Blog

  • chastext for Windows

    I wrote a Windows Assembly version of my chastext program.

    #main.asm

    format PE console
    include 'win32ax.inc'
    include 'chastelibw32.asm'
    
    main:
    
    mov [radix],10 ; Choose radix for integer output.
    mov [int_width],1
    
    ;get command line argument string
    call [GetCommandLineA]
    
    mov [arg_string_index],eax ;back up eax to restore later
    
    call strlen ;get the length of the string
    
    mov ebx,[arg_string_index] ;mov the address of the string start into ebx
    add ebx,eax                ;add eax which contains the length
    mov [arg_string_end],ebx   ;move end of string address to permanent location
    
    ;optionally display the arg string to make sure it is working correctly
    ;mov eax,[arg_string_index]
    ;call putstring
    ;call putline
    
    ;set ebx back to the start of the arg string for the filter loop
    mov ebx,[arg_string_index]
    
    ;now ebx points to the first non space character in the arguments passed to the DOS program
    ;and we know that [arg_string_end] is where it ends
    
    ;the next step is to filter the arguments into separate zero terminated strings
    ;each space will be changed to a zero (normally)
    ;but we also need to account for spaces inside quotes that are considered part of the string
    ;Linux handles this normally but DOS needs me to write the code to mimic this behavior
    ;because the program needs to function identically for DOS or Linux
    
    mov cl,' ' ;set the default filter character (argument terminator) to a space
    mov ch,0   ;are we currently checking spaces 0 or quote characters 1 as terminators?
    
    ;this loop is the new and improved argument filter
    ;it keeps track of whether we are inside or outside a quote
    ;and also which type of quote started the quote
    ;the actual quote marks are not part of the string unless they
    ;are the opposite quote type than what started the string
    ;The important thing is that spaces can exist inside of quoted strings
    ;as one argument rather than each new word being a new argument
    ;could be important for filenames containing spaces, etc.
    
    argument_filter:
    
    cmp ebx,[arg_string_end] ;are we at the end of the arg string?
    jz argument_filter_end       ;if yes, stop the filter and terminate with zero
    
    cmp ch,1       ;are we inside a quoted string?
    jz quote_check ;if yes, don't do anything to the spaces
    
    cmp byte[ebx],cl ;compare the byte at address bx to the string terminator
    jnz ignore_char ;if it is not the same, we ignore it
    mov byte[ebx],0  ;but if it matches, change it to a zero
    ignore_char:
    
    cmp byte [ebx],0x22 ;is this a double quote -> "
    jz start_quote
    cmp byte [ebx],0x27 ;is this a single quote -> '
    jz start_quote
    jmp quote_no ;it was not a quote
    
    start_quote:
    
    mov ch,1    ;set ch to 1 to set that we are inside a quote now
    mov cl,[ebx] ;save this quote type as the new terminator
    mov byte[ebx],0 ;but delete the first quote with zero
    
    ;check for single or double quotes
    quote_check:
    
    cmp [ebx],cl ;is this character the same type of quote that started this sub string?
    jnz quote_no ;if it is not, then skip to quote_no section
    
    ;but if it was matching, change this byte to zero
    ;and change cl back to a space
    mov cl,' ' ;cl is now a space
    mov ch,0   ;ch is 0 because now we have ended the quoted string
    mov byte[ebx],0 ;delete the end quote with zero
    
    quote_no:
    
    inc ebx ;go to the next character
    jmp argument_filter   ;jump back to the beginning of argument filter
    
    argument_filter_end:
    mov byte [ebx],0 ;terminate the ending with a zero for safety
    
    ;check first argument which is name of program
    ;mov eax,[arg_string_index]
    ;call putstr_and_line
    
    call get_next_arg ;get address of next arg and return into eax register
    cmp eax,[arg_string_end] ;if there is no filename arg, we end
    jnz args_exist
    
    mov eax,help    ;if no arguments were given, show a help message
    call putstring
    jmp ending     ;and end the program because there is nothing to do
    
    args_exist:
    
    mov [filename],eax
    ;call putstr_and_line ;print filename before text output
    
    ;This is where the main part of the chastext program really begins.;
    
    ;now that the argument string is prepared, we will try to use the first argument as a filename to open
    
    ;https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
    ;https://learn.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights
    
    ;open first file with the CreateFileA function
    
    push 0           ;NULL: We are not using a template file
    push 0x80        ;FILE_ATTRIBUTE_NORMAL
    push 3           ;OPEN_EXISTING
    push 0           ;NULL: No security attributes
    push 0           ;NULL: Share mode irrelevant. Only this program reads the file.
    push 0x80000000  ;GENERIC_READ access mode
    push [filename] ;
    call [CreateFileA]
    
    ;check eax for file handle or error code
    ;call putint
    cmp eax,-1
    jnz file_ok
    
    mov eax,file_error_message
    call putstring
    call [GetLastError]
    call putint
    jmp main_end ;end program if the file was not opened
    
    ;this label is jumped to when the file is opened correctly
    file_ok:
    
    mov [filedesc],eax
    
    ;before we proceed, we also check for more arguments.
    
    call get_next_arg ;get address of next arg and return into eax register
    cmp eax,[arg_string_end] ;if at end, no search string argument
    jz textdump ;jump to textdump section
    
    ;otherwise, we save the address at ax to our search string
    mov [string_search],eax
    ;call putstr_and_line
    
    
    call get_next_arg ;get address of next arg and return into ax register
    cmp eax,[arg_string_end] ;if at end, no replacement string argument
    jz textdump ;jump to hexdump section
    
    ;otherwise, we save the address at ax to our replacement string
    mov [string_replace],eax
    ;call putstr_and_line
    
    ;all other arguments that may exist after this are irrelevant
    
    textdump:
    
    ;this is the beginning of the textdump main loop of chastext
    
    ;first, check to see if there is a search string
    ;if there is a search string, skip the normal putchar
    
    cmp dword[string_search],0 ;do we have a search string?
    jnz putchar_skip
    
    ;but if there is not a search string
    ;we will read one character, then display it to stdout
    ;and then jump to the beginning of the textdump loop to print them until EOF
    ;we start the loop with a call to read exactly 1 byte
    
    ;read only 1 byte using Win32 ReadFile system call.
    push 0              ;Optional Overlapped Structure 
    push bytes_read     ;Store Number of Bytes Read from this call
    push 1              ;Number of bytes to read
    push byte_array     ;address to store bytes
    push [filedesc]     ;handle of the open file
    call [ReadFile]
    
    mov eax,[bytes_read]
    
    cmp eax,1        ;check to see if exactly 1 byte was read
    jz file_success ;if true, proceed to display
    ;mov ax,end_of_file
    ;call putstring
    jmp main_end ;otherwise close the file and end program after failure
    
    ; this point is reached if 1 byte was read from the file successfully
    file_success:
    
    mov al,[byte_array]
    call putchar
    jmp textdump
    
    ;if search string doesn't exist, just jump and repeat the loop
    ;otherwise we continue into the next section that compares the input with the search string
    
    putchar_skip:
    
    ;this is the beginning of search mode
    ;it handles the file by seeking and reading to search every position for the search string
    
    ;first, seek to the file_address we initialized to zero
    ;this variable will be added to depending on actions taken
    
    ;seek to address of file with SetFilePointer function
    ;https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-setfilepointer
    push 0             ;seek from beginning of file (SEEK_SET)
    push 0             ;NULL: We are not using a 64 bit address
    push [file_address] ;where we are seeking to
    push [filedesc] ;seek within this file
    call [SetFilePointer]
    
    ;obtain the length of the search string using my strlen function
    mov eax,[string_search]
    call strlen ;get the length of the search string
    
    mov ecx,eax ;store this length in ecx
    mov [search_length],ecx
    
    ;call putint_and_line ;check length of search string
    
    ;use the length of the string we are searching for as the number of bytes to read at this location
    
    ;Win32 ReadFile system call.
    push 0              ;Optional Overlapped Structure 
    push bytes_read     ;Store Number of Bytes Read from this call
    push ecx            ;Number of bytes to read
    push byte_array     ;address to store bytes
    push [filedesc]     ;handle of the open file
    call [ReadFile]
    
    mov eax,[bytes_read]  ;get how many bytes were read with that last read operation
    
    mov ebx,byte_array    ;move the address of bytes read into bx
    add ebx,eax           ;add number of bytes read (return value of read function in eax)
    mov byte[ebx],0       ;terminate the string with zero
    
    cmp eax,[search_length] ;if the number of bytes is not what we expected to read, end this loop
    jnz textdump_end
    
    ;move our two strings into the esi and edi registers for comparison
    ;with my custom written strcmp function
    
    mov esi,[string_search]
    mov edi,byte_array
    call strcmp ;compare these two strings
    
    cmp eax,0 ;test if they are the same (if eax returned zero)
    jnz not_match ;if they are not a match go to that section for printing a character
    
    ;but if they are a match, then we either quote them
    ;or replace them if a replacement string is available
    
    ;but regardless of which action we do, since a match was found, let us add this count to the file address
    ;so that we read from beyond this point next time the textdump loop starts
    mov eax,[bytes_read]
    add [file_address],eax
    
    cmp dword[string_replace],0 ;check to see if a replacement string is available
    jz print_quotes ;if not, skip to the part where we just quote the strings that match
    
    ;otherwise, we will print the replacement string instead of the original!
    
    mov eax,[string_replace]
    call putstring ;print the string
    
    jmp textdump ;restart the main loop
    
    print_quotes:
    ;print quotes around matched string
    mov al,'"'
    call putchar
    
    mov eax,byte_array
    call putstring ;print the string
    
    mov al,'"'
    call putchar
    
    jmp textdump ;restart the main loop
    
    not_match: 
    
    mov al,[byte_array]
    call putchar
    add [file_address],1 ;add 1 to the file address so we don't read this same position again
    
    jmp textdump
    
    textdump_end:
    
    ;print the remaining bytes, if any, left after the main loop ended
    mov eax,byte_array
    call putstring
    
    main_end:
    
    ;this is the end of the program
    ;we close the open file and then use the exit call
    
    ;close the file
    push [filedesc]
    call [CloseHandle]
    
    
    ending:
    ;Exit the process with code 0
    push 0
    call [ExitProcess]
    
    .end main
    
    arg_string_index  dd 0 ;start of arg string
    arg_string_end    dd 0 ;address of the end of the arg string
    
    ;function to move ahead to the next art
    ;only works after the filter has been applied to turn all spaces into zeroes
    get_next_arg:
    mov ebx,[arg_string_index]
    find_zero:
    cmp byte [ebx],0
    jz found_zero
    inc ebx
    jmp find_zero ; this char is not zero, go to the next char
    found_zero:
    
    find_non_zero:
    cmp ebx,[arg_string_end]
    jz arg_finish ;if ebx is already at end, nothing left to find
    cmp byte [ebx],0
    jnz arg_finish ;if this char is not zero we have found the next string!
    inc ebx
    jmp find_non_zero ;otherwise, keep looking
    
    arg_finish:
    mov [arg_string_index],ebx ; save this index to variable
    mov eax,ebx ;but also save it to ax register for use
    ret
    ;we can know that there are no more arguments when
    ;the either [arg_start] or eax are equal to [arg_end]
    
    ;the strlen and strcmp are named after the equivalent C functions
    ;but are written from scratch by me based on their expected behavior
    
    ;a function to get the length of string in eax and return the integer in eax
    
    strlen:
    
    mov ebx,eax ; copy eax to ebx. ebx will be used as index to the string
    
    strlen_start: ; this loop finds the length of the string as part of the putstring function
    
    cmp [ebx],byte 0 ; compare byte at address ebx with 0
    jz strlen_end ; if comparison was zero, jump to loop end because we have found the length
    inc ebx
    jmp strlen_start
    
    strlen_end:
    sub ebx,eax ;subtract start pointer from current pointer to get length of string
    
    mov eax,ebx ;copy the string length back to eax
    
    ret
    
    ;strcmp compares the string at esi to the one at edi
    ;ax returns 0 if the strings are the same and 1 if different
    ;the algorithm is simple but I will explain it for those who are confused
    
    ;eax is initialized to zero
    ;a byte from each string is loaded into the al and bl registers
    ;the bytes are compared. if they are different, then we jump to the end
    ;However, if they are the same, then we check if one of them is zero
    ;for this purpose it doesn't matter whether we compare al or bl with zero
    ;because it is known that they are the same if the jnz did not take place
    ;if it is zero, this also jumps to the end of the function
    ;If neither jump took place, then we jump to the start of the loop
    ;but when the function finally ends bl will be subtracted from al
    ;this ensures that the function returns zero if the final characters are the same
    
    strcmp:
    
    mov eax,0
    
    strcmp_start:
    
    ;read a byte from each string
    mov al,[edi]
    mov bl,[esi]
    cmp al,bl
    jnz strcmp_end
    
    cmp al,0
    jz strcmp_end
    
    inc edi
    inc esi
    
    jmp strcmp_start
    
    strcmp_end:
    sub al,bl
    
    ret
    
    help db 'chastext by Chastity White Rose',0Dh,0Ah
    db '"cat" or "type" a file without changing it:',0Dh,0Ah,9,'chastext file',0Dh,0Ah
    db 'search for a string and quote it:',0Dh,0Ah,9,'chastext file search',0Dh,0Ah
    db 'replace string:',0Dh,0Ah,9,'chastext file search replace',0Dh,0Ah
    db 'Find or replace any string!',0Dh,0Ah,0
    
    file_error_message db 'Could not open the file! Error number: ',0
    filename dd 0
    filedesc dd 0
    file_address dd 0 ;file address defaults to zero AKA beginning of file
    end_of_file db 'EOF',0
    
    ;where we will store data from the file
    bytes_read dd 0
    
    search_length dd 0
    string_search dd 0 ; place to hold the search string pointer
    string_replace dd 0 ; place to hold the replacement string pointer
    
    byte_array db 0x73 dup 0
    
  • chastelib test suite for RISC-V Assembly in riscemu simulator

    This time I really went wild with RISC-V Assembly. I discovered a new simulator written in Python that is much more strict about what it allows. Many of the same pseudo instructions that worked in RARS don’t work in riscemu. I took this as a challenge to adapt my code for compatibility with both riscemu and RARS. Other than the different exit call numbers, the functions work the same in both emulators. This gives me a viable foundation for learning and teaching about RISC-V Assembly and how it is different from Intel. I love them both but RISC-V is the new way that many people are excited about.

    main.asm

    # chastelib test suite for RISC-V Assembly in riscemu simulator
    
    # The same library of functions I commonly use in my Intel Assembly code
    # have now been translated to RISC-V.
    
    .data
    
    # These variables are used by the intstr function to convert an integer to a string
    # and what radix should be used as well as the width (how many leading zeros)
    
    int_string: .space 32 #reserve space for 32 bytes for up to 32 bits if printed in binary
    int_end: .byte 0 #the terminating zero of the integer string
    radix: .byte 2   #the radix the number will be shown in
    int_width: .byte 1 #by default
    
    # These variables are for outputting special strings
    # such as a newline, space, or a single character based on s0
    
    space: .byte 0x20, 0
    line:  .byte 0x0A, 0
    char:  .byte 0, 0 
    
    # These variables are for outputting specific messages
    # or to simulate user input as integers in the strint function
    
    string0: .asciz "chastelib test suite for RISC-V Assembly in riscemu simulator\n"
    
    input_int_0: .asciz "0"
    input_int_1: .asciz "100"
    
    .text
    
    la s0, string0
    jal putstr
    
    # at the beginning of a program, it is usually good to get user input
    # this program doesn't use real user input but simulates it with global strings we will interpret
    # as if they are hexadecimal integers
    
    # change radix to decimal
    li t0, 16    #load t0 register with the new radix
    la t1, radix #load t1 register with the address the radix will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    # load s0 with address of first integer string, convert it with strint, and save in another register
    la s0, input_int_0
    jal strint
    mv s2, s0
    
    # load s0 with address of second integer string, convert it with strint, and save in another register
    la s0, input_int_1
    jal strint
    mv s3, s0
    
    # this is how we would load the loop controller variables directly
    # these are commented out for this example
    # li s0, 0
    # li s1, 0x100
    
    mv s0, s2
    mv s1, s3
    
    loop:
    
    # change radix to binary
    li t0, 2     #load t0 register with the new radix
    la t1, radix #load t1 register with the address the radix will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    # change width to 8 to represent an 8 bit binary value
    li t0, 8     #load t0 register with the new width
    la t1, int_width #load t1 register with the address the width will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    jal putint
    jal putspace
    
    # change radix to hexadecimal
    li t0, 16     #load t0 register with the new radix
    la t1, radix #load t1 register with the address the radix will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    # change width to 2 to represent an 8 bit binary value as a two digit hex value
    li t0, 2     #load t0 register with the new width
    la t1, int_width #load t1 register with the address the width will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    jal putint
    jal putspace
    
    # change radix to decimal
    li t0, 10     #load t0 register with the new radix
    la t1, radix #load t1 register with the address the radix will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    # change width to 3 to represent an 8 bit binary value decimal value of up to 3 digits
    li t0, 3       #load t0 register with the new width
    la t1, int_width #load t1 register with the address the width will go to
    sb t0, 0(t1) #save t0 register (byte) to address t1
    
    jal putint
    
    li t1, 0x20
    blt s0, t1, not_char
    li t1, 0x7E
    blt t1, s0, not_char
    
    jal putspace
    jal putchar
    
    not_char:                # jump here if character is outside range to print
    
    jal putline
    
    addi s0, s0, 1
    blt s0, s1, loop
    
    la s0, string0
    jal putstr
    
    addi    a0, zero, 0     # a0=0  (exit code for OS)
    addi    a7, zero, 93    # a7=93 (exit system call)
    ecall                   #       (environment call)
    
    #################################################################################
    # The following functions are independent of a specific RISC-V Operating System #
    #                                                                               #
    # intstr = convert integer into a string ready for printing                     #
    # putint = prints integer using intstr and the OS specific putstr function      #
    # strint = convert string into an integer                                       #
    #                                                                               #
    # The s0 register is used for pass data in or out of these functions            #
    # See comments above those specific functions for full details                  #
    #################################################################################
    
    # The intstr function does several things at once and is the foundation for all integer output.
    # It uses the global radix variable to know which radix or number base to use when turning the integer to a string
    # It also uses the global int_width variable to determine how many leading zeros should be used for the string
    # The purpose of this is to make numbers look good when lined up when they are printed in a list.
    # radices 2 to 36 are supported. Digits higher than 9 will be capital letters
    
    intstr:
    
    la t1, radix     # load address of radix into t1
    lb t2, 0(t1)     # load value of radix into t2
    la t1, int_width # load address of width into t1
    lb t4, 0(t1)     # load value of int_width into t4
    li t3, 1         # load current number of digits, always 1
    
    la t1, int_end # t1=address of terminating zero in string
    addi t1, t1, -1        # t1-- to go to lowest digit
    
    digits_start:
    
    remu t0, s0, t2 # t0=remainder of the previous division
    divu s0, s0, t2 # s0=s0/t2 (divide s0 by the radix value in t2)
    
    li t5, 10 # load t5 with 10 because RISC-V does not allow constants for branches
    
    blt t0, t5, decimal_digit
    bge t0, t5, hexadecimal_digit
    
    decimal_digit: # we go here if it is only a digit 0 to 9
    
    addi t0, t0, 0x30
    
    j save_digit
    
    hexadecimal_digit:
    addi t0, t0, -10
    addi t0, t0, 0x41
    
    save_digit:
    sb t0, 0(t1) # store byte from t0 at address t1
    beq s0, zero, intstr_end
    addi t1, t1, -1
    addi t3, t3, 1
    j digits_start
    
    intstr_end:
    
    li t0, 0x30
    prefix_zeros:
    bge t3, t4, end_zeros
    addi t1, t1, -1
    sb t0, 0(t1) # store byte from t0 at address t1
    addi t3, t3, 1
    j prefix_zeros
    end_zeros:
    
    mv s0, t1
    
    ret
    
    # this function calls intstr to convert the s0 register into a string
    # then it uses the system specific putstr call to print the string
    # it also uses the stack to save the value of s0 and ra (return address)
    # this way, s0 is restored to the value it had before this function
    # restoring ra is required because it is modified during calls to other functions
    
    putint:
    
    addi sp, sp, -8
    sw ra, 0(sp)
    sw s0, 4(sp)
    
    jal intstr
    jal putstr
    
    lw ra, 0(sp)
    lw s0, 4(sp)
    addi sp, sp, 8
    
    ret
    
    # RISC-V does not allow constants for branches
    # Because of this fact, the RISC-V version of strint
    # requires a lot more code than the MIPS version
    # Whatever value I wanted to compare in the branch statement
    # was placed in the t5 register on the line before the conditional branch
    # Even though it is completely stupid, it has proven to work
    
    strint:
    
    la t1, radix     # load address of radix into t1
    lb t2, 0(t1)     # load value of radix into t2
    
    mv t1, s0 # copy string address from s0 to t1
    li s0, 0
    
    read_strint:
    lb t0, 0(t1)
    addi t1, t1, 1
    beq t0, zero, strint_end
    
    # if char is below '0' or above '9', it is outside the range of these and is not a digit
    li t5, 0x30
    blt t0, t5, not_digit
    li t5, 0x39
    blt t5, t0, not_digit
    
    # but if it is a digit, then correct and process the character
    is_digit:
    andi t0, t0, 0xF
    j process_char
    
    not_digit:
    # it isn't a digit, but it could be perhaps and alphabet character
    # which is a digit in a higher base
    
    # if char is below 'A' or above 'Z', it is outside the range of these and is not capital letter
    li t5, 0x41
    blt t0, t5, not_upper
    li t5, 0x5A
    blt t5, t0, not_upper
    
    is_upper:
    li t5, 0x41
    sub t0, t0, t5
    addi t0, t0, 10
    j process_char
    
    not_upper:
    
    # if char is below 'a' or above 'z', it is outside the range of these and is not lowercase letter
    li t5, 0x61
    blt t0, t5, not_lower
    li t5, 0x7A
    blt t5, t0, not_lower
    
    is_lower:
    li t5, 0x61
    sub t0, t0, t5
    addi t0, t0, 10
    j process_char
    
    not_lower:
    
    # if we have reached this point, result invalid and end function
    # this is only reached if the byte was not a valid digit or alphabet character
    j strint_end
    
    process_char:
    
    blt t2, t0 strint_end #;if this value is above or equal to radix, it is too high despite being a valid digit/alpha
    
    
    mul s0, s0, t2 # multiply s0 by the radix
    add s0, s0, t0 # add the correct value of this digit
    
    j read_strint # jump back and continue the loop if nothing has exited it
    
    strint_end:
    
    ret
    
    ###############################################################################
    # This putstr function is the most portable function for RISC-V simulators    #
    # It calculates the length of a zero terminated string before printing it     #
    # This is the same way used in my Intel Assembly programs for DOS and Linux   #
    # This function was written to operate the same in both RARS and riscemu      #
    ###############################################################################
    
    putstr:
    
    mv t1, s0 # t1 will be used as an index register
    
    putstr_strlen_start:
    lb t0, 0(t1)                       # load byte into t0 from address of t1
    beq t0, zero, putstr_strlen_end # if t0==0, then we jump to the end of the loop.
    addi t1, t1, 1                     # go to next byte
    j putstr_strlen_start           # jump to start of the loop
    putstr_strlen_end:              
    
    
    addi a0, zero, 1  # a0=1     (STDOUT file number)
    addi a1, s0, 0    # a1=s0    (address of string )
    sub  a2, t1, s0   # a2=t1-s0 (length of string  )
    addi a7, zero, 64 # a7=64    (write system call )
    ecall             #          (environment call  )
    
    ret
    
    #############################################################################
    # The next four 3 functions print things to standard output                 #
    # All of them use the putstr function above to achieve the output           #
    # They use the stack to preserve the values of the s0 and t1 registers used #
    # They also use global variables in the data section                        #
    #############################################################################
    
    #the putchar function, which is named after the C language function of the same name
    #prints the lowest byte of the s0 register as a byte or character to standard output
    
    putchar:
    
    addi sp, sp, -12
    sw ra, 0(sp)
    sw s0, 4(sp)
    sw t1, 8(sp)
    
    la t1, char
    sb s0, 0(t1)
    la s0, char
    jal putstr
    
    lw ra, 0(sp)
    lw s0, 4(sp)
    lw t1, 8(sp)
    addi sp, sp, 12
    
    ret
    
    # the putspace function prints a space to standard output
    
    putspace:
    
    addi sp, sp, -8
    sw ra, 0(sp)
    sw s0, 4(sp)
    
    la s0, space
    jal putstr
    
    lw ra, 0(sp)
    lw s0, 4(sp)
    addi sp, sp, 8
    
    ret
    
    # the putline function prints a newline to standard output
    
    putline:
    
    addi sp, sp, -8
    sw ra, 0(sp)
    sw s0, 4(sp)
    
    la s0, line
    jal putstr
    
    lw ra, 0(sp)
    lw s0, 4(sp)
    addi sp, sp, 8
    
    ret
    
  • Learning POSIX System Calls

    I have been doing Assembly language programming for some time now, and yet only today did I take the time to read the documentation and some online examples to help me learn how to use the system calls from C programs.

    On Linux systems like the Debian one I use, there are documentation pages already installed. There are hundreds of them, and yet only 6 are required to create all of my command-line tools.

    The following commands can be used to read each one of the 6 fundamental system calls available on Linux and Unix systems for C programmers.

    Six Supreme System Calls

    man 2 open
    man 2 close
    man 2 read
    man 2 write
    man 2 lseek
    man 2 exit
    

    My command line utilities, I have been creating such as chastehex, chastecmp, and chastext used these system calls in their assembly versions. However, I traditionally used the C standard library for the C versions of these programs.

    But after reading about the system calls and seeing some examples, I realized that I could make copies and rewrite these tools by calling only system calls. During this process, I learned how much easier they are to use compared to the C library functions.

    Here is a summary of each of these functions and how they are used in my programs.

    All of my tools use “open” to open a file and then “close” to close it when I am done with it. There can be no confusion as to what these functions do because of their names.

    Similarly, “read” and “write” do exactly what their names imply. They operate the same as fread and fwrite do in stdio. But they take only 3 arguments instead of 4, which makes a lot of sense. You give them a pointer, and then you tell them exactly how many bytes you want to read from or write to a file descriptor previously assigned with “open”.

    The “lseek” function stands for long seek and is capable of moving to a different position in a file before the next read or write operation. Not every program needs this, but chastehex does because one of the arguments is an address in hexadecimal to read or write. Jumping around in a file is sometimes necessary if you are working with large files or the address matters a lot.

    The final call to any program is “exit” because it ends the program. There isn’t much to say about it except that it also lets you return a number to the operating system. Usually, 0 means no errors happened, and a value of anything else indicates a specific type of error you have defined in your program. All of my programs return 1 if a file could not be opened.

    Each of these functions has various arguments that have clearly defined meanings. The return values are also specified in their manual pages.

    Interestingly, these calls are available on every operating system that I know about except for Windows. However, considering how easy these are to implement using the C standard library, it would be possible to write Windows versions of these. In fact, some people have already done this.

    See the Cygwin and MinGW projects for more information about how to use these calls on Windows. For all other operating systems: Linux, Unix (FreeBSD, OpenBSD, NetBSD, Minix, and ChromiumOS) These calls are already available if you have a working C compiler.

    You might wonder why I spent the time learning and explaining this. It is because having a super small library of functions that I can memorize allows faster programming and less time spent looking at my references when I have forgotten which order the arguments go in.

    This knowledge gives me an alternative library of functions I can use that is easier than the C standard library. However, I am keeping both versions of every program I have written.

    But the final point I want to make is that because these are the same calls used in my Assembly programs, I can make C programs that map 1 to 1 when comparing and teaching Assembly in the books I write!

  • new program: chastearg

    I wrote a small program for both Linux and DOS assembly. It is very easy to explain what it does with some pictures. The first picture is what it looks like when I use the Linux version on my Debian system. The second is the DOS version running under the DOSBOX emulator.

    As you can see, the words surrounded by quotes are displayed on the same line because they count as one argument. Linux handles this by default but DOS needed some help. I had to rewrite my entire argument filter for the DOS version.

    The reason I wrote this project and worked to make it consistent for both DOS and Linux is because I wanted to do an upgrade to the DOS version of chastext. As you can see from the picture below, I have succeeded!

    When I first posted about my chastext project, some people said it was useless because we can already use sed for Linux or other tools for find and replace. However, my assembly versions are simpler and faster than sed when you don’t need regular expressions. They also don’t depend on anything other than interrupt calls of the operating system.

    But more importantly, their argument is stupid. Writing similar programs to existing programs is a great programming exercise and is especially important for tiny projects where I don’t want to implement all the features of a program or its dependencies. I can also bring the program to platforms that the original program does not support, such as DOS.

    This attitude some people have is one that I don’t like. Should I not sing just because Taylor Swift can sing better than me? Should I not play the piano just because other people can do it better than me? Or should I not play Chess just because I can’t do it as well as Magnus Carlsen or a chess engine?

    I started programming for the joy of learning and writing me own things. I often reinvent the wheel such as how I wrote my own strlen and strcmp functions for my chastext project. I don’t have access to the C standard library with the way I am doing it. I can’t imagine criticizing someone else’s programming project just because it has features to a similar tool that may exist. Otherwise, I would be saying Linus Torvalds should not have created Linux just because Unix and Minix existed which had similar file systems.

  • new program: chastext

    I wrote another assembly program. This one works with text files instead of binary files. It can do a search and replace of all occurrences of a string in a text file. This could be useful for translating programs between programming languages or editing text configuration files. This screenshot is an example of how it can be used.

    main.asm

    ;Linux 32-bit Assembly Source for chastext
    ;a basic text search and replace program
    format ELF executable
    entry main
    
    ;a reduced form of chastelib without functions this program doesn't use
    include 'chastext-chastelib32.asm'
    
    main:
    
    ;radix will be 16 because this whole program is about hexadecimal
    ;mov dword [radix],16 ; can choose radix for integer input/output!
    
    pop eax
    mov [argc],eax ;save the argument count for later
    
    cmp [argc],1
    ja help_skip ;if more than 1 argument is given, skip the help message and process the other arguments
    
    help:
    mov eax,help_message
    call putstring
    jmp main_end
    help_skip:
    
    pop eax ;pop the next arg which is the name of the program we are running
    
    get_filename:
    pop eax ;pop the next arg which is the name of the file we will open
    
    mov [filename],eax ; save the name of the file we will open to read
    
    arg_open_file:
    
    ;Linux system call to open a file
    
    mov ecx,0   ;open file in read only mode
    mov ebx,eax ;filename should be in eax before this function was called
    mov eax,5   ;invoke SYS_OPEN (kernel opcode 5)
    int 80h     ;call the kernel
    
    cmp eax,0
    jns file_open_no_errors ;if eax is not negative/signed there was no error
    
    ;Otherwise, if it was signed, then this code will display an error message.
    
    mov eax,open_error_message
    call putstr_and_line
    
    jmp main_end ;end the program because we failed at opening the file
    
    file_open_no_errors:
    
    mov [filedesc],eax ; save the file descriptor number for later use
    
    ;before we just textdump or "cat" the file, we need to check for the existence of more arguments which will modify the output
    
    cmp [argc],3
    jb search_skip
    
    pop eax ;pop the next arg which is the string we are searching for
    mov [string_search],eax
    
    search_skip:
    
    cmp [argc],4
    jb replace_skip
    
    pop eax ;pop the next arg which is the string we are searching for
    mov [string_replace],eax
    
    replace_skip:
    
    ;now we begin displaying the file but also searching for the search string if it exists. We will check for these based on the number of arguments like we did earlier
    
    textdump:
    
    mov edx,1            ;number of bytes to read
    mov ecx,byte_array   ;address to store the bytes
    mov ebx,[filedesc]   ;move the opened file descriptor into EBX
    mov eax,3            ;invoke SYS_READ (kernel opcode 3)
    int 80h              ;call the kernel
    
    mov [bytes_read],eax
    
    cmp eax,0
    jnz file_success ;if more than zero bytes read, proceed to display
    
    jmp main_end
    
    ; this point is reached if file was read from successfully
    
    file_success:
    
    cmp [argc],2 ;if only 2 arguments, just putchar and read next one
    jnz putchar_skip
    
    ;normally, we will print the last read character
    mov al,[byte_array]
    call putchar
    
    putchar_skip:
    
    cmp [argc],3 ;if not enough arguments, skip the search string section
    jb textdump
    
    mov ebx,[string_search]
    
    mov al,[ebx]
    mov ah,[byte_array]
    cmp al,ah ;compare the first character of search string with the byte read already
    jz search_start ; if they are equal, skip putchar and begin searching for the string
    
    ;otherwise, if they are not equal, just putchar the last byte read and repeat the loop
    mov al,[byte_array]
    call putchar
    jmp textdump
    
    search_start:
    mov eax,[string_search]
    call strlen ;get the length of the search string
    
    ;attempt to read the length-1 bytes because the first one is already read into the byte array
    
    dec eax
    mov edx,eax            ;number of bytes to read
    mov ecx,byte_array+1   ;address to store the bytes
    mov ebx,[filedesc]     ;move the opened file descriptor into EBX
    mov eax,3              ;invoke SYS_READ (kernel opcode 3)
    int 80h                ;call the kernel
    
    mov ebx,ecx
    add ebx,eax
    mov byte [ebx],0 ;terminate the string with zero
    
    mov esi,[string_search]
    mov edi,byte_array
    call strcmp ;compare these two strings
    
    cmp eax,0 ;test if they are the same (if eax returned zero)
    jnz normal_print ;if they are not a match print them unmodified and unquoted
    
    ;but if they are a match, then we either quote them
    ;or replace them if a replacement string is available
    
    cmp [argc],4 ;if less than 4 args, no replacement exist, so we quote the strings
    jb print_quotes
    
    ;otherwise, we will print the replacement string instead of the original!
    
    mov eax,[string_replace]
    call putstring ;print the string
    
    jmp normal_print_skip
    
    print_quotes:
    ;print quotes around matched string
    mov al,'"'
    call putchar
    
    mov eax,byte_array
    call putstring ;print the string
    
    mov al,'"'
    call putchar
    
    jmp normal_print_skip
    
    normal_print: ;print normal / unquoted because it doesn't match
    
    mov eax,byte_array
    call putstring ;print the string
    
    normal_print_skip:
    
    jmp textdump
    
    main_end:
    
    ;this is the end of the program
    ;we close the open file and then use the exit call
    
    ;Linux system call to close a file
    
    mov ebx,[filedesc] ;file number to close
    mov eax,6          ;invoke SYS_CLOSE (kernel opcode 6)
    int 80h            ;call the kernel
    
    mov eax, 1  ; invoke SYS_EXIT (kernel opcode 1)
    mov ebx, 0  ; return 0 status on exit - 'No Errors'
    int 80h
    
    ;a function to get the length of string in eax and return the integer in eax
    
    strlen:
    
    mov ebx,eax ; copy eax to ebx. ebx will be used as index to the string
    
    strlen_start: ; this loop finds the length of the string as part of the putstring function
    
    cmp [ebx],byte 0 ; compare byte at address ebx with 0
    jz strlen_end ; if comparison was zero, jump to loop end because we have found the length
    inc ebx
    jmp strlen_start
    
    strlen_end:
    sub ebx,eax ;subtract start pointer from current pointer to get length of string
    
    mov eax,ebx ;copy the string length back to eax
    
    ret
    
    ;compare the string at esi to the one at edi
    
    strcmp:
    
    mov eax,0 ;this will be stay zero unless the strings are different
    
    strcmp_start:
    mov bl,[edi]
    cmp bl,0
    jz strcmp_end
    mov bh,[esi]
    cmp bh,0
    jz strcmp_end
    
    inc edi
    inc esi
    
    cmp bl,bh
    jz strcmp_start ;if they are the same, continue to next character
    
    inc eax ;if they were different, eax will be incremented and the function ends
    
    strcmp_end:
    ret
    
    help_message db 'chastext by Chastity White Rose',0Ah,0Ah
    db '"cat" a file:',0Ah,0Ah,9,'chastext file',0Ah,0Ah
    db 'search for a string:',0Ah,0Ah,9,'chastext file search',0Ah,0Ah
    db 'replace string:',0Ah,0Ah,9,'chastext file search replace',0Ah,0Ah
    db 'Find or replace any string!',0Ah,0
    
    open_error_message db 'error while opening file',0
    
    ;variables for managing arguments and files
    argc rd 1
    filename rd 1 ; name of the file to be opened
    filedesc rd 1 ; file descriptor
    bytes_read rd 1
    
    string_search rd 1 ; place to hold the search string pointer
    string_replace rd 1 ; place to hold the replacement string pointer
    
    ;where we will store data from the file
    byte_array rb 0x100
    

    chastext-chastelib32.asm

    ; chastelib assembly header file for 32 bit Linux
    
    ;This file has been modified for the chastext program
    ;Only string related functions are included because this program transforms text but does not process integers
    
    putstring:
    
    push eax
    push ebx
    push ecx
    push edx
    
    mov ebx,eax ; copy eax to ebx. ebx will be used as index to the string
    
    putstring_strlen_start: ; this loop finds the length of the string as part of the putstring function
    
    cmp [ebx],byte 0 ; compare byte at address ebx with 0
    jz putstring_strlen_end ; if comparison was zero, jump to loop end because we have found the length
    inc ebx
    jmp putstring_strlen_start
    
    putstring_strlen_end:
    sub ebx,eax ;subtract start pointer from current pointer to get length of string
    
    ;Write string using Linux Write system call.
    ;Reference for 32 bit x86 syscalls is below.
    ;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/syscalls/#x86-32-bit
    
    mov edx,ebx      ;number of bytes to write
    mov ecx,eax      ;pointer/address of string to write
    mov ebx,1        ;write to the STDOUT file
    mov eax,4        ;invoke SYS_WRITE (kernel opcode 4 on 32 bit systems)
    int 80h          ;system call to write the message
    
    pop edx
    pop ecx
    pop ebx
    pop eax
    
    ret ; this is the end of the putstring function return to calling location
    
    ;The utility functions below simply print a space or a newline.
    ;these help me save code when printing lots of strings and integers.
    
    line db 0Ah,0
    
    putline:
    push eax
    mov eax,line
    call putstring
    pop eax
    ret
    
    ;a function for printing a single character that is the value of al
    
    char: db 0,0
    
    putchar:
    push eax
    mov [char],al
    mov eax,char
    call putstring
    pop eax
    ret
    
    ;a small function just for the common operation
    ;printing a string followed by a line feed
    ;this saves a few bytes in the assembled code
    ;by reducing the number of function calls in the main program
    ;it also means we don't need to include a newline in every string!
    
    putstr_and_line:
    call putstring
    call putline
    ret