Blog

  • AAA-Linux: Chapter 8: User Input

    This post is a preview of the Linux version of Assembly Arithmetic Algorithms. It is planned to be bigger than the DOS book was and so far I am up to chapter 8 and trying to explain everything I can for Linux users who want to write assembly for modern computers at a speed higher than what was possible using a DOS emulator.

    The first seven chapters have been about teaching the basics of Assembly and getting output of strings and numbers to the screen. All those steps were required for learning Assembly. However, at some point, when you have a program that is meant to do something, you need to have a way for other people, especially those who are not programmers, to be able to give input to direct what the program does.

    There are two main ways of doing this in a console program. The first way is have the program ask for the user to type something from the keyboard and then wait until they write something and press enter. The next program will achieve this. Copy this and try it out and then I will explain after the code how it works.

    FASM Keyboard Input

    format ELF executable
    
    main:
    
    mov dword [radix],10
    mov dword [int_width],1
    
    loop_input:
    
    mov eax,string0
    call putstring
    
    call getstring
    
    mov esi,eax     ;mov the string address in eax to esi
    mov edi,string3 ;mov the "exit" string address to edi
    call strcmp     ;call the function to compare the strings and return eax
    cmp eax,0       ;if eax is 0, the strings are the same
    jz the_end      ;go to the_end if the user typed "exit"
    
    mov eax,string1
    call putstring
    
    mov eax,buf
    call putstring
    call putline
    
    mov eax,string2
    call putstring
    
    mov eax,[count]
    call putint
    call putline
    
    jmp loop_input
    
    the_end:
    mov eax,1
    mov ebx,0
    int 80h
    
    string0 db 'Enter a string from the keyboard: ',0
    string1 db 'string: ',0
    string2 db 'length: ',0
    string3 db 'exit',0
    
    buf db 0x100 dup '?'
    count dd 0
    
    getstring:
    
    mov [count],0 ;set count of characters read during this function to zero
    mov edx,1     ;number of bytes to read
    mov ecx,buf   ;address to store the bytes
    
    getstring_chars:
    
    mov ebx,0     ;read from stdin
    mov eax,3     ;invoke SYS_READ (kernel opcode 3)
    int 80h       ;call the kernel
    
    cmp eax,1     ;was 1 character read?
    jnz getstring_end ; if not, then end this loop
    
    mov al,[ecx]  ;mov last character read into al register
    
    ;check if this character is in the proper range to be part of the string
    
    cmp al,0x20      ;compare with 0x20 (space)
    jb getstring_end ;jump if below to getstring_end label
    cmp al,0x7E      ;compare with 0x7E (tilde)
    ja getstring_end ;jump if above to getstring_end label
    
    ;if neither jump happened, keep the character and
    
    inc [count]   ;increment how many characters we have read
    inc ecx       ;increment address where next byte is read from
    jmp getstring_chars ;jump back to start of loop and keep reading
    
    getstring_end:
    
    mov byte[ecx],0 ;terminate this string with a zero
    
    mov eax,buf ;mov the buffer address to eax for returning the string
    
    ret
    
    ;strcmp compares the string at esi to the one at edi
    ;eax returns 0 if the strings are the same and 1 if different
    ;the algorithm is simple but I will explain it for those who are confused
    
    ;eax is initialized to zero
    ;a byte from each string is loaded into the al and bl registers
    ;the bytes are compared. if they are different, then we jump to the end
    ;However, if they are the same, then we check if one of them is zero
    ;for this purpose it doesn't matter whether we compare al or bl with zero
    ;because it is known that they are the same if the jnz did not take place
    ;if it is zero, this also jumps to the end of the function
    ;If neither jump took place, then we jump to the start of the loop
    ;but when the function finally ends bl will be subtracted from al
    ;this ensures that the function returns zero if the final characters are the same
    
    strcmp:
    
    mov eax,0
    
    strcmp_start:
    
    ;read a byte from each string
    mov al,[edi]
    mov bl,[esi]
    cmp al,bl
    jnz strcmp_end
    
    cmp al,0
    jz strcmp_end
    
    inc edi
    inc esi
    
    jmp strcmp_start
    
    strcmp_end:
    sub al,bl
    
    ret
    
    include 'chastelib32.asm'
    

    The getstring function uses a read system call to read from file descriptor 0 which represents standard input or the keyboard. It reads one character each time with a loop and starts at an address labeled “buf” which was declared as a global variable of 256 bytes which were initialized with question marks. I also defined a variable named count which was used to automatically count how many bytes were read.

    buf db 0x100 dup '?'
    count dd 0
    

    But I feel that the part of this function that needs the most explaining is this section:

    cmp al,0x20      ;compare with 0x20 (space)
    jb getstring_end ;jump if below to getstring_end label
    cmp al,0x7E      ;compare with 0x7E (tilde)
    ja getstring_end ;jump if above to getstring_end label
    

    Because this range of characters from space to tilde is what I have identified as the acceptable range of characters. There is no standard way that makes sense for all strings. For example, someone may want to make a getstring function that only accepts capital letters or that only accepts numbers 0 to 9. I can’t say that there is one way that is the best.

    The program listed above will keep running the loop until the user types “exit” as the string. Each time after it gets the string, it compares the what the user entered to the “exit” string. If the strcmp function returns 0, it means the two strings are the same.

    This particular variant of strcmp is based off of the C function of the same name. You may also remember that I wrote a strlen function for the first example in chapter 7 when I had a string that I wanted to write to a new file.

    I believe that using conventional names of C functions is a good idea because C programmers who read my books will already be familiar with that function and what it does in the C programming language.

    In any case, “exit” was the perfect name for a command to “exit” the program. It is also how you log out of a Linux terminal and is the official name for the system call that exits every program in this book!

    Although using the keyboard for input during a running program is a great interactive way of doing things, there is one way that I enjoy even more. The next program is one that I wrote long before I started writing this book and has been referred to as “chastearg” on my blog and the Flat Assembler Forum. It prints the command line arguments when you add them after the name of the program.

    FASM Command Line Arguments

    format ELF executable
    entry main
    
    include 'chastelib32.asm'
    
    main:
    
    pop eax              ;pop the number of arguments from the stack
    mov [argc],eax       ;save the argument count for later
    
    pop eax              ;pop argument 0 (name of the program)
    dec [argc]           ;subtract 1 from argument count
    
    putarg:
    
    cmp [argc],0         ;check for remaining arguments
    jz putarg_end        ;if none, end the loop and stop printing
    pop eax              ;pop the next argument off the stack
    call putstring       ;print the string and a new line
    call putline
    dec [argc]           ;subtract 1 from argument count
    jmp putarg           ;jump to the beginning of the loop
    
    putarg_end:
    
    mov eax, 1           ; invoke SYS_EXIT (kernel opcode 1)
    mov ebx, 0           ; return 0 status on exit - 'No Errors'
    int 0x80
    
    argc dd 0
    

    What is a Command Line Argument?

    People who come from a Windows environment may not even know what a command line argument is because they are used to pointing and clicking with a mouse. You can’t enter an argument this way. For clarification on this topic, here is some terminal text to clarify what arguments are.

    fasm main.asm
    flat assembler  version 1.73.30  (16384 kilobytes memory)
    2 passes, 481 bytes.
    chmod +x main
    ./main this program has command line arguments
    this
    program
    has
    command
    line
    arguments
    

    When we run fasm and give it the name of the Assembly file we want to assemble, the file is an argument or an option we provide to it. In the above example, main.asm is the file I provide to fasm as an argument.

    After the file is assembled, I run the chmod command with the arguments “+x” and “main” which adds the execution permission to the main executable that was just created.

    Finally, running “./main” followed by more words on the same line causes Linux to interpret them as arguments. They are pushed onto the stack.

    When a program begins on Linux, you can access the number of how many arguments were passed to the program by getting the first number you pop off the stack. In the chastearg program, there is a loop that keeps track of how many arguments are left. While there are some remaining, it keeps popping them into the eax register and calling putstring until there are none left.

    Arguments vs Keyboard Input

    The primary difference between input from the keyboard during a program and passing arguments is that the arguments do not stop the execution of a program and wait for anything. If you have an install script which is meant to compile and install a large program, it is better not to pause it for any reason unless an error happens. Arguments are best in this case so that someone can pass information to it that they want the program to know.

    Keyboard input does have a benefit though. For example, suppose that you ask the user to input a number and then they accidentally input a string that is not recognizable as a number. With keyboard input, you can tell them they made a mistake and ask them to try again. With arguments, you cannot edit them during the program because they are only pushed at the start when the program is run from the terminal.

    Only you can decide which of these methods your program needs, but I hope that my explanation and my strcmp function is helpful for you when you try to write a program that needs input to do different things conditionally.

    Later in this book, I will present a calculator written in Assembly language that builds from this chapter’s keyboard input loop. However, we are not ready for that until I teach you how to separate regular strings from numbers. That will be the subject of the next chapter and I can promise you it is simultaneously the hardest task but also the most useful feature you will need for writing any program that has to read numbers.

  • Abortion prevention: don’t forget about the men

    Consider this situation: A man chooses to have sex with a woman who he knows will go for an abortion if she gets pregnant. She does get pregnant. She goes to an abortionist, who performs the abortion. Which of the three people killed the baby? The father, mother, or abortionist?

    It should be obvious that all three people played a part in the abortion. So no matter what your answer, you are partially correct. The father chose to do the one thing that could result in a pregnancy the mother did not want. The mother chose to abort rather than seeking out alternatives. The abortionist was the final step in causing the death of the baby.

    Keeping all of that in mind, if you could go back and talk to one of the people—the father, mother, or abortionist—and convince them to become pro-life, which one would you choose?

    Again, there’s no one right answer, but I would pick the father. Talking to the abortionist could have a major impact if he’s the only abortionist in town and has no one to replace him, but otherwise, the mother will just go elsewhere for the abortion. Of course talking to the mother is good because if her mind changes, the child will live. But will she have the support of the father? He had expected the mother to have an abortion even before they had sex, which implies that he has no interest in taking care of the child.

    Talking to the father makes sense because he has the power to change his ways and stop creating children who will be killed. He also has the ability to support the mother in taking care of the children he is responsible for, making the mother less likely to want to abort. Many women abort due to fear of being a single mother.

    Outreach to women in crisis pregnancy situations is great, but we need to make sure we’re reaching the men too, rather than placing all of the weight on the women. The idea that men are irrelevant to the abortion debate is incredibly misguided.


    This was a post that I submitted to Kelsey Hazzard at Secular Pro-Life back in 2014. At that time, I was still known as Chandler Klebs. You can still read it where Kelsey posted in on the Secular Pro-Life Blog

    http://blog.secularprolife.org/2014/08/abortion-prevention-dont-forget-about.html

  • update for chastext on DOS

    I used my new getarg function I wrote to improve the DOS version of chastext. Nothing about the behavior of the program has changed but the code is a lot smaller and more readable. This chastext program was the original reason I wrote the chastearg program. I needed to get the command line arguments just write.

    But more importantly, people may not see the value of the chastext program and why it is useful to transform text. For that reason, I took this screenshot of a demo batch file that shows just how much I can modify a text file in stages.

    Besides changing a funny tongue twister about seashells into other things (which made a really great example), I also used the Linux version of chastext to make it possible to assemble my DOS programs with either FASM or NASM. The two assemblers are mostly compatible with each other when doing DOS programming due to the lack of headers in ‘.com’ files. Simply replacing “include” with “%include” is enough to transform my FASM source into NASM source because the % is required for the NASM include directive.

    FASM is my main assembler but making sure it assembles my code in NASM will also make a difference for those following my book, Assembly Arithmetic Algorithms for DOS. The book is complete and available on leanpub but there are possible corrections to the code if necessary.

    And for now, I am also trying to work on the Linux version of the book which will be more work because I have a new angle where I want to compare the Assembly to the C code of the same program. Since Linux developers are more familiar with C, it will help those with C language experience to learn the nature of Assembly language and how it is specifically very useful for Linux systems even more than it is for DOS or Windows.

    Also, the source code of the chastext program is available in its own repository.

    https://github.com/chastitywhiterose/chastext

    C and assembly versions are available which means that it can be either compiled or assembled and run on any operating system that I know about. Almost every platform has a C compiler and my custom assembly programs perform even higher on DOS and Linux than the C version did.

    Between chastehex, chastecmp, and now chastext, I have a small set of development tools that I can use for verifying when my programs are producing the output I want. Each tool was made for a specific need I had in mind.

  • chastarg for DOS

    The chastearg program, which is shortened to chastarg to respect DOS 8.3 filename limits, is a tool for separating command line arguments into multiple lines except preserving those that are quoted and therefore counting as one argument. Quoted strings will print on the same line.

    A key aspect of how this works is the new “getarg” function that I wrote. If you take a look at this small program that uses it, it is very simple.

    main.asm

    org 100h     ;DOS programs start at this address
    
    ;this loop will get all the command line arguments and print them on separate lines
    
    call getarg ;this first call will get the command string
    
    arg_loop:
    call getarg
    cmp ax,0 ;did the getarg function return 0?
    jz arg_loop_end ;if ax was zero, there are no args
    call putstring
    call putline
    jmp arg_loop
    arg_loop_end:
    
    ending:
    mov ax,4C00h ; Exit program
    int 21h
    
    include 'getarg.asm'
    include 'chastelib16.asm'
    
    db 0x48 dup 0 ;add extra bytes to make it 512 bytes exactly
    

    But the getarg function itself is a little bit complicated. I tried my best to comment it so that hopefully other DOS programmers can benefit from this useful function.

    getarg.asm

    ;The getarg function was something I badly needed in order to make my assembly code for DOS easier to read.
    ;It will automatically process the command line arguments if they are available.
    ;
    ;The first time it is run, it returns the whole command string or zero if no args are given
    ;DOS does not allow the program name to be part of the arguments
    ;
    ;Each time after that, it will give you the next argument which is a subtring of the original.
    ;When no more arguments are available, it will always return zero
    ;The program calling this is expected to check for this error and then terminate
    ;or print a message depending on the goals of that program
    
    ;A word of warning though, this function has multiple return statements and is long
    ;However, it is fully featured in that it can recognize quoted strings as being the same argument
    ;This brings full compatibility between my DOS and Linux programs which expect consistent behavior
    
    getarg:
    
    mov bx,[arguments_start] ;get the address of start of arguments
    cmp bx,0 ;is this address zero? (meaning this function was not called before)
    jz get_arg_data ;if it was zero, then get the argument data for the first execution of this function
    
    ;if the start was not zero, then clearly arguments exist and addresses have been saved
    cmp bx,[arguments_end]  ;is the address of the start and end the same?
    jnz find_next_string  ;if they are not the same, find the next sub string
    mov ax,0 ;otherwise, return ax as zero and check this in the main program
    
    ret
    
    find_next_string:
    
    mov bx,[arguments_start] ;get address of current arg
    
    skip_spaces:
    
    cmp byte[bx],' ' ;is this byte a space?
    jnz skip_spaces_end ;if it is not a space, we can end this loop
    inc bx ;otherwise, go to next byte
    jmp skip_spaces ;and keep looping till we find non-space
    skip_spaces_end:
    mov ax,bx ;copy this non-space address to ax register
    
    ;we have found a non-space which is the start of a printable string
    ;but we still have to find the next space and terminate it with a zero!
    
    ;however, there is a special case where we want a string to contain spaces. In this case, I have another routine!
    
    ;check for quoted strings
    cmp byte[bx],0x22 ;is this a double quote -> "
    jz scan_quoted_string
    cmp byte[bx],0x27 ;is this a single quote -> '
    jz scan_quoted_string
    
    find_space:
    cmp byte [bx],' ' ;is this a space?
    jz found_space ;if this was a space, end the loop and terminate with zero
    
    ;we must also check to see if we have reached the terminating zero of the arguments string
    cmp byte[bx],0 ;is this byte a zero?
    jz no_more_args ;if yes this string is already terminated
    
    inc bx
    jmp find_space ; this char was not space, go to the next char
    found_space:
    mov byte[bx],0 ;terminate this string
    
    inc bx ;but go to the next byte
    mov [arguments_start],bx ;and set the new start address for the next call
    
    ret ;We can return ax safely knowing the string ends in a zero
    
    scan_quoted_string:
    
    mov cl,byte[bx] ;mov this quote type to cl
    inc bx ;go to next byte
    mov ax,bx ;set ax to this address which is assumed to be the start of a quoted string
    
    find_end_quote:
    cmp byte[bx],cl ;is this the same quote we started with?
    jz found_end_quote ;if it is, end this loop
    
    ;we must also check to see if we have reached the terminating zero of the arguments string
    ;this avoids a crash if I forgot to add the second quotation mark in the arguments
    cmp byte[bx],0 ;is this byte a zero?
    jz no_more_args ;if yes this string is already terminated
    
    inc bx
    jmp find_end_quote
    found_end_quote:
    mov byte[bx],0 ;terminate this string
    
    inc bx ;but go to the next byte
    mov [arguments_start],bx ;and set the new start address for the next call
    
    ret
    
    no_more_args:
    
    mov [arguments_start],bx ;mov the start to where the string ended
    
    ;now that the start and end addresses are the same
    ;this function will always return zero
    ret
    
    ;this will happen first time this function is called to get the argument data
    get_arg_data:
    mov ax,0      ;zero ax (upper half of ax)
    mov al,[80h] ;load length of the command string from this address
    cmp ax,0
    jz getarg_end
    
    mov bx,0x81  ;mov into bx the address of the start of the argument string
    mov [arguments_start],bx ;save the start of the arguments to this variable
    add bx,ax    ;add the length of the command string to this address
    mov byte[bx],0 ;terminate this with a zero to avoid segfaults when printed with putstring
    mov [arguments_end],bx ;save the end of the arguments to this variable
    mov ax,[arguments_start] ;copy the address of the arguments start to ax
    
    getarg_end:
    ret
    
    ;start and end default to address of zero, which means we have not tested the arguments yet
    arguments_start dw 0
    arguments_end dw 0
    

  • chastext for Windows

    I wrote a Windows Assembly version of my chastext program.

    #main.asm

    format PE console
    include 'win32ax.inc'
    include 'chastelibw32.asm'
    
    main:
    
    mov [radix],10 ; Choose radix for integer output.
    mov [int_width],1
    
    ;get command line argument string
    call [GetCommandLineA]
    
    mov [arg_string_index],eax ;back up eax to restore later
    
    call strlen ;get the length of the string
    
    mov ebx,[arg_string_index] ;mov the address of the string start into ebx
    add ebx,eax                ;add eax which contains the length
    mov [arg_string_end],ebx   ;move end of string address to permanent location
    
    ;optionally display the arg string to make sure it is working correctly
    ;mov eax,[arg_string_index]
    ;call putstring
    ;call putline
    
    ;set ebx back to the start of the arg string for the filter loop
    mov ebx,[arg_string_index]
    
    ;now ebx points to the first non space character in the arguments passed to the DOS program
    ;and we know that [arg_string_end] is where it ends
    
    ;the next step is to filter the arguments into separate zero terminated strings
    ;each space will be changed to a zero (normally)
    ;but we also need to account for spaces inside quotes that are considered part of the string
    ;Linux handles this normally but DOS needs me to write the code to mimic this behavior
    ;because the program needs to function identically for DOS or Linux
    
    mov cl,' ' ;set the default filter character (argument terminator) to a space
    mov ch,0   ;are we currently checking spaces 0 or quote characters 1 as terminators?
    
    ;this loop is the new and improved argument filter
    ;it keeps track of whether we are inside or outside a quote
    ;and also which type of quote started the quote
    ;the actual quote marks are not part of the string unless they
    ;are the opposite quote type than what started the string
    ;The important thing is that spaces can exist inside of quoted strings
    ;as one argument rather than each new word being a new argument
    ;could be important for filenames containing spaces, etc.
    
    argument_filter:
    
    cmp ebx,[arg_string_end] ;are we at the end of the arg string?
    jz argument_filter_end       ;if yes, stop the filter and terminate with zero
    
    cmp ch,1       ;are we inside a quoted string?
    jz quote_check ;if yes, don't do anything to the spaces
    
    cmp byte[ebx],cl ;compare the byte at address bx to the string terminator
    jnz ignore_char ;if it is not the same, we ignore it
    mov byte[ebx],0  ;but if it matches, change it to a zero
    ignore_char:
    
    cmp byte [ebx],0x22 ;is this a double quote -> "
    jz start_quote
    cmp byte [ebx],0x27 ;is this a single quote -> '
    jz start_quote
    jmp quote_no ;it was not a quote
    
    start_quote:
    
    mov ch,1    ;set ch to 1 to set that we are inside a quote now
    mov cl,[ebx] ;save this quote type as the new terminator
    mov byte[ebx],0 ;but delete the first quote with zero
    
    ;check for single or double quotes
    quote_check:
    
    cmp [ebx],cl ;is this character the same type of quote that started this sub string?
    jnz quote_no ;if it is not, then skip to quote_no section
    
    ;but if it was matching, change this byte to zero
    ;and change cl back to a space
    mov cl,' ' ;cl is now a space
    mov ch,0   ;ch is 0 because now we have ended the quoted string
    mov byte[ebx],0 ;delete the end quote with zero
    
    quote_no:
    
    inc ebx ;go to the next character
    jmp argument_filter   ;jump back to the beginning of argument filter
    
    argument_filter_end:
    mov byte [ebx],0 ;terminate the ending with a zero for safety
    
    ;check first argument which is name of program
    ;mov eax,[arg_string_index]
    ;call putstr_and_line
    
    call get_next_arg ;get address of next arg and return into eax register
    cmp eax,[arg_string_end] ;if there is no filename arg, we end
    jnz args_exist
    
    mov eax,help    ;if no arguments were given, show a help message
    call putstring
    jmp ending     ;and end the program because there is nothing to do
    
    args_exist:
    
    mov [filename],eax
    ;call putstr_and_line ;print filename before text output
    
    ;This is where the main part of the chastext program really begins.;
    
    ;now that the argument string is prepared, we will try to use the first argument as a filename to open
    
    ;https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
    ;https://learn.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights
    
    ;open first file with the CreateFileA function
    
    push 0           ;NULL: We are not using a template file
    push 0x80        ;FILE_ATTRIBUTE_NORMAL
    push 3           ;OPEN_EXISTING
    push 0           ;NULL: No security attributes
    push 0           ;NULL: Share mode irrelevant. Only this program reads the file.
    push 0x80000000  ;GENERIC_READ access mode
    push [filename] ;
    call [CreateFileA]
    
    ;check eax for file handle or error code
    ;call putint
    cmp eax,-1
    jnz file_ok
    
    mov eax,file_error_message
    call putstring
    call [GetLastError]
    call putint
    jmp main_end ;end program if the file was not opened
    
    ;this label is jumped to when the file is opened correctly
    file_ok:
    
    mov [filedesc],eax
    
    ;before we proceed, we also check for more arguments.
    
    call get_next_arg ;get address of next arg and return into eax register
    cmp eax,[arg_string_end] ;if at end, no search string argument
    jz textdump ;jump to textdump section
    
    ;otherwise, we save the address at ax to our search string
    mov [string_search],eax
    ;call putstr_and_line
    
    
    call get_next_arg ;get address of next arg and return into ax register
    cmp eax,[arg_string_end] ;if at end, no replacement string argument
    jz textdump ;jump to hexdump section
    
    ;otherwise, we save the address at ax to our replacement string
    mov [string_replace],eax
    ;call putstr_and_line
    
    ;all other arguments that may exist after this are irrelevant
    
    textdump:
    
    ;this is the beginning of the textdump main loop of chastext
    
    ;first, check to see if there is a search string
    ;if there is a search string, skip the normal putchar
    
    cmp dword[string_search],0 ;do we have a search string?
    jnz putchar_skip
    
    ;but if there is not a search string
    ;we will read one character, then display it to stdout
    ;and then jump to the beginning of the textdump loop to print them until EOF
    ;we start the loop with a call to read exactly 1 byte
    
    ;read only 1 byte using Win32 ReadFile system call.
    push 0              ;Optional Overlapped Structure 
    push bytes_read     ;Store Number of Bytes Read from this call
    push 1              ;Number of bytes to read
    push byte_array     ;address to store bytes
    push [filedesc]     ;handle of the open file
    call [ReadFile]
    
    mov eax,[bytes_read]
    
    cmp eax,1        ;check to see if exactly 1 byte was read
    jz file_success ;if true, proceed to display
    ;mov ax,end_of_file
    ;call putstring
    jmp main_end ;otherwise close the file and end program after failure
    
    ; this point is reached if 1 byte was read from the file successfully
    file_success:
    
    mov al,[byte_array]
    call putchar
    jmp textdump
    
    ;if search string doesn't exist, just jump and repeat the loop
    ;otherwise we continue into the next section that compares the input with the search string
    
    putchar_skip:
    
    ;this is the beginning of search mode
    ;it handles the file by seeking and reading to search every position for the search string
    
    ;first, seek to the file_address we initialized to zero
    ;this variable will be added to depending on actions taken
    
    ;seek to address of file with SetFilePointer function
    ;https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-setfilepointer
    push 0             ;seek from beginning of file (SEEK_SET)
    push 0             ;NULL: We are not using a 64 bit address
    push [file_address] ;where we are seeking to
    push [filedesc] ;seek within this file
    call [SetFilePointer]
    
    ;obtain the length of the search string using my strlen function
    mov eax,[string_search]
    call strlen ;get the length of the search string
    
    mov ecx,eax ;store this length in ecx
    mov [search_length],ecx
    
    ;call putint_and_line ;check length of search string
    
    ;use the length of the string we are searching for as the number of bytes to read at this location
    
    ;Win32 ReadFile system call.
    push 0              ;Optional Overlapped Structure 
    push bytes_read     ;Store Number of Bytes Read from this call
    push ecx            ;Number of bytes to read
    push byte_array     ;address to store bytes
    push [filedesc]     ;handle of the open file
    call [ReadFile]
    
    mov eax,[bytes_read]  ;get how many bytes were read with that last read operation
    
    mov ebx,byte_array    ;move the address of bytes read into bx
    add ebx,eax           ;add number of bytes read (return value of read function in eax)
    mov byte[ebx],0       ;terminate the string with zero
    
    cmp eax,[search_length] ;if the number of bytes is not what we expected to read, end this loop
    jnz textdump_end
    
    ;move our two strings into the esi and edi registers for comparison
    ;with my custom written strcmp function
    
    mov esi,[string_search]
    mov edi,byte_array
    call strcmp ;compare these two strings
    
    cmp eax,0 ;test if they are the same (if eax returned zero)
    jnz not_match ;if they are not a match go to that section for printing a character
    
    ;but if they are a match, then we either quote them
    ;or replace them if a replacement string is available
    
    ;but regardless of which action we do, since a match was found, let us add this count to the file address
    ;so that we read from beyond this point next time the textdump loop starts
    mov eax,[bytes_read]
    add [file_address],eax
    
    cmp dword[string_replace],0 ;check to see if a replacement string is available
    jz print_quotes ;if not, skip to the part where we just quote the strings that match
    
    ;otherwise, we will print the replacement string instead of the original!
    
    mov eax,[string_replace]
    call putstring ;print the string
    
    jmp textdump ;restart the main loop
    
    print_quotes:
    ;print quotes around matched string
    mov al,'"'
    call putchar
    
    mov eax,byte_array
    call putstring ;print the string
    
    mov al,'"'
    call putchar
    
    jmp textdump ;restart the main loop
    
    not_match: 
    
    mov al,[byte_array]
    call putchar
    add [file_address],1 ;add 1 to the file address so we don't read this same position again
    
    jmp textdump
    
    textdump_end:
    
    ;print the remaining bytes, if any, left after the main loop ended
    mov eax,byte_array
    call putstring
    
    main_end:
    
    ;this is the end of the program
    ;we close the open file and then use the exit call
    
    ;close the file
    push [filedesc]
    call [CloseHandle]
    
    
    ending:
    ;Exit the process with code 0
    push 0
    call [ExitProcess]
    
    .end main
    
    arg_string_index  dd 0 ;start of arg string
    arg_string_end    dd 0 ;address of the end of the arg string
    
    ;function to move ahead to the next art
    ;only works after the filter has been applied to turn all spaces into zeroes
    get_next_arg:
    mov ebx,[arg_string_index]
    find_zero:
    cmp byte [ebx],0
    jz found_zero
    inc ebx
    jmp find_zero ; this char is not zero, go to the next char
    found_zero:
    
    find_non_zero:
    cmp ebx,[arg_string_end]
    jz arg_finish ;if ebx is already at end, nothing left to find
    cmp byte [ebx],0
    jnz arg_finish ;if this char is not zero we have found the next string!
    inc ebx
    jmp find_non_zero ;otherwise, keep looking
    
    arg_finish:
    mov [arg_string_index],ebx ; save this index to variable
    mov eax,ebx ;but also save it to ax register for use
    ret
    ;we can know that there are no more arguments when
    ;the either [arg_start] or eax are equal to [arg_end]
    
    ;the strlen and strcmp are named after the equivalent C functions
    ;but are written from scratch by me based on their expected behavior
    
    ;a function to get the length of string in eax and return the integer in eax
    
    strlen:
    
    mov ebx,eax ; copy eax to ebx. ebx will be used as index to the string
    
    strlen_start: ; this loop finds the length of the string as part of the putstring function
    
    cmp [ebx],byte 0 ; compare byte at address ebx with 0
    jz strlen_end ; if comparison was zero, jump to loop end because we have found the length
    inc ebx
    jmp strlen_start
    
    strlen_end:
    sub ebx,eax ;subtract start pointer from current pointer to get length of string
    
    mov eax,ebx ;copy the string length back to eax
    
    ret
    
    ;strcmp compares the string at esi to the one at edi
    ;ax returns 0 if the strings are the same and 1 if different
    ;the algorithm is simple but I will explain it for those who are confused
    
    ;eax is initialized to zero
    ;a byte from each string is loaded into the al and bl registers
    ;the bytes are compared. if they are different, then we jump to the end
    ;However, if they are the same, then we check if one of them is zero
    ;for this purpose it doesn't matter whether we compare al or bl with zero
    ;because it is known that they are the same if the jnz did not take place
    ;if it is zero, this also jumps to the end of the function
    ;If neither jump took place, then we jump to the start of the loop
    ;but when the function finally ends bl will be subtracted from al
    ;this ensures that the function returns zero if the final characters are the same
    
    strcmp:
    
    mov eax,0
    
    strcmp_start:
    
    ;read a byte from each string
    mov al,[edi]
    mov bl,[esi]
    cmp al,bl
    jnz strcmp_end
    
    cmp al,0
    jz strcmp_end
    
    inc edi
    inc esi
    
    jmp strcmp_start
    
    strcmp_end:
    sub al,bl
    
    ret
    
    help db 'chastext by Chastity White Rose',0Dh,0Ah
    db '"cat" or "type" a file without changing it:',0Dh,0Ah,9,'chastext file',0Dh,0Ah
    db 'search for a string and quote it:',0Dh,0Ah,9,'chastext file search',0Dh,0Ah
    db 'replace string:',0Dh,0Ah,9,'chastext file search replace',0Dh,0Ah
    db 'Find or replace any string!',0Dh,0Ah,0
    
    file_error_message db 'Could not open the file! Error number: ',0
    filename dd 0
    filedesc dd 0
    file_address dd 0 ;file address defaults to zero AKA beginning of file
    end_of_file db 'EOF',0
    
    ;where we will store data from the file
    bytes_read dd 0
    
    search_length dd 0
    string_search dd 0 ; place to hold the search string pointer
    string_replace dd 0 ; place to hold the replacement string pointer
    
    byte_array db 0x73 dup 0