chastext for Windows

I wrote a Windows Assembly version of my chastext program.

#main.asm

format PE console
include 'win32ax.inc'
include 'chastelibw32.asm'

main:

mov [radix],10 ; Choose radix for integer output.
mov [int_width],1

;get command line argument string
call [GetCommandLineA]

mov [arg_string_index],eax ;back up eax to restore later

call strlen ;get the length of the string

mov ebx,[arg_string_index] ;mov the address of the string start into ebx
add ebx,eax                ;add eax which contains the length
mov [arg_string_end],ebx   ;move end of string address to permanent location

;optionally display the arg string to make sure it is working correctly
;mov eax,[arg_string_index]
;call putstring
;call putline

;set ebx back to the start of the arg string for the filter loop
mov ebx,[arg_string_index]

;now ebx points to the first non space character in the arguments passed to the DOS program
;and we know that [arg_string_end] is where it ends

;the next step is to filter the arguments into separate zero terminated strings
;each space will be changed to a zero (normally)
;but we also need to account for spaces inside quotes that are considered part of the string
;Linux handles this normally but DOS needs me to write the code to mimic this behavior
;because the program needs to function identically for DOS or Linux

mov cl,' ' ;set the default filter character (argument terminator) to a space
mov ch,0   ;are we currently checking spaces 0 or quote characters 1 as terminators?

;this loop is the new and improved argument filter
;it keeps track of whether we are inside or outside a quote
;and also which type of quote started the quote
;the actual quote marks are not part of the string unless they
;are the opposite quote type than what started the string
;The important thing is that spaces can exist inside of quoted strings
;as one argument rather than each new word being a new argument
;could be important for filenames containing spaces, etc.

argument_filter:

cmp ebx,[arg_string_end] ;are we at the end of the arg string?
jz argument_filter_end       ;if yes, stop the filter and terminate with zero

cmp ch,1       ;are we inside a quoted string?
jz quote_check ;if yes, don't do anything to the spaces

cmp byte[ebx],cl ;compare the byte at address bx to the string terminator
jnz ignore_char ;if it is not the same, we ignore it
mov byte[ebx],0  ;but if it matches, change it to a zero
ignore_char:

cmp byte [ebx],0x22 ;is this a double quote -> "
jz start_quote
cmp byte [ebx],0x27 ;is this a single quote -> '
jz start_quote
jmp quote_no ;it was not a quote

start_quote:

mov ch,1    ;set ch to 1 to set that we are inside a quote now
mov cl,[ebx] ;save this quote type as the new terminator
mov byte[ebx],0 ;but delete the first quote with zero

;check for single or double quotes
quote_check:

cmp [ebx],cl ;is this character the same type of quote that started this sub string?
jnz quote_no ;if it is not, then skip to quote_no section

;but if it was matching, change this byte to zero
;and change cl back to a space
mov cl,' ' ;cl is now a space
mov ch,0   ;ch is 0 because now we have ended the quoted string
mov byte[ebx],0 ;delete the end quote with zero

quote_no:

inc ebx ;go to the next character
jmp argument_filter   ;jump back to the beginning of argument filter

argument_filter_end:
mov byte [ebx],0 ;terminate the ending with a zero for safety

;check first argument which is name of program
;mov eax,[arg_string_index]
;call putstr_and_line

call get_next_arg ;get address of next arg and return into eax register
cmp eax,[arg_string_end] ;if there is no filename arg, we end
jnz args_exist

mov eax,help    ;if no arguments were given, show a help message
call putstring
jmp ending     ;and end the program because there is nothing to do

args_exist:

mov [filename],eax
;call putstr_and_line ;print filename before text output

;This is where the main part of the chastext program really begins.;

;now that the argument string is prepared, we will try to use the first argument as a filename to open

;https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
;https://learn.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights

;open first file with the CreateFileA function

push 0           ;NULL: We are not using a template file
push 0x80        ;FILE_ATTRIBUTE_NORMAL
push 3           ;OPEN_EXISTING
push 0           ;NULL: No security attributes
push 0           ;NULL: Share mode irrelevant. Only this program reads the file.
push 0x80000000  ;GENERIC_READ access mode
push [filename] ;
call [CreateFileA]

;check eax for file handle or error code
;call putint
cmp eax,-1
jnz file_ok

mov eax,file_error_message
call putstring
call [GetLastError]
call putint
jmp main_end ;end program if the file was not opened

;this label is jumped to when the file is opened correctly
file_ok:

mov [filedesc],eax

;before we proceed, we also check for more arguments.

call get_next_arg ;get address of next arg and return into eax register
cmp eax,[arg_string_end] ;if at end, no search string argument
jz textdump ;jump to textdump section

;otherwise, we save the address at ax to our search string
mov [string_search],eax
;call putstr_and_line


call get_next_arg ;get address of next arg and return into ax register
cmp eax,[arg_string_end] ;if at end, no replacement string argument
jz textdump ;jump to hexdump section

;otherwise, we save the address at ax to our replacement string
mov [string_replace],eax
;call putstr_and_line

;all other arguments that may exist after this are irrelevant

textdump:

;this is the beginning of the textdump main loop of chastext

;first, check to see if there is a search string
;if there is a search string, skip the normal putchar

cmp dword[string_search],0 ;do we have a search string?
jnz putchar_skip

;but if there is not a search string
;we will read one character, then display it to stdout
;and then jump to the beginning of the textdump loop to print them until EOF
;we start the loop with a call to read exactly 1 byte

;read only 1 byte using Win32 ReadFile system call.
push 0              ;Optional Overlapped Structure 
push bytes_read     ;Store Number of Bytes Read from this call
push 1              ;Number of bytes to read
push byte_array     ;address to store bytes
push [filedesc]     ;handle of the open file
call [ReadFile]

mov eax,[bytes_read]

cmp eax,1        ;check to see if exactly 1 byte was read
jz file_success ;if true, proceed to display
;mov ax,end_of_file
;call putstring
jmp main_end ;otherwise close the file and end program after failure

; this point is reached if 1 byte was read from the file successfully
file_success:

mov al,[byte_array]
call putchar
jmp textdump

;if search string doesn't exist, just jump and repeat the loop
;otherwise we continue into the next section that compares the input with the search string

putchar_skip:

;this is the beginning of search mode
;it handles the file by seeking and reading to search every position for the search string

;first, seek to the file_address we initialized to zero
;this variable will be added to depending on actions taken

;seek to address of file with SetFilePointer function
;https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-setfilepointer
push 0             ;seek from beginning of file (SEEK_SET)
push 0             ;NULL: We are not using a 64 bit address
push [file_address] ;where we are seeking to
push [filedesc] ;seek within this file
call [SetFilePointer]

;obtain the length of the search string using my strlen function
mov eax,[string_search]
call strlen ;get the length of the search string

mov ecx,eax ;store this length in ecx
mov [search_length],ecx

;call putint_and_line ;check length of search string

;use the length of the string we are searching for as the number of bytes to read at this location

;Win32 ReadFile system call.
push 0              ;Optional Overlapped Structure 
push bytes_read     ;Store Number of Bytes Read from this call
push ecx            ;Number of bytes to read
push byte_array     ;address to store bytes
push [filedesc]     ;handle of the open file
call [ReadFile]

mov eax,[bytes_read]  ;get how many bytes were read with that last read operation

mov ebx,byte_array    ;move the address of bytes read into bx
add ebx,eax           ;add number of bytes read (return value of read function in eax)
mov byte[ebx],0       ;terminate the string with zero

cmp eax,[search_length] ;if the number of bytes is not what we expected to read, end this loop
jnz textdump_end

;move our two strings into the esi and edi registers for comparison
;with my custom written strcmp function

mov esi,[string_search]
mov edi,byte_array
call strcmp ;compare these two strings

cmp eax,0 ;test if they are the same (if eax returned zero)
jnz not_match ;if they are not a match go to that section for printing a character

;but if they are a match, then we either quote them
;or replace them if a replacement string is available

;but regardless of which action we do, since a match was found, let us add this count to the file address
;so that we read from beyond this point next time the textdump loop starts
mov eax,[bytes_read]
add [file_address],eax

cmp dword[string_replace],0 ;check to see if a replacement string is available
jz print_quotes ;if not, skip to the part where we just quote the strings that match

;otherwise, we will print the replacement string instead of the original!

mov eax,[string_replace]
call putstring ;print the string

jmp textdump ;restart the main loop

print_quotes:
;print quotes around matched string
mov al,'"'
call putchar

mov eax,byte_array
call putstring ;print the string

mov al,'"'
call putchar

jmp textdump ;restart the main loop

not_match: 

mov al,[byte_array]
call putchar
add [file_address],1 ;add 1 to the file address so we don't read this same position again

jmp textdump

textdump_end:

;print the remaining bytes, if any, left after the main loop ended
mov eax,byte_array
call putstring

main_end:

;this is the end of the program
;we close the open file and then use the exit call

;close the file
push [filedesc]
call [CloseHandle]


ending:
;Exit the process with code 0
push 0
call [ExitProcess]

.end main

arg_string_index  dd 0 ;start of arg string
arg_string_end    dd 0 ;address of the end of the arg string

;function to move ahead to the next art
;only works after the filter has been applied to turn all spaces into zeroes
get_next_arg:
mov ebx,[arg_string_index]
find_zero:
cmp byte [ebx],0
jz found_zero
inc ebx
jmp find_zero ; this char is not zero, go to the next char
found_zero:

find_non_zero:
cmp ebx,[arg_string_end]
jz arg_finish ;if ebx is already at end, nothing left to find
cmp byte [ebx],0
jnz arg_finish ;if this char is not zero we have found the next string!
inc ebx
jmp find_non_zero ;otherwise, keep looking

arg_finish:
mov [arg_string_index],ebx ; save this index to variable
mov eax,ebx ;but also save it to ax register for use
ret
;we can know that there are no more arguments when
;the either [arg_start] or eax are equal to [arg_end]

;the strlen and strcmp are named after the equivalent C functions
;but are written from scratch by me based on their expected behavior

;a function to get the length of string in eax and return the integer in eax

strlen:

mov ebx,eax ; copy eax to ebx. ebx will be used as index to the string

strlen_start: ; this loop finds the length of the string as part of the putstring function

cmp [ebx],byte 0 ; compare byte at address ebx with 0
jz strlen_end ; if comparison was zero, jump to loop end because we have found the length
inc ebx
jmp strlen_start

strlen_end:
sub ebx,eax ;subtract start pointer from current pointer to get length of string

mov eax,ebx ;copy the string length back to eax

ret

;strcmp compares the string at esi to the one at edi
;ax returns 0 if the strings are the same and 1 if different
;the algorithm is simple but I will explain it for those who are confused

;eax is initialized to zero
;a byte from each string is loaded into the al and bl registers
;the bytes are compared. if they are different, then we jump to the end
;However, if they are the same, then we check if one of them is zero
;for this purpose it doesn't matter whether we compare al or bl with zero
;because it is known that they are the same if the jnz did not take place
;if it is zero, this also jumps to the end of the function
;If neither jump took place, then we jump to the start of the loop
;but when the function finally ends bl will be subtracted from al
;this ensures that the function returns zero if the final characters are the same

strcmp:

mov eax,0

strcmp_start:

;read a byte from each string
mov al,[edi]
mov bl,[esi]
cmp al,bl
jnz strcmp_end

cmp al,0
jz strcmp_end

inc edi
inc esi

jmp strcmp_start

strcmp_end:
sub al,bl

ret

help db 'chastext by Chastity White Rose',0Dh,0Ah
db '"cat" or "type" a file without changing it:',0Dh,0Ah,9,'chastext file',0Dh,0Ah
db 'search for a string and quote it:',0Dh,0Ah,9,'chastext file search',0Dh,0Ah
db 'replace string:',0Dh,0Ah,9,'chastext file search replace',0Dh,0Ah
db 'Find or replace any string!',0Dh,0Ah,0

file_error_message db 'Could not open the file! Error number: ',0
filename dd 0
filedesc dd 0
file_address dd 0 ;file address defaults to zero AKA beginning of file
end_of_file db 'EOF',0

;where we will store data from the file
bytes_read dd 0

search_length dd 0
string_search dd 0 ; place to hold the search string pointer
string_replace dd 0 ; place to hold the replacement string pointer

byte_array db 0x73 dup 0

Comments

Leave a comment