Tag: computers

new program: chastdin

I wrote another program which is actually a modification of chastack. This gets input from a user while it is running. Despite how simple it may seem, I had to work at reading from the keyboard because there are multiple ways to read a string from the user. I may add more to this program later, but it has all the important functions of a stack based calculator. Here is a screenshot that shows me using it. You can probably figure out what the commands do based on their name and the numbers printed. I have also attached the full assembly source code to this post.

main.asm

format ELF executable
entry main

include 'chastelib32.asm'
include 'chastdin32.asm'

main:

mov dword[radix],10    ;I can choose the radix for integer output!
mov dword[int_width],1 ;and the width of each integer for padded zeros

mov ebp,chastack       ;mov the address of the beginning of the stack to ebp registers

;this program does not read command line arguments
;it always displays a message to tell user what the program does
mov eax,string_help
call putstring

mov [last_char],0xA ;set newline as last_char so prompt will display

main_loop:

;show the arrow indicating we wait for the user to enter something
;but only show it when the last character is a newline
;otherwise it will print too many if multiple commands were entered on the same line
cmp [last_char],0xA
jnz skip_prompt
mov eax,string_prompt
call putstring
skip_prompt:

call getstring ;get string and return address in eax

;we must restart the loop in case of an empty string
;if we didn't, strint would read the empty string and return 0
;then zero would be pushed to the stack, which is not what we want

cmp dword[count],0 ;were there zero characters read?
jz main_loop ;if yes, this was an empty string, retry input

mov esi,eax    ;mov string to esi for string comparison

;Now we process the string the user entered
;First, we will try testing for commands
;If any of the predefined strings match the string in esi
;We jump to the label for that command

mov edi,string_add
call strcmp
jz command_add

mov edi,string_sub
call strcmp
jz command_sub

mov edi,string_mul
call strcmp
jz command_mul

mov edi,string_div
call strcmp
jz command_div

mov edi,string_rem
call strcmp
jz command_rem

mov edi,string_query
call strcmp
jz command_query

mov edi,string_clear
call strcmp
jz command_clear

mov edi,string_exit
call strcmp
jz command_exit

;The default command is to turn the argument into a number and push to stack
command_num:

mov eax,esi          ;mov the string to eax for processing numbers
call strint          ;try to get a number from the string pointed to by eax
cmp [strint_error],0 ;did we have zero errors in the strint function?
jz num_push          ;if there were no errors, push this to stack

mov eax,string_err
call putstring
mov eax,esi
call putstring
call putline
jmp num_push_end ;skip the push because this can't be used

num_push:        ;push the number to the fake stack
add ebp,4
mov [ebp],eax
num_push_end:
jmp main_loop

;These are the labels and code for each of the commands
;When a command is done, we jump back to the beginning of the loop

command_add:
mov eax,[ebp]
mov dword[ebp],0
sub ebp,4
add [ebp],eax
jmp main_loop

command_sub:
mov eax,[ebp]
mov dword[ebp],0
sub ebp,4
sub [ebp],eax
jmp main_loop

command_mul:
mov ebx,[ebp]
mov dword[ebp],0
sub ebp,4
mov eax,[ebp]
mov edx,0     ;zero edx before multiply
mul ebx       ;multiply eax with value in ebx
mov [ebp],eax
jmp main_loop

command_div:
mov ebx,[ebp]
mov dword[ebp],0
sub ebp,4
mov eax,[ebp]
mov edx,0 ;zero edx before divide
div ebx   ;divide eax with value in ebx
mov [ebp],eax ;store quotient on stack
jmp main_loop

command_rem:
mov ebx,[ebp]
mov dword[ebp],0
sub ebp,4
mov eax,[ebp]
mov edx,0 ;zero edx before divide
div ebx   ;divide eax with value in ebx
mov [ebp],edx ;store remainder on stack
jmp main_loop

command_query: ;print all numbers on the stack
push ebp ;save value of ebp
command_query_loop:
cmp ebp,chastack ;is ebp equal to the address of stack start?
jz command_query_end  ;if it is, end the putstack loop
mov eax,[ebp]
sub ebp,4
call putint_and_line
jmp command_query_loop
command_query_end:
pop ebp ;restore ebp to what it was before this command
jmp main_loop

command_clear: ;erase all numbers on the stack
command_clear_loop:
cmp ebp,chastack ;is ebp equal to the address of stack start?
jz command_clear_end  ;if it is, end the putstack loop
mov dword[ebp],0
sub ebp,4
jmp command_clear_loop
command_clear_end:
jmp main_loop

command_exit: ;end the program

main_loop_end:

mov eax,1        ;exit (kernel opcode 1 on 32 bit systems)
mov ebx,0        ;return 0 status on exit - 'No Errors'
int 80h          ;system call for 32-bit Linux kernel

argc dd 0

string_err db 'Error: invalid number or command: ',0 ;Generic error message
string_add db 'add',0
string_sub db 'sub',0
string_mul db 'mul',0
string_div db 'div',0
string_rem db 'rem',0
string_exit db 'exit',0
string_query db '?',0
string_clear db 'clear',0

string_prompt db '-> ',0

string_help db 'chastdin is a stack based interactive calculator',0xA
            db 'Numbers are pushed on the stack and commands can do math.',0xA
            db 'It is a fork of chastack that reads from stdin instead of arguments.',0xA
            db 'Each line can contain multiple numbers or commands.',0xA
            db 'Math commands are add,sub,mul,div,rem',0xA
            db 'The exit command ends the program',0xA
            db 'The ? command prints the entire stack',0xA,0xA,0

;This program uses a virtual stack for convenience and portability
;I allocate memory for a virtual stack that we can index as if it was the real stack
;I name it "chastack" for Chastity's stack.

db 6 dup 0 ;extra padding bytes
chastack: rd 0x100

chastdin32.asm

;Chastity's Standard Input header file
;The functions here are designed to read strings and numbers from standard input.

;getstring ;read characters from stdin until the first whitespace
;getline   ;read characters from stdin until the first newline,EOF,tab,etc.
;strcmp    ;compare two strings similar to the same function in C

;these variables are used as the default controllers
;for the getstring and getline functions
;buf stores keyboard input during those functions
;count stores how many bytes were read
;last_char stores the last character read
;usually this will be a space, tab, or newline

buf db 0x100 dup '?'
count dd 0
last_char db 0

;summary
;the getstring function is the reverse function of putstring
;instead of printing a string to standard output
;it reads a string from standard input (AKA the keyboard)

;details
;the getstring function is designed to get a string of text
;which is terminated by whitespace or any non printable character
;the idea is that multiple strings can be passed on one line
;separated by spaces, similar to command line arguments
;this function was written for the specific purpose of converting any of
;my programs that used command line arguments to read from stdin instead

getstring:

mov [count],0 ;set count of characters read during this function to zero
mov edx,1     ;number of bytes to read
mov ecx,buf   ;address to store the bytes

getstring_chars:

mov ebx,0     ;read from stdin
mov eax,3     ;invoke SYS_READ (kernel opcode 3)
int 80h       ;call the kernel

cmp eax,1     ;was 1 character read?
jnz getstring_end ; if not, then end this loop

mov al,[ecx]  ;mov last character read into al register

;check if this character is in the proper range to be part of the string

cmp al,0x21      ;compare with 0x21 (!=exclamation)
jb getstring_end ;jump if below to getstring_end label
cmp al,0x7E      ;compare with 0x7E (tilde)
ja getstring_end ;jump if above to getstring_end label

;if neither jump happened, keep the character and

inc [count]   ;increment how many characters we have read
inc ecx       ;increment address where next byte is read from
jmp getstring_chars ;jump back to start of loop and keep reading

getstring_end:

mov [last_char],al ;save the last character read
mov byte[ecx],0 ;terminate this string with a zero

mov eax,buf ;mov the buffer address to eax for returning the string

ret

;the getline function gets an entire line of text from the keyboard
;calling this function allows for a string that can contain spaces
;it considers as anything outside the range of 0x20 to 0x7E as the end of line character
;this is because the end of the line might be 0x0A on Linux
;or it might be 0x0D,0x0A on DOS or Windows.
;technically, it means tab will also terminate a line
;the intended use of this function is to read a filename
;filenames can contain spaces

getline:

mov [count],0 ;set count of characters read during this function to zero
mov edx,1     ;number of bytes to read
mov ecx,buf   ;address to store the bytes

getline_chars:

mov ebx,0     ;read from stdin
mov eax,3     ;invoke SYS_READ (kernel opcode 3)
int 80h       ;call the kernel

cmp eax,1     ;was 1 character read?
jnz getline_end ; if not, then end this loop

mov al,[ecx]  ;mov last character read into al register

;check if this character is in the proper range to be part of the string

cmp al,0x20    ;compare with 0x20 (space)
jb getline_end ;jump if below to getstring_end label
cmp al,0x7E    ;compare with 0x7E (tilde)
ja getline_end ;jump if above to getstring_end label

;if neither jump happened, keep the character and

inc [count]       ;increment how many characters we have read
inc ecx           ;increment address where next byte is read from
jmp getline_chars ;jump back to start of loop and keep reading

getline_end:

mov byte[ecx],0 ;terminate this string with a zero

mov eax,buf ;mov the buffer address to eax for returning the string

ret

;summary
;strcmp compares the string at esi to the one at edi
;eax returns 0 if the strings are the same and 1 if different
;the algorithm is simple but I will explain it for those who are confused

;details
;eax is initialized to zero
;a byte from each string is loaded into the al and bl registers
;the bytes are compared. if they are different, then we jump to the end
;However, if they are the same, then we check if one of them is zero
;for this purpose it doesn't matter whether we compare al or bl with zero
;because it is known that they are the same if the jnz did not take place
;if it is zero, this also jumps to the end of the function
;If neither jump took place, then we jump to the start of the loop
;but when the function finally ends bl will be subtracted from al
;this ensures that the function returns zero if the final characters are the same
;ebx,esi,and edi are preserved but eax is the return value
;also, the sub instruction at the end of the function also updates the flags
;so you can "jz" or "jnz" to a label after calling this function based on results

strcmp:

push ebx
push esi
push edi

mov eax,0

strcmp_start:

;read a byte from each string
mov al,[edi]
mov bl,[esi]
cmp al,bl
jnz strcmp_end

cmp al,0
jz strcmp_end

inc edi
inc esi

jmp strcmp_start

strcmp_end:
sub al,bl

pop edi
pop esi
pop ebx

ret

July 2, 2026

AAA Linux: Chapter 15: chastecmp
This post is a chapter from my recently published Linux edition of Assembly Arithmetic Algorithms. The story behind why I wrote the program featured in chapter 15 goes back to when I discovered how to cheat at video games. This story is worth sharing to inspire the next generation of gamers to learn computer math and programming just as I did.
Chapter 15: chastecmp

In this chapter, I will show you the source code of a file comparison program. This program is meant to find which bytes are different between two files that are similar but contain a few differences.

I will use text files for my examples in this chapter, but the program actually does a binary file comparison and displays the different bytes in hexadecimal because it is a universally understood shorthand for binary that most C and Assembly programmers are already familiar with.

First, here is the source code of chastecmp, which is the short name for “Chastity’s Comparison tool”. The name is also meant to refer to the “cmp” instruction, which is used a lot more in this program because it is essential.

FASM chastecmp source
```
;Linux 32-bit Assembly Source for chastecmp
format ELF executable

main:

;radix will be 16 because this whole program is about hexadecimal
mov dword[radix],16 ; can choose radix for integer input/output!
mov dword[int_width],1

pop eax ;get the number of arguments
dec eax ;subtract 1 because we will ignore the name of the program
pop ebx ;pop program name into a register to delete it from stack

cmp eax,2 ;do we have two arguments to be used as filenames?
jb help
mov dword[offset],0 ;assume the offset is 0,beginning of file
jmp arg_open_file_1

help:
mov eax,help_message
call putstring
jmp main_end

arg_open_file_1:
pop eax
mov [filename1],eax ; save the name of the file we will open to read
call putstring ;print the name of the file we will try opening

mov ecx,0   ;open file in read mode 
mov ebx,eax ;move filename for system call
mov eax,5   ;invoke SYS_OPEN (kernel opcode 5)
int 80h     ;call the kernel

cmp eax,0
js file_error_display ;end program if the file can't be opened
mov [fd1],eax ; save the file descriptor number for later use
mov eax,file_open
call putstr_and_line

arg_open_file_2:
pop eax
mov [filename2],eax ; save the name of the file we will open to read

call putstring ;print the name of the file we will try opening

mov ecx,0   ;open file in read mode 
mov ebx,eax ;move filename for system call
mov eax,5   ;invoke SYS_OPEN (kernel opcode 5)
int 80h     ;call the kernel

cmp eax,0
js file_error_display ;end program if the file can't be opened
mov [fd2],eax ; save the file descriptor number for later use
mov eax,file_open
call putstr_and_line

files_compare:

file_1_read_one_byte:
mov edx,1       ;number of bytes to read
mov ecx,buf1    ;address to store the bytes
mov ebx,[fd1]   ;move the opened file descriptor into EBX
mov eax,3       ;invoke SYS_READ (kernel opcode 3)
int 80h         ;call the kernel

;eax will have the number of byte read after system call
mov [count1],eax ;we save the number of byte read for later
cmp eax,0
jnz file_2_read_one_byte ;unless zero bytes were read, proceed to read from next file

mov eax,[filename1]
call putstring
mov eax,end_of_file_string
call putstr_and_line

;Even if we have reached the end of the first file,
;we still proceed to read a byte from the second file
;to see if it also ends at the same address

file_2_read_one_byte:
mov edx,1       ;number of byte to read
mov ecx,buf2    ;address to store the bytes
mov ebx,[fd2]   ;move the opened file descriptor into EBX
mov eax,3       ;invoke SYS_READ (kernel opcode 3)
int 80h         ;call the kernel

;eax will have the number of bytes read after system call
mov [count2],eax ;we save the number of bytes read for later
cmp eax,0
jnz check_both_bytes ;unless zero bytes were read, proceed to compare bytes from both files

mov eax,[filename2]
call putstring
mov eax,end_of_file_string
call putstr_and_line

jmp main_end ;we have reach end of one file and should end program

check_both_bytes:

;we add the number of bytes read from both files
mov eax,[count1]
add eax,[count2]
cmp eax,2
jnz main_end

compare_bytes:

mov al,[buf1]
mov bl,[buf2]

;compare the two bytes and skip printing them if they are the same
cmp al,bl
jz bytes_are_same

;print the address and the bytes at that address
mov eax,[offset]
mov dword[int_width],8
call putint_and_space
mov dword[int_width],2
mov eax,0
mov al,[buf1]
call putint_and_space
mov al,[buf2]
call putint_and_line

bytes_are_same:

inc dword[offset]

jmp files_compare

file_error_display:

mov eax,file_error
call putstr_and_line

main_end:

;this is the end of the program
;we close the open files and then use the exit call

mov ebx,[fd1] ;file number to close
mov eax,6   ;invoke SYS_CLOSE (kernel opcode 6)
int 80h     ;call the kernel

mov ebx,[fd2] ;file number to close
mov eax,6   ;invoke SYS_CLOSE (kernel opcode 6)
int 80h     ;call the kernel

mov eax, 1  ; invoke SYS_EXIT (kernel opcode 1)
mov ebx, 0  ; return 0 status on exit - 'No Errors'
int 80h

include 'chastelib32.asm'

;variables for displaying information
help_message db 'chastecmp by Chastity White Rose',0Ah,0Ah
db 9,'chastecmp file1 file2',0Ah,0Ah
db 'Differing bytes are shown in hexadecimal',0Ah
db 'until the EOF has been reached.',0Ah,0

file_open db ' opened',0
file_error db ' error',0
end_of_file_string db ' EOF',0

db 23 dup 0 ;fill with extra space to match 1024 executable size

;variables for managing files
filename1 dd ? ;name of the file to be opened
filename2 dd ? ;name of the file to be opened
fd1 dd ?       ;file descriptor 1
fd2 dd ?       ;file descriptor 2
buf1 db ?      ;store byte from file 1 here
buf2 db ?      ;store byte from file 2 here
count1 dd ?    
count2 dd ?
offset dd ?
```
How to use chastecmp

Using the chastecmp program requires two filenames to be passed as command-line arguments. Although you can use any files you have, it makes sense to use a simple example with text files because they are so easy to create with the echo command.

Run these commands to create the two files.
```
echo "chandler is my birth name" > file1.txt
echo "chastity is my trans name" > file2.txt
```
Now that the files exist
```
./main file1.txt file2.txt
```
If you have created these files and run the chastecmp program on them, you will see this result:
```
file1.txt opened
file2.txt opened
00000003 6E 73
00000004 64 74
00000005 6C 69
00000006 65 74
00000007 72 79
0000000F 62 74
00000010 69 72
00000011 72 61
00000012 74 6E
00000013 68 73
file1.txt EOF
file2.txt EOF
```
How does chastecmp work?

This program is much simpler than chastack or chastext, but it is close to 180 lines and still has some logic to follow. First thing it does is check to see how many command-line arguments were passed to the program. Since the name of the program always counts as 1, we subtract from this number and also pop the next argument into ebx just to get rid of it. The actual register used doesn’t matter in this case as long as it is not eax, which holds the number of arguments.

The eax register is compared with 2. If this number is below 2, then there are not enough arguments to continue the program, and it will end. Otherwise, it will proceed to use the open call with both filenames and assume these files exist. If they do not exist, it will print the filename and then say error.

If both files are opened, it will keep reading 1 byte from each file descriptor and store each in its own buffer of 1 byte. If the two bytes are the same, they will be ignored. However, if they are different, the address and the values of both bytes at that address will be displayed.

The variable “offset” is used to keep track of which address we are at in both files, but it isn’t used to lseek in this program because we are going from beginning to end.

If at any time the read system call returns 0, a message is displayed with the filename and EOF to tell the user that the end of that file has been reached.

In the example I just used, both files are the same length of 26 bytes and will reach the end at the same time.

But why should I care?

The average person probably does not know why it matters to see the hexadecimal differences between two files. I know it seems silly, especially for small text files as I used in this chapter’s examples. However, I can give two examples of times I have used this information.

The first example is relevant to Chapter 2, where I presented the header file “chaste-elf-32.nasm” which can be included to make a loadable program using the NASM assembler.

I read the specification document for ELF files to describe what the fields were named and what the values meant. However, this informational alone was not enough for me to successfully create the custom ELF header. I had to create ELF executable files with FASM because it has this feature built in. By creating slightly different programs, I was able to compare the binary differences in the different source files fed to FASM. The chastecmp program was extremely helpful to me as I used it hundreds of times in reverse engineering the ELF format.

One of my discoveries was that when the size of a program increased, either by adding more code or adding more data statements, there was a number in the header that also increased. As it turns out, the memory size of the file increased even when data reservation keywords (such as rb,rw,rd, and rq) were used, even when the size of the file itself didn’t.

The specification could tell me a lot, but without the example ELF headers FASM was already creating, I would not have been able to create dynamic headers to match programs written in FASM. I probably spent 12 hours on that project, but at least I can assembler any of my programs with NASM if I make the necessary syntax changes.

But perhaps a more fun example, and also the reason I got started with programming, was that I used a file comparison tool to cheat at a Norse mythology game years ago. The game was called Castle of the Winds, and it ran on Windows 3.1, 98, and even XP.

One of the features of that specific game was that it let you save the game at any time. I remember that I had 5 mana points. I saved the first file and then cast the magic arrow spell to spend one point. I then saved a second file and ran the Windows “fc” command to compare the two files in binary mode.
```
fc /b 1.cwg 2.cwg
```
It told me the address of the byte that had changed from 5 to 4. I then opened this in a hex editor named XVI32 and changed this byte to different values.

In time, I was able to not only change my mana points but also hit points and experience points to make myself invincible in that game.

I didn’t really know much about hexadecimal at this point, but by trial and error, I accidentally started understanding it. It was this experience of cheating in a video game that led me to learn about binary and hexadecimal number systems originally.

I had seen for the first time that an understanding of computer arithmetic could allow me to break the rules and do things in a video game that the developer could not predict or prevent me from doing. In those days, I learned to do the same with many video games and had many fun adventures.

In modern times, developers have gotten smarter and have put measures in place to prevent this form of cheating. Most notably, more games are multiplayer and read data from a server that stores the game data, where no user can hack it.

But you have to understand that back in the 90s, nearly every single player game could be hacked that stored its data locally and didn’t connect to the internet. I have had people criticize my habit of cheating in single-player games and say that it ruins the experience of the game.

But what they don’t understand is that I didn’t care about the video game I was hacking, because Arithmetic had become my favorite game. My love of math was so great that I learned computer programming and had more fun writing programs in BASIC, C, and Assembly than I did playing video games in the first place.

I can’t hack most modern games with these tricks, but I have found the art of computer programming, which is much more satisfying than any video game I have played in my life.

In summary, the chastecmp program does the same thing as the “fc /b” command from DOS and Windows did. When I switched to Linux as my primary operating system, I wrote my own file comparison tool to always keep the fond memories of my childhood with me.
Share this:
Tweet
Email
Like Loading…
June 28, 2026

chastecmp 64-bit

This post is the source of the 64-bit edition of chastecmp, my file comparison tool. It behaves exactly the same as the 32-bit edition. However, I am slowly translating my best programs to use the 64-bit calling convention for the eventual 64-bit edition of my book. This will be a few years away but there is not too much work that needs to be done. Mostly I just have to use different registers and different numbers along with the syscall instruction instead of interrupt 0x80. The calls are standard and part of the Linux kernel. All I am doing is translating it to be more compatible with how 64-bit Linux does things.

main.asm

;Linux 64-bit Assembly Source for chastecmp
format ELF64 executable
entry main

include 'chastelib64.asm'

main:

;radix will be 16 because this whole program is about hexadecimal
mov [radix],16 ; can choose radix for integer input/output!
mov [int_width],1

pop rax
mov [argc],rax ;save the argument count for later

;first arg is the name of the program. we skip past it
pop rax
dec [argc]
mov rax,[argc]

cmp rax,2
jb help
mov [file_offset],0 ;assume the offset is 0,beginning of file
jmp arg_open_file_1

help:
mov rax,help_message
call putstring
jmp main_end

arg_open_file_1:
pop rax
mov [filename1],rax ; save the name of the file we will open to read

call putstring ;print the name of the file we will try opening

mov rsi,0   ;open file in read mode 
mov rdi,rax ;filename should be in rax before this function was called
mov rax,2   ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
syscall     ;call the kernel

cmp rax,0
js file_error_display ;end program if the file can't be opened
mov [filedesc1],rax ; save the file descriptor number for later use
mov rax,file_open
call putstr_and_line

arg_open_file_2:
pop rax
mov [filename2],rax ; save the name of the file we will open to read

call putstring ;print the name of the file we will try opening

mov rsi,0   ;open file in read mode 
mov rdi,rax ;filename should be in rax before this function was called
mov rax,2   ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
syscall     ;call the kernel

cmp rax,0
js file_error_display ;end program if the file can't be opened
mov [filedesc2],rax ; save the file descriptor number for later use
mov rax,file_open
call putstr_and_line

files_compare:

file_1_read_one_byte:
mov rdx,1            ;number of bytes to read
mov rsi,byte1        ;address to store the bytes
mov rdi,[filedesc1]  ;move the opened file descriptor into rdi
mov rax,0            ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
syscall              ;call the kernel

;rax will have the number of bytes read after system call
mov [file_1_bytes_read],rax ;we save the number of bytes read for later
cmp rax,0
jnz file_2_read_one_byte ;unless zero bytes were read, proceed to read from next file

mov rax,[filename1]
call putstring
mov rax,end_of_file_string
call putstr_and_line

;Even if we have reached the end of the first file,
;we still proceed to read a byte from the second file
;to see if it also ends at the same address

file_2_read_one_byte:
mov rdx,1            ;number of bytes to read
mov rsi,byte2        ;address to store the bytes
mov rdi,[filedesc2]  ;move the opened file descriptor into rdi
mov rax,0            ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
syscall              ;call the kernel

;rax will have the number of bytes read after system call
mov [file_2_bytes_read],rax ;we save the number of bytes read for later
cmp rax,0
jnz check_both_bytes ;unless zero bytes were read, proceed to compare bytes from both files

mov rax,[filename2]
call putstring
mov rax,end_of_file_string
call putstr_and_line

jmp main_end ;we have reach end of one file and should end program

check_both_bytes:

;we add the number of bytes read from both files
mov rax,[file_1_bytes_read]
add rax,[file_2_bytes_read]
cmp rax,2
jnz main_end

compare_bytes:

mov al,[byte1]
mov bl,[byte2]

;compare the two bytes and skip printing them if they are the same
cmp al,bl
jz bytes_are_same

;print the address and the bytes at that address
mov rax,[file_offset]
mov [int_width],8
call putint_and_space
mov [int_width],2
mov rax,0
mov al,[byte1]
call putint_and_space
mov al,[byte2]
call putint_and_line

bytes_are_same:

inc [file_offset]

jmp files_compare

file_error_display:

mov rax,file_error
call putstr_and_line

main_end:

;this is the end of the program
;we close the open files and then use the exit call

mov rdi,[filedesc1] ;file number to close
mov rax,3           ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
syscall             ;call the kernel

mov rdi,[filedesc2] ;file number to close
mov rax,3           ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
syscall             ;call the kernel

mov rax, 0x3C ; invoke SYS_EXIT (kernel opcode 0x3C (60 decimal) on 64 bit systems)
mov rdi,0   ; return 0 status on exit - 'No Errors'
syscall

;variables for displaying information

help_message db 'chastecmp by Chastity White Rose',0Ah,0Ah
db 9,'chastecmp file1 file2',0Ah,0Ah
db 'Differing bytes are shown in hexadecimal',0Ah
db 'until the EOF has been reached.',0Ah,0

file_open db ' opened',0
file_error db ' error',0
end_of_file_string db ' EOF',0

db 48 dup 0 ;fill with extra space to match 1280 executable size

;variables for managing arguments and files
argc dq ?
filename1 dq ? ; name of the file to be opened
filename2 dq ? ; name of the file to be opened
filedesc1 dq ? ; file descriptor
filedesc2 dq ? ; file descriptor
byte1 db ?
byte2 db ?
file_1_bytes_read dq ?
file_2_bytes_read dq ?
file_offset dq ?

chastelib64.asm

; chastelib assembly header file for 64 bit Linux
; This file is where I keep the source of my most important Assembly functions
; These are my string and integer output and conversion routines.

; To simplify documentation. The Accumulator/Arithmetic register
; (ax,ebx,rax) depending on bit size shall be referred to as register A
; for the description of these core functions because the A register
; is treated special both by the Intel company and my code;

; putstring; Prints a zero terminated string from the address pointer to by A register.
; intstr;    Converts the number in A into a zero terminated string and points A to that address
; putint;    Prints the integer in A by calling intstr and then putstring.
; strint;    Converts the zero terminated string into an integer and sets A to that value
   
; Now, the source of the functions begins, with comments included for parts that I felt needed explanation.

putstring:

push rax
push rbx
push rcx
push rdx

mov rbx,rax ; copy rax to rbx as well. Now both registers have the address of the main_string

putstring_strlen_start: ; this loop finds the lenge of the string as part of the putstring function

cmp [rbx],byte 0 ; compare byte at address rdx with 0
jz putstring_strlen_end ; if comparison was zero, jump to loop end because we have found the length
inc rbx
jmp putstring_strlen_start

putstring_strlen_end:
sub rbx,rax ;subtract start pointer from current pointer to get length of string

;Write string using Linux Write system call
;Reference for 64 bit x86 syscalls is below.
;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/syscalls/#x86_64-64-bit

mov rdx,rbx      ;number of bytes to write
mov rsi,rax      ;pointer/address of string to write
mov rdi,1        ;write to the STDOUT file
mov rax,1        ;invoke SYS_WRITE (kernel opcode 1 on 64 bit systems)
syscall          ;system call to write the message

pop rdx
pop rcx
pop rbx
pop rax

ret ; this is the end of the putstring function return to calling location

; This is the location in memory where digits are written to by the intstr function
; The string of bytes and settings such as the radix and width are global variables defined below.

int_string db 64 dup '?' ;enough bytes to hold maximum size 64-bit binary integer

int_string_end db 0 ;zero byte terminator for the integer string

radix dq 2 ;radix or base for integer output. 2=binary, 8=octal, 10=decimal, 16=hexadecimal
int_width dq 8 ;default width of integers. Extra zeros prefixed if more than 1

;this function creates a string of the integer in rax
;it uses the above radix variable to determine base from 2 to 36
;it then loads rax with the address of the string
;this means that it can be used with the putstring function

intstr:

mov rbx,int_string_end-1 ;find address of lowest digit(just before the newline 0Ah)
mov rcx,1

digits_start:

mov rdx,0;
div qword [radix]
cmp rdx,10
jb decimal_digit
jnb hexadecimal_digit

decimal_digit: ;we go here if it is only a digit 0 to 9
add rdx,'0'
jmp save_digit

hexadecimal_digit:
sub rdx,10
add rdx,'A'

save_digit:

mov [rbx],dl
cmp rax,0
jz intstr_end
dec rbx
inc rcx
jmp digits_start

intstr_end:

prefix_zeros:
cmp rcx,[int_width]
jnb end_zeros
dec rbx
mov [rbx],byte '0'
inc rcx
jmp prefix_zeros
end_zeros:

mov rax,rbx ; now that the digits have been written to the string, display it!

ret

; function to print string form of whatever integer is in rax
; The radix determines which number base the string form takes.
; Anything from 2 to 36 is a valid radix
; in practice though, only bases 2,8,10,and 16 will make sense to other programmers
; this function does not process anything by itself but calls the combination of my other
; functions in the order I intended them to be used.

putint: 

push rax
push rbx
push rcx
push rdx

call intstr

call putstring

pop rdx
pop rcx
pop rbx
pop rax

ret

;this function converts a string pointed to by rax into an integer returned in rax instead
;it is a little complicated because it has to account for whether the character in
;a string is a decimal digit 0 to 9, or an alphabet character for bases higher than ten
;it also checks for both uppercase and lowercase letters for bases 11 to 36
;finally, it checks if that letter makes sense for the base.
;For example, G to Z cannot be used in hexadecimal, only A to F can
;The purpose of writing this function was to be able to accept user input as integers
;This function is improved with error checking and uses the new strint_error variable
;The program can check this value after the call and see how many errors happened.

strint_error db 0 ;declare a byte variable that keeps track of errors

strint:

mov rbx,rax ;copy string address from rax to rbx because rax will be replaced soon!
mov rax,0
mov [strint_error],0 ;set errors to 0 at the start of this function

read_strint:
mov rcx,0 ; zero rcx so only lower 8 bits are used
mov cl,[rbx]
inc rbx
cmp cl,0 ; compare byte at address rdx with 0
jz strint_end ; if comparison was zero, this is the end of string

;if char is below '0' or above '9', it is outside the range of these and is not a digit
cmp cl,'0'
jb not_digit
cmp cl,'9'
ja not_digit

;but if it is a digit, then correct and process the character
is_digit:
sub cl,'0'
jmp process_char

not_digit:
;it isn't a digit, but it could an alphabet character which is a digit in a higher base

;if char is below 'A' or above 'Z', it is outside the range of these and is not capital letter
cmp cl,'A'
jb not_upper
cmp cl,'Z'
ja not_upper

is_upper:
sub cl,'A'
add cl,10
jmp process_char

not_upper:

;if char is below 'a' or above 'z', it is outside the range of these and is not lowercase letter
cmp cl,'a'
jb not_lower
cmp cl,'z'
ja not_lower

is_lower:
sub cl,'a'
add cl,10
jmp process_char

not_lower:

;if we have reached this point, result invalid and end function with error
jmp strint_end_error

process_char:

cmp rcx,[radix] ;compare char with radix
jnb strint_end_error ;if this value is above or equal to radix, it is too high despite being a valid digit/alpha

mov rdx,0 ;zero rdx because it is used in mul sometimes
mul qword [radix] ;mul rax with radix
add rax,rcx

jmp read_strint ;jump back and continue the loop if nothing has exited it

strint_end_error: ;we jump here if there was an error with one of the chars
inc [strint_error] ;increment error counter because char invalid

strint_end: ;we jump here when no errors happened

ret

;The utility functions below simply print a space or a newline.
;these help me save code when printing lots of strings and integers.

space db ' ',0 ;a string containing only a space

putspace:
push rax
mov rax,space
call putstring
pop rax
ret

line db 0Ah,0 ;a string containing only a newline

;the next function which pushes rax to the stack
;moves the address of the line string and prints it with putstring
;then it pops the original value of rax back from the stack before the function returns
;this allows me to print a newline anywhere in the code without a single register changing

putline:
push rax
mov rax,line
call putstring
pop rax
ret

;a function for printing a single character that is the value of al

char: db 0,0

putchar:
push rax
mov [char],al
mov rax,char
call putstring
pop rax
ret

;a small function just for the common operation
;printing an integer followed by a space
;this saves a few bytes in the assembled code
;by reducing the number of function calls in the main program

putint_and_space:
call putint
call putspace
ret

;a small function just for the common operation
;printing an integer followed by a line feed
;this saves a few bytes in the assembled code
;by reducing the number of function calls in the main program

putint_and_line:
call putint
call putline
ret

;a small function just for the common operation
;printing a string followed by a line feed
;this saves a few bytes in the assembled code
;by reducing the number of function calls in the main program
;it also means we don't need to include a newline in every string!

putstr_and_line:
call putstring
call putline
ret

June 15, 2026

chastack prototype

I have created a small prototype of a calculator in the C programming language. It is a stack based calculator similar to the forth programming language. It is written in C as a testing ground but uses methods designed to be translatable to Assembly language. It will be a featured program in chapter ten of my Linux Assembly book.

main.c

#include <stdio.h>
#include <string.h>
#include "chastelib.h"

#define stack_length 0x10
int stack[stack_length]; /*stack array of size stack_length*/

/*
variables named after registers

esp is declared as a pointer because its only purpose in Assembly is managing the stack
ebp is declared as a pointer to keep track of the original stack pointer address

all other registers are used as normal integers
*/
int eax,ebx,ecx,edx,esi,edi,*ebp,*esp;

void push(i)
{
 esp--;
 *esp=i;
}

int pop()
{
 int i=*esp;
 *esp=0; /*set the value at [esp] to 0 to delete it*/
 esp++;
 return i;
}


int main(int argc, char **argv)
{
 int x=1;

 /*set the radix used for integer display*/
 radix=10;
 int_width=1;

 /*set the stack pointer to where it should start*/
  esp=stack+stack_length;
  ebp=esp; /*backup address of esp to ebp*/

 /*
 Now the fun begins. Each argument is processed as a number or command
 */

 while(x!=argc)
 {
  /*
  putstr(argv[x]);
  putstr("\n");
  */
  
  /*first, we check for commands before we check for integers*/
  
  if(!strcmp(argv[x],"add"))
  {
   /*putstr("The add command adds using the top two numbers on the stack.\n");*/
   ebx=pop();
   eax=pop();
   eax+=ebx;
   push(eax);
  }
  
  else if(!strcmp(argv[x],"mul"))
  {
   /*putstr("The mul command multiplies using the top two numbers on the stack.\n");*/
   ebx=pop();
   eax=pop();
   eax*=ebx;
   push(eax);
  }

  else if(!strcmp(argv[x],"sub"))
  {
   /*putstr("The sub command subtracts using the top two numbers on the stack.\n");*/
   ebx=pop();
   eax=pop();
   eax-=ebx;
   push(eax);
  }

  else if(!strcmp(argv[x],"div"))
  {
   /*putstr("The div command divides using the top two numbers on the stack.\n");*/
   ebx=pop();
   eax=pop();
   eax/=ebx;
   push(eax);
  }

  else /*try to get a number and push it to the stack*/
  {
   
  eax=strint(argv[x]); /*get a number from the string*/
  if(strint_errors)
  {
   putstr("Last argument was not a number, but it could be a command!\n");
  }
  else
  {
   /*
   putstr("number returned by strint(argv[x]) is: ");
   putint(eax);
   putstr("\n");
   putstr("It will be pushed to the stack.");
   */
   push(eax);
  }
  
  }
  
  x++;
 }
 
 while(esp<ebp)
 {
  putint(*esp);
  putstr("\n");
  esp++;
 }
 
 return 0;
}

June 11, 2026

AAA-Linux: Chapter 8: User Input
This post is a preview of the Linux version of Assembly Arithmetic Algorithms. It is planned to be bigger than the DOS book was and so far I am up to chapter 8 and trying to explain everything I can for Linux users who want to write assembly for modern computers at a speed higher than what was possible using a DOS emulator.
The first seven chapters have been about teaching the basics of Assembly and getting output of strings and numbers to the screen. All those steps were required for learning Assembly. However, at some point, when you have a program that is meant to do something, you need to have a way for other people, especially those who are not programmers, to be able to give input to direct what the program does.

There are two main ways of doing this in a console program. The first way is have the program ask for the user to type something from the keyboard and then wait until they write something and press enter. The next program will achieve this. Copy this and try it out and then I will explain after the code how it works.

FASM Keyboard Input
```
format ELF executable

main:

mov dword [radix],10
mov dword [int_width],1

loop_input:

mov eax,string0
call putstring

call getstring

mov esi,eax     ;mov the string address in eax to esi
mov edi,string3 ;mov the "exit" string address to edi
call strcmp     ;call the function to compare the strings and return eax
cmp eax,0       ;if eax is 0, the strings are the same
jz the_end      ;go to the_end if the user typed "exit"

mov eax,string1
call putstring

mov eax,buf
call putstring
call putline

mov eax,string2
call putstring

mov eax,[count]
call putint
call putline

jmp loop_input

the_end:
mov eax,1
mov ebx,0
int 80h

string0 db 'Enter a string from the keyboard: ',0
string1 db 'string: ',0
string2 db 'length: ',0
string3 db 'exit',0

buf db 0x100 dup '?'
count dd 0

getstring:

mov [count],0 ;set count of characters read during this function to zero
mov edx,1     ;number of bytes to read
mov ecx,buf   ;address to store the bytes

getstring_chars:

mov ebx,0     ;read from stdin
mov eax,3     ;invoke SYS_READ (kernel opcode 3)
int 80h       ;call the kernel

cmp eax,1     ;was 1 character read?
jnz getstring_end ; if not, then end this loop

mov al,[ecx]  ;mov last character read into al register

;check if this character is in the proper range to be part of the string

cmp al,0x20      ;compare with 0x20 (space)
jb getstring_end ;jump if below to getstring_end label
cmp al,0x7E      ;compare with 0x7E (tilde)
ja getstring_end ;jump if above to getstring_end label

;if neither jump happened, keep the character and

inc [count]   ;increment how many characters we have read
inc ecx       ;increment address where next byte is read from
jmp getstring_chars ;jump back to start of loop and keep reading

getstring_end:

mov byte[ecx],0 ;terminate this string with a zero

mov eax,buf ;mov the buffer address to eax for returning the string

ret

;strcmp compares the string at esi to the one at edi
;eax returns 0 if the strings are the same and 1 if different
;the algorithm is simple but I will explain it for those who are confused

;eax is initialized to zero
;a byte from each string is loaded into the al and bl registers
;the bytes are compared. if they are different, then we jump to the end
;However, if they are the same, then we check if one of them is zero
;for this purpose it doesn't matter whether we compare al or bl with zero
;because it is known that they are the same if the jnz did not take place
;if it is zero, this also jumps to the end of the function
;If neither jump took place, then we jump to the start of the loop
;but when the function finally ends bl will be subtracted from al
;this ensures that the function returns zero if the final characters are the same

strcmp:

mov eax,0

strcmp_start:

;read a byte from each string
mov al,[edi]
mov bl,[esi]
cmp al,bl
jnz strcmp_end

cmp al,0
jz strcmp_end

inc edi
inc esi

jmp strcmp_start

strcmp_end:
sub al,bl

ret

include 'chastelib32.asm'
```
The getstring function uses a read system call to read from file descriptor 0 which represents standard input or the keyboard. It reads one character each time with a loop and starts at an address labeled “buf” which was declared as a global variable of 256 bytes which were initialized with question marks. I also defined a variable named count which was used to automatically count how many bytes were read.
```
buf db 0x100 dup '?'
count dd 0
```
But I feel that the part of this function that needs the most explaining is this section:
```
cmp al,0x20      ;compare with 0x20 (space)
jb getstring_end ;jump if below to getstring_end label
cmp al,0x7E      ;compare with 0x7E (tilde)
ja getstring_end ;jump if above to getstring_end label
```
Because this range of characters from space to tilde is what I have identified as the acceptable range of characters. There is no standard way that makes sense for all strings. For example, someone may want to make a getstring function that only accepts capital letters or that only accepts numbers 0 to 9. I can’t say that there is one way that is the best.

The program listed above will keep running the loop until the user types “exit” as the string. Each time after it gets the string, it compares the what the user entered to the “exit” string. If the strcmp function returns 0, it means the two strings are the same.

This particular variant of strcmp is based off of the C function of the same name. You may also remember that I wrote a strlen function for the first example in chapter 7 when I had a string that I wanted to write to a new file.

I believe that using conventional names of C functions is a good idea because C programmers who read my books will already be familiar with that function and what it does in the C programming language.

In any case, “exit” was the perfect name for a command to “exit” the program. It is also how you log out of a Linux terminal and is the official name for the system call that exits every program in this book!

Although using the keyboard for input during a running program is a great interactive way of doing things, there is one way that I enjoy even more. The next program is one that I wrote long before I started writing this book and has been referred to as “chastearg” on my blog and the Flat Assembler Forum. It prints the command line arguments when you add them after the name of the program.

FASM Command Line Arguments
```
format ELF executable
entry main

include 'chastelib32.asm'

main:

pop eax              ;pop the number of arguments from the stack
mov [argc],eax       ;save the argument count for later

pop eax              ;pop argument 0 (name of the program)
dec [argc]           ;subtract 1 from argument count

putarg:

cmp [argc],0         ;check for remaining arguments
jz putarg_end        ;if none, end the loop and stop printing
pop eax              ;pop the next argument off the stack
call putstring       ;print the string and a new line
call putline
dec [argc]           ;subtract 1 from argument count
jmp putarg           ;jump to the beginning of the loop

putarg_end:

mov eax, 1           ; invoke SYS_EXIT (kernel opcode 1)
mov ebx, 0           ; return 0 status on exit - 'No Errors'
int 0x80

argc dd 0
```
What is a Command Line Argument?

People who come from a Windows environment may not even know what a command line argument is because they are used to pointing and clicking with a mouse. You can’t enter an argument this way. For clarification on this topic, here is some terminal text to clarify what arguments are.
```
fasm main.asm
flat assembler  version 1.73.30  (16384 kilobytes memory)
2 passes, 481 bytes.
chmod +x main
./main this program has command line arguments
this
program
has
command
line
arguments
```
When we run fasm and give it the name of the Assembly file we want to assemble, the file is an argument or an option we provide to it. In the above example, main.asm is the file I provide to fasm as an argument.

After the file is assembled, I run the chmod command with the arguments “+x” and “main” which adds the execution permission to the main executable that was just created.

Finally, running “./main” followed by more words on the same line causes Linux to interpret them as arguments. They are pushed onto the stack.

When a program begins on Linux, you can access the number of how many arguments were passed to the program by getting the first number you pop off the stack. In the chastearg program, there is a loop that keeps track of how many arguments are left. While there are some remaining, it keeps popping them into the eax register and calling putstring until there are none left.

Arguments vs Keyboard Input

The primary difference between input from the keyboard during a program and passing arguments is that the arguments do not stop the execution of a program and wait for anything. If you have an install script which is meant to compile and install a large program, it is better not to pause it for any reason unless an error happens. Arguments are best in this case so that someone can pass information to it that they want the program to know.

Keyboard input does have a benefit though. For example, suppose that you ask the user to input a number and then they accidentally input a string that is not recognizable as a number. With keyboard input, you can tell them they made a mistake and ask them to try again. With arguments, you cannot edit them during the program because they are only pushed at the start when the program is run from the terminal.

Only you can decide which of these methods your program needs, but I hope that my explanation and my strcmp function is helpful for you when you try to write a program that needs input to do different things conditionally.

Later in this book, I will present a calculator written in Assembly language that builds from this chapter’s keyboard input loop. However, we are not ready for that until I teach you how to separate regular strings from numbers. That will be the subject of the next chapter and I can promise you it is simultaneously the hardest task but also the most useful feature you will need for writing any program that has to read numbers.
Share this:
Tweet
Email
Like Loading…
June 7, 2026

Tag: computers

new program: chastdin

main.asm

chastdin32.asm

Share this:

AAA Linux: Chapter 15: chastecmp

Chapter 15: chastecmp

FASM chastecmp source

How to use chastecmp

How does chastecmp work?

But why should I care?

Share this:

chastecmp 64-bit

main.asm

chastelib64.asm

Share this:

chastack prototype

main.c

Share this:

AAA-Linux: Chapter 8: User Input

FASM Keyboard Input

FASM Command Line Arguments

What is a Command Line Argument?

Arguments vs Keyboard Input

Share this: