Blog

  • AAA Linux: Chapter 15: chastecmp

    This post is a chapter from my recently published Linux edition of Assembly Arithmetic Algorithms. The story behind why I wrote the program featured in chapter 15 goes back to when I discovered how to cheat at video games. This story is worth sharing to inspire the next generation of gamers to learn computer math and programming just as I did.

    Chapter 15: chastecmp

    In this chapter, I will show you the source code of a file comparison program. This program is meant to find which bytes are different between two files that are similar but contain a few differences.

    I will use text files for my examples in this chapter, but the program actually does a binary file comparison and displays the different bytes in hexadecimal because it is a universally understood shorthand for binary that most C and Assembly programmers are already familiar with.

    First, here is the source code of chastecmp, which is the short name for “Chastity’s Comparison tool”. The name is also meant to refer to the “cmp” instruction, which is used a lot more in this program because it is essential.

    FASM chastecmp source

    ;Linux 32-bit Assembly Source for chastecmp
    format ELF executable
    
    main:
    
    ;radix will be 16 because this whole program is about hexadecimal
    mov dword[radix],16 ; can choose radix for integer input/output!
    mov dword[int_width],1
    
    pop eax ;get the number of arguments
    dec eax ;subtract 1 because we will ignore the name of the program
    pop ebx ;pop program name into a register to delete it from stack
    
    cmp eax,2 ;do we have two arguments to be used as filenames?
    jb help
    mov dword[offset],0 ;assume the offset is 0,beginning of file
    jmp arg_open_file_1
    
    help:
    mov eax,help_message
    call putstring
    jmp main_end
    
    arg_open_file_1:
    pop eax
    mov [filename1],eax ; save the name of the file we will open to read
    call putstring ;print the name of the file we will try opening
    
    mov ecx,0   ;open file in read mode 
    mov ebx,eax ;move filename for system call
    mov eax,5   ;invoke SYS_OPEN (kernel opcode 5)
    int 80h     ;call the kernel
    
    cmp eax,0
    js file_error_display ;end program if the file can't be opened
    mov [fd1],eax ; save the file descriptor number for later use
    mov eax,file_open
    call putstr_and_line
    
    arg_open_file_2:
    pop eax
    mov [filename2],eax ; save the name of the file we will open to read
    
    call putstring ;print the name of the file we will try opening
    
    mov ecx,0   ;open file in read mode 
    mov ebx,eax ;move filename for system call
    mov eax,5   ;invoke SYS_OPEN (kernel opcode 5)
    int 80h     ;call the kernel
    
    cmp eax,0
    js file_error_display ;end program if the file can't be opened
    mov [fd2],eax ; save the file descriptor number for later use
    mov eax,file_open
    call putstr_and_line
    
    files_compare:
    
    file_1_read_one_byte:
    mov edx,1       ;number of bytes to read
    mov ecx,buf1    ;address to store the bytes
    mov ebx,[fd1]   ;move the opened file descriptor into EBX
    mov eax,3       ;invoke SYS_READ (kernel opcode 3)
    int 80h         ;call the kernel
    
    ;eax will have the number of byte read after system call
    mov [count1],eax ;we save the number of byte read for later
    cmp eax,0
    jnz file_2_read_one_byte ;unless zero bytes were read, proceed to read from next file
    
    mov eax,[filename1]
    call putstring
    mov eax,end_of_file_string
    call putstr_and_line
    
    ;Even if we have reached the end of the first file,
    ;we still proceed to read a byte from the second file
    ;to see if it also ends at the same address
    
    file_2_read_one_byte:
    mov edx,1       ;number of byte to read
    mov ecx,buf2    ;address to store the bytes
    mov ebx,[fd2]   ;move the opened file descriptor into EBX
    mov eax,3       ;invoke SYS_READ (kernel opcode 3)
    int 80h         ;call the kernel
    
    ;eax will have the number of bytes read after system call
    mov [count2],eax ;we save the number of bytes read for later
    cmp eax,0
    jnz check_both_bytes ;unless zero bytes were read, proceed to compare bytes from both files
    
    mov eax,[filename2]
    call putstring
    mov eax,end_of_file_string
    call putstr_and_line
    
    jmp main_end ;we have reach end of one file and should end program
    
    check_both_bytes:
    
    ;we add the number of bytes read from both files
    mov eax,[count1]
    add eax,[count2]
    cmp eax,2
    jnz main_end
    
    compare_bytes:
    
    mov al,[buf1]
    mov bl,[buf2]
    
    ;compare the two bytes and skip printing them if they are the same
    cmp al,bl
    jz bytes_are_same
    
    ;print the address and the bytes at that address
    mov eax,[offset]
    mov dword[int_width],8
    call putint_and_space
    mov dword[int_width],2
    mov eax,0
    mov al,[buf1]
    call putint_and_space
    mov al,[buf2]
    call putint_and_line
    
    bytes_are_same:
    
    inc dword[offset]
    
    jmp files_compare
    
    file_error_display:
    
    mov eax,file_error
    call putstr_and_line
    
    main_end:
    
    ;this is the end of the program
    ;we close the open files and then use the exit call
    
    mov ebx,[fd1] ;file number to close
    mov eax,6   ;invoke SYS_CLOSE (kernel opcode 6)
    int 80h     ;call the kernel
    
    mov ebx,[fd2] ;file number to close
    mov eax,6   ;invoke SYS_CLOSE (kernel opcode 6)
    int 80h     ;call the kernel
    
    mov eax, 1  ; invoke SYS_EXIT (kernel opcode 1)
    mov ebx, 0  ; return 0 status on exit - 'No Errors'
    int 80h
    
    include 'chastelib32.asm'
    
    ;variables for displaying information
    help_message db 'chastecmp by Chastity White Rose',0Ah,0Ah
    db 9,'chastecmp file1 file2',0Ah,0Ah
    db 'Differing bytes are shown in hexadecimal',0Ah
    db 'until the EOF has been reached.',0Ah,0
    
    file_open db ' opened',0
    file_error db ' error',0
    end_of_file_string db ' EOF',0
    
    db 23 dup 0 ;fill with extra space to match 1024 executable size
    
    ;variables for managing files
    filename1 dd ? ;name of the file to be opened
    filename2 dd ? ;name of the file to be opened
    fd1 dd ?       ;file descriptor 1
    fd2 dd ?       ;file descriptor 2
    buf1 db ?      ;store byte from file 1 here
    buf2 db ?      ;store byte from file 2 here
    count1 dd ?    
    count2 dd ?
    offset dd ?
    

    How to use chastecmp

    Using the chastecmp program requires two filenames to be passed as command-line arguments. Although you can use any files you have, it makes sense to use a simple example with text files because they are so easy to create with the echo command.

    Run these commands to create the two files.

    echo "chandler is my birth name" > file1.txt
    echo "chastity is my trans name" > file2.txt
    

    Now that the files exist

    ./main file1.txt file2.txt
    

    If you have created these files and run the chastecmp program on them, you will see this result:

    file1.txt opened
    file2.txt opened
    00000003 6E 73
    00000004 64 74
    00000005 6C 69
    00000006 65 74
    00000007 72 79
    0000000F 62 74
    00000010 69 72
    00000011 72 61
    00000012 74 6E
    00000013 68 73
    file1.txt EOF
    file2.txt EOF
    

    How does chastecmp work?

    This program is much simpler than chastack or chastext, but it is close to 180 lines and still has some logic to follow. First thing it does is check to see how many command-line arguments were passed to the program. Since the name of the program always counts as 1, we subtract from this number and also pop the next argument into ebx just to get rid of it. The actual register used doesn’t matter in this case as long as it is not eax, which holds the number of arguments.

    The eax register is compared with 2. If this number is below 2, then there are not enough arguments to continue the program, and it will end. Otherwise, it will proceed to use the open call with both filenames and assume these files exist. If they do not exist, it will print the filename and then say error.

    If both files are opened, it will keep reading 1 byte from each file descriptor and store each in its own buffer of 1 byte. If the two bytes are the same, they will be ignored. However, if they are different, the address and the values of both bytes at that address will be displayed.

    The variable “offset” is used to keep track of which address we are at in both files, but it isn’t used to lseek in this program because we are going from beginning to end.

    If at any time the read system call returns 0, a message is displayed with the filename and EOF to tell the user that the end of that file has been reached.

    In the example I just used, both files are the same length of 26 bytes and will reach the end at the same time.

    But why should I care?

    The average person probably does not know why it matters to see the hexadecimal differences between two files. I know it seems silly, especially for small text files as I used in this chapter’s examples. However, I can give two examples of times I have used this information.

    The first example is relevant to Chapter 2, where I presented the header file “chaste-elf-32.nasm” which can be included to make a loadable program using the NASM assembler.

    I read the specification document for ELF files to describe what the fields were named and what the values meant. However, this informational alone was not enough for me to successfully create the custom ELF header. I had to create ELF executable files with FASM because it has this feature built in. By creating slightly different programs, I was able to compare the binary differences in the different source files fed to FASM. The chastecmp program was extremely helpful to me as I used it hundreds of times in reverse engineering the ELF format.

    One of my discoveries was that when the size of a program increased, either by adding more code or adding more data statements, there was a number in the header that also increased. As it turns out, the memory size of the file increased even when data reservation keywords (such as rb,rw,rd, and rq) were used, even when the size of the file itself didn’t.

    The specification could tell me a lot, but without the example ELF headers FASM was already creating, I would not have been able to create dynamic headers to match programs written in FASM. I probably spent 12 hours on that project, but at least I can assembler any of my programs with NASM if I make the necessary syntax changes.

    But perhaps a more fun example, and also the reason I got started with programming, was that I used a file comparison tool to cheat at a Norse mythology game years ago. The game was called Castle of the Winds, and it ran on Windows 3.1, 98, and even XP.

    One of the features of that specific game was that it let you save the game at any time. I remember that I had 5 mana points. I saved the first file and then cast the magic arrow spell to spend one point. I then saved a second file and ran the Windows “fc” command to compare the two files in binary mode.

    fc /b 1.cwg 2.cwg
    

    It told me the address of the byte that had changed from 5 to 4. I then opened this in a hex editor named XVI32 and changed this byte to different values.

    In time, I was able to not only change my mana points but also hit points and experience points to make myself invincible in that game.

    I didn’t really know much about hexadecimal at this point, but by trial and error, I accidentally started understanding it. It was this experience of cheating in a video game that led me to learn about binary and hexadecimal number systems originally.

    I had seen for the first time that an understanding of computer arithmetic could allow me to break the rules and do things in a video game that the developer could not predict or prevent me from doing. In those days, I learned to do the same with many video games and had many fun adventures.

    In modern times, developers have gotten smarter and have put measures in place to prevent this form of cheating. Most notably, more games are multiplayer and read data from a server that stores the game data, where no user can hack it.

    But you have to understand that back in the 90s, nearly every single player game could be hacked that stored its data locally and didn’t connect to the internet. I have had people criticize my habit of cheating in single-player games and say that it ruins the experience of the game.

    But what they don’t understand is that I didn’t care about the video game I was hacking, because Arithmetic had become my favorite game. My love of math was so great that I learned computer programming and had more fun writing programs in BASIC, C, and Assembly than I did playing video games in the first place.

    I can’t hack most modern games with these tricks, but I have found the art of computer programming, which is much more satisfying than any video game I have played in my life.

    In summary, the chastecmp program does the same thing as the “fc /b” command from DOS and Windows did. When I switched to Linux as my primary operating system, I wrote my own file comparison tool to always keep the fond memories of my childhood with me.

  • ELF Header Assembly Gists on Github

    I had just finished the 14th chapter of the Linux edition of Assembly Arithmetic Algorithms, and then I took a look at some old source code where I had learned enough of the ELF format to create my own ELF headers using either FASM or NASM. The differences between these two assemblers make them incompatible with each other. Still, I have made four separate gists on GitHub so that if someone wants to create their own ELF header for a program without relying on the built-in features of FASM, I have found the best way to do it.

    The purposes of this are both for my own greater understanding of the ELF format, because it is used in all Linux distributions, and also to provide a way for NASM users to benefit from features that currently only exist in FASM, which is still my favorite Assembler.

    FASM Hello 32-bit

    FASM Hello 64-bit

    NASM Hello 32-bit

    NASM Hello 64-bit

    Although my Linux Assembly book mostly focuses on using FASM because it is easier for beginners, I do officially support those who wish to port my programs to other assemblers. Porting is easy once you get past the small differences in the directives of each.

  • putchar

    putchar

    int putchar(int c);

    What does this function do?

    Writes 1 character to standard output (terminal or redirected to file).

    When would you use this function?

    1. When you need to write a simple character for formatting, such as a space, tab, newline, etc. to separate the output of other output functions like printf or fwrite.
    2. To print a specific range of characters in a loop.

    Where in your program would you call it?

    Any place where you need a character to separate data or you are unsure of the value of a byte and need to debug something. This is especially true in loops to identify where bugs are.

    Why is this function better or worse than alternatives?

    putchar is simpler to use than printf and only requires one argument: the character number.

    How can you use it?

    Here is a complete program using putchar that writes all the printable characters to the terminal using a loop. It then prints a new line character with the escape sequence '\n'.

    #include <stdio.h>
    
    int main(int argc, char *argv[])
    {
     int a=0x20,b=0x7F;
    
     while(a<b)
     {
      putchar(a);
      a+=1;
     }
     putchar('\n');
        
     return 0;
    }
    

    Compile and run

    gcc -Wall -ansi -pedantic main.c -o main && ./main

    Output

     !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
    
  • Chapter 9: Chad and Luke Share Bible Stories

    Chad was hard at work finding Bible stories he could tell the kids while they ate after soccer. He had managed to speak the story from Daniel chapter 1 in his own words, which made more sense than how it sounded in the King James Bible.

    Meanwhile, Luke was also busy picking his own set of Bible stories. He was looking for something he could use to fight against Chad’s Vegan diet and also justify his own beliefs about eating meat. He also remembered that Chad said he could bring any food he wanted for the children. He decided to buy some regular hot dogs and buns because he knew it would make Chad angry.

    On the next Sunday, Chad and Luke coached the kids in soccer, and everyone had a lot of fun. There seemed to be more children there than the previous week. Chad said it was probably because they told their friends about the fun of soccer and free food.

    When it was time for lunch, Chad got a big cooler out of his car and bags of chips. This choice of food surprised Luke, because he had expected Tofu sandwiches. He also noticed that there was no grill, and Stacy wasn’t there.

    Chad served everyone a plate full of chips and filled several cups with cold lemonade. He brought a cup to each child and told them to come to the table for more if they were still thirsty.

    “Chad, do you have a Bible story prepared today?” said Luke.

    “You bet I do, but first I need to drink some lemonade,” said Chad.

    “In that case, I will tell my story,” said Luke.

    “The story begins in Genesis chapter 6, when humans were so evil that God regretted making them. He decided the earth needed a great reset. He liked a man named Noah, who was righteous, and told him to build a big boat called an ark. He and his family would collect animals and lots of food to eat during a flood that would drown every other living thing. The only humans on the ark were Noah, his wife, their 3 sons, and their wives. They also had a male and female of every kind of animal,” said Luke.

    “Why did all the animals on the earth have to die?” said Simon.

    “They were not necessary because the animals on the ark would reproduce after the flood, and there would be more of all the animals,” said Luke.

    “What does it mean to reproduce?” asked Simon.

    "You might want to ask your parents about that. I am not qualified to answer that, especially for children. Anyhow, that’s not the point. After the flood ended, God gave humans permission to eat animals for food. In Genesis chapter 9, God said

    
    And God blessed Noah and his sons, and said unto them, Be fruitful, and multiply, and replenish the earth.
    
    2 And the fear of you and the dread of you shall be upon every beast of the earth, and upon every fowl of the air, upon all that moveth [upon] the earth, and upon all the fishes of the sea; into your hand are they delivered.
    
    3 Every moving thing that liveth shall be meat for you; even as the green herb have I given you all things.
    
    

    “So you see, animals exist to be eaten, which reminds me, I brought hot dogs,” said Luke.

    The children hissed and booed at Luke. They wanted no part of eating animals, and they were quite traumatized by the Noah’s Ark story. They asked Chad to tell another Daniel story.

    “Of course, I can tell you a story,” said Chad.

    “Many years after King Nebuchadnezzar died, and also his son Belshazzar was killed, a new king named Darius took over. In spite of this, Daniel was still trusted by the new king and still considered a wise man or a prophet because he had interpreted dreams for King Nebuchadnezzar and had translated the writing on the wall for Belshazzar. He was assigned to be in authority over the princes and presidents,” said Chad.

    “The presidents and princes became jealous of Daniel because he was their boss and the king liked him more. Therefore, they tried to find a way to turn the king against him. They asked King Darius to write a decree that for the next 30 days, anyone who prays to any God or human other than him would be thrown into a den of lions. He agreed and wrote a decree that anyone caught praying to someone else would be executed by being thrown into the Lion’s Den. Though he didn’t realize that this was a trap being set for his most trusted servant, Daniel,” said Chad.

    “The men who asked him to make the decree knew they would find Daniel praying near his window as he always had. Daniel knew about the law, but he still prayed to God 3 times a day and didn’t change anything he did.”

    “Why didn’t he just keep his prayers hidden or hide somewhere where the bad guys couldn’t catch him?” said a young boy named Trevor.

    “I think that he knew it was better to be faithful to God and live in his truth, even though he knew he might be in trouble for it,” said Chad.

    “What happened after that?” said multiple children.

    “The princes and presidents told the king that Daniel was still praying to his God, despite the decree. They demanded that he be thrown into the Lion’s Den as was written in the executive order,” said Chad.

    “The king tried to talk his way out of his, but the law of the king could not be disobeyed even by the king himself because he would be admitting he made a mistake and was not a God. He knew he had been tricked by the bad guys. Daniel was thrown into the Lion’s Den, and the king could not eat or sleep that night because he was so worried about Daniel. The next day, he went and found that Daniel was still alive. The lions had not even touched him. Daniel said that God closed the mouth of the lions because he was innocent,” said Chad.

    “At that moment, the king ordered Daniel to be taken out of the den and to throw in all the men who had accused Daniel. The lions tore them each up before they even hit the ground. He then made a new executive order that everyone should worship and pray to the God of Daniel. He believed that he was the true God who could make sure that even hungry lions would not eat the innocent,” said Chad.

    “Come on, Chad, do you really believe this story? Those lions would have eaten Daniel if he were in there all night,” said Luke.

    “Tell me, Luke, do you believe the flood story you told the children in Genesis? Do you really think that a God who prevents lions from eating innocent man would drown all the innocent animals of the whole earth, and that only 8 humans were righteous enough to be saved?” said Chad.

    “Well, these stories are both part of the Bible, so I guess they are both true,” said Luke.

    “But that’s not what I asked you, Luke. I asked whether YOU believe the story you read to the children,” said Chad.

    “I am not sure. But do you believe the story of Daniel in the Lion’s Den?” said Luke.

    “You bet I do. It only makes sense. Daniel was innocent, and he had done nothing wrong. We also know that he did not eat meat from last week’s story. He did not have the smell of meat on him, and the lions probably didn’t think he smelled like food. They probably would have left him alone even if God had not intervened. This story not only matches what I believe, but it makes sense to my brain as well,” said Chad.

    The children clapped and continued to enjoy the salty chips and the cool. Lemonade. Chad knew just what their bodies needed in the summer heat and told a story that they could understand and relate to.

    Nobody touched Luke’s hot dogs, not even him. He was too angry to eat. He was jealous of Chad, just like the princes and presidents who had tried to turn King Darius against Daniel.

  • chastecmp 64-bit

    This post is the source of the 64-bit edition of chastecmp, my file comparison tool. It behaves exactly the same as the 32-bit edition. However, I am slowly translating my best programs to use the 64-bit calling convention for the eventual 64-bit edition of my book. This will be a few years away but there is not too much work that needs to be done. Mostly I just have to use different registers and different numbers along with the syscall instruction instead of interrupt 0x80. The calls are standard and part of the Linux kernel. All I am doing is translating it to be more compatible with how 64-bit Linux does things.

    main.asm

    ;Linux 64-bit Assembly Source for chastecmp
    format ELF64 executable
    entry main
    
    include 'chastelib64.asm'
    
    main:
    
    ;radix will be 16 because this whole program is about hexadecimal
    mov [radix],16 ; can choose radix for integer input/output!
    mov [int_width],1
    
    pop rax
    mov [argc],rax ;save the argument count for later
    
    ;first arg is the name of the program. we skip past it
    pop rax
    dec [argc]
    mov rax,[argc]
    
    cmp rax,2
    jb help
    mov [file_offset],0 ;assume the offset is 0,beginning of file
    jmp arg_open_file_1
    
    help:
    mov rax,help_message
    call putstring
    jmp main_end
    
    arg_open_file_1:
    pop rax
    mov [filename1],rax ; save the name of the file we will open to read
    
    call putstring ;print the name of the file we will try opening
    
    mov rsi,0   ;open file in read mode 
    mov rdi,rax ;filename should be in rax before this function was called
    mov rax,2   ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
    syscall     ;call the kernel
    
    cmp rax,0
    js file_error_display ;end program if the file can't be opened
    mov [filedesc1],rax ; save the file descriptor number for later use
    mov rax,file_open
    call putstr_and_line
    
    arg_open_file_2:
    pop rax
    mov [filename2],rax ; save the name of the file we will open to read
    
    call putstring ;print the name of the file we will try opening
    
    mov rsi,0   ;open file in read mode 
    mov rdi,rax ;filename should be in rax before this function was called
    mov rax,2   ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
    syscall     ;call the kernel
    
    cmp rax,0
    js file_error_display ;end program if the file can't be opened
    mov [filedesc2],rax ; save the file descriptor number for later use
    mov rax,file_open
    call putstr_and_line
    
    files_compare:
    
    file_1_read_one_byte:
    mov rdx,1            ;number of bytes to read
    mov rsi,byte1        ;address to store the bytes
    mov rdi,[filedesc1]  ;move the opened file descriptor into rdi
    mov rax,0            ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
    syscall              ;call the kernel
    
    ;rax will have the number of bytes read after system call
    mov [file_1_bytes_read],rax ;we save the number of bytes read for later
    cmp rax,0
    jnz file_2_read_one_byte ;unless zero bytes were read, proceed to read from next file
    
    mov rax,[filename1]
    call putstring
    mov rax,end_of_file_string
    call putstr_and_line
    
    ;Even if we have reached the end of the first file,
    ;we still proceed to read a byte from the second file
    ;to see if it also ends at the same address
    
    file_2_read_one_byte:
    mov rdx,1            ;number of bytes to read
    mov rsi,byte2        ;address to store the bytes
    mov rdi,[filedesc2]  ;move the opened file descriptor into rdi
    mov rax,0            ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
    syscall              ;call the kernel
    
    ;rax will have the number of bytes read after system call
    mov [file_2_bytes_read],rax ;we save the number of bytes read for later
    cmp rax,0
    jnz check_both_bytes ;unless zero bytes were read, proceed to compare bytes from both files
    
    mov rax,[filename2]
    call putstring
    mov rax,end_of_file_string
    call putstr_and_line
    
    jmp main_end ;we have reach end of one file and should end program
    
    check_both_bytes:
    
    ;we add the number of bytes read from both files
    mov rax,[file_1_bytes_read]
    add rax,[file_2_bytes_read]
    cmp rax,2
    jnz main_end
    
    compare_bytes:
    
    mov al,[byte1]
    mov bl,[byte2]
    
    ;compare the two bytes and skip printing them if they are the same
    cmp al,bl
    jz bytes_are_same
    
    ;print the address and the bytes at that address
    mov rax,[file_offset]
    mov [int_width],8
    call putint_and_space
    mov [int_width],2
    mov rax,0
    mov al,[byte1]
    call putint_and_space
    mov al,[byte2]
    call putint_and_line
    
    bytes_are_same:
    
    inc [file_offset]
    
    jmp files_compare
    
    file_error_display:
    
    mov rax,file_error
    call putstr_and_line
    
    main_end:
    
    ;this is the end of the program
    ;we close the open files and then use the exit call
    
    mov rdi,[filedesc1] ;file number to close
    mov rax,3           ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
    syscall             ;call the kernel
    
    mov rdi,[filedesc2] ;file number to close
    mov rax,3           ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
    syscall             ;call the kernel
    
    mov rax, 0x3C ; invoke SYS_EXIT (kernel opcode 0x3C (60 decimal) on 64 bit systems)
    mov rdi,0   ; return 0 status on exit - 'No Errors'
    syscall
    
    ;variables for displaying information
    
    help_message db 'chastecmp by Chastity White Rose',0Ah,0Ah
    db 9,'chastecmp file1 file2',0Ah,0Ah
    db 'Differing bytes are shown in hexadecimal',0Ah
    db 'until the EOF has been reached.',0Ah,0
    
    file_open db ' opened',0
    file_error db ' error',0
    end_of_file_string db ' EOF',0
    
    db 48 dup 0 ;fill with extra space to match 1280 executable size
    
    ;variables for managing arguments and files
    argc dq ?
    filename1 dq ? ; name of the file to be opened
    filename2 dq ? ; name of the file to be opened
    filedesc1 dq ? ; file descriptor
    filedesc2 dq ? ; file descriptor
    byte1 db ?
    byte2 db ?
    file_1_bytes_read dq ?
    file_2_bytes_read dq ?
    file_offset dq ?
    

    chastelib64.asm

    ; chastelib assembly header file for 64 bit Linux
    ; This file is where I keep the source of my most important Assembly functions
    ; These are my string and integer output and conversion routines.
    
    ; To simplify documentation. The Accumulator/Arithmetic register
    ; (ax,ebx,rax) depending on bit size shall be referred to as register A
    ; for the description of these core functions because the A register
    ; is treated special both by the Intel company and my code;
    
    ; putstring; Prints a zero terminated string from the address pointer to by A register.
    ; intstr;    Converts the number in A into a zero terminated string and points A to that address
    ; putint;    Prints the integer in A by calling intstr and then putstring.
    ; strint;    Converts the zero terminated string into an integer and sets A to that value
       
    ; Now, the source of the functions begins, with comments included for parts that I felt needed explanation.
    
    putstring:
    
    push rax
    push rbx
    push rcx
    push rdx
    
    mov rbx,rax ; copy rax to rbx as well. Now both registers have the address of the main_string
    
    putstring_strlen_start: ; this loop finds the lenge of the string as part of the putstring function
    
    cmp [rbx],byte 0 ; compare byte at address rdx with 0
    jz putstring_strlen_end ; if comparison was zero, jump to loop end because we have found the length
    inc rbx
    jmp putstring_strlen_start
    
    putstring_strlen_end:
    sub rbx,rax ;subtract start pointer from current pointer to get length of string
    
    ;Write string using Linux Write system call
    ;Reference for 64 bit x86 syscalls is below.
    ;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/syscalls/#x86_64-64-bit
    
    mov rdx,rbx      ;number of bytes to write
    mov rsi,rax      ;pointer/address of string to write
    mov rdi,1        ;write to the STDOUT file
    mov rax,1        ;invoke SYS_WRITE (kernel opcode 1 on 64 bit systems)
    syscall          ;system call to write the message
    
    pop rdx
    pop rcx
    pop rbx
    pop rax
    
    ret ; this is the end of the putstring function return to calling location
    
    ; This is the location in memory where digits are written to by the intstr function
    ; The string of bytes and settings such as the radix and width are global variables defined below.
    
    int_string db 64 dup '?' ;enough bytes to hold maximum size 64-bit binary integer
    
    int_string_end db 0 ;zero byte terminator for the integer string
    
    radix dq 2 ;radix or base for integer output. 2=binary, 8=octal, 10=decimal, 16=hexadecimal
    int_width dq 8 ;default width of integers. Extra zeros prefixed if more than 1
    
    ;this function creates a string of the integer in rax
    ;it uses the above radix variable to determine base from 2 to 36
    ;it then loads rax with the address of the string
    ;this means that it can be used with the putstring function
    
    intstr:
    
    mov rbx,int_string_end-1 ;find address of lowest digit(just before the newline 0Ah)
    mov rcx,1
    
    digits_start:
    
    mov rdx,0;
    div qword [radix]
    cmp rdx,10
    jb decimal_digit
    jnb hexadecimal_digit
    
    decimal_digit: ;we go here if it is only a digit 0 to 9
    add rdx,'0'
    jmp save_digit
    
    hexadecimal_digit:
    sub rdx,10
    add rdx,'A'
    
    save_digit:
    
    mov [rbx],dl
    cmp rax,0
    jz intstr_end
    dec rbx
    inc rcx
    jmp digits_start
    
    intstr_end:
    
    prefix_zeros:
    cmp rcx,[int_width]
    jnb end_zeros
    dec rbx
    mov [rbx],byte '0'
    inc rcx
    jmp prefix_zeros
    end_zeros:
    
    mov rax,rbx ; now that the digits have been written to the string, display it!
    
    ret
    
    ; function to print string form of whatever integer is in rax
    ; The radix determines which number base the string form takes.
    ; Anything from 2 to 36 is a valid radix
    ; in practice though, only bases 2,8,10,and 16 will make sense to other programmers
    ; this function does not process anything by itself but calls the combination of my other
    ; functions in the order I intended them to be used.
    
    putint: 
    
    push rax
    push rbx
    push rcx
    push rdx
    
    call intstr
    
    call putstring
    
    pop rdx
    pop rcx
    pop rbx
    pop rax
    
    ret
    
    ;this function converts a string pointed to by rax into an integer returned in rax instead
    ;it is a little complicated because it has to account for whether the character in
    ;a string is a decimal digit 0 to 9, or an alphabet character for bases higher than ten
    ;it also checks for both uppercase and lowercase letters for bases 11 to 36
    ;finally, it checks if that letter makes sense for the base.
    ;For example, G to Z cannot be used in hexadecimal, only A to F can
    ;The purpose of writing this function was to be able to accept user input as integers
    ;This function is improved with error checking and uses the new strint_error variable
    ;The program can check this value after the call and see how many errors happened.
    
    strint_error db 0 ;declare a byte variable that keeps track of errors
    
    strint:
    
    mov rbx,rax ;copy string address from rax to rbx because rax will be replaced soon!
    mov rax,0
    mov [strint_error],0 ;set errors to 0 at the start of this function
    
    read_strint:
    mov rcx,0 ; zero rcx so only lower 8 bits are used
    mov cl,[rbx]
    inc rbx
    cmp cl,0 ; compare byte at address rdx with 0
    jz strint_end ; if comparison was zero, this is the end of string
    
    ;if char is below '0' or above '9', it is outside the range of these and is not a digit
    cmp cl,'0'
    jb not_digit
    cmp cl,'9'
    ja not_digit
    
    ;but if it is a digit, then correct and process the character
    is_digit:
    sub cl,'0'
    jmp process_char
    
    not_digit:
    ;it isn't a digit, but it could an alphabet character which is a digit in a higher base
    
    ;if char is below 'A' or above 'Z', it is outside the range of these and is not capital letter
    cmp cl,'A'
    jb not_upper
    cmp cl,'Z'
    ja not_upper
    
    is_upper:
    sub cl,'A'
    add cl,10
    jmp process_char
    
    not_upper:
    
    ;if char is below 'a' or above 'z', it is outside the range of these and is not lowercase letter
    cmp cl,'a'
    jb not_lower
    cmp cl,'z'
    ja not_lower
    
    is_lower:
    sub cl,'a'
    add cl,10
    jmp process_char
    
    not_lower:
    
    ;if we have reached this point, result invalid and end function with error
    jmp strint_end_error
    
    process_char:
    
    cmp rcx,[radix] ;compare char with radix
    jnb strint_end_error ;if this value is above or equal to radix, it is too high despite being a valid digit/alpha
    
    mov rdx,0 ;zero rdx because it is used in mul sometimes
    mul qword [radix] ;mul rax with radix
    add rax,rcx
    
    jmp read_strint ;jump back and continue the loop if nothing has exited it
    
    strint_end_error: ;we jump here if there was an error with one of the chars
    inc [strint_error] ;increment error counter because char invalid
    
    strint_end: ;we jump here when no errors happened
    
    ret
    
    ;The utility functions below simply print a space or a newline.
    ;these help me save code when printing lots of strings and integers.
    
    space db ' ',0 ;a string containing only a space
    
    putspace:
    push rax
    mov rax,space
    call putstring
    pop rax
    ret
    
    line db 0Ah,0 ;a string containing only a newline
    
    ;the next function which pushes rax to the stack
    ;moves the address of the line string and prints it with putstring
    ;then it pops the original value of rax back from the stack before the function returns
    ;this allows me to print a newline anywhere in the code without a single register changing
    
    putline:
    push rax
    mov rax,line
    call putstring
    pop rax
    ret
    
    ;a function for printing a single character that is the value of al
    
    char: db 0,0
    
    putchar:
    push rax
    mov [char],al
    mov rax,char
    call putstring
    pop rax
    ret
    
    ;a small function just for the common operation
    ;printing an integer followed by a space
    ;this saves a few bytes in the assembled code
    ;by reducing the number of function calls in the main program
    
    putint_and_space:
    call putint
    call putspace
    ret
    
    ;a small function just for the common operation
    ;printing an integer followed by a line feed
    ;this saves a few bytes in the assembled code
    ;by reducing the number of function calls in the main program
    
    putint_and_line:
    call putint
    call putline
    ret
    
    ;a small function just for the common operation
    ;printing a string followed by a line feed
    ;this saves a few bytes in the assembled code
    ;by reducing the number of function calls in the main program
    ;it also means we don't need to include a newline in every string!
    
    putstr_and_line:
    call putstring
    call putline
    ret