This post is the source of the 64-bit edition of chastecmp, my file comparison tool. It behaves exactly the same as the 32-bit edition. However, I am slowly translating my best programs to use the 64-bit calling convention for the eventual 64-bit edition of my book. This will be a few years away but there is not too much work that needs to be done. Mostly I just have to use different registers and different numbers along with the syscall instruction instead of interrupt 0x80. The calls are standard and part of the Linux kernel. All I am doing is translating it to be more compatible with how 64-bit Linux does things.
main.asm
;Linux 64-bit Assembly Source for chastecmp
format ELF64 executable
entry main
include 'chastelib64.asm'
main:
;radix will be 16 because this whole program is about hexadecimal
mov [radix],16 ; can choose radix for integer input/output!
mov [int_width],1
pop rax
mov [argc],rax ;save the argument count for later
;first arg is the name of the program. we skip past it
pop rax
dec [argc]
mov rax,[argc]
cmp rax,2
jb help
mov [file_offset],0 ;assume the offset is 0,beginning of file
jmp arg_open_file_1
help:
mov rax,help_message
call putstring
jmp main_end
arg_open_file_1:
pop rax
mov [filename1],rax ; save the name of the file we will open to read
call putstring ;print the name of the file we will try opening
mov rsi,0 ;open file in read mode
mov rdi,rax ;filename should be in rax before this function was called
mov rax,2 ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
syscall ;call the kernel
cmp rax,0
js file_error_display ;end program if the file can't be opened
mov [filedesc1],rax ; save the file descriptor number for later use
mov rax,file_open
call putstr_and_line
arg_open_file_2:
pop rax
mov [filename2],rax ; save the name of the file we will open to read
call putstring ;print the name of the file we will try opening
mov rsi,0 ;open file in read mode
mov rdi,rax ;filename should be in rax before this function was called
mov rax,2 ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
syscall ;call the kernel
cmp rax,0
js file_error_display ;end program if the file can't be opened
mov [filedesc2],rax ; save the file descriptor number for later use
mov rax,file_open
call putstr_and_line
files_compare:
file_1_read_one_byte:
mov rdx,1 ;number of bytes to read
mov rsi,byte1 ;address to store the bytes
mov rdi,[filedesc1] ;move the opened file descriptor into rdi
mov rax,0 ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
syscall ;call the kernel
;rax will have the number of bytes read after system call
mov [file_1_bytes_read],rax ;we save the number of bytes read for later
cmp rax,0
jnz file_2_read_one_byte ;unless zero bytes were read, proceed to read from next file
mov rax,[filename1]
call putstring
mov rax,end_of_file_string
call putstr_and_line
;Even if we have reached the end of the first file,
;we still proceed to read a byte from the second file
;to see if it also ends at the same address
file_2_read_one_byte:
mov rdx,1 ;number of bytes to read
mov rsi,byte2 ;address to store the bytes
mov rdi,[filedesc2] ;move the opened file descriptor into rdi
mov rax,0 ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
syscall ;call the kernel
;rax will have the number of bytes read after system call
mov [file_2_bytes_read],rax ;we save the number of bytes read for later
cmp rax,0
jnz check_both_bytes ;unless zero bytes were read, proceed to compare bytes from both files
mov rax,[filename2]
call putstring
mov rax,end_of_file_string
call putstr_and_line
jmp main_end ;we have reach end of one file and should end program
check_both_bytes:
;we add the number of bytes read from both files
mov rax,[file_1_bytes_read]
add rax,[file_2_bytes_read]
cmp rax,2
jnz main_end
compare_bytes:
mov al,[byte1]
mov bl,[byte2]
;compare the two bytes and skip printing them if they are the same
cmp al,bl
jz bytes_are_same
;print the address and the bytes at that address
mov rax,[file_offset]
mov [int_width],8
call putint_and_space
mov [int_width],2
mov rax,0
mov al,[byte1]
call putint_and_space
mov al,[byte2]
call putint_and_line
bytes_are_same:
inc [file_offset]
jmp files_compare
file_error_display:
mov rax,file_error
call putstr_and_line
main_end:
;this is the end of the program
;we close the open files and then use the exit call
mov rdi,[filedesc1] ;file number to close
mov rax,3 ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
syscall ;call the kernel
mov rdi,[filedesc2] ;file number to close
mov rax,3 ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
syscall ;call the kernel
mov rax, 0x3C ; invoke SYS_EXIT (kernel opcode 0x3C (60 decimal) on 64 bit systems)
mov rdi,0 ; return 0 status on exit - 'No Errors'
syscall
;variables for displaying information
help_message db 'chastecmp by Chastity White Rose',0Ah,0Ah
db 9,'chastecmp file1 file2',0Ah,0Ah
db 'Differing bytes are shown in hexadecimal',0Ah
db 'until the EOF has been reached.',0Ah,0
file_open db ' opened',0
file_error db ' error',0
end_of_file_string db ' EOF',0
db 48 dup 0 ;fill with extra space to match 1280 executable size
;variables for managing arguments and files
argc dq ?
filename1 dq ? ; name of the file to be opened
filename2 dq ? ; name of the file to be opened
filedesc1 dq ? ; file descriptor
filedesc2 dq ? ; file descriptor
byte1 db ?
byte2 db ?
file_1_bytes_read dq ?
file_2_bytes_read dq ?
file_offset dq ?
chastelib64.asm
; chastelib assembly header file for 64 bit Linux
; This file is where I keep the source of my most important Assembly functions
; These are my string and integer output and conversion routines.
; To simplify documentation. The Accumulator/Arithmetic register
; (ax,ebx,rax) depending on bit size shall be referred to as register A
; for the description of these core functions because the A register
; is treated special both by the Intel company and my code;
; putstring; Prints a zero terminated string from the address pointer to by A register.
; intstr; Converts the number in A into a zero terminated string and points A to that address
; putint; Prints the integer in A by calling intstr and then putstring.
; strint; Converts the zero terminated string into an integer and sets A to that value
; Now, the source of the functions begins, with comments included for parts that I felt needed explanation.
putstring:
push rax
push rbx
push rcx
push rdx
mov rbx,rax ; copy rax to rbx as well. Now both registers have the address of the main_string
putstring_strlen_start: ; this loop finds the lenge of the string as part of the putstring function
cmp [rbx],byte 0 ; compare byte at address rdx with 0
jz putstring_strlen_end ; if comparison was zero, jump to loop end because we have found the length
inc rbx
jmp putstring_strlen_start
putstring_strlen_end:
sub rbx,rax ;subtract start pointer from current pointer to get length of string
;Write string using Linux Write system call
;Reference for 64 bit x86 syscalls is below.
;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/syscalls/#x86_64-64-bit
mov rdx,rbx ;number of bytes to write
mov rsi,rax ;pointer/address of string to write
mov rdi,1 ;write to the STDOUT file
mov rax,1 ;invoke SYS_WRITE (kernel opcode 1 on 64 bit systems)
syscall ;system call to write the message
pop rdx
pop rcx
pop rbx
pop rax
ret ; this is the end of the putstring function return to calling location
; This is the location in memory where digits are written to by the intstr function
; The string of bytes and settings such as the radix and width are global variables defined below.
int_string db 64 dup '?' ;enough bytes to hold maximum size 64-bit binary integer
int_string_end db 0 ;zero byte terminator for the integer string
radix dq 2 ;radix or base for integer output. 2=binary, 8=octal, 10=decimal, 16=hexadecimal
int_width dq 8 ;default width of integers. Extra zeros prefixed if more than 1
;this function creates a string of the integer in rax
;it uses the above radix variable to determine base from 2 to 36
;it then loads rax with the address of the string
;this means that it can be used with the putstring function
intstr:
mov rbx,int_string_end-1 ;find address of lowest digit(just before the newline 0Ah)
mov rcx,1
digits_start:
mov rdx,0;
div qword [radix]
cmp rdx,10
jb decimal_digit
jnb hexadecimal_digit
decimal_digit: ;we go here if it is only a digit 0 to 9
add rdx,'0'
jmp save_digit
hexadecimal_digit:
sub rdx,10
add rdx,'A'
save_digit:
mov [rbx],dl
cmp rax,0
jz intstr_end
dec rbx
inc rcx
jmp digits_start
intstr_end:
prefix_zeros:
cmp rcx,[int_width]
jnb end_zeros
dec rbx
mov [rbx],byte '0'
inc rcx
jmp prefix_zeros
end_zeros:
mov rax,rbx ; now that the digits have been written to the string, display it!
ret
; function to print string form of whatever integer is in rax
; The radix determines which number base the string form takes.
; Anything from 2 to 36 is a valid radix
; in practice though, only bases 2,8,10,and 16 will make sense to other programmers
; this function does not process anything by itself but calls the combination of my other
; functions in the order I intended them to be used.
putint:
push rax
push rbx
push rcx
push rdx
call intstr
call putstring
pop rdx
pop rcx
pop rbx
pop rax
ret
;this function converts a string pointed to by rax into an integer returned in rax instead
;it is a little complicated because it has to account for whether the character in
;a string is a decimal digit 0 to 9, or an alphabet character for bases higher than ten
;it also checks for both uppercase and lowercase letters for bases 11 to 36
;finally, it checks if that letter makes sense for the base.
;For example, G to Z cannot be used in hexadecimal, only A to F can
;The purpose of writing this function was to be able to accept user input as integers
;This function is improved with error checking and uses the new strint_error variable
;The program can check this value after the call and see how many errors happened.
strint_error db 0 ;declare a byte variable that keeps track of errors
strint:
mov rbx,rax ;copy string address from rax to rbx because rax will be replaced soon!
mov rax,0
mov [strint_error],0 ;set errors to 0 at the start of this function
read_strint:
mov rcx,0 ; zero rcx so only lower 8 bits are used
mov cl,[rbx]
inc rbx
cmp cl,0 ; compare byte at address rdx with 0
jz strint_end ; if comparison was zero, this is the end of string
;if char is below '0' or above '9', it is outside the range of these and is not a digit
cmp cl,'0'
jb not_digit
cmp cl,'9'
ja not_digit
;but if it is a digit, then correct and process the character
is_digit:
sub cl,'0'
jmp process_char
not_digit:
;it isn't a digit, but it could an alphabet character which is a digit in a higher base
;if char is below 'A' or above 'Z', it is outside the range of these and is not capital letter
cmp cl,'A'
jb not_upper
cmp cl,'Z'
ja not_upper
is_upper:
sub cl,'A'
add cl,10
jmp process_char
not_upper:
;if char is below 'a' or above 'z', it is outside the range of these and is not lowercase letter
cmp cl,'a'
jb not_lower
cmp cl,'z'
ja not_lower
is_lower:
sub cl,'a'
add cl,10
jmp process_char
not_lower:
;if we have reached this point, result invalid and end function with error
jmp strint_end_error
process_char:
cmp rcx,[radix] ;compare char with radix
jnb strint_end_error ;if this value is above or equal to radix, it is too high despite being a valid digit/alpha
mov rdx,0 ;zero rdx because it is used in mul sometimes
mul qword [radix] ;mul rax with radix
add rax,rcx
jmp read_strint ;jump back and continue the loop if nothing has exited it
strint_end_error: ;we jump here if there was an error with one of the chars
inc [strint_error] ;increment error counter because char invalid
strint_end: ;we jump here when no errors happened
ret
;The utility functions below simply print a space or a newline.
;these help me save code when printing lots of strings and integers.
space db ' ',0 ;a string containing only a space
putspace:
push rax
mov rax,space
call putstring
pop rax
ret
line db 0Ah,0 ;a string containing only a newline
;the next function which pushes rax to the stack
;moves the address of the line string and prints it with putstring
;then it pops the original value of rax back from the stack before the function returns
;this allows me to print a newline anywhere in the code without a single register changing
putline:
push rax
mov rax,line
call putstring
pop rax
ret
;a function for printing a single character that is the value of al
char: db 0,0
putchar:
push rax
mov [char],al
mov rax,char
call putstring
pop rax
ret
;a small function just for the common operation
;printing an integer followed by a space
;this saves a few bytes in the assembled code
;by reducing the number of function calls in the main program
putint_and_space:
call putint
call putspace
ret
;a small function just for the common operation
;printing an integer followed by a line feed
;this saves a few bytes in the assembled code
;by reducing the number of function calls in the main program
putint_and_line:
call putint
call putline
ret
;a small function just for the common operation
;printing a string followed by a line feed
;this saves a few bytes in the assembled code
;by reducing the number of function calls in the main program
;it also means we don't need to include a newline in every string!
putstr_and_line:
call putstring
call putline
ret
