64 bit Linux chastehex

I used my previous 32 bit Linux program and translated it to use the “syscall” instruction and the new registers and functions numbers for 64 bit Linux programs.

The behavior of the program is identical to the 32 bit version of chastehex, however this was a stepping stone into 64 bit development in Assembly for Linux. There could be some use for 64 bit programs, but none that I can think of right now. Still, it is good to be prepared. Also, there was quite a bit of research that went into learning how to do the system calls in 64 bit mode. Those who are interested in learning how to make console programs for Linux in 64 bit can ask me questions about my source.

The reason chastehex makes such a good program to base my standard library design on is that it does all the basic things that are needed for most programs.

  • Writes strings and numbers to standard output
  • opens and closes a file and reads or writes to it
  • accepts command line arguments and changes behavior accordingly
  • displays a message of how to use it if it is launched with no arguments
  • uses only functions supplied by the Linux kernel

This post contains the full source for the 64 bit version of chastehex. You can also see the FASM forum thread about it here:

https://board.flatassembler.net/topic.php?p=246358

main.asm

;Linux 64-bit Assembly Source for chastehex
;a special tool originally written in C
format ELF64 executable
entry main

include 'chastelib64.asm'
include "chasteio64.asm"

main:

;radix will be 16 because this whole program is about hexadecimal
mov [radix],16 ; can choose radix for integer input/output!
mov [int_newline],0 ;disable automatic printing of newlines after putint
;we will be manually printing spaces or newlines depending on context

pop rax
mov [argc],rax ;save the argument count for later

;first arg is the name of the program. we skip past it
pop rax
dec [argc]

;before we try to get the first argument as a filename, we must check if it exists
cmp [argc],0
jnz arg_open_file

help:
mov rax,help_message
call putstring
jmp main_end

arg_open_file:

pop rax
dec [argc]
mov [filename],rax ; save the name of the file we will open to read
call putstring
call putline

call open

cmp rax,0
js main_end

mov [filedesc],rax ; save the file descriptor number for later use
mov [file_offset],0 ;assume the offset is 0,beginning of file

;check next arg
cmp [argc],0 ;if there are no more args after filename, just hexdump it
jnz next_arg_address ;but if there are more, jump to the next argument to process it as address

hexdump:

mov rdx,0x10         ;number of bytes to read
mov rsi,byte_array   ;address to store the bytes
mov rdi,[filedesc]   ;move the opened file descriptor into rdi
mov rax,0            ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
syscall              ;call the kernel

mov [bytes_read],rax

; call putint

cmp rax,0
jnz file_success ;if more than zero bytes read, proceed to display

;if the offset is zero, display EOF to indicate empty file
;otherwise, end without displaying this because there should already be bytes printed to the display
cmp [file_offset],0
jnz main_end

call show_eof

jmp main_end

; this point is reached if file was read from successfully

file_success:
;mov rax,[filename]
;call putstring
;mov rax,file_opened_string
;call putstring

mov rax,byte_array
;call putstring

call print_bytes_row

cmp [bytes_read],1 
jl main_end ;if less than one bytes read, there is an error
jmp hexdump

;address argument section
next_arg_address:

;if there is at least one more arg
pop rax ;pop the argument into rax and process it as a hex number
dec [argc]
call strint

mov rdx,0          ;whence argument (SEEK_SET)
mov rsi,rax        ;move the file cursor to this address
mov rdi,[filedesc] ;move the opened file descriptor into rdi
mov rax,8          ;invoke SYS_LSEEK (kernel opcode 8 on 64 bit Intel)
syscall            ;call the kernel

mov [file_offset],rax ;move the new offset

;check the number of args still remaining
cmp [argc],0
jnz next_arg_write ; if there are still arguments, skip this read section and enter writing mode

read_one_byte:
mov rdx,1            ;number of bytes to read
mov rsi,byte_array   ;address to store the bytes
mov rdi,[filedesc]   ;move the opened file descriptor into rdi
mov rax,0            ;invoke SYS_READ (kernel opcode 0 on 64 bit Intel)
syscall              ;call the kernel


;rax will have the number of bytes read after system call
cmp rax,1
jz print_byte_read ;if exactly 1 byte was read, proceed to print info

call show_eof

jmp main_end ;go to end of program

;print the address and the byte at that address
print_byte_read:
call print_byte_info

;this section interprets the rest of the args as bytes to write
next_arg_write:
cmp [argc],0
jz main_end

pop rax
dec [argc]
call strint ;try to convert string to a hex number

;write that number as a byte value to the file

mov [byte_array],al

mov rdx,1          ;write 1 byte
mov rsi,byte_array ;pointer/address of byte to write
mov rdi,[filedesc] ;write to this file descriptor
mov rax,1          ;invoke SYS_WRITE (kernel opcode 1 on 64 bit systems)
syscall            ;system call to write the message

call print_byte_info
inc [file_offset]

jmp next_arg_write

main_end:

;this is the end of the program
;we close the open file and then use the exit call

mov rax,[filedesc] ;file number to close
call close

mov rax, 0x3C ; invoke SYS_EXIT (kernel opcode 0x3C (60 decimal) on 64 bit systems)
mov rdi,0   ; return 0 status on exit - 'No Errors'
syscall


;this function prints a row of hex bytes
;each row is 16 bytes
print_bytes_row:
mov rax,[file_offset]
mov [int_width],8
call putint
call putspace

mov rbx,byte_array
mov rcx,[bytes_read]
add [file_offset],rcx
next_byte:
mov rax,0
mov al,[rbx]
mov [int_width],2
call putint
call putspace

inc rbx
dec rcx
cmp rcx,0
jnz next_byte

mov rcx,[bytes_read]
pad_spaces:
cmp rcx,0x10
jz pad_spaces_end
mov rax,space_three
call putstring
inc rcx
jmp pad_spaces
pad_spaces_end:

;optionally, print chars after hex bytes
call print_bytes_row_text
call putline

ret

space_three db '   ',0

print_bytes_row_text:
mov rbx,byte_array
mov rcx,[bytes_read]
next_char:
mov rax,0
mov al,[rbx]

;if char is below '0' or above '9', it is outside the range of these and is not a digit
cmp al,0x20
jb not_printable
cmp al,0x7E
ja not_printable

printable:
;if char is in printable range,copy as is and proceed to next index
jmp next_index

not_printable:
mov al,'.' ;otherwise replace with placeholder value

next_index:
mov [rbx],al
inc rbx
dec rcx
cmp rcx,0
jnz next_char
mov [rbx],byte 0 ;make sure string is zero terminated

mov rax,byte_array
call putstring

ret


;function to display EOF with address
show_eof:

mov rax,[file_offset]
mov [int_width],8
call putint
call putspace
mov rax,end_of_file_string
call putstring
call putline

ret

;print the address and the byte at that address
print_byte_info:
mov rax,[file_offset]
mov [int_width],8
call putint
call putspace
mov rax,0
mov al,[byte_array]
mov [int_width],2
call putint
call putline

ret

end_of_file_string db 'EOF',0

help_message db 'Welcome to chastehex! The tool for reading and writing bytes of a file!',0Ah,0Ah
db 'To hexdump an entire file:',0Ah,0Ah,9,'chastehex file',0Ah,0Ah
db 'To read a single byte at an address:',0Ah,0Ah,9,'chastehex file address',0Ah,0Ah
db 'To write a single byte at an address:',0Ah,0Ah,9,'chastehex file address value',0Ah,0Ah,0

;variables for managing arguments
argc dq 0
filename dq 0 ; name of the file to be opened
filedesc dq 0 ; file descriptor
bytes_read dq 0
file_offset dq 0




;where we will store data from the file
byte_array db 17 dup ?

chastelib64.asm

; This file is where I keep my function definitions.
; These are usually my string and integer output routines.

; function to print zero terminated string pointed to by register rax

stdout dq 1 ; variable for standard output so that it can theoretically be redirected

putstring:

push rax
push rbx
push rcx
push rdx

mov rbx,rax ; copy rax to rbx as well. Now both registers have the address of the main_string

putstring_strlen_start: ; this loop finds the lenge of the string as part of the putstring function

cmp [rbx],byte 0 ; compare byte at address rdx with 0
jz putstring_strlen_end ; if comparison was zero, jump to loop end because we have found the length
inc rbx
jmp putstring_strlen_start

putstring_strlen_end:
sub rbx,rax ;rbx will now have correct number of bytes

;write string using Linux Write system call
;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/syscalls/#x86_64-64-bit


mov rdx,rbx      ;number of bytes to write
mov rsi,rax      ;pointer/address of string to write
mov rdi,[stdout] ;write to the STDOUT file
mov rax,1        ;invoke SYS_WRITE (kernel opcode 1 on 64 bit systems)
syscall          ;system call to write the message


pop rdx
pop rcx
pop rbx
pop rax

ret ; this is the end of the putstring function return to calling location

;this is the location in memory where digits are written to by the putint function
int_string     db 64 dup '?' ;enough bytes to hold maximum size 64-bit binary integer
; this is the end of the integer string optional line feed and terminating zero
; clever use of this label can change the ending to be a different character when needed 
int_newline db 0Ah,0

radix dq 2 ;radix or base for integer output. 2=binary, 8=octal, 10=decimal, 16=hexadecimal
int_width dq 8

;this function creates a string of the integer in rax
;it uses the above radix variable to determine base from 2 to 36
;it then loads rax with the address of the string
;this means that it can be used with the putstring function

intstr:

mov rbx,int_newline-1 ;find address of lowest digit(just before the newline 0Ah)
mov rcx,1

digits_start:

mov rdx,0;
div qword [radix]
cmp rdx,10
jb decimal_digit
jge hexadecimal_digit

decimal_digit: ;we go here if it is only a digit 0 to 9
add rdx,'0'
jmp save_digit

hexadecimal_digit:
sub rdx,10
add rdx,'A'

save_digit:

mov [rbx],dl
cmp rax,0
jz intstr_end
dec rbx
inc rcx
jmp digits_start

intstr_end:

prefix_zeros:
cmp rcx,[int_width]
jnb end_zeros
dec rbx
mov [rbx],byte '0'
inc rcx
jmp prefix_zeros
end_zeros:

mov rax,rbx ; now that the digits have been written to the string, display it!

ret


; function to print string form of whatever integer is in rax
; The radix determines which number base the string form takes.
; Anything from 2 to 36 is a valid radix
; in practice though, only bases 2,8,10,and 16 will make sense to other programmers
; this function does not process anything by itself but calls the combination of my other
; functions in the order I intended them to be used.

putint: 

push rax
push rbx
push rcx
push rdx

call intstr

call putstring

pop rdx
pop rcx
pop rbx
pop rax

ret

;this function converts a string pointed to by rax into an integer returned in rax instead
;it is a little complicated because it has to account for whether the character in
;a string is a decimal digit 0 to 9, or an alphabet character for bases higher than ten
;it also checks for both uppercase and lowercase letters for bases 11 to 36
;finally, it checks if that letter makes sense for the base.
;For example, G to Z cannot be used in hexadecimal, only A to F can
;The purpose of writing this function was to be able to accept user input as integers

strint:

mov rbx,rax ;copy string address from rax to esi because rax will be replaced soon!
mov rax,0

read_strint:
mov rcx,0 ; zero rcx so only lower 8 bits are used
mov cl,[rbx]
inc rbx
cmp cl,0 ; compare byte at address rdx with 0
jz strint_end ; if comparison was zero, this is the end of string

;if char is below '0' or above '9', it is outside the range of these and is not a digit
cmp cl,'0'
jb not_digit
cmp cl,'9'
ja not_digit

;but if it is a digit, then correct and process the character
is_digit:
sub cl,'0'
jmp process_char

not_digit:
;it isn't a digit, but it could be perhaps and alphabet character
;which is a digit in a higher base

;if char is below 'A' or above 'Z', it is outside the range of these and is not capital letter
cmp cl,'A'
jb not_upper
cmp cl,'Z'
ja not_upper

is_upper:
sub cl,'A'
add cl,10
jmp process_char

not_upper:

;if char is below 'a' or above 'z', it is outside the range of these and is not lowercase letter
cmp cl,'a'
jb not_lower
cmp cl,'z'
ja not_lower

is_lower:
sub cl,'a'
add cl,10
jmp process_char

not_lower:

;if we have reached this point, result invalid and end function
jmp strint_end

process_char:

cmp rcx,[radix] ;compare char with radix
jae strint_end ;if this value is above or equal to radix, it is too high despite being a valid digit/alpha

mov rdx,0 ;zero rdx because it is used in mul sometimes
mul [radix]    ;mul rax with radix
add rax,rcx

jmp read_strint ;jump back and continue the loop if nothing has exited it

strint_end:

ret
;the next utility functions simply print a space or a newline
;these help me save code when printing lots of things for debugging

space db ' ',0
line db 0Dh,0Ah,0

putspace:
push rax
mov rax,space
call putstring
pop rax
ret

putline:
push rax
mov rax,line
call putstring
pop rax
ret

chasteio64.asm

;this file is for managing the advanced Input and Output situations that occur when opening and closing files.
;I use the following references when using system calls.


;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/syscalls/#x86-32-bit
;https://www.chromium.org/chromium-os/developer-library/reference/linux-constants/errnos/


;before calling this function, make sure the rax register points to an address containing the filename as a zero terminated string
;this function opens a file for both reading and writing handle is returned in rax
;this function design is consistent with my other functions by using only rax as the input and output
;because it opens files for reading and writing, I do not need to be concerned with passing another argument for access mode

;However, this function actually does a whole lot more. It detects error codes by testing the sign bit and jumping to an error display system if rax is less than 0; Negative numbers are how errors are indicated on Linux. By turning the numbers positive, we get the actual error codes. The most common error codes that would occur are the following, either because a file doesn't exist, or because the user doesn't have permissions to read or write it.

; 2 0x02 ENOENT No such file or directory
;13 0x0d EACCES Permission denied

open_error_message db 'File Error Code: ',0

open:

mov rsi,2   ;open file in read and write mode 
mov rdi,rax ;filename should be in rax before this function was called
mov rax,2   ;invoke SYS_OPEN (kernel opcode 2 on 64 bit systems)
syscall     ;call the kernel

cmp rax,0
js open_error
jmp open_end

open_error:

neg rax ;invert sign to get errno code
push rax
mov rax,open_error_message
call putstring
pop rax
call putint
call putline
neg rax ;return rax to original sign

open_end:

ret

;this is the equivalent close call that expects rax to have the file handle we are closing
;technically it just passes it on to rdi but it is easier for me to remember if I use rax for everything

close:

mov rdi,rax ;file number to close
mov rax,3   ;invoke SYS_CLOSE (kernel opcode 3 for 64 bit Intel)
syscall     ;call the kernel

ret

Comments

Leave a comment