Open4

macOSでARM64 Assembly Language(Disassembling Machine Code in macOS)

tana00tana00

サンプルを動かしてみる

shell.s
.section __TEXT,__text ; This directive tells the assembler to place the following code in the __text section of the __TEXT segment.
.global _main         ; This makes the _main label globally visible, so that the linker can find it as the entry point of the program.
.align 2              ; This directive tells the assembler to align the start of the _main function to the next 4-byte boundary (2^2 = 4).

_main:    
    adr  x0, sh_path  ; This is the address of "/bin/sh".
    mov  x1, xzr      ; Clear x1, because we need to pass NULL as the second argument to execve.
    mov  x2, xzr      ; Clear x2, because we need to pass NULL as the third argument to execve.    
    mov  x16, #59     ; Move the execve syscall number (59) into x16.
    svc  #0x1337      ; Make the syscall. The number 0x1337 doesn't actually matter, because the svc instruction always triggers a supervisor call, and the exact action is determined by the value in x16.

sh_path: .asciz "/bin/sh"
hello.s
.section __TEXT, __text
.global _main
.align 2

_main:
        mov     x2,  #13    // x2  length
        adr     x1,  msg    // x1  string address
        mov     x0,  #1     // x0  stdout
        mov     x16, #4    // sys_write
        svc     #0
        mov     x0,  xzr
        mov     x16, #1    // sys_exit
        svc     #0
msg:
        .asciz  "hello, world\n"

Introduction to ARM64 - HackTricks
BSD system call
assemble,link and run

>as -o shell.o shell.s
>ld -o shell shell.o -syslibroot $(xcrun -sdk macosx --show-sdk-path) -lSystem
>./shell

retreave symbol

>nm shell
0000000100000000 T __mh_execute_header
0000000100003f8c T _main
0000000100003fa0 t sh_path

string literal strip, dis-assemble and Hex dump

>strip shell 
>objdump -d shell

shell:	file format mach-o arm64

Disassembly of section __TEXT,__text:

0000000100003f8c <__text>:
100003f8c: 100000a0    	adr	x0, #20
100003f90: aa1f03e1    	mov	x1, xzr
100003f94: aa1f03e2    	mov	x2, xzr
100003f98: d2800770    	mov	x16, #59
100003f9c: d40266e1    	svc	#0x1337
100003fa0: 6e69622f    	rsubhn2.8h	v15, v17, v9
100003fa4: 0068732f    	<unknown>
>od -t x4 shell
0000000          feedfacf        0100000c        00000000        00000002
0000020          00000010        000002e8        00200085        00000000
0000040          00000019        00000048        41505f5f        455a4547
0000060          00004f52        00000000        00000000        00000000
0000100          00000000        00000001        00000000        00000000
0000120          00000000        00000000        00000000        00000000
< omitted >
>hexdump -C shell
00000000  cf fa ed fe 0c 00 00 01  00 00 00 00 02 00 00 00  |................|
00000010  10 00 00 00 e8 02 00 00  85 00 20 00 00 00 00 00  |.......... .....|
00000020  19 00 00 00 48 00 00 00  5f 5f 50 41 47 45 5a 45  |....H...__PAGEZE|
00000030  52 4f 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |RO..............|
< omitted >

Arm64 LinuxでのAssembly Language(日本語)
Arm64(ARMv8) Assembly Programming (00)
Armv7 Assembly Language YouTube(英語)
x86-64プロセッサのスタックを理解する #assembly - Qiita

tana00tana00

引数を2つ取る関数の呼出

何処までディレクティブ(疑似命令)を削れるのかためしてみた。.globl(globalでも可)さえあればAssemble出来る。

// my-add.S

    ; .section __TEXT,__text
    ; .globl _add
    ; .p2align 2
_add:
    // 返り値(w0) = 第一引数(w0) + 第二引数(w1)
    add w0, w0, w1
    ret

    .globl _main
_main:
    // リンクレジスタとフレームポインタの値をスタックへ退避
    sub sp, sp, #16
    stp x29, x30, [sp, #0]

    // w0 = _add(w0, w1)
    mov w0, #10
    mov w1, #20
    bl _add

    // リンクレジスタとフレームポインタの値をスタックから復元
    ldp x29, x30, [sp, #0]
    
    // return w0
    ret

実行するには

$ gcc add.S
$ ./a.out;echo $? 
30

🔹lldb command, s: step into func, n: step over func
m read -l 8 -c 256 0x....: mem dump, reg read fp lr sp pc: reg
🔹AArch64 mnemonicの読み方
add Xd, Xn, #uimm {, lsl #12}
Xd = Xn + uimm, logical left shift
shift operation: lsl, lsr, asr(arithmetic shift right,
shift rotation)
uimm12: unsigned 12bit width integer(0 ... 4095)
simm9: singed 9bit width integer(-256 ... 255)

stp: store pair of registers
ret { Xn }: 引数なしでx30(lr)
subs wzr, w8, #0 // どのレジスタも変化させたくないのでXd = wzr
cset w9, eq // eq: cond sufix: cond abbr: z = 1 -> w9
tbnz w9,#0, break // w9 != 0 then break, test branch nz
fp: base pointer
bl: branch with link, setting the register x30 to pc+4
Apple M1で遊ぼう🍎 - ESM アジャイル事業部 開発者ブログ

tana00tana00

C言語ソースからAssembly Languageソースへ変換する際、汚い出力を防止

  1. cfi, cfa疑似命令出力を防止cfi and cfa means (call frame information ) and (call frame address )
$ clang -S -Xclang -funwind-tables=0 addc.c
  1. 冗長なコード出力の防止
$ clang -O -S -Xclang -funwind-tables=0 addc.c
  1. before なんだか冗長なコード
	.section	__TEXT,__text,regular,pure_instructions
	.build_version macos, 14, 0	sdk_version 14, 2
	.globl	_add                            ; -- Begin function add
	.p2align	2
_add:                                   ; @add
	.cfi_startproc
; %bb.0:
	sub	sp, sp, #16
	.cfi_def_cfa_offset 16
	str	w0, [sp, #12]
	str	w1, [sp, #8]
	ldr	w8, [sp, #12]
	ldr	w9, [sp, #8]
	add	w0, w8, w9
	add	sp, sp, #16
	ret
	.cfi_endproc
                                        ; -- End function
	.globl	_main                           ; -- Begin function main
	.p2align	2
_main:                                  ; @main
	.cfi_startproc
; %bb.0:
	sub	sp, sp, #32
	.cfi_def_cfa_offset 32
	stp	x29, x30, [sp, #16]             ; 16-byte Folded Spill
	add	x29, sp, #16
	.cfi_def_cfa w29, 16
	.cfi_offset w30, -8
	.cfi_offset w29, -16
	stur	wzr, [x29, #-4]
	mov	w0, #10
	mov	w1, #20
	bl	_add
	ldp	x29, x30, [sp, #16]             ; 16-byte Folded Reload
	add	sp, sp, #32
	ret
	.cfi_endproc
                                        ; -- End function
.subsections_via_symbols

(1) [ARM64] What's the difference between ldr and ldur? : asm

  1. after コードがスッキリ
  	.section	__TEXT,__text,regular,pure_instructions
	.build_version macos, 14, 0	sdk_version 14, 2
	.globl	_add                            ; -- Begin function add
	.p2align	2
_add:                                   ; @add
; %bb.0:
	add	w0, w1, w0
	ret
                                        ; -- End function
	.globl	_main                           ; -- Begin function main
	.p2align	2
_main:                                  ; @main
; %bb.0:
	mov	w0, #30
	ret
                                        ; -- End function
.subsections_via_symbols

c++ - What do the CFI directives mean? (and some more questions) - Stack Overflow
[llvm-dev] [RFC] Asynchronous unwind tables attribute
アセンブリコードを眺めてみる
5. C言語ソース

// add.c
int add(int n, int m) {
    return n + m;
}
 
int main() {
    return add(10, 20);
}

おすすめ: x86アセンブリ言語での関数コール
ISA: Learn the architecture - A64 Instruction Set Architecture

tana00tana00

16-byte Folded Spill? A: "Write 16 bytes at a time"

// Before optimization
mov x1, #12345678
mov x2, #abcdef01
str x1, [sp, #-8]
str x2, [sp, #-4]

// After optimization
stp x1, x2, [sp, #-16]