奇安信攻防社区-Window向之x86 ShellCode入门 (butian.net)

一步步学写Windows下的Shellcode-安全客 - 安全资讯平台 (anquanke.com)

Windows x64 Calling Convention: Stack Frame | Red Team Notes (ired.team)

Finding Kernel32 Base and Function Addresses in Shellcode | Red Team Notes (ired.team)

Windows/x64 - Dynamic Null-Free WinExec PopCalc Shellcode (205 Bytes) (exploit-db.com)

x64 calling convention | Microsoft Learn

default rel ; default using relative address, can prevent some reloc
global _start
section .text

%define u(x) __?utf16?__(x) ; can use u('123') to define utf-16 string

_start:
; we first use strcmp_u16_ascii to locate kernel32.dll
lea rdi, [.kernel32_dll]
call locate_dll
; test locate LoadLibraryA
mov rdi, rax
lea rsi, [.loadlibrarya]
call locate_function
; test find ntdll
lea rdi, [.ntdll_dll]
call locate_dll

jmp stop

; please always write your dll name in CAPITAL LETTERS
.loadlibrarya:              db  "LoadLibraryA", 0
.kernel32_dll:              db  "KERNEL32.DLL", 0
.ntdll_dll:                 db  "NTDLL.DLL", 0

; function calling convention mark:
;   for example (linux)[linux, windows]:
;       self and sub function including linux and windows call
;       current function use linux call
;       caller must make sure preserve callee saved regs
;       and if current function is windows call,
;       make sure preserve 0x20 space on stack
;   or for example (linux)[rdi, rsi, rcx, rdx, rax]:
;       means only `rdi, rsi, rcx, rdx, rax` will change by function

; windows safe reg: rbx, rbp, rdi, rsi, rsp, r12, r13, r14, r15
; linux safe reg: rbx, rbp, rsp, r12, r13, r14, r15 (windows -rdi, -rsi)

; memcmp(addr1: rdi, addr2: rsi, length: edx)
;   rax: 0 -> same
;        1 -> not the same
;   (linux)[rdi, rsi, rcx, rdx, rax]
memcmp:
    mov ecx, edx
    repe cmpsb
    setne al
    movzx eax, al
    ret

; memset(dst: rdi, value: sil, length: edx)
;   (linux)[rdi, rsi, rcx, rdx, rax]
memset:
    mov eax, esi
    mov ecx, edx
    rep stosb
    ret

; memcpy(dst: rdi, src: rsi, length: edx)
;   (linux)[rdi, rsi, rcx, rdx, rax]
memcpy:
    mov ecx, edx
    rep movsb
    ret

; strlen(str: rdi)
;   eax: length
;   (linux)[rdi, rcx, rax]
strlen:
    xor ecx, ecx    ; ecx = 0
    dec ecx         ; ecx = -1 (0xFFFFFFFF)
    ; rcx = maximum length to scan
    xor eax, eax    ; eax = 0 (al = 0 value to scan for)
    cld             ; clear DF, rep will mov forward, rcx will dec
    repne scasb     ; scan the memory for AL
    sub eax, ecx    ; eax = 0 - ecx_leftover = scanned bytes + 1
    sub eax, 2      ; fix that into "string length" (-1 for '\\0')
    ret

; strcmp(str1: rdi, str2: rsi)
;   rax: 0 -> same
;        1 -> not the same
;   (linux)[rdi, rsi, rcx, rdx, rax, r8]
strcmp:
    mov r8, rdi     ; save str1 to r8
    call strlen     ; get str1 len
    mov rdi, r8
    mov edx, eax
    inc edx         ; compare including ending NULL byte
    call memcmp     ; memcmp(str1, str2, strlen(str1) + 1)
    ret

; locate_dll(name: rdi)
;   (linux)[windows]
locate_dll:
    push rbx
    ; load first ldr entry (.InMemoryOrderLinks.FLink, 64: offset 0x10)
    mov rax, gs:[0x60]      ; rax = TIB.PEB
    mov rax, [rax + 0x18]   ; rax = PEB.LDR
    mov rbx, [rax + 0x20]   ; rbx = LDR.InMemoryOrderModuleList.Flink

.loop:      ; .xx is local label, will be associated with the previous non-local label
            ; here .loop == locate_dll.loop
            ; before next non-local label, we can use jmp .loop
            ; after next non-local label, we can use jmp locate_dll.loop

    ; rcx, rdx, r8, r9, stack: windows fastcall
    ; windows fastcall won't change rdi/rsi, doesn't need to save
    ; mov rax, [rbx + 0x4a]       ; BaseDllName.MaximumLength (include final null char (in utf-16))

    ; RtlCompareUnicodeString(BaseDllName.Buffer, name, false)
    mov rcx, [rbx + 0x50]       ; arg1, String1: BaseDllName.Buffer
    mov rdx, rdi                ; arg2, String2: name
    ; xor r8, r8                  ; arg3, CaseInSensitive: false
    ; sub rsp, 0x20               ; windows x86_64 fastcall always need this space
    call strcmp_u16_ascii
    ; add rsp, 0x20               ; restore stack
    cmp rax, 0
    jz .success
.next:
    mov rbx, [rbx] ; InMemoryOrderLinks.FLink, next entry addr
    jmp .loop

    ; if we need to check whether all images are scaned, use these codes
    ; cmp rbx, [first_ldr_entry]
    ; jnz .loop
    ; xor eax, eax
    ; jmp .ret

.success:
    cmovz rax, [rbx + 0x20] ; DllBase
.ret:
    pop rbx
    ret

; locate_function(base: rdi, name: rsi)
;   (linux)[rdi, rsi, rcx, rdx, rax, r8, r9]
locate_function:
    push rbx
    push rbp
    push r10

    mov rbx, rdi            ; rbx = base
    mov r10, rsi            ; r10 = name

    mov eax, [rbx + 0x3c]   ; e_lfanew, PE header offset
    add rax, rbx            ; IMAGE_NT_HEADERS addr
    ; rbp = Export Directory->Virtual address (rva + base)
    mov ebp, [rax + 0x88]
    add rbp, rbx
    ; r9 = AddressOfNames (rva + base)
    mov r9d, [rbp + 0x20]
    add r9, rbx

    xor eax, eax
.find_loop:
    ; rdi = AddressOfNames[rax] (rva + base)
    mov edi, [r9 + rax*4]
    add rdi, rbx

    mov rsi, r10            ; rsi = r10 = name
    push rax
    call strcmp
    cmp eax, 0
    pop rax
    jz .calc_addr
    inc eax
    jmp .find_loop
.calc_addr:
    ; r9 = AddressOfNameOrdinals (rva + base)
    mov r9d, [rbp + 0x24]
    add r9, rbx
    ; rax = AddressOfNameOrdinals[rax]
    mov eax, [r9 + rax*2]   ; use eax instead of ax for shorter code
    movzx eax, ax           ; we only need 2 byte
    ; r9 = AddressOfFunctions (rva + base)
    mov r9d, [rbp + 0x1c]
    add r9, rbx
    ; rax = AddressOfFunctions[ax(Ordinals)] (rva + base)
    mov eax, [r9 + rax*4]
    add rax, rbx
.ret:
    pop r10
    pop rbp
    pop rbx
    ret

; locate_kernel32_simple:
;     mov rax, [first_ldr_entry]  ; current program image
;     mov rax, [rax]              ; ntdll.dll
;     mov rax, [rax]              ; kernel32.dll
;     mov rax, [rax + 0x20]       ; _LDR_DATA_TABLE_ENTRY.DllBase
;     ret

; strcmp_u16_ascii(str1: rcx, str2: rdx)
;   (windows)[rax, rcx, rdx]
; will ignore case
; valid:
;   0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz!"#$%&\\'()*+,-./:;<=>?
; special:
;   _ -> \\x7f, will not present in dll name
strcmp_u16_ascii:
    push rdi
    xor eax, eax
.loop:
    mov edi, [rcx]  ; will fetch 4 byte (for shorter code), but we only need 2 byte
    mov al, [rdx]   ; will only set low byte, but rax is cleared at the start of the function
    cmp di, ax      ; compare UTF-16 string, 2 bytes -> 1 char
    jz .next
    or al, 0x20     ; will transform KERNEL32.DLL -> kernel32.dll
    cmp di, ax
    jnz .ret
.next:
    cmp al, 0
    jz .ret
    add rcx, 2
    inc rdx
    jmp .loop
.ret:
    setne al        ; if not the same, will jmp from jnz, so al = 1
                    ; if the same, will jmp from jz, so al = 0
    movzx eax, al
    pop rdi
    ret

stop:
    times 4 db 0x90 ; fill 4 nop