Windows - x64 Assembly Optimization

Indlæg af Hanne Mølgaard Plasc

Problem



Jeg forsøger at optimere flere samleprocedurer for størrelse, jeg er ikke bekymret for hastigheden.
De optimeringer, jeg kender til, er situationer som følger:


;the following two lines
    mov rbp, rsp
    add rbp, 50h
;can be changed to
    lea rbp, [rsp+50h]


Hvilke andre optimeringer kan jeg bruge til at reducere antallet af byte i den følgende procedure?
Jeg beder ikke nogen om at optimere denne procedure fuldt ud, bare påpege, hvor jeg kan forbedre.


;get procedure address
asmGetProc proc
push rcx                    ;pointer to function name
push rdx                    ;DllBase address (IMAGE\_DOS\_HEADER pointer)
push r8                     ;pointer to IMAGE\_EXPORT\_DIRECTORY
push r9                     ;IMAGE\_EXPORT\_DIRECTORY->NumberOfNames
                            ;IMAGE\_EXPORT\_DIRECTORY->AddressOfNameOrdinals[r9]

push rbx                    ;saved pointer to function name

push r10                    ;pointer to IMAGE\_EXPORT\_DIRECTORY->AddressOfNames
                            ;pointer to IMAGE\_EXPORT\_DIRECTORY->AddressOfNameOrdinals
                            ;pointer to IMAGE\_EXPORT\_DIRECTORY->AddressOfFunctions

mov rbx, rcx                ;save the function name pointer to rax

mov r8d, [rdx+3ch]          ;IMAGE\_DOS\_HEADER->e\_lfanew (DWORD) (Offset to IMAGE\_NT\_HEADERS64)
add r8, rdx                 ;add DllBase to the e\_lfanew offset
add r8, 88h                 ;18h - IMAGE\_NT\_HEADERS64->OptionalHeader (IMAGE\_OPTIONAL\_HEADER64) 18h bytes
                            ;70h - skip entire IMAGE\_OPTIONAL\_HEADER64 structure
                            ;r8 points to the IMAGE\_DATA\_DIRECTORY structure
mov r8d, [r8]               ;IMAGE\_DATA\_DIRECTORY->VirtualAddress (DWORD)
add r8, rdx                 ;add DllBase to VirtualAddress (IMAGE\_EXPORT\_DIRECTORY)

mov r9d, [r8+18h]           ;IMAGE\_EXPORT\_DIRECTORY->NumberOfNames
mov r10d, [r8+20h]          ;IMAGE\_EXPORT\_DIRECTORY->AddressOfNames (DWORD)
add r10, rdx                ;add DllBase to AddressOfNames (DWORD)
for\_each\_function:
    ;decrement function name counter
    dec r9

    ;load current index of AddressOfNames into r11
    lea rcx, [r10 + 4 * r9]     ;AddressOfNames[i] - function string RVA (relative virtual address)
    mov ecx, [rcx]              ;r11d is the AddressOfName[r9] RVA (DWORD)
    add rcx, rdx                ;add DllBase to string RVA DWORD

    call asmHsh                 ;hash the function name
    cmp rax, rbx                ;compare the function name hash with the passed hash
jnz for\_each\_function           ;jump to top of loop is not a match


;r8  - export directory
;r9  - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
mov r10d, [r8+24h]          ;IMAGE\_EXPORT\_DIRECTORY->AddressOfNameOrdinals (DWORD)
add r10, rdx                ;add DllBase to AddressOfNameOrdinals DWORD
mov r9w, [r10+2*r9]         ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)

mov r10d, [r8+1ch]          ;IMAGE\_EXPORT\_DIRECTORY->AddressOfFunctions (DWORD)
add r10, rdx                ;add DllBase to AddressOfFunctions DWORD
mov eax, [r10+r9*4]         ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
add rax, rdx                ;add DllBase to function ordinal RVA DWORD

pop r10
pop rbx
pop r9
pop r8
pop rdx
pop rcx

ret                         ;return from procedure
asmGetProc endp


EDIT: Tilføjet asmHsh (min dårlige)


;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
push rdx

mov rax, 5381d
hl:
    mov rdx, rax
    shl rax, 5
    add rax, rdx
    xor al, [rcx]
    inc rcx
;check for null termination
mov dl, [rcx]
cmp dl, 00h
jne short hl         

pop rdx
pop rcx
ret

asmHsh endp

Bedste reference


Optimering af samling til plads i 64-bit mode bør man: (1) bruge DWORD bredde, når det er tilstrækkeligt (mindre præfikser); (2) holde sig til de gamle X86-registre eax-edx/esi/edi/ebp (strammere kodning).


Forhåbentlig, hvad der er gjort nedenfor illustrerer ideen. ML64 samlede de oprindelige rutiner til 135 bytes og den ændrede version til 103 bytes.


Eksempler på ændringer: (1) brugt rbp/rsi/rdi i stedet for r8/r9/r10 ; (2) krympede instruktionssekvenser, der kunne udføres via multi-komponent adressemetoder; (3) brugte DWORD dec , hvor dataene er kendt for at være 32 bit; (4) brugt IMUL i stedet for skift/tilføj.


'; - ' er foran fjernede linjer '; ## delta ' tilføjes til tilføjede linjer, hvor delta er byteforskellen den nye kode produceret. Der blev ikke gjort noget forsøg på at justere kommentarerne.


;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
;-push rdx ;## -1

mov rax, 5381d
hl:
;-  mov rdx, rax
;-  shl rax, 5
;-  add rax, rdx
    imul rax,rax,33 ;## -6
    xor al, [rcx]
    inc rcx
;check for null termination
;-mov dl, [rcx]
;-cmp dl, 00h
cmp byte ptr [rcx], 00h ;## -2
jne short hl         

;-pop rdx ;## -1
pop rcx
ret
asmHsh endp

;get procedure address
asmGetProc proc
push rcx                    ;pointer to function name
push rdx                    ;DllBase address (IMAGE\_DOS\_HEADER pointer)
;-push r8                    ;pointer to IMAGE\_EXPORT\_DIRECTORY
push rbp ;## -1
;-push r9                     ;IMAGE\_EXPORT\_DIRECTORY->NumberOfNames
push rsi ;## -1
                            ;IMAGE\_EXPORT\_DIRECTORY->AddressOfNameOrdinals[r9]

push rbx                    ;saved pointer to function name

;-push r10                    ;pointer to IMAGE\_EXPORT\_DIRECTORY->AddressOfNames
push rdi ;## -1
                            ;pointer to IMAGE\_EXPORT\_DIRECTORY->AddressOfNameOrdinals
                            ;pointer to IMAGE\_EXPORT\_DIRECTORY->AddressOfFunctions

mov rbx, rcx                ;save the function name pointer to rax

;-mov r8d, [rdx+3ch]          ;IMAGE\_DOS\_HEADER->e\_lfanew (DWORD) (Offset to IMAGE\_NT\_HEADERS64)
mov ebp, [rdx+3ch] ;## -1
;-add r8, rdx                 ;add DllBase to the e\_lfanew offset
;-add r8, 88h                 ;18h - IMAGE\_NT\_HEADERS64->OptionalHeader (IMAGE\_OPTIONAL\_HEADER64) 18h bytes
;-                            ;70h - skip entire IMAGE\_OPTIONAL\_HEADER64 structure
;-                            ;r8 points to the IMAGE\_DATA\_DIRECTORY structure
;-mov r8d, [r8]               ;IMAGE\_DATA\_DIRECTORY->VirtualAddress (DWORD)
mov ebp, [rbp+rdx+88h] ;## -5
;-add r8, rdx                 ;add DllBase to VirtualAddress (IMAGE\_EXPORT\_DIRECTORY)
add rbp, rdx ;## 0

;-mov r9d, [r8+18h]           ;IMAGE\_EXPORT\_DIRECTORY->NumberOfNames
mov esi, [rbp+18h] ;## -1
;-mov r10d, [r8+20h]          ;IMAGE\_EXPORT\_DIRECTORY->AddressOfNames (DWORD)
mov edi, [rbp+20h] ;## -1
;-add r10, rdx                ;add DllBase to AddressOfNames (DWORD)
add rdi, rdx ;## 0
for\_each\_function:
    ;decrement function name counter
;-  dec r9
    dec esi ;## -1

    ;load current index of AddressOfNames into r11
;-  lea rcx, [r10 + 4 * r9]     ;AddressOfNames[i] - function string RVA (relative virtual address)
;-  mov ecx, [rcx]              ;r11d is the AddressOfName[r9] RVA (DWORD)
    mov ecx, [rdi + 4 * rsi] ;## -3
    add rcx, rdx                ;add DllBase to string RVA DWORD

    call asmHsh                 ;hash the function name
    cmp rax, rbx                ;compare the function name hash with the passed hash
jnz for\_each\_function           ;jump to top of loop is not a match


;r8  - export directory
;r9  - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
;-mov r10d, [r8+24h]          ;IMAGE\_EXPORT\_DIRECTORY->AddressOfNameOrdinals (DWORD)
mov edi, [rbp+24h];## -1
;-add r10, rdx                ;add DllBase to AddressOfNameOrdinals DWORD
add rdi, rdx; ## 0
;-mov r9w, [r10+2*r9]         ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)
mov si, [rdi+2*rsi] ;## -1

;-mov r10d, [r8+1ch]          ;IMAGE\_EXPORT\_DIRECTORY->AddressOfFunctions (DWORD)
mov edi, [rbp+1ch] ;## -1
;-add r10, rdx                ;add DllBase to AddressOfFunctions DWORD
add rdi, rdx ;## 0
;-mov eax, [r10+r9*4]         ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
mov eax, [rdi+rsi*4] ; ## -1
add rax, rdx                ;add DllBase to function ordinal RVA DWORD

;-pop r10
pop rdi ; ## -1
pop rbx
;-pop r9
pop rsi
;-pop r8
pop rbp ;## -1
pop rdx
pop rcx

ret                         ;return from procedure
asmGetProc endp