capstone-engine / capstone

Capstone disassembly/disassembler framework for ARM, ARM64 (ARMv8), Alpha, BPF, Ethereum VM, HPPA, M68K, M680X, Mips, MOS65XX, PPC, RISC-V(rv32G/rv64G), SH, Sparc, SystemZ, TMS320C64X, TriCore, Webassembly, XCore and X86.

Home Page:http://www.capstone-engine.org

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Is this a BUG or am I using it incorrectly?

kkptm opened this issue · comments

static void print_string_hex(unsigned char* str, size_t len)
{
    unsigned char* c;
    for (c = str; c < str + len; c++) {
	    printf("0x%02x ", *c & 0xff);
    }
}
bool printAsm(unsigned char* codes,size_t codeSize, unsigned __int64 baseAddress)
{
    csh handle = NULL;
    cs_insn* insn = NULL;
    cs_err err = cs_open(CS_ARCH_X86, CS_MODE_64, &handle);
    if (err) {
	    abort();
	    return false;
    }
    cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
    size_t count = cs_disasm(handle, codes, codeSize, baseAddress, 0, &insn);
    if (count) {
	    for (int i = 0; i < count; i++) {

		    printf("0x%llx :\t", insn[i].address);
		    print_string_hex(insn[i].bytes, insn[i].size);
		    printf("\t%s\t%s\n", insn[i].mnemonic, insn[i].op_str);
	    }
	    cs_free(insn, count);
    }
    else {
	    abort();
    }
    cs_close(&handle);
    return true;
}
int main()
{
#define X86_CODE64 "\x90\xFF\x25\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90"
    printAsm((unsigned char*)X86_CODE64, sizeof(X86_CODE64) - 1, 0x1000);
    std::cout << "Hello World!\n";
}

It outputs:

0x1000 :        0x90    nop
0x1001 :        0xff 0x25 0x00 0x00 0x00 0x00   jmp     qword ptr [rip]
0x1007 :        0x00 0x00       add     byte ptr [rax], al
0x1009 :        0x00 0x00       add     byte ptr [rax], al
0x100b :        0x00 0x00       add     byte ptr [rax], al
0x100d :        0x00 0x00       add     byte ptr [rax], al
0x100f :        0x90    nop

FF 25 00 00 00 00 [ptr] its corresponding code should be: jmp far xx
why is that?

llvm-mc seems to agree if I am not mistaken:

echo "0x90,0xFF,0x25,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x90" | llvm-mc-19 --disassemble --arch=x86 --x86-asm-syntax=intel --show-encoding
	.text
	nop                                     # encoding: [0x90]
	jmpl	*0                              # encoding: [0xff,0x25,0x00,0x00,0x00,0x00]
	addb	%al, (%eax)                     # encoding: [0x00,0x00]
	addb	%al, (%eax)                     # encoding: [0x00,0x00]
	addb	%al, (%eax)                     # encoding: [0x00,0x00]
	addb	%al, (%eax)                     # encoding: [0x00,0x00]
	nop  

Can you please confirm. My x86 assembly knowledge is not great.

00010000 - FF25 00000000 89674523A1000000 - jmp A123456789
0001000E - FF15 02000000 EB08 89674523A1000000 - call A123456789
jmp far ,call far, this is true in an x64 process

this may not seem like a compiler standard, but it is indeed the correct executable binary code