diff --git a/VEX/hacked104/vg_from_ucode.c b/VEX/hacked104/vg_from_ucode.c index bfab64d6e..2bb256ad1 100644 --- a/VEX/hacked104/vg_from_ucode.c +++ b/VEX/hacked104/vg_from_ucode.c @@ -810,22 +810,26 @@ static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 ) static void emit_get_fpu_state ( void ) { +#if 0 Int off = 4 * VGOFF_(m_fpustate); newEmit(); emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */ emitL ( off ); if (dis) VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off ); +#endif } static void emit_put_fpu_state ( void ) { +#if 0 Int off = 4 * VGOFF_(m_fpustate); newEmit(); emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */ emitL ( off ); if (dis) VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off ); +#endif } static void emit_fpu_no_mem ( UChar first_byte, diff --git a/VEX/hacked104/vg_helpers.S b/VEX/hacked104/vg_helpers.S index 4a285f3ed..e9b3cc3e4 100644 --- a/VEX/hacked104/vg_helpers.S +++ b/VEX/hacked104/vg_helpers.S @@ -206,9 +206,10 @@ cpuid__99: VG_(helper_fstsw_AX): pushl %eax pushl %esi - movl VGOFF_(m_fpustate), %esi - frstor (%ebp, %esi, 4) - fstsw %ax +# movl VGOFF_(m_fpustate), %esi +# frstor (%ebp, %esi, 4) +# fstsw %ax + movw $0, %ax popl %esi movw %ax, 8(%esp) popl %eax diff --git a/VEX/hacked104/vg_include.h b/VEX/hacked104/vg_include.h index d108667da..0d904ea3f 100644 --- a/VEX/hacked104/vg_include.h +++ b/VEX/hacked104/vg_include.h @@ -554,10 +554,11 @@ extern void VG_(__libc_freeres_wrapper)( void ); which need to go here to avoid ugly circularities. ------------------------------------------------------------------ */ -/* How big is the saved FPU state? */ -#define VG_SIZE_OF_FPUSTATE 108 /* ... and in words ... */ -#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4) +#define VG_SIZE_OF_FPUSTATE_W (8*2 + 1) + +#define VG_SIZE_OF_FPUSTATE (4 * VG_SIZE_OF_FPUSTATE_W) + /* --------------------------------------------------------------------- @@ -737,7 +738,9 @@ typedef UInt m_cc_dflag; UInt m_eip; - UInt m_fpu[VG_SIZE_OF_FPUSTATE_W]; + + ULong m_f0, m_f1, m_f2, m_f3, m_f4, m_f5, m_f6, m_f7; + UInt m_ftop; UInt sh_eax; UInt sh_ebx; @@ -1533,7 +1536,7 @@ extern void* VG_(client_realloc) ( ThreadState* tst, extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ + 1 /* %eflags */ + 1 /* %eip */ - + VG_SIZE_OF_FPUSTATE_W /* FPU state */ + + (108/4) /* real FPU state */ ]; /* Handy fns for doing the copy back and forth. */ @@ -1969,9 +1972,18 @@ extern Int VGOFF_(m_cc_src); extern Int VGOFF_(m_cc_dst); extern Int VGOFF_(m_cc_dflag); -extern Int VGOFF_(m_fpustate); extern Int VGOFF_(m_eip); +extern Int VGOFF_(m_f0); +extern Int VGOFF_(m_f1); +extern Int VGOFF_(m_f2); +extern Int VGOFF_(m_f3); +extern Int VGOFF_(m_f4); +extern Int VGOFF_(m_f5); +extern Int VGOFF_(m_f6); +extern Int VGOFF_(m_f7); +extern Int VGOFF_(m_ftop); + /* Reg-alloc spill area (VG_MAX_SPILLSLOTS words long). */ extern Int VGOFF_(spillslots); diff --git a/VEX/hacked104/vg_main.c b/VEX/hacked104/vg_main.c index db5ce8c7d..977cd3862 100644 --- a/VEX/hacked104/vg_main.c +++ b/VEX/hacked104/vg_main.c @@ -55,8 +55,18 @@ Int VGOFF_(m_cc_src) = INVALID_OFFSET; Int VGOFF_(m_cc_dst) = INVALID_OFFSET; Int VGOFF_(m_cc_dflag) = INVALID_OFFSET; -Int VGOFF_(m_fpustate) = INVALID_OFFSET; Int VGOFF_(m_eip) = INVALID_OFFSET; + +Int VGOFF_(m_f0) = INVALID_OFFSET; +Int VGOFF_(m_f1) = INVALID_OFFSET; +Int VGOFF_(m_f2) = INVALID_OFFSET; +Int VGOFF_(m_f3) = INVALID_OFFSET; +Int VGOFF_(m_f4) = INVALID_OFFSET; +Int VGOFF_(m_f5) = INVALID_OFFSET; +Int VGOFF_(m_f6) = INVALID_OFFSET; +Int VGOFF_(m_f7) = INVALID_OFFSET; +Int VGOFF_(m_ftop) = INVALID_OFFSET; + Int VGOFF_(spillslots) = INVALID_OFFSET; Int VGOFF_(sh_eax) = INVALID_OFFSET; Int VGOFF_(sh_ecx) = INVALID_OFFSET; @@ -165,12 +175,24 @@ static void vg_init_baseBlock ( void ) /* 6 */ VGOFF_(m_esi) = alloc_BaB(1); /* 7 */ VGOFF_(m_edi) = alloc_BaB(1); - /* 8 */ VGOFF_(m_cc_op) = alloc_BaB(1); + /* 8 */ VGOFF_(m_cc_op) = alloc_BaB(1); /* 9 */ VGOFF_(m_cc_src) = alloc_BaB(1); /* 10 */ VGOFF_(m_cc_dst) = alloc_BaB(1); - /* 11 */ VGOFF_(m_cc_dflag) = alloc_BaB(1); - /* 12 */ VGOFF_(m_eip) = alloc_BaB(1); + /* 11 */ VGOFF_(m_cc_dflag)= alloc_BaB(1); + /* 12 */ VGOFF_(m_eip) = alloc_BaB(1); + + /* 13 */ VGOFF_(m_f0) = alloc_BaB(2); + /* 15 */ VGOFF_(m_f1) = alloc_BaB(2); + /* 17 */ VGOFF_(m_f2) = alloc_BaB(2); + /* 19 */ VGOFF_(m_f3) = alloc_BaB(2); + /* 21 */ VGOFF_(m_f4) = alloc_BaB(2); + /* 23 */ VGOFF_(m_f5) = alloc_BaB(2); + /* 25 */ VGOFF_(m_f6) = alloc_BaB(2); + /* 27 */ VGOFF_(m_f7) = alloc_BaB(2); + /* 29 */ VGOFF_(m_ftop) = alloc_BaB(1); + + /* stated offsets are wrong after here */ /* 13 */ VGOFF_(sh_eax) = alloc_BaB(1); /* 14 */ VGOFF_(sh_ecx) = alloc_BaB(1); /* 15 */ VGOFF_(sh_edx) = alloc_BaB(1); @@ -255,8 +277,6 @@ static void vg_init_baseBlock ( void ) /* I gave up counting at this point. Since they're way above the short-amode-boundary, there's no point. */ - VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W); - VGOFF_(helper_idiv_64_32) = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_64_32) ); VGOFF_(helper_div_64_32) @@ -999,7 +1019,7 @@ static void process_cmd_line_options ( void ) UInt VG_(m_state_static) [8 /* int regs, in Intel order */ + 1 /* %eflags */ + 1 /* %eip */ - + VG_SIZE_OF_FPUSTATE_W /* FPU state */ + + (108/4) /* real FPU state */ ]; void VG_(copy_baseBlock_to_m_state_static) ( void ) @@ -1030,15 +1050,15 @@ void VG_(copy_baseBlock_to_m_state_static) ( void ) VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_eip)]; - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + /* Hack */ + for (i = 0; i < (108/4); i++) VG_(m_state_static)[40/4 + i] - = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; + = 0; } void VG_(copy_m_state_static_to_baseBlock) ( void ) { - Int i; VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[ 0/4]; VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[ 4/4]; VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[ 8/4]; @@ -1055,9 +1075,20 @@ void VG_(copy_m_state_static_to_baseBlock) ( void ) VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[36/4]; - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(baseBlock)[VGOFF_(m_fpustate) + i] - = VG_(m_state_static)[40/4 + i]; + /* Make the FPU register stack appear to be empty. */ + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]) = 0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]) = 0; + /* stack grows down, towards lower numbered registers, and ftop is + decremented prior to use when pushing. Hence the initial value + should be zero, as the decrement then changes it to 7 so we end + up first writing %f7. */ + VG_(baseBlock)[VGOFF_(m_ftop)] = 0; } diff --git a/VEX/hacked104/vg_scheduler.c b/VEX/hacked104/vg_scheduler.c index 593da48ac..4a0b42271 100644 --- a/VEX/hacked104/vg_scheduler.c +++ b/VEX/hacked104/vg_scheduler.c @@ -373,7 +373,6 @@ ThreadId VG_(get_current_tid) ( void ) __inline__ void VG_(load_thread_state) ( ThreadId tid ) { - Int i; vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); VG_(baseBlock)[VGOFF_(m_eax)] = VG_(threads)[tid].m_eax; @@ -392,8 +391,15 @@ void VG_(load_thread_state) ( ThreadId tid ) VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip; - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i]; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]) = VG_(threads)[tid].m_f0; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]) = VG_(threads)[tid].m_f1; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]) = VG_(threads)[tid].m_f2; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]) = VG_(threads)[tid].m_f3; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]) = VG_(threads)[tid].m_f4; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]) = VG_(threads)[tid].m_f5; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]) = VG_(threads)[tid].m_f6; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]) = VG_(threads)[tid].m_f7; + VG_(baseBlock)[VGOFF_(m_ftop)] = VG_(threads)[tid].m_ftop; VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax; VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx; @@ -418,8 +424,8 @@ void VG_(load_thread_state) ( ThreadId tid ) __inline__ void VG_(save_thread_state) ( ThreadId tid ) { - Int i; - const UInt junk = 0xDEADBEEF; + const UInt junk = 0xDEADBEEF; + const ULong junk64 = 0xDEADBEEFDEADBEEFLL; vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); @@ -439,8 +445,15 @@ void VG_(save_thread_state) ( ThreadId tid ) VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; + VG_(threads)[tid].m_f0 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]); + VG_(threads)[tid].m_f1 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]); + VG_(threads)[tid].m_f2 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]); + VG_(threads)[tid].m_f3 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]); + VG_(threads)[tid].m_f4 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]); + VG_(threads)[tid].m_f5 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]); + VG_(threads)[tid].m_f6 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]); + VG_(threads)[tid].m_f7 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]); + VG_(threads)[tid].m_ftop = VG_(baseBlock)[VGOFF_(m_ftop)]; VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; @@ -467,8 +480,15 @@ void VG_(save_thread_state) ( ThreadId tid ) VG_(baseBlock)[VGOFF_(m_cc_dflag)] = junk; VG_(baseBlock)[VGOFF_(m_eip)] = junk; - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]) = junk64; + *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]) = junk64; + VG_(baseBlock)[VGOFF_(m_ftop)] = junk; vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; } diff --git a/VEX/hacked104/vg_signals.c b/VEX/hacked104/vg_signals.c index dead95e11..3cfe3bff0 100644 --- a/VEX/hacked104/vg_signals.c +++ b/VEX/hacked104/vg_signals.c @@ -899,7 +899,6 @@ typedef /* Safely-saved version of sigNo, as described above. */ Int sigNo_private; /* Saved processor state. */ - UInt fpustate[VG_SIZE_OF_FPUSTATE_W]; UInt eax; UInt ecx; UInt edx; @@ -913,6 +912,8 @@ typedef UInt cc_src; UInt cc_dst; UInt cc_dflag; + ULong f0, f1, f2, f3, f4, f5, f6, f7; + UInt ftop; /* Scheduler-private stuff: what was the thread's status prior to delivering this signal? */ ThreadStatus status; @@ -930,7 +931,6 @@ typedef static void vg_push_signal_frame ( ThreadId tid, int sigNo ) { - Int i; Addr esp, esp_top_of_frame; VgSigFrame* frame; ThreadState* tst; @@ -971,8 +971,15 @@ void vg_push_signal_frame ( ThreadId tid, int sigNo ) frame->puContext = (Addr)NULL; frame->magicPI = 0x31415927; - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - frame->fpustate[i] = tst->m_fpu[i]; + frame->f0 = tst->m_f0; + frame->f1 = tst->m_f1; + frame->f2 = tst->m_f2; + frame->f3 = tst->m_f3; + frame->f4 = tst->m_f4; + frame->f5 = tst->m_f5; + frame->f6 = tst->m_f6; + frame->f7 = tst->m_f7; + frame->ftop = tst->m_ftop; frame->eax = tst->m_eax; frame->ecx = tst->m_ecx; @@ -1022,7 +1029,7 @@ static Int vg_pop_signal_frame ( ThreadId tid ) { Addr esp; - Int sigNo, i; + Int sigNo; VgSigFrame* frame; ThreadState* tst; @@ -1042,8 +1049,15 @@ Int vg_pop_signal_frame ( ThreadId tid ) "vg_pop_signal_frame (thread %d): valid magic", tid); /* restore machine state */ - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - tst->m_fpu[i] = frame->fpustate[i]; + tst->m_f0 = frame->f0; + tst->m_f1 = frame->f1; + tst->m_f2 = frame->f2; + tst->m_f3 = frame->f3; + tst->m_f4 = frame->f4; + tst->m_f5 = frame->f5; + tst->m_f6 = frame->f6; + tst->m_f7 = frame->f7; + tst->m_ftop = frame->ftop; /* Mark the frame structure as nonaccessible. */ if (VG_(clo_instrument)) diff --git a/VEX/hacked104/vg_translate.c b/VEX/hacked104/vg_translate.c index ba6da033c..b62d691d2 100644 --- a/VEX/hacked104/vg_translate.c +++ b/VEX/hacked104/vg_translate.c @@ -3121,8 +3121,8 @@ void VG_(translate) ( ThreadState* tst, UChar* final; Bool debugging_translation; - static Int v0thresh = 87000; - static Int v2thresh = 87000; + static Int v0thresh = 940; + static Int v2thresh = 940; TranslateResult tres; static Bool vex_init_done = False; diff --git a/VEX/priv/guest-x86/gdefs.h b/VEX/priv/guest-x86/gdefs.h index 808e9a314..4e29f4e63 100644 --- a/VEX/priv/guest-x86/gdefs.h +++ b/VEX/priv/guest-x86/gdefs.h @@ -130,8 +130,46 @@ enum { /* EIP */ #define OFFB_EIP (12*4) +/* FPU. For now, just simulate 8 64-bit registers and the reg-stack + top pointer, of which only the least significant three bits are + relevant. -#define SIZEOF_X86H_STATE OFFB_EIP + The model is: + F0 .. F7 are the 8 registers. ftop[2:0] contains the + index of the current 'stack top' -- pretty meaningless, but + still. + + When a value is pushed onto the stack, ftop is first replaced by + (ftop-1) & 7, and then F[ftop] is assigned the value. + + When a value is popped off the stack, the value is read from + F[ftop], and then ftop is replaced by (ftop+1) & 7. + + In general, a reference to a register ST(i) actually references + F[ (ftop+i) & 7 ]. + + There should be an array of 8 booleans corresponding to F0 .. F7, + indicating whether the corresponding F reg contains a value or not. + + A read of an F reg marked empty, for any reason, elicits a stack + underflow fault. + + A load from memory into an F reg marked full elicits a stack overflow + fault. This appears to be the only way a stack overflow fault can + happen. +*/ +#define OFFB_F0 (13*4) +#define OFFB_F1 (15*4) +#define OFFB_F2 (17*4) +#define OFFB_F3 (19*4) +#define OFFB_F4 (21*4) +#define OFFB_F5 (23*4) +#define OFFB_F6 (25*4) +#define OFFB_F7 (27*4) +#define OFFB_FTOP (29*4) + +/* Don't forget to keep this up to date. */ +#define SIZEOF_X86H_STATE OFFB_FTOP diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c index b76085805..43f904479 100644 --- a/VEX/priv/guest-x86/toIR.c +++ b/VEX/priv/guest-x86/toIR.c @@ -3076,6 +3076,189 @@ UInt dis_imul_I_E_G ( UChar sorb, } +/*------------------------------------------------------------*/ +/*--- x87 floating point insns. ---*/ +/*------------------------------------------------------------*/ + +/* Get/set the top-of-stack pointer. */ + +static IRExpr* get_ftop ( void ) +{ + return IRExpr_Get( OFFB_FTOP, Ity_I32 ); +} + +static IRStmt* put_ftop ( IRExpr* e ) +{ + return IRStmt_Put( OFFB_FTOP, e ); +} + +/* Given i, generate an expression which is the offset in the guest + state of ST(i), considering the current value of FTOP. */ + +static IRExpr* off_ST ( Int i ) +{ + vassert(i >= 0 && i <= 7); + return + binop(Iop_Add32, + binop(Iop_Mul32, + binop(Iop_And32, + binop(Iop_Add32, get_ftop(), mkU32(i)), + mkU32(7)), + mkU32(8)), + mkU32(OFFB_F0) + ); +} + +/* Given i, and some expression e, generate 'ST(i) = e'. */ + +static IRStmt* put_ST ( Int i, IRExpr* value ) +{ + return + IRStmt_PutI( off_ST(i), value, OFFB_F0, OFFB_F7+8-1 ); +} + +/* Given i, generate an expression yielding 'ST(i)'. */ + +static IRExpr* get_ST ( Int i ) +{ + return + IRExpr_GetI( off_ST(i), Ity_F64, OFFB_F0, OFFB_F7+8-1 ); +} + +/* Adjust FTOP downwards by one register. */ + +static IRStmt* do_push ( void ) +{ + return + put_ftop( + binop(Iop_And32, + binop(Iop_Sub32, get_ftop(), mkU32(1)), + mkU32(7)) + ); +} + + +static +UInt dis_FPU ( Bool* decode_ok, UChar sorb, UInt delta ) +{ + Int len; + Char dis_buf[32]; + UInt opc_aux; + + /* On entry, delta points at the second byte of the insn (the modrm + byte).*/ + UChar first_opcode = getIByte(delta-1); + UChar modrm = getIByte(delta+0); + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ + + if (first_opcode == 0xD8) { + goto decode_fail; + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xD9) { + goto decode_fail; + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDA) { + goto decode_fail; + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDB) { + goto decode_fail; + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDC) { + goto decode_fail; + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDD) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FLD double-real */ + DIP("fldD %s\n", dis_buf); + stmt( do_push() ); + stmt( put_ST(0, IRExpr_LDle(Ity_F64, mkexpr(addr))) ); + break; + +#if 0 + case 2: /* FST double-real */ + IFDB( if (dis) printf("\tfstD\t%s\n",t_addr); ) + if (!fp_is_empty_tag(fp_get_tag_ST(0))) { + vd_addr = fp_get_reg_ST(0); + } else { + vd_addr = NAN; + fp_set_stack_underflow(); + } + setDMem(a_addr,vd_addr); + break; + + case 3: /* FSTP double-real */ + IFDB( if (dis) printf("\tfstpD\t%s\n",t_addr); ) + if (!fp_is_empty_tag(fp_get_tag_ST(0))) { + vd_addr = fp_pop(); + } else { + vd_addr = fp_pop(); /* then throw away result */ + vd_addr = NAN; + fp_set_stack_underflow(); + } + setDMem(a_addr,vd_addr); + break; +#endif + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", opc_aux); + vex_printf("first_opcode == 0xDD"); + goto decode_fail; + } + } else { + goto decode_fail; + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDE) { + goto decode_fail; + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDF) { + goto decode_fail; + } + + else + vpanic("dis_FPU(x86): invalid primary opcode"); + + decode_success: + *decode_ok = True; + return delta; + + decode_fail: + *decode_ok = False; + return delta; +} + + //-- /* Handle FPU insns which read/write memory. On entry, eip points to //-- the second byte of the insn (the one following D8 .. DF). */ //-- static @@ -6249,16 +6432,23 @@ static UInt disInstr ( UInt delta, Bool* isEnd ) //-- DIP("fwait\n"); //-- break; //-- -//-- case 0xD8: -//-- case 0xD9: -//-- case 0xDA: -//-- case 0xDB: -//-- case 0xDC: -//-- case 0xDD: -//-- case 0xDE: -//-- case 0xDF: -//-- eip = dis_fpu ( cb, sorb, opc, eip ); -//-- break; + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: { + UInt delta0 = delta; + Bool decode_OK = False; + delta = dis_FPU ( &decode_OK, sorb, delta ); + if (!decode_OK) { + delta = delta0; + goto decode_failure; + } + break; + } /* ------------------------ INC & DEC ------------------ */ diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c index fe0a6010e..0c2fe7271 100644 --- a/VEX/priv/host-x86/hdefs.c +++ b/VEX/priv/host-x86/hdefs.c @@ -35,7 +35,7 @@ void ppHRegX86 ( HReg reg ) return; case HRcFloat: r = hregNumber(reg); - vassert(r >= 0 && r < 6); + vassert(r >= 0 && r < 4); vex_printf("%%fake%d", r); return; case HRcVector: @@ -54,9 +54,14 @@ HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt, False); } HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt, False); } HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt, False); } +HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFloat, False); } +HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFloat, False); } +HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFloat, False); } +HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFloat, False); } + void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) { - *nregs = 6; + *nregs = 10; *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); (*arr)[0] = hregX86_EAX(); (*arr)[1] = hregX86_EBX(); @@ -64,6 +69,10 @@ void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) (*arr)[3] = hregX86_EDX(); (*arr)[4] = hregX86_ESI(); (*arr)[5] = hregX86_EDI(); + (*arr)[6] = hregX86_FAKE0(); + (*arr)[7] = hregX86_FAKE1(); + (*arr)[8] = hregX86_FAKE2(); + (*arr)[9] = hregX86_FAKE3(); } @@ -399,6 +408,18 @@ Char* showX86ShiftOp ( X86ShiftOp op ) { } } +Char* showX86FpOp ( X86FpOp op ) { + switch (op) { + case Xfp_Add: return "add"; + case Xfp_Sub: return "sub"; + case Xfp_Mul: return "mul"; + case Xfp_Div: return "div"; + case Xfp_Sqrt: return "sqrt"; + case Xfp_Negate: return "chs"; + default: vpanic("ppX86FpOp"); + } +} + X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); i->tag = Xin_Alu32R; @@ -506,7 +527,7 @@ X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, vassert(szSmall == 1 || szSmall == 2); return i; } -X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { +X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); i->tag = Xin_Store; i->Xin.Store.sz = sz; @@ -515,6 +536,42 @@ X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { vassert(sz == 1 || sz == 2); return i; } +X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_FpUnary; + i->Xin.FpUnary.op = op; + i->Xin.FpUnary.src = src; + i->Xin.FpUnary.dst = dst; + return i; +} +X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_FpBinary; + i->Xin.FpBinary.op = op; + i->Xin.FpBinary.srcL = srcL; + i->Xin.FpBinary.srcR = srcR; + i->Xin.FpBinary.dst = dst; + return i; +} +X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_FpLdSt; + i->Xin.FpLdSt.isLoad = isLoad; + i->Xin.FpLdSt.sz = sz; + i->Xin.FpLdSt.reg = reg; + i->Xin.FpLdSt.addr = addr; + vassert(sz == 4 || sz == 8); + return i; +} +X86Instr* X86Instr_FpI64 ( Bool toInt, HReg freg, HReg iregHi, HReg iregLo ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_FpI64; + i->Xin.FpI64.toInt = toInt; + i->Xin.FpI64.freg = freg; + i->Xin.FpI64.iregHi = iregHi; + i->Xin.FpI64.iregLo = iregLo; + return i; +} void ppX86Instr ( X86Instr* i ) { @@ -617,6 +674,19 @@ void ppX86Instr ( X86Instr* i ) { vex_printf(","); ppX86AMode(i->Xin.Store.dst); return; + case Xin_FpLdSt: + if (i->Xin.FpLdSt.isLoad) { + vex_printf("gld%c" , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); + ppX86AMode(i->Xin.FpLdSt.addr); + vex_printf(", "); + ppHRegX86(i->Xin.FpLdSt.reg); + } else { + vex_printf("gst%c" , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); + ppHRegX86(i->Xin.FpLdSt.reg); + vex_printf(", "); + ppX86AMode(i->Xin.FpLdSt.addr); + } + return; default: vpanic("ppX86Instr"); } diff --git a/VEX/priv/host-x86/hdefs.h b/VEX/priv/host-x86/hdefs.h index f2b6c2cdd..3723889b0 100644 --- a/VEX/priv/host-x86/hdefs.h +++ b/VEX/priv/host-x86/hdefs.h @@ -242,6 +242,17 @@ typedef extern Char* showX86ShiftOp ( X86ShiftOp ); +/* --------- */ +typedef + enum { + Xfp_Add, Xfp_Sub, Xfp_Mul, Xfp_Div, + Xfp_Sqrt, Xfp_Negate + } + X86FpOp; + +extern Char* showX86FpOp ( X86FpOp ); + + /* --------- */ typedef enum { @@ -258,11 +269,15 @@ typedef Xin_Goto, /* conditional/unconditional jmp to dst */ Xin_CMov32, /* conditional move */ Xin_LoadEX, /* mov{s,z}{b,w}l from mem to reg */ - Xin_Store /* store 16/8 bit value in memory */ + Xin_Store, /* store 16/8 bit value in memory */ + Xin_FpUnary, /* FP fake unary op */ + Xin_FpBinary, /* FP fake binary op */ + Xin_FpLdSt, /* FP fake load/store */ + Xin_FpI64 /* FP fake to/from 64-bit signed int */ } X86InstrTag; -/* Destinations are on the RIGHT (second operand). */ +/* Destinations are on the RIGHT (second operand) */ typedef struct { @@ -348,25 +363,53 @@ typedef HReg src; X86AMode* dst; } Store; - } Xin; + /* X86 Floating point (fake 3-operand, "flat reg file" insns) */ + struct { + X86FpOp op; + HReg src; + HReg dst; + } FpUnary; + struct { + X86FpOp op; + HReg srcL; + HReg srcR; + HReg dst; + } FpBinary; + struct { + Bool isLoad; + UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */ + HReg reg; + X86AMode* addr; + } FpLdSt; + struct { + Bool toInt; /* True: F64->I64; False: I64->64 */ + HReg freg; + HReg iregHi; + HReg iregLo; + } FpI64; + } Xin; } X86Instr; -extern X86Instr* X86Instr_Alu32R ( X86AluOp, X86RMI*, HReg ); -extern X86Instr* X86Instr_Alu32M ( X86AluOp, X86RI*, X86AMode* ); -extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, X86RM* dst ); -extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, X86RM* ); -extern X86Instr* X86Instr_Test32 ( X86RI* src, X86RM* dst ); -extern X86Instr* X86Instr_MulL ( Bool syned, X86ScalarSz, X86RM* ); -extern X86Instr* X86Instr_Div ( Bool syned, X86ScalarSz, X86RM* ); -extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst ); -extern X86Instr* X86Instr_Push ( X86RMI* ); -extern X86Instr* X86Instr_Call ( HReg ); -extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst ); -extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst ); -extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, - X86AMode* src, HReg dst ); -extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ); +extern X86Instr* X86Instr_Alu32R ( X86AluOp, X86RMI*, HReg ); +extern X86Instr* X86Instr_Alu32M ( X86AluOp, X86RI*, X86AMode* ); +extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, X86RM* dst ); +extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, X86RM* ); +extern X86Instr* X86Instr_Test32 ( X86RI* src, X86RM* dst ); +extern X86Instr* X86Instr_MulL ( Bool syned, X86ScalarSz, X86RM* ); +extern X86Instr* X86Instr_Div ( Bool syned, X86ScalarSz, X86RM* ); +extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst ); +extern X86Instr* X86Instr_Push ( X86RMI* ); +extern X86Instr* X86Instr_Call ( HReg ); +extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst ); +extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst ); +extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, + X86AMode* src, HReg dst ); +extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ); +extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ); +extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ); +extern X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* ); +extern X86Instr* X86Instr_FpI64 ( Bool toInt, HReg freg, HReg iregHi, HReg iregLo ); extern void ppX86Instr ( X86Instr* ); diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c index 00179e4ba..c28d12ae1 100644 --- a/VEX/priv/ir/irdefs.c +++ b/VEX/priv/ir/irdefs.c @@ -26,6 +26,8 @@ void ppIRType ( IRType ty ) case Ity_I16: vex_printf( "I16"); break; case Ity_I32: vex_printf( "I32"); break; case Ity_I64: vex_printf( "I64"); break; + case Ity_F32: vex_printf( "F32"); break; + case Ity_F64: vex_printf( "F64"); break; default: vex_printf("ty = 0x%x\n", (Int)ty); vpanic("ppIRType"); } @@ -196,6 +198,12 @@ void ppIRStmt ( IRStmt* s ) vex_printf( "PUT(%d) = ", s->Ist.Put.offset); ppIRExpr(s->Ist.Put.expr); break; + case Ist_PutI: + vex_printf( "PUTI[%d,%d](", s->Ist.PutI.minoff, s->Ist.PutI.maxoff); + ppIRExpr(s->Ist.PutI.offset); + vex_printf( ") = " ); + ppIRExpr(s->Ist.PutI.expr); + break; case Ist_Tmp: ppIRTemp(s->Ist.Tmp.tmp); vex_printf( " = " ); @@ -320,6 +328,16 @@ IRExpr* IRExpr_Get ( Int off, IRType ty ) { e->Iex.Get.ty = ty; return e; } +IRExpr* IRExpr_GetI ( IRExpr* off, IRType ty, + UShort minoff, UShort maxoff ) { + IRExpr* e = LibVEX_Alloc(sizeof(IRExpr)); + e->tag = Iex_GetI; + e->Iex.GetI.offset = off; + e->Iex.GetI.ty = ty; + e->Iex.GetI.minoff = minoff; + e->Iex.GetI.maxoff = maxoff; + return e; +} IRExpr* IRExpr_Tmp ( IRTemp tmp ) { IRExpr* e = LibVEX_Alloc(sizeof(IRExpr)); e->tag = Iex_Tmp; @@ -382,6 +400,17 @@ IRStmt* IRStmt_Put ( Int off, IRExpr* value ) { s->Ist.Put.expr = value; return s; } +IRStmt* IRStmt_PutI ( IRExpr* off, IRExpr* value, + UShort minoff, UShort maxoff ) { + IRStmt* s = LibVEX_Alloc(sizeof(IRStmt)); + s->tag = Ist_PutI; + s->link = NULL; + s->Ist.PutI.offset = off; + s->Ist.PutI.expr = value; + s->Ist.PutI.minoff = minoff; + s->Ist.PutI.maxoff = maxoff; + return s; +} IRStmt* IRStmt_Tmp ( IRTemp tmp, IRExpr* expr ) { IRStmt* s = LibVEX_Alloc(sizeof(IRStmt)); s->tag = Ist_Tmp; @@ -702,6 +731,10 @@ void useBeforeDef_Stmt ( IRBB* bb, IRStmt* stmt, Int* def_counts ) case Ist_Put: useBeforeDef_Expr(bb,stmt,stmt->Ist.Put.expr,def_counts); break; + case Ist_PutI: + useBeforeDef_Expr(bb,stmt,stmt->Ist.PutI.offset,def_counts); + useBeforeDef_Expr(bb,stmt,stmt->Ist.PutI.expr,def_counts); + break; case Ist_Tmp: useBeforeDef_Expr(bb,stmt,stmt->Ist.Tmp.expr,def_counts); break; @@ -807,6 +840,14 @@ void tcStmt ( IRBB* bb, IRStmt* stmt, IRType gWordTy ) if (typeOfIRExpr(tyenv,stmt->Ist.Put.expr) == Ity_Bit) sanityCheckFail(bb,stmt,"IRStmt.Put.expr: cannot Put :: Ity_Bit"); break; + case Ist_PutI: + tcExpr( bb, stmt, stmt->Ist.PutI.expr, gWordTy ); + tcExpr( bb, stmt, stmt->Ist.PutI.offset, gWordTy ); + if (typeOfIRExpr(tyenv,stmt->Ist.PutI.expr) == Ity_Bit) + sanityCheckFail(bb,stmt,"IRStmt.PutI.expr: cannot PutI :: Ity_Bit"); + if (typeOfIRExpr(tyenv,stmt->Ist.PutI.offset) != Ity_I32) + sanityCheckFail(bb,stmt,"IRStmt.PutI.offset: not :: Ity_I32"); + break; case Ist_Tmp: tcExpr( bb, stmt, stmt->Ist.Tmp.expr, gWordTy ); if (lookupIRTypeEnv(tyenv, stmt->Ist.Tmp.tmp) diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index c769d2134..9dd701ede 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -21,7 +21,9 @@ typedef enum { Ity_INVALID=0x10FFF, Ity_Bit=0x11000, - Ity_I8, Ity_I16, Ity_I32, Ity_I64 } + Ity_I8, Ity_I16, Ity_I32, Ity_I64, + Ity_F32, Ity_F64 + } IRType; extern void ppIRType ( IRType ); @@ -113,7 +115,11 @@ typedef Iop_32HLto64, // :: (I32,I32) -> I64 /* 1-bit stuff */ Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */ - Iop_1Uto8 /* :: Ity_Bit -> Ity_I8, unsigned widen */ + Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */ + /* FP stuff */ + Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, + Iop_SqrtF64, + Iop_I64toF64, Iop_F64toI64 } IROp; @@ -123,17 +129,35 @@ extern void ppIROp ( IROp ); /* ------------------ Expressions ------------------ */ /* data Expr - = GET Int Int -- offset, size + = GET Int Type -- offset, size + | GETI Expr Type Int Int -- offset, size, minoff, maxoff | TMP Temp -- value of temporary | BINOP Op Expr Expr -- binary op | UNOP Op Expr -- unary op | LDle Type Expr -- load of the given type, Expr:: 32 or 64 | CONST Const -- 8/16/32/64-bit int constant + +Re GETI. It carries two ints, which give the lowest and highest +possible byte offsets that the GetI can possibly reference. +For example, if the type is Ity_I32, and the Expr may have +a value of M, M+4 or M+8, where M is a translation-time known +constant, then the low and high limits are M and M+11 respectively. + +PUTI carries similar limit values. + +These can be used by IR optimisers to establish aliasing/non-aliasing +between seperate GETI and PUTI terms, which could be used to do +reordering of them, or suchlike things. Clearly it's critical to give +the correct limit values -- this is something that can't be +automatically checked (in general), and so the front-end writers must +be very careful to tell the truth, since not doing so could lead to +obscure IR optimisation bugs. */ + typedef enum { Iex_Binder, /* Used only in pattern matching. Not an expression. */ - Iex_Get, Iex_Tmp, Iex_Binop, Iex_Unop, Iex_LDle, + Iex_Get, Iex_GetI, Iex_Tmp, Iex_Binop, Iex_Unop, Iex_LDle, Iex_Const, Iex_CCall, Iex_Mux0X } IRExprTag; @@ -148,6 +172,12 @@ typedef Int offset; IRType ty; } Get; + struct { + struct _IRExpr* offset; + IRType ty; + UShort minoff; + UShort maxoff; + } GetI; struct { IRTemp tmp; } Tmp; @@ -183,6 +213,8 @@ typedef extern IRExpr* IRExpr_Binder ( Int binder ); extern IRExpr* IRExpr_Get ( Int off, IRType ty ); +extern IRExpr* IRExpr_GetI ( IRExpr* off, IRType ty, + UShort minoff, UShort maxoff ); extern IRExpr* IRExpr_Tmp ( IRTemp tmp ); extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ); extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ); @@ -194,7 +226,8 @@ extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX ); extern void ppIRExpr ( IRExpr* ); /* CCall info. The name is the C helper function; the backends - will look it up in a table of known helpers, to get the address. + will hand the name to the front ends to get the address of a + host-code helper function to be called. The args are a NULL-terminated array of arguments. The stated return IRType, and the implied argument types, must match that @@ -220,7 +253,7 @@ data Stmt -- Const is destination guest addr */ typedef - enum { Ist_Put, Ist_Tmp, Ist_STle, Ist_Exit } + enum { Ist_Put, Ist_PutI, Ist_Tmp, Ist_STle, Ist_Exit } IRStmtTag; typedef @@ -231,6 +264,12 @@ typedef Int offset; IRExpr* expr; } Put; + struct { + IRExpr* offset; + IRExpr* expr; + UShort minoff; + UShort maxoff; + } PutI; struct { IRTemp tmp; IRExpr* expr; @@ -249,6 +288,8 @@ typedef IRStmt; extern IRStmt* IRStmt_Put ( Int off, IRExpr* value ); +extern IRStmt* IRStmt_PutI ( IRExpr* off, IRExpr* value, + UShort minoff, UShort maxoff ); extern IRStmt* IRStmt_Tmp ( IRTemp tmp, IRExpr* expr ); extern IRStmt* IRStmt_STle ( IRExpr* addr, IRExpr* value ); extern IRStmt* IRStmt_Exit ( IRExpr* cond, IRConst* dst );