Lots of spadework for getting x86 floating point to work.

git-svn-id: svn://svn.valgrind.org/vex/trunk@165
This commit is contained in:
Julian Seward
2004-08-12 20:46:53 +00:00
parent 0046704677
commit b00968b577
13 changed files with 583 additions and 78 deletions

View File

@@ -810,22 +810,26 @@ static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
static void emit_get_fpu_state ( void )
{
#if 0
Int off = 4 * VGOFF_(m_fpustate);
newEmit();
emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */
emitL ( off );
if (dis)
VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
#endif
}
static void emit_put_fpu_state ( void )
{
#if 0
Int off = 4 * VGOFF_(m_fpustate);
newEmit();
emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */
emitL ( off );
if (dis)
VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
#endif
}
static void emit_fpu_no_mem ( UChar first_byte,

View File

@@ -206,9 +206,10 @@ cpuid__99:
VG_(helper_fstsw_AX):
pushl %eax
pushl %esi
movl VGOFF_(m_fpustate), %esi
frstor (%ebp, %esi, 4)
fstsw %ax
# movl VGOFF_(m_fpustate), %esi
# frstor (%ebp, %esi, 4)
# fstsw %ax
movw $0, %ax
popl %esi
movw %ax, 8(%esp)
popl %eax

View File

@@ -554,10 +554,11 @@ extern void VG_(__libc_freeres_wrapper)( void );
which need to go here to avoid ugly circularities.
------------------------------------------------------------------ */
/* How big is the saved FPU state? */
#define VG_SIZE_OF_FPUSTATE 108
/* ... and in words ... */
#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4)
#define VG_SIZE_OF_FPUSTATE_W (8*2 + 1)
#define VG_SIZE_OF_FPUSTATE (4 * VG_SIZE_OF_FPUSTATE_W)
/* ---------------------------------------------------------------------
@@ -737,7 +738,9 @@ typedef
UInt m_cc_dflag;
UInt m_eip;
UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
ULong m_f0, m_f1, m_f2, m_f3, m_f4, m_f5, m_f6, m_f7;
UInt m_ftop;
UInt sh_eax;
UInt sh_ebx;
@@ -1533,7 +1536,7 @@ extern void* VG_(client_realloc) ( ThreadState* tst,
extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */
+ 1 /* %eflags */
+ 1 /* %eip */
+ VG_SIZE_OF_FPUSTATE_W /* FPU state */
+ (108/4) /* real FPU state */
];
/* Handy fns for doing the copy back and forth. */
@@ -1969,9 +1972,18 @@ extern Int VGOFF_(m_cc_src);
extern Int VGOFF_(m_cc_dst);
extern Int VGOFF_(m_cc_dflag);
extern Int VGOFF_(m_fpustate);
extern Int VGOFF_(m_eip);
extern Int VGOFF_(m_f0);
extern Int VGOFF_(m_f1);
extern Int VGOFF_(m_f2);
extern Int VGOFF_(m_f3);
extern Int VGOFF_(m_f4);
extern Int VGOFF_(m_f5);
extern Int VGOFF_(m_f6);
extern Int VGOFF_(m_f7);
extern Int VGOFF_(m_ftop);
/* Reg-alloc spill area (VG_MAX_SPILLSLOTS words long). */
extern Int VGOFF_(spillslots);

View File

@@ -55,8 +55,18 @@ Int VGOFF_(m_cc_src) = INVALID_OFFSET;
Int VGOFF_(m_cc_dst) = INVALID_OFFSET;
Int VGOFF_(m_cc_dflag) = INVALID_OFFSET;
Int VGOFF_(m_fpustate) = INVALID_OFFSET;
Int VGOFF_(m_eip) = INVALID_OFFSET;
Int VGOFF_(m_f0) = INVALID_OFFSET;
Int VGOFF_(m_f1) = INVALID_OFFSET;
Int VGOFF_(m_f2) = INVALID_OFFSET;
Int VGOFF_(m_f3) = INVALID_OFFSET;
Int VGOFF_(m_f4) = INVALID_OFFSET;
Int VGOFF_(m_f5) = INVALID_OFFSET;
Int VGOFF_(m_f6) = INVALID_OFFSET;
Int VGOFF_(m_f7) = INVALID_OFFSET;
Int VGOFF_(m_ftop) = INVALID_OFFSET;
Int VGOFF_(spillslots) = INVALID_OFFSET;
Int VGOFF_(sh_eax) = INVALID_OFFSET;
Int VGOFF_(sh_ecx) = INVALID_OFFSET;
@@ -165,12 +175,24 @@ static void vg_init_baseBlock ( void )
/* 6 */ VGOFF_(m_esi) = alloc_BaB(1);
/* 7 */ VGOFF_(m_edi) = alloc_BaB(1);
/* 8 */ VGOFF_(m_cc_op) = alloc_BaB(1);
/* 8 */ VGOFF_(m_cc_op) = alloc_BaB(1);
/* 9 */ VGOFF_(m_cc_src) = alloc_BaB(1);
/* 10 */ VGOFF_(m_cc_dst) = alloc_BaB(1);
/* 11 */ VGOFF_(m_cc_dflag) = alloc_BaB(1);
/* 12 */ VGOFF_(m_eip) = alloc_BaB(1);
/* 11 */ VGOFF_(m_cc_dflag)= alloc_BaB(1);
/* 12 */ VGOFF_(m_eip) = alloc_BaB(1);
/* 13 */ VGOFF_(m_f0) = alloc_BaB(2);
/* 15 */ VGOFF_(m_f1) = alloc_BaB(2);
/* 17 */ VGOFF_(m_f2) = alloc_BaB(2);
/* 19 */ VGOFF_(m_f3) = alloc_BaB(2);
/* 21 */ VGOFF_(m_f4) = alloc_BaB(2);
/* 23 */ VGOFF_(m_f5) = alloc_BaB(2);
/* 25 */ VGOFF_(m_f6) = alloc_BaB(2);
/* 27 */ VGOFF_(m_f7) = alloc_BaB(2);
/* 29 */ VGOFF_(m_ftop) = alloc_BaB(1);
/* stated offsets are wrong after here */
/* 13 */ VGOFF_(sh_eax) = alloc_BaB(1);
/* 14 */ VGOFF_(sh_ecx) = alloc_BaB(1);
/* 15 */ VGOFF_(sh_edx) = alloc_BaB(1);
@@ -255,8 +277,6 @@ static void vg_init_baseBlock ( void )
/* I gave up counting at this point. Since they're way above the
short-amode-boundary, there's no point. */
VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
VGOFF_(helper_idiv_64_32)
= alloc_BaB_1_set( (Addr) & VG_(helper_idiv_64_32) );
VGOFF_(helper_div_64_32)
@@ -999,7 +1019,7 @@ static void process_cmd_line_options ( void )
UInt VG_(m_state_static) [8 /* int regs, in Intel order */
+ 1 /* %eflags */
+ 1 /* %eip */
+ VG_SIZE_OF_FPUSTATE_W /* FPU state */
+ (108/4) /* real FPU state */
];
void VG_(copy_baseBlock_to_m_state_static) ( void )
@@ -1030,15 +1050,15 @@ void VG_(copy_baseBlock_to_m_state_static) ( void )
VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_eip)];
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
/* Hack */
for (i = 0; i < (108/4); i++)
VG_(m_state_static)[40/4 + i]
= VG_(baseBlock)[VGOFF_(m_fpustate) + i];
= 0;
}
void VG_(copy_m_state_static_to_baseBlock) ( void )
{
Int i;
VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[ 0/4];
VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[ 4/4];
VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[ 8/4];
@@ -1055,9 +1075,20 @@ void VG_(copy_m_state_static_to_baseBlock) ( void )
VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[36/4];
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_fpustate) + i]
= VG_(m_state_static)[40/4 + i];
/* Make the FPU register stack appear to be empty. */
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]) = 0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]) = 0;
/* stack grows down, towards lower numbered registers, and ftop is
decremented prior to use when pushing. Hence the initial value
should be zero, as the decrement then changes it to 7 so we end
up first writing %f7. */
VG_(baseBlock)[VGOFF_(m_ftop)] = 0;
}

View File

@@ -373,7 +373,6 @@ ThreadId VG_(get_current_tid) ( void )
__inline__
void VG_(load_thread_state) ( ThreadId tid )
{
Int i;
vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
VG_(baseBlock)[VGOFF_(m_eax)] = VG_(threads)[tid].m_eax;
@@ -392,8 +391,15 @@ void VG_(load_thread_state) ( ThreadId tid )
VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip;
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]) = VG_(threads)[tid].m_f0;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]) = VG_(threads)[tid].m_f1;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]) = VG_(threads)[tid].m_f2;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]) = VG_(threads)[tid].m_f3;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]) = VG_(threads)[tid].m_f4;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]) = VG_(threads)[tid].m_f5;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]) = VG_(threads)[tid].m_f6;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]) = VG_(threads)[tid].m_f7;
VG_(baseBlock)[VGOFF_(m_ftop)] = VG_(threads)[tid].m_ftop;
VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
@@ -418,8 +424,8 @@ void VG_(load_thread_state) ( ThreadId tid )
__inline__
void VG_(save_thread_state) ( ThreadId tid )
{
Int i;
const UInt junk = 0xDEADBEEF;
const UInt junk = 0xDEADBEEF;
const ULong junk64 = 0xDEADBEEFDEADBEEFLL;
vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
@@ -439,8 +445,15 @@ void VG_(save_thread_state) ( ThreadId tid )
VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
VG_(threads)[tid].m_f0 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]);
VG_(threads)[tid].m_f1 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]);
VG_(threads)[tid].m_f2 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]);
VG_(threads)[tid].m_f3 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]);
VG_(threads)[tid].m_f4 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]);
VG_(threads)[tid].m_f5 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]);
VG_(threads)[tid].m_f6 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]);
VG_(threads)[tid].m_f7 = *(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]);
VG_(threads)[tid].m_ftop = VG_(baseBlock)[VGOFF_(m_ftop)];
VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
@@ -467,8 +480,15 @@ void VG_(save_thread_state) ( ThreadId tid )
VG_(baseBlock)[VGOFF_(m_cc_dflag)] = junk;
VG_(baseBlock)[VGOFF_(m_eip)] = junk;
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f0)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f1)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f2)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f3)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f4)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f5)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f6)]) = junk64;
*(ULong*)(&VG_(baseBlock)[VGOFF_(m_f7)]) = junk64;
VG_(baseBlock)[VGOFF_(m_ftop)] = junk;
vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
}

View File

@@ -899,7 +899,6 @@ typedef
/* Safely-saved version of sigNo, as described above. */
Int sigNo_private;
/* Saved processor state. */
UInt fpustate[VG_SIZE_OF_FPUSTATE_W];
UInt eax;
UInt ecx;
UInt edx;
@@ -913,6 +912,8 @@ typedef
UInt cc_src;
UInt cc_dst;
UInt cc_dflag;
ULong f0, f1, f2, f3, f4, f5, f6, f7;
UInt ftop;
/* Scheduler-private stuff: what was the thread's status prior to
delivering this signal? */
ThreadStatus status;
@@ -930,7 +931,6 @@ typedef
static
void vg_push_signal_frame ( ThreadId tid, int sigNo )
{
Int i;
Addr esp, esp_top_of_frame;
VgSigFrame* frame;
ThreadState* tst;
@@ -971,8 +971,15 @@ void vg_push_signal_frame ( ThreadId tid, int sigNo )
frame->puContext = (Addr)NULL;
frame->magicPI = 0x31415927;
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
frame->fpustate[i] = tst->m_fpu[i];
frame->f0 = tst->m_f0;
frame->f1 = tst->m_f1;
frame->f2 = tst->m_f2;
frame->f3 = tst->m_f3;
frame->f4 = tst->m_f4;
frame->f5 = tst->m_f5;
frame->f6 = tst->m_f6;
frame->f7 = tst->m_f7;
frame->ftop = tst->m_ftop;
frame->eax = tst->m_eax;
frame->ecx = tst->m_ecx;
@@ -1022,7 +1029,7 @@ static
Int vg_pop_signal_frame ( ThreadId tid )
{
Addr esp;
Int sigNo, i;
Int sigNo;
VgSigFrame* frame;
ThreadState* tst;
@@ -1042,8 +1049,15 @@ Int vg_pop_signal_frame ( ThreadId tid )
"vg_pop_signal_frame (thread %d): valid magic", tid);
/* restore machine state */
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
tst->m_fpu[i] = frame->fpustate[i];
tst->m_f0 = frame->f0;
tst->m_f1 = frame->f1;
tst->m_f2 = frame->f2;
tst->m_f3 = frame->f3;
tst->m_f4 = frame->f4;
tst->m_f5 = frame->f5;
tst->m_f6 = frame->f6;
tst->m_f7 = frame->f7;
tst->m_ftop = frame->ftop;
/* Mark the frame structure as nonaccessible. */
if (VG_(clo_instrument))

View File

@@ -3121,8 +3121,8 @@ void VG_(translate) ( ThreadState* tst,
UChar* final;
Bool debugging_translation;
static Int v0thresh = 87000;
static Int v2thresh = 87000;
static Int v0thresh = 940;
static Int v2thresh = 940;
TranslateResult tres;
static Bool vex_init_done = False;

View File

@@ -130,8 +130,46 @@ enum {
/* EIP */
#define OFFB_EIP (12*4)
/* FPU. For now, just simulate 8 64-bit registers and the reg-stack
top pointer, of which only the least significant three bits are
relevant.
#define SIZEOF_X86H_STATE OFFB_EIP
The model is:
F0 .. F7 are the 8 registers. ftop[2:0] contains the
index of the current 'stack top' -- pretty meaningless, but
still.
When a value is pushed onto the stack, ftop is first replaced by
(ftop-1) & 7, and then F[ftop] is assigned the value.
When a value is popped off the stack, the value is read from
F[ftop], and then ftop is replaced by (ftop+1) & 7.
In general, a reference to a register ST(i) actually references
F[ (ftop+i) & 7 ].
There should be an array of 8 booleans corresponding to F0 .. F7,
indicating whether the corresponding F reg contains a value or not.
A read of an F reg marked empty, for any reason, elicits a stack
underflow fault.
A load from memory into an F reg marked full elicits a stack overflow
fault. This appears to be the only way a stack overflow fault can
happen.
*/
#define OFFB_F0 (13*4)
#define OFFB_F1 (15*4)
#define OFFB_F2 (17*4)
#define OFFB_F3 (19*4)
#define OFFB_F4 (21*4)
#define OFFB_F5 (23*4)
#define OFFB_F6 (25*4)
#define OFFB_F7 (27*4)
#define OFFB_FTOP (29*4)
/* Don't forget to keep this up to date. */
#define SIZEOF_X86H_STATE OFFB_FTOP

View File

@@ -3076,6 +3076,189 @@ UInt dis_imul_I_E_G ( UChar sorb,
}
/*------------------------------------------------------------*/
/*--- x87 floating point insns. ---*/
/*------------------------------------------------------------*/
/* Get/set the top-of-stack pointer. */
static IRExpr* get_ftop ( void )
{
return IRExpr_Get( OFFB_FTOP, Ity_I32 );
}
static IRStmt* put_ftop ( IRExpr* e )
{
return IRStmt_Put( OFFB_FTOP, e );
}
/* Given i, generate an expression which is the offset in the guest
state of ST(i), considering the current value of FTOP. */
static IRExpr* off_ST ( Int i )
{
vassert(i >= 0 && i <= 7);
return
binop(Iop_Add32,
binop(Iop_Mul32,
binop(Iop_And32,
binop(Iop_Add32, get_ftop(), mkU32(i)),
mkU32(7)),
mkU32(8)),
mkU32(OFFB_F0)
);
}
/* Given i, and some expression e, generate 'ST(i) = e'. */
static IRStmt* put_ST ( Int i, IRExpr* value )
{
return
IRStmt_PutI( off_ST(i), value, OFFB_F0, OFFB_F7+8-1 );
}
/* Given i, generate an expression yielding 'ST(i)'. */
static IRExpr* get_ST ( Int i )
{
return
IRExpr_GetI( off_ST(i), Ity_F64, OFFB_F0, OFFB_F7+8-1 );
}
/* Adjust FTOP downwards by one register. */
static IRStmt* do_push ( void )
{
return
put_ftop(
binop(Iop_And32,
binop(Iop_Sub32, get_ftop(), mkU32(1)),
mkU32(7))
);
}
static
UInt dis_FPU ( Bool* decode_ok, UChar sorb, UInt delta )
{
Int len;
Char dis_buf[32];
UInt opc_aux;
/* On entry, delta points at the second byte of the insn (the modrm
byte).*/
UChar first_opcode = getIByte(delta-1);
UChar modrm = getIByte(delta+0);
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
if (first_opcode == 0xD8) {
goto decode_fail;
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xD9) {
goto decode_fail;
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDA) {
goto decode_fail;
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDB) {
goto decode_fail;
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDC) {
goto decode_fail;
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDD) {
if (modrm < 0xC0) {
/* bits 5,4,3 are an opcode extension, and the modRM also
specifies an address. */
IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
delta += len;
switch (gregOfRM(modrm)) {
case 0: /* FLD double-real */
DIP("fldD %s\n", dis_buf);
stmt( do_push() );
stmt( put_ST(0, IRExpr_LDle(Ity_F64, mkexpr(addr))) );
break;
#if 0
case 2: /* FST double-real */
IFDB( if (dis) printf("\tfstD\t%s\n",t_addr); )
if (!fp_is_empty_tag(fp_get_tag_ST(0))) {
vd_addr = fp_get_reg_ST(0);
} else {
vd_addr = NAN;
fp_set_stack_underflow();
}
setDMem(a_addr,vd_addr);
break;
case 3: /* FSTP double-real */
IFDB( if (dis) printf("\tfstpD\t%s\n",t_addr); )
if (!fp_is_empty_tag(fp_get_tag_ST(0))) {
vd_addr = fp_pop();
} else {
vd_addr = fp_pop(); /* then throw away result */
vd_addr = NAN;
fp_set_stack_underflow();
}
setDMem(a_addr,vd_addr);
break;
#endif
default:
vex_printf("unhandled opc_aux = 0x%2x\n", opc_aux);
vex_printf("first_opcode == 0xDD");
goto decode_fail;
}
} else {
goto decode_fail;
}
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDE) {
goto decode_fail;
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDF) {
goto decode_fail;
}
else
vpanic("dis_FPU(x86): invalid primary opcode");
decode_success:
*decode_ok = True;
return delta;
decode_fail:
*decode_ok = False;
return delta;
}
//-- /* Handle FPU insns which read/write memory. On entry, eip points to
//-- the second byte of the insn (the one following D8 .. DF). */
//-- static
@@ -6249,16 +6432,23 @@ static UInt disInstr ( UInt delta, Bool* isEnd )
//-- DIP("fwait\n");
//-- break;
//--
//-- case 0xD8:
//-- case 0xD9:
//-- case 0xDA:
//-- case 0xDB:
//-- case 0xDC:
//-- case 0xDD:
//-- case 0xDE:
//-- case 0xDF:
//-- eip = dis_fpu ( cb, sorb, opc, eip );
//-- break;
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF: {
UInt delta0 = delta;
Bool decode_OK = False;
delta = dis_FPU ( &decode_OK, sorb, delta );
if (!decode_OK) {
delta = delta0;
goto decode_failure;
}
break;
}
/* ------------------------ INC & DEC ------------------ */

View File

@@ -35,7 +35,7 @@ void ppHRegX86 ( HReg reg )
return;
case HRcFloat:
r = hregNumber(reg);
vassert(r >= 0 && r < 6);
vassert(r >= 0 && r < 4);
vex_printf("%%fake%d", r);
return;
case HRcVector:
@@ -54,9 +54,14 @@ HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt, False); }
HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt, False); }
HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt, False); }
HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFloat, False); }
HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFloat, False); }
HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFloat, False); }
HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFloat, False); }
void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
{
*nregs = 6;
*nregs = 10;
*arr = LibVEX_Alloc(*nregs * sizeof(HReg));
(*arr)[0] = hregX86_EAX();
(*arr)[1] = hregX86_EBX();
@@ -64,6 +69,10 @@ void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
(*arr)[3] = hregX86_EDX();
(*arr)[4] = hregX86_ESI();
(*arr)[5] = hregX86_EDI();
(*arr)[6] = hregX86_FAKE0();
(*arr)[7] = hregX86_FAKE1();
(*arr)[8] = hregX86_FAKE2();
(*arr)[9] = hregX86_FAKE3();
}
@@ -399,6 +408,18 @@ Char* showX86ShiftOp ( X86ShiftOp op ) {
}
}
Char* showX86FpOp ( X86FpOp op ) {
switch (op) {
case Xfp_Add: return "add";
case Xfp_Sub: return "sub";
case Xfp_Mul: return "mul";
case Xfp_Div: return "div";
case Xfp_Sqrt: return "sqrt";
case Xfp_Negate: return "chs";
default: vpanic("ppX86FpOp");
}
}
X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_Alu32R;
@@ -506,7 +527,7 @@ X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
vassert(szSmall == 1 || szSmall == 2);
return i;
}
X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_Store;
i->Xin.Store.sz = sz;
@@ -515,6 +536,42 @@ X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
vassert(sz == 1 || sz == 2);
return i;
}
X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_FpUnary;
i->Xin.FpUnary.op = op;
i->Xin.FpUnary.src = src;
i->Xin.FpUnary.dst = dst;
return i;
}
X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_FpBinary;
i->Xin.FpBinary.op = op;
i->Xin.FpBinary.srcL = srcL;
i->Xin.FpBinary.srcR = srcR;
i->Xin.FpBinary.dst = dst;
return i;
}
X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_FpLdSt;
i->Xin.FpLdSt.isLoad = isLoad;
i->Xin.FpLdSt.sz = sz;
i->Xin.FpLdSt.reg = reg;
i->Xin.FpLdSt.addr = addr;
vassert(sz == 4 || sz == 8);
return i;
}
X86Instr* X86Instr_FpI64 ( Bool toInt, HReg freg, HReg iregHi, HReg iregLo ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_FpI64;
i->Xin.FpI64.toInt = toInt;
i->Xin.FpI64.freg = freg;
i->Xin.FpI64.iregHi = iregHi;
i->Xin.FpI64.iregLo = iregLo;
return i;
}
void ppX86Instr ( X86Instr* i ) {
@@ -617,6 +674,19 @@ void ppX86Instr ( X86Instr* i ) {
vex_printf(",");
ppX86AMode(i->Xin.Store.dst);
return;
case Xin_FpLdSt:
if (i->Xin.FpLdSt.isLoad) {
vex_printf("gld%c" , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
ppX86AMode(i->Xin.FpLdSt.addr);
vex_printf(", ");
ppHRegX86(i->Xin.FpLdSt.reg);
} else {
vex_printf("gst%c" , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
ppHRegX86(i->Xin.FpLdSt.reg);
vex_printf(", ");
ppX86AMode(i->Xin.FpLdSt.addr);
}
return;
default:
vpanic("ppX86Instr");
}

View File

@@ -242,6 +242,17 @@ typedef
extern Char* showX86ShiftOp ( X86ShiftOp );
/* --------- */
typedef
enum {
Xfp_Add, Xfp_Sub, Xfp_Mul, Xfp_Div,
Xfp_Sqrt, Xfp_Negate
}
X86FpOp;
extern Char* showX86FpOp ( X86FpOp );
/* --------- */
typedef
enum {
@@ -258,11 +269,15 @@ typedef
Xin_Goto, /* conditional/unconditional jmp to dst */
Xin_CMov32, /* conditional move */
Xin_LoadEX, /* mov{s,z}{b,w}l from mem to reg */
Xin_Store /* store 16/8 bit value in memory */
Xin_Store, /* store 16/8 bit value in memory */
Xin_FpUnary, /* FP fake unary op */
Xin_FpBinary, /* FP fake binary op */
Xin_FpLdSt, /* FP fake load/store */
Xin_FpI64 /* FP fake to/from 64-bit signed int */
}
X86InstrTag;
/* Destinations are on the RIGHT (second operand). */
/* Destinations are on the RIGHT (second operand) */
typedef
struct {
@@ -348,25 +363,53 @@ typedef
HReg src;
X86AMode* dst;
} Store;
} Xin;
/* X86 Floating point (fake 3-operand, "flat reg file" insns) */
struct {
X86FpOp op;
HReg src;
HReg dst;
} FpUnary;
struct {
X86FpOp op;
HReg srcL;
HReg srcR;
HReg dst;
} FpBinary;
struct {
Bool isLoad;
UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */
HReg reg;
X86AMode* addr;
} FpLdSt;
struct {
Bool toInt; /* True: F64->I64; False: I64->64 */
HReg freg;
HReg iregHi;
HReg iregLo;
} FpI64;
} Xin;
}
X86Instr;
extern X86Instr* X86Instr_Alu32R ( X86AluOp, X86RMI*, HReg );
extern X86Instr* X86Instr_Alu32M ( X86AluOp, X86RI*, X86AMode* );
extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, X86RM* dst );
extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, X86RM* );
extern X86Instr* X86Instr_Test32 ( X86RI* src, X86RM* dst );
extern X86Instr* X86Instr_MulL ( Bool syned, X86ScalarSz, X86RM* );
extern X86Instr* X86Instr_Div ( Bool syned, X86ScalarSz, X86RM* );
extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst );
extern X86Instr* X86Instr_Push ( X86RMI* );
extern X86Instr* X86Instr_Call ( HReg );
extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst );
extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst );
extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
X86AMode* src, HReg dst );
extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst );
extern X86Instr* X86Instr_Alu32R ( X86AluOp, X86RMI*, HReg );
extern X86Instr* X86Instr_Alu32M ( X86AluOp, X86RI*, X86AMode* );
extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, X86RM* dst );
extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, X86RM* );
extern X86Instr* X86Instr_Test32 ( X86RI* src, X86RM* dst );
extern X86Instr* X86Instr_MulL ( Bool syned, X86ScalarSz, X86RM* );
extern X86Instr* X86Instr_Div ( Bool syned, X86ScalarSz, X86RM* );
extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst );
extern X86Instr* X86Instr_Push ( X86RMI* );
extern X86Instr* X86Instr_Call ( HReg );
extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst );
extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst );
extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
X86AMode* src, HReg dst );
extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst );
extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst );
extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst );
extern X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* );
extern X86Instr* X86Instr_FpI64 ( Bool toInt, HReg freg, HReg iregHi, HReg iregLo );
extern void ppX86Instr ( X86Instr* );

View File

@@ -26,6 +26,8 @@ void ppIRType ( IRType ty )
case Ity_I16: vex_printf( "I16"); break;
case Ity_I32: vex_printf( "I32"); break;
case Ity_I64: vex_printf( "I64"); break;
case Ity_F32: vex_printf( "F32"); break;
case Ity_F64: vex_printf( "F64"); break;
default: vex_printf("ty = 0x%x\n", (Int)ty);
vpanic("ppIRType");
}
@@ -196,6 +198,12 @@ void ppIRStmt ( IRStmt* s )
vex_printf( "PUT(%d) = ", s->Ist.Put.offset);
ppIRExpr(s->Ist.Put.expr);
break;
case Ist_PutI:
vex_printf( "PUTI[%d,%d](", s->Ist.PutI.minoff, s->Ist.PutI.maxoff);
ppIRExpr(s->Ist.PutI.offset);
vex_printf( ") = " );
ppIRExpr(s->Ist.PutI.expr);
break;
case Ist_Tmp:
ppIRTemp(s->Ist.Tmp.tmp);
vex_printf( " = " );
@@ -320,6 +328,16 @@ IRExpr* IRExpr_Get ( Int off, IRType ty ) {
e->Iex.Get.ty = ty;
return e;
}
IRExpr* IRExpr_GetI ( IRExpr* off, IRType ty,
UShort minoff, UShort maxoff ) {
IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
e->tag = Iex_GetI;
e->Iex.GetI.offset = off;
e->Iex.GetI.ty = ty;
e->Iex.GetI.minoff = minoff;
e->Iex.GetI.maxoff = maxoff;
return e;
}
IRExpr* IRExpr_Tmp ( IRTemp tmp ) {
IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
e->tag = Iex_Tmp;
@@ -382,6 +400,17 @@ IRStmt* IRStmt_Put ( Int off, IRExpr* value ) {
s->Ist.Put.expr = value;
return s;
}
IRStmt* IRStmt_PutI ( IRExpr* off, IRExpr* value,
UShort minoff, UShort maxoff ) {
IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
s->tag = Ist_PutI;
s->link = NULL;
s->Ist.PutI.offset = off;
s->Ist.PutI.expr = value;
s->Ist.PutI.minoff = minoff;
s->Ist.PutI.maxoff = maxoff;
return s;
}
IRStmt* IRStmt_Tmp ( IRTemp tmp, IRExpr* expr ) {
IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
s->tag = Ist_Tmp;
@@ -702,6 +731,10 @@ void useBeforeDef_Stmt ( IRBB* bb, IRStmt* stmt, Int* def_counts )
case Ist_Put:
useBeforeDef_Expr(bb,stmt,stmt->Ist.Put.expr,def_counts);
break;
case Ist_PutI:
useBeforeDef_Expr(bb,stmt,stmt->Ist.PutI.offset,def_counts);
useBeforeDef_Expr(bb,stmt,stmt->Ist.PutI.expr,def_counts);
break;
case Ist_Tmp:
useBeforeDef_Expr(bb,stmt,stmt->Ist.Tmp.expr,def_counts);
break;
@@ -807,6 +840,14 @@ void tcStmt ( IRBB* bb, IRStmt* stmt, IRType gWordTy )
if (typeOfIRExpr(tyenv,stmt->Ist.Put.expr) == Ity_Bit)
sanityCheckFail(bb,stmt,"IRStmt.Put.expr: cannot Put :: Ity_Bit");
break;
case Ist_PutI:
tcExpr( bb, stmt, stmt->Ist.PutI.expr, gWordTy );
tcExpr( bb, stmt, stmt->Ist.PutI.offset, gWordTy );
if (typeOfIRExpr(tyenv,stmt->Ist.PutI.expr) == Ity_Bit)
sanityCheckFail(bb,stmt,"IRStmt.PutI.expr: cannot PutI :: Ity_Bit");
if (typeOfIRExpr(tyenv,stmt->Ist.PutI.offset) != Ity_I32)
sanityCheckFail(bb,stmt,"IRStmt.PutI.offset: not :: Ity_I32");
break;
case Ist_Tmp:
tcExpr( bb, stmt, stmt->Ist.Tmp.expr, gWordTy );
if (lookupIRTypeEnv(tyenv, stmt->Ist.Tmp.tmp)

View File

@@ -21,7 +21,9 @@
typedef
enum { Ity_INVALID=0x10FFF,
Ity_Bit=0x11000,
Ity_I8, Ity_I16, Ity_I32, Ity_I64 }
Ity_I8, Ity_I16, Ity_I32, Ity_I64,
Ity_F32, Ity_F64
}
IRType;
extern void ppIRType ( IRType );
@@ -113,7 +115,11 @@ typedef
Iop_32HLto64, // :: (I32,I32) -> I64
/* 1-bit stuff */
Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
Iop_1Uto8 /* :: Ity_Bit -> Ity_I8, unsigned widen */
Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */
/* FP stuff */
Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
Iop_SqrtF64,
Iop_I64toF64, Iop_F64toI64
}
IROp;
@@ -123,17 +129,35 @@ extern void ppIROp ( IROp );
/* ------------------ Expressions ------------------ */
/*
data Expr
= GET Int Int -- offset, size
= GET Int Type -- offset, size
| GETI Expr Type Int Int -- offset, size, minoff, maxoff
| TMP Temp -- value of temporary
| BINOP Op Expr Expr -- binary op
| UNOP Op Expr -- unary op
| LDle Type Expr -- load of the given type, Expr:: 32 or 64
| CONST Const -- 8/16/32/64-bit int constant
Re GETI. It carries two ints, which give the lowest and highest
possible byte offsets that the GetI can possibly reference.
For example, if the type is Ity_I32, and the Expr may have
a value of M, M+4 or M+8, where M is a translation-time known
constant, then the low and high limits are M and M+11 respectively.
PUTI carries similar limit values.
These can be used by IR optimisers to establish aliasing/non-aliasing
between seperate GETI and PUTI terms, which could be used to do
reordering of them, or suchlike things. Clearly it's critical to give
the correct limit values -- this is something that can't be
automatically checked (in general), and so the front-end writers must
be very careful to tell the truth, since not doing so could lead to
obscure IR optimisation bugs.
*/
typedef
enum { Iex_Binder, /* Used only in pattern matching.
Not an expression. */
Iex_Get, Iex_Tmp, Iex_Binop, Iex_Unop, Iex_LDle,
Iex_Get, Iex_GetI, Iex_Tmp, Iex_Binop, Iex_Unop, Iex_LDle,
Iex_Const, Iex_CCall, Iex_Mux0X }
IRExprTag;
@@ -148,6 +172,12 @@ typedef
Int offset;
IRType ty;
} Get;
struct {
struct _IRExpr* offset;
IRType ty;
UShort minoff;
UShort maxoff;
} GetI;
struct {
IRTemp tmp;
} Tmp;
@@ -183,6 +213,8 @@ typedef
extern IRExpr* IRExpr_Binder ( Int binder );
extern IRExpr* IRExpr_Get ( Int off, IRType ty );
extern IRExpr* IRExpr_GetI ( IRExpr* off, IRType ty,
UShort minoff, UShort maxoff );
extern IRExpr* IRExpr_Tmp ( IRTemp tmp );
extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 );
extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg );
@@ -194,7 +226,8 @@ extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
extern void ppIRExpr ( IRExpr* );
/* CCall info. The name is the C helper function; the backends
will look it up in a table of known helpers, to get the address.
will hand the name to the front ends to get the address of a
host-code helper function to be called.
The args are a NULL-terminated array of arguments. The stated
return IRType, and the implied argument types, must match that
@@ -220,7 +253,7 @@ data Stmt
-- Const is destination guest addr
*/
typedef
enum { Ist_Put, Ist_Tmp, Ist_STle, Ist_Exit }
enum { Ist_Put, Ist_PutI, Ist_Tmp, Ist_STle, Ist_Exit }
IRStmtTag;
typedef
@@ -231,6 +264,12 @@ typedef
Int offset;
IRExpr* expr;
} Put;
struct {
IRExpr* offset;
IRExpr* expr;
UShort minoff;
UShort maxoff;
} PutI;
struct {
IRTemp tmp;
IRExpr* expr;
@@ -249,6 +288,8 @@ typedef
IRStmt;
extern IRStmt* IRStmt_Put ( Int off, IRExpr* value );
extern IRStmt* IRStmt_PutI ( IRExpr* off, IRExpr* value,
UShort minoff, UShort maxoff );
extern IRStmt* IRStmt_Tmp ( IRTemp tmp, IRExpr* expr );
extern IRStmt* IRStmt_STle ( IRExpr* addr, IRExpr* value );
extern IRStmt* IRStmt_Exit ( IRExpr* cond, IRConst* dst );