mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-06 19:54:18 +00:00
Extend the state components in VG_(m_state_static) and VG_(baseBlock)
to include the SSE/SSE2 architectural state. Automagically detect at startup, in vg_startup.S, whether or not this is a SSE-enabled CPU and act accordingly. All subsequent FPU/SSE state transfers between the simulated and real machine are then done either with fsave/frstor (as before) or fxsave/fxrstor (the SSE equivalents). Fragile and fiddly; (1) the SSE state needs to be stored on a 16-byte boundary, and (2) certain bits in the saved MXCSR reg in a state written by fxsave need to be anded out before we can safely restore using fxrstor. It does appear to work. I'd appreciate people trying it out on various CPUs to establish whether the SSE / not-SSE check works right, and/or anything else is broken. Unfortunately makes some programs run significantly slower. I don't know why. Perhaps due to copying around more processor state than there was before (SSE state is 512 bytes, FPU state was only 108). I will look into this. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1574
This commit is contained in:
parent
2d4e9e253f
commit
23ae8adf30
@ -46,15 +46,36 @@ VG_(do_syscall):
|
||||
|
||||
# and save the real FPU state too
|
||||
fwait
|
||||
fnsave VG_(real_fpu_state_saved_over_syscall)
|
||||
frstor VG_(real_fpu_state_saved_over_syscall)
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq1nosse
|
||||
fxsave VG_(real_sse_state_saved_over_syscall)
|
||||
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
|
||||
fxrstor VG_(real_sse_state_saved_over_syscall)
|
||||
jmp qq1merge
|
||||
qq1nosse:
|
||||
fnsave VG_(real_sse_state_saved_over_syscall)
|
||||
frstor VG_(real_sse_state_saved_over_syscall)
|
||||
qq1merge:
|
||||
popfl
|
||||
|
||||
# remember what the simulators stack pointer is
|
||||
movl %esp, VG_(esp_saved_over_syscall)
|
||||
|
||||
# Now copy the simulated machines state into the real one
|
||||
# esp still refers to the simulators stack
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq2nosse
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp qq2merge
|
||||
qq2nosse:
|
||||
frstor VG_(m_state_static)+64
|
||||
qq2merge:
|
||||
popfl
|
||||
|
||||
movl VG_(m_state_static)+56, %eax
|
||||
pushl %eax
|
||||
popfl
|
||||
@ -111,11 +132,32 @@ VG_(do_syscall):
|
||||
popl %eax
|
||||
movl %eax, VG_(m_state_static)+56
|
||||
fwait
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz pp2nosse
|
||||
fxsave VG_(m_state_static)+64
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp pp2merge
|
||||
pp2nosse:
|
||||
fnsave VG_(m_state_static)+64
|
||||
frstor VG_(m_state_static)+64
|
||||
|
||||
pp2merge:
|
||||
popfl
|
||||
|
||||
# Restore the state of the simulator
|
||||
frstor VG_(real_fpu_state_saved_over_syscall)
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz pp1nosse
|
||||
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
|
||||
fxrstor VG_(real_sse_state_saved_over_syscall)
|
||||
jmp pp1merge
|
||||
pp1nosse:
|
||||
frstor VG_(real_sse_state_saved_over_syscall)
|
||||
pp1merge:
|
||||
popfl
|
||||
|
||||
popal
|
||||
|
||||
ret
|
||||
|
||||
@ -1334,24 +1334,44 @@ static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
|
||||
/*--- FPU instruction emitters ---*/
|
||||
/*----------------------------------------------------*/
|
||||
|
||||
static void emit_get_fpu_state ( void )
|
||||
static void emit_get_sse_state ( void )
|
||||
{
|
||||
Int off = 4 * VGOFF_(m_fpustate);
|
||||
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
|
||||
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
|
||||
VG_(emitL) ( off );
|
||||
if (dis)
|
||||
VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
|
||||
Int off = 4 * VGOFF_(m_ssestate);
|
||||
if (VG_(have_ssestate)) {
|
||||
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
|
||||
VG_(emitB) ( 0x0F );
|
||||
VG_(emitB) ( 0xAE ); VG_(emitB) ( 0x8D ); /* fxrstor d32(%ebp) */
|
||||
VG_(emitL) ( off );
|
||||
if (dis)
|
||||
VG_(printf)("\n\t\tfxrstor\t%d(%%ebp)\n", off );
|
||||
} else {
|
||||
/* Not a SSE-capable CPU. Just do frstor. */
|
||||
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
|
||||
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
|
||||
VG_(emitL) ( off );
|
||||
if (dis)
|
||||
VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_put_fpu_state ( void )
|
||||
static void emit_put_sse_state ( void )
|
||||
{
|
||||
Int off = 4 * VGOFF_(m_fpustate);
|
||||
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
|
||||
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
|
||||
VG_(emitL) ( off );
|
||||
if (dis)
|
||||
VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
|
||||
Int off = 4 * VGOFF_(m_ssestate);
|
||||
if (VG_(have_ssestate)) {
|
||||
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
|
||||
VG_(emitB) ( 0x0F );
|
||||
VG_(emitB) ( 0xAE ); VG_(emitB) ( 0x85 ); /* fxsave d32(%ebp) */
|
||||
VG_(emitL) ( off );
|
||||
if (dis)
|
||||
VG_(printf)("\n\t\tfxsave\t%d(%%ebp)\n", off );
|
||||
} else {
|
||||
/* Not a SSE-capable CPU. Just do fnsave. */
|
||||
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
|
||||
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
|
||||
VG_(emitL) ( off );
|
||||
if (dis)
|
||||
VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_fpu_no_mem ( FlagSet uses_sflags,
|
||||
@ -3024,18 +3044,19 @@ Bool anyFlagUse ( UInstr* u )
|
||||
}
|
||||
|
||||
|
||||
/* *fplive==True indicates that the simulated machine's FPU state is in
|
||||
the real FPU. If so we need to be very careful not to trash it.
|
||||
If FPU state is live and we deem it necessary to copy it back to
|
||||
the simulated machine's FPU state, we do so. The final state of
|
||||
fpliveness is returned. In short we _must_ do put_fpu_state if
|
||||
/* *fplive==True indicates that the simulated machine's FPU/SSE state is in
|
||||
the real machine's cpu. If so we need to be very careful not to trash it.
|
||||
If FPU/SSE state is live and we deem it necessary to copy it back to
|
||||
the simulated machine's FPU/SSE state, we do so. The final state of
|
||||
fpliveness is returned. In short we _must_ do put_sse_state if
|
||||
there is any chance at all that the code generated for a UInstr
|
||||
will change the real FPU state.
|
||||
will change the real FPU/MMX/SSE/SSE2 state.
|
||||
*/
|
||||
static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
RRegSet regs_live_before,
|
||||
/* Running state, which we update. */
|
||||
Bool* fplive, /* True<==>FPU state in real FPU */
|
||||
Bool* sselive, /* True<==>FPU/SSE
|
||||
state in real FPU */
|
||||
Addr* orig_eip, /* previous curr_eip, or zero */
|
||||
Addr* curr_eip ) /* current eip */
|
||||
{
|
||||
@ -3248,9 +3269,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag2 == RealReg);
|
||||
vg_assert(u->size == 0);
|
||||
|
||||
if (*fplive) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (*sselive) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
|
||||
VG_(synth_ccall) ( (Addr) & VG_(do_useseg),
|
||||
@ -3339,9 +3360,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
case JMP: {
|
||||
vg_assert(u->tag2 == NoValue);
|
||||
vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
|
||||
if (*fplive) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (*sselive) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
if (u->cond == CondAlways) {
|
||||
switch (u->tag1) {
|
||||
@ -3382,9 +3403,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag1 == RealReg);
|
||||
vg_assert(u->tag2 == Literal);
|
||||
vg_assert(u->size == 4);
|
||||
if (*fplive) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (*sselive) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
|
||||
break;
|
||||
@ -3405,9 +3426,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag1 == Lit16);
|
||||
vg_assert(u->tag2 == NoValue);
|
||||
vg_assert(u->size == 0);
|
||||
if (*fplive) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (*sselive) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
/* Call to a helper which is pretending to be a real CPU
|
||||
instruction (and therefore operates on Real flags and
|
||||
@ -3433,9 +3454,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
else vg_assert(u->tag3 == NoValue);
|
||||
vg_assert(u->size == 0);
|
||||
|
||||
if (*fplive) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (*sselive) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
VG_(synth_ccall) ( u->lit32, u->argc, u->regparms_n, argv, tagv,
|
||||
ret_reg, regs_live_before, u->regs_live_after );
|
||||
@ -3459,9 +3480,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
case FPU_W:
|
||||
vg_assert(u->tag1 == Lit16);
|
||||
vg_assert(u->tag2 == RealReg);
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_fpu_regmem ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3472,9 +3493,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
case FPU:
|
||||
vg_assert(u->tag1 == Lit16);
|
||||
vg_assert(u->tag2 == NoValue);
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_fpu_no_mem ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3488,9 +3509,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag2 == RealReg);
|
||||
vg_assert(u->tag3 == NoValue);
|
||||
vg_assert(!anyFlagUse(u));
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_MMX2_regmem ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3503,9 +3524,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag2 == RealReg);
|
||||
vg_assert(u->tag3 == NoValue);
|
||||
vg_assert(!anyFlagUse(u));
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_MMX2_reg_to_mmxreg ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3518,9 +3539,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag2 == RealReg);
|
||||
vg_assert(u->tag3 == NoValue);
|
||||
vg_assert(!anyFlagUse(u));
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_MMX2_mmxreg_to_reg ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3532,9 +3553,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag1 == Lit16);
|
||||
vg_assert(u->tag2 == NoValue);
|
||||
vg_assert(u->tag3 == NoValue);
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_MMX1_no_mem ( u->flags_r, u->flags_w,
|
||||
u->val1 & 0xFF );
|
||||
@ -3544,9 +3565,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag1 == Lit16);
|
||||
vg_assert(u->tag2 == NoValue);
|
||||
vg_assert(u->tag3 == NoValue);
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_MMX2_no_mem ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3557,9 +3578,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
vg_assert(u->tag1 == Lit16);
|
||||
vg_assert(u->tag2 == Lit16);
|
||||
vg_assert(u->tag3 == NoValue);
|
||||
if (!(*fplive)) {
|
||||
emit_get_fpu_state();
|
||||
*fplive = True;
|
||||
if (!(*sselive)) {
|
||||
emit_get_sse_state();
|
||||
*sselive = True;
|
||||
}
|
||||
synth_MMX3_no_mem ( u->flags_r, u->flags_w,
|
||||
(u->val1 >> 8) & 0xFF,
|
||||
@ -3569,9 +3590,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
|
||||
default:
|
||||
if (VG_(needs).extended_UCode) {
|
||||
if (*fplive) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (*sselive) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
SK_(emit_XUInstr)(u, regs_live_before);
|
||||
} else {
|
||||
@ -3584,9 +3605,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
|
||||
}
|
||||
}
|
||||
|
||||
if (0 && (*fplive)) {
|
||||
emit_put_fpu_state();
|
||||
*fplive = False;
|
||||
if (0 && (*sselive)) {
|
||||
emit_put_sse_state();
|
||||
*sselive = False;
|
||||
}
|
||||
|
||||
/* Update UInstr histogram */
|
||||
@ -3604,7 +3625,7 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
|
||||
{
|
||||
Int i;
|
||||
UChar regs_live_before = 0; /* No regs live at BB start */
|
||||
Bool fplive;
|
||||
Bool sselive;
|
||||
Addr orig_eip, curr_eip;
|
||||
Int tgt;
|
||||
|
||||
@ -3627,7 +3648,7 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
|
||||
VG_(target_forward)(&tgt);
|
||||
|
||||
/* Set up running state. */
|
||||
fplive = False;
|
||||
sselive = False;
|
||||
orig_eip = cb->orig_eip; /* we know EIP is up to date on BB entry */
|
||||
curr_eip = cb->orig_eip;
|
||||
vg_assert(curr_eip != 0); /* otherwise the incremental updating
|
||||
@ -3645,12 +3666,12 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
|
||||
}
|
||||
vg_assert(sane);
|
||||
emitUInstr( cb, i, regs_live_before,
|
||||
&fplive, &orig_eip, &curr_eip );
|
||||
&sselive, &orig_eip, &curr_eip );
|
||||
}
|
||||
regs_live_before = u->regs_live_after;
|
||||
}
|
||||
if (dis) VG_(printf)("\n");
|
||||
vg_assert(!fplive); /* FPU state must be saved by end of BB */
|
||||
vg_assert(!sselive); /* SSE state must be saved by end of BB */
|
||||
vg_assert(eflags_state != UPD_Real); /* flags can't just be in CPU */
|
||||
|
||||
if (j != NULL) {
|
||||
|
||||
@ -176,8 +176,18 @@ cpuid__99:
|
||||
VG_(helper_fstsw_AX):
|
||||
pushl %eax
|
||||
pushl %esi
|
||||
movl VGOFF_(m_fpustate), %esi
|
||||
movl VGOFF_(m_ssestate), %esi
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz aa1nosse
|
||||
fxrstor (%ebp, %esi, 4)
|
||||
jmp aa1merge
|
||||
aa1nosse:
|
||||
frstor (%ebp, %esi, 4)
|
||||
aa1merge:
|
||||
popfl
|
||||
|
||||
fstsw %ax
|
||||
popl %esi
|
||||
movw %ax, 8(%esp)
|
||||
|
||||
@ -575,10 +575,19 @@ extern int VGR_(writev)(int fd,
|
||||
which need to go here to avoid ugly circularities.
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
/* How big is the saved FPU state? */
|
||||
#define VG_SIZE_OF_FPUSTATE 108
|
||||
/* How big is the saved SSE/SSE2 state? Note that this subsumes the
|
||||
FPU state. On machines without SSE, we just save/restore the FPU
|
||||
state into the first part of this area. */
|
||||
/* A general comment about SSE save/restore: It appears that the 7th
|
||||
word (which is the MXCSR) has to be &ed with 0x0000FFBF in order
|
||||
that restoring from it later does not cause a GP fault (which is
|
||||
delivered as a segfault). I guess this will have to be done
|
||||
any time we do fxsave :-( 7th word means word offset 6 or byte
|
||||
offset 24 from the start address of the save area.
|
||||
*/
|
||||
#define VG_SIZE_OF_SSESTATE 512
|
||||
/* ... and in words ... */
|
||||
#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4)
|
||||
#define VG_SIZE_OF_SSESTATE_W ((VG_SIZE_OF_SSESTATE+3)/4)
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
@ -820,7 +829,12 @@ struct _ThreadState {
|
||||
UInt m_esp;
|
||||
UInt m_eflags;
|
||||
UInt m_eip;
|
||||
UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
|
||||
|
||||
/* The SSE/FPU state. This array does not (necessarily) have the
|
||||
required 16-byte alignment required to get stuff in/out by
|
||||
fxsave/fxrestore. So we have to do it "by hand".
|
||||
*/
|
||||
UInt m_sse[VG_SIZE_OF_SSESTATE_W];
|
||||
|
||||
UInt sh_eax;
|
||||
UInt sh_ebx;
|
||||
@ -1243,6 +1257,12 @@ extern void VG_(mini_stack_dump) ( ExeContext* ec );
|
||||
Exports of vg_main.c
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
/* Is this a SSE/SSE2-capable CPU? If so, we had better save/restore
|
||||
the SSE state all over the place. This is set up very early, in
|
||||
vg_startup.S. We have to determine it early since we can't even
|
||||
correctly snapshot the startup machine state without it. */
|
||||
extern Bool VG_(have_ssestate);
|
||||
|
||||
/* Tell the logging mechanism whether we are logging to a file
|
||||
descriptor or a socket descriptor. */
|
||||
extern Bool VG_(logging_to_filedes);
|
||||
@ -1253,13 +1273,18 @@ extern void VG_(do_sanity_checks) ( Bool force_expensive );
|
||||
/* A structure used as an intermediary when passing the simulated
|
||||
CPU's state to some assembly fragments, particularly system calls.
|
||||
Stuff is copied from baseBlock to here, the assembly magic runs,
|
||||
and then the inverse copy is done.
|
||||
*/
|
||||
and then the inverse copy is done. Alignment: the SSE state must
|
||||
be 16-byte aligned. We ask for the whole struct to be 16-byte
|
||||
aligned, and the SSE state starts at the 6+8+1+1th == 16th word,
|
||||
so it too must be 16-byte aligned. Consequence: change this struct
|
||||
only _very carefully_ ! See also above comment re masking MXCSR.
|
||||
*/
|
||||
__attribute__ ((aligned (16)))
|
||||
extern UInt VG_(m_state_static) [6 /* segment regs, Intel order */
|
||||
+ 8 /* int regs, in Intel order */
|
||||
+ 1 /* %eflags */
|
||||
+ 1 /* %eip */
|
||||
+ VG_SIZE_OF_FPUSTATE_W /* FPU state */
|
||||
+ VG_SIZE_OF_SSESTATE_W /* SSE state */
|
||||
];
|
||||
|
||||
/* Handy fns for doing the copy back and forth. */
|
||||
@ -1543,7 +1568,7 @@ extern void VG_(signalreturn_bogusRA)( void );
|
||||
startup time, are given values denoting offsets into baseBlock.
|
||||
These offsets are in *words* from the start of baseBlock. */
|
||||
|
||||
#define VG_BASEBLOCK_WORDS 200
|
||||
#define VG_BASEBLOCK_WORDS 400
|
||||
|
||||
extern UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
|
||||
|
||||
@ -1562,7 +1587,7 @@ extern Int VGOFF_(m_ebp);
|
||||
extern Int VGOFF_(m_esi);
|
||||
extern Int VGOFF_(m_edi);
|
||||
extern Int VGOFF_(m_eflags);
|
||||
extern Int VGOFF_(m_fpustate);
|
||||
extern Int VGOFF_(m_ssestate);
|
||||
extern Int VGOFF_(m_eip);
|
||||
|
||||
extern Int VGOFF_(m_dflag); /* D flag is handled specially */
|
||||
|
||||
@ -49,7 +49,7 @@ Int VGOFF_(m_esi) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_edi) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_eflags) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_dflag) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_fpustate) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_ssestate) = INVALID_OFFSET;
|
||||
Int VGOFF_(ldt) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_cs) = INVALID_OFFSET;
|
||||
Int VGOFF_(m_ss) = INVALID_OFFSET;
|
||||
@ -256,7 +256,14 @@ static void vg_init_baseBlock ( void )
|
||||
|
||||
VGOFF_(m_dflag) = alloc_BaB(1);
|
||||
|
||||
VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
|
||||
/* The FPU/SSE state. This _must_ be 16-byte aligned. */
|
||||
(void)alloc_BaB(1); /* Padding, to achieve required alignment. */
|
||||
VGOFF_(m_ssestate) = alloc_BaB(VG_SIZE_OF_SSESTATE_W);
|
||||
vg_assert(
|
||||
( ((UInt)(& VG_(baseBlock)[VGOFF_(m_ssestate)]))
|
||||
% 16 )
|
||||
== 0
|
||||
);
|
||||
|
||||
/* This thread's LDT pointer, and segment registers. */
|
||||
VGOFF_(ldt) = alloc_BaB(1);
|
||||
@ -404,7 +411,8 @@ UInt VG_(stack)[10000];
|
||||
UInt VG_(sigstack)[10000];
|
||||
|
||||
/* Saving stuff across system calls. */
|
||||
UInt VG_(real_fpu_state_saved_over_syscall)[VG_SIZE_OF_FPUSTATE_W];
|
||||
__attribute__ ((aligned (16)))
|
||||
UInt VG_(real_sse_state_saved_over_syscall)[VG_SIZE_OF_SSESTATE_W];
|
||||
Addr VG_(esp_saved_over_syscall);
|
||||
|
||||
/* Counts downwards in vg_run_innerloop. */
|
||||
@ -428,6 +436,15 @@ UInt VG_(exitcode) = 0;
|
||||
descriptor or a socket descriptor. */
|
||||
Bool VG_(logging_to_filedes) = True;
|
||||
|
||||
/* Is this a SSE/SSE2-capable CPU? If so, we had better save/restore
|
||||
the SSE state all over the place. This is set up very early, in
|
||||
vg_startup.S. We have to determine it early since we can't even
|
||||
correctly snapshot the startup machine state without it. */
|
||||
/* Initially True. Safer to err on the side of SSEness and get SIGILL
|
||||
than to not notice for some reason that we have SSE and get wierd
|
||||
errors later on. */
|
||||
Bool VG_(have_ssestate) = True;
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
Counters, for informational purposes only.
|
||||
@ -1177,11 +1194,15 @@ static void process_cmd_line_options ( void )
|
||||
Copying to/from m_state_static.
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
/* See comment about this in vg_include.h. Change only with
|
||||
great care.
|
||||
*/
|
||||
__attribute__ ((aligned (16)))
|
||||
UInt VG_(m_state_static) [6 /* segment regs, Intel order */
|
||||
+ 8 /* int regs, in Intel order */
|
||||
+ 1 /* %eflags */
|
||||
+ 1 /* %eip */
|
||||
+ VG_SIZE_OF_FPUSTATE_W /* FPU state */
|
||||
+ VG_SIZE_OF_SSESTATE_W /* FPU state */
|
||||
];
|
||||
|
||||
UInt VG_(insertDflag)(UInt eflags, Int d)
|
||||
@ -1226,13 +1247,14 @@ void VG_(copy_baseBlock_to_m_state_static) ( void )
|
||||
VG_(m_state_static)[48/4] = VG_(baseBlock)[VGOFF_(m_esi)];
|
||||
VG_(m_state_static)[52/4] = VG_(baseBlock)[VGOFF_(m_edi)];
|
||||
|
||||
VG_(m_state_static)[56/4] = VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)]);
|
||||
VG_(m_state_static)[56/4]
|
||||
= VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)]);
|
||||
VG_(m_state_static)[60/4] = VG_(baseBlock)[VGOFF_(m_eip)];
|
||||
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
VG_(m_state_static)[64/4 + i]
|
||||
= VG_(baseBlock)[VGOFF_(m_fpustate) + i];
|
||||
= VG_(baseBlock)[VGOFF_(m_ssestate) + i];
|
||||
}
|
||||
|
||||
|
||||
@ -1255,13 +1277,15 @@ void VG_(copy_m_state_static_to_baseBlock) ( void )
|
||||
VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[48/4];
|
||||
VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[52/4];
|
||||
|
||||
VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[56/4] & ~EFlagD;
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)] = VG_(extractDflag)(VG_(m_state_static)[56/4]);
|
||||
VG_(baseBlock)[VGOFF_(m_eflags)]
|
||||
= VG_(m_state_static)[56/4] & ~EFlagD;
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)]
|
||||
= VG_(extractDflag)(VG_(m_state_static)[56/4]);
|
||||
|
||||
VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[60/4];
|
||||
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
VG_(baseBlock)[VGOFF_(m_fpustate) + i]
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
VG_(baseBlock)[VGOFF_(m_ssestate) + i]
|
||||
= VG_(m_state_static)[64/4 + i];
|
||||
}
|
||||
|
||||
@ -1361,6 +1385,11 @@ void VG_(main) ( void )
|
||||
VgSchedReturnCode src;
|
||||
ThreadState* tst;
|
||||
|
||||
if (VG_(have_ssestate))
|
||||
VG_(printf)("Looks like a SSE-capable CPU\n");
|
||||
else
|
||||
VG_(printf)("Looks like a MMX-only CPU\n");
|
||||
|
||||
/* Check skin and core versions are compatible */
|
||||
if (VG_CORE_INTERFACE_MAJOR_VERSION != VG_(skin_interface_major_version)) {
|
||||
VG_(printf)("Error:\n"
|
||||
|
||||
@ -419,12 +419,15 @@ void VG_(load_thread_state) ( ThreadId tid )
|
||||
VG_(baseBlock)[VGOFF_(m_edi)] = VG_(threads)[tid].m_edi;
|
||||
VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(threads)[tid].m_ebp;
|
||||
VG_(baseBlock)[VGOFF_(m_esp)] = VG_(threads)[tid].m_esp;
|
||||
VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(threads)[tid].m_eflags & ~EFlagD;
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)] = VG_(extractDflag)(VG_(threads)[tid].m_eflags);
|
||||
VG_(baseBlock)[VGOFF_(m_eflags)]
|
||||
= VG_(threads)[tid].m_eflags & ~EFlagD;
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)]
|
||||
= VG_(extractDflag)(VG_(threads)[tid].m_eflags);
|
||||
VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip;
|
||||
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
VG_(baseBlock)[VGOFF_(m_ssestate) + i]
|
||||
= VG_(threads)[tid].m_sse[i];
|
||||
|
||||
if (VG_(needs).shadow_regs) {
|
||||
VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
|
||||
@ -500,12 +503,14 @@ void VG_(save_thread_state) ( ThreadId tid )
|
||||
VG_(threads)[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
|
||||
VG_(threads)[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
|
||||
VG_(threads)[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
|
||||
VG_(threads)[tid].m_eflags = VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)]);
|
||||
VG_(threads)[tid].m_eflags
|
||||
= VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
|
||||
VG_(baseBlock)[VGOFF_(m_dflag)]);
|
||||
VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
|
||||
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
VG_(threads)[tid].m_sse[i]
|
||||
= VG_(baseBlock)[VGOFF_(m_ssestate) + i];
|
||||
|
||||
if (VG_(needs).shadow_regs) {
|
||||
VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
|
||||
@ -550,8 +555,8 @@ void VG_(save_thread_state) ( ThreadId tid )
|
||||
VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
|
||||
VG_(baseBlock)[VGOFF_(m_eip)] = junk;
|
||||
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
VG_(baseBlock)[VGOFF_(m_ssestate) + i] = junk;
|
||||
|
||||
vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
|
||||
}
|
||||
|
||||
@ -904,7 +904,7 @@ typedef
|
||||
/* Safely-saved version of sigNo, as described above. */
|
||||
Int sigNo_private;
|
||||
/* Saved processor state. */
|
||||
UInt fpustate[VG_SIZE_OF_FPUSTATE_W];
|
||||
UInt ssestate[VG_SIZE_OF_SSESTATE_W];
|
||||
UInt eax;
|
||||
UInt ecx;
|
||||
UInt edx;
|
||||
@ -988,8 +988,8 @@ void vg_push_signal_frame ( ThreadId tid, int sigNo )
|
||||
frame->puContext = (Addr)NULL;
|
||||
frame->magicPI = 0x31415927;
|
||||
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
frame->fpustate[i] = tst->m_fpu[i];
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
frame->ssestate[i] = tst->m_sse[i];
|
||||
|
||||
frame->eax = tst->m_eax;
|
||||
frame->ecx = tst->m_ecx;
|
||||
@ -1050,8 +1050,8 @@ Int vg_pop_signal_frame ( ThreadId tid )
|
||||
"vg_pop_signal_frame (thread %d): valid magic", tid);
|
||||
|
||||
/* restore machine state */
|
||||
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
|
||||
tst->m_fpu[i] = frame->fpustate[i];
|
||||
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
|
||||
tst->m_sse[i] = frame->ssestate[i];
|
||||
|
||||
/* Mark the frame structure as nonaccessible. */
|
||||
VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
|
||||
|
||||
@ -105,10 +105,39 @@ really_start_up:
|
||||
pushfl
|
||||
popl %eax
|
||||
movl %eax, VG_(m_state_static)+56
|
||||
|
||||
# now weve captured all the integer registers and
|
||||
# flags, figure out whether this is an sse-enabled
|
||||
# cpu or not.
|
||||
movb $0, VG_(have_ssestate) # assume sse-disabled
|
||||
movl $0, %eax
|
||||
cpuid
|
||||
cmpl $1, %eax
|
||||
jl get_fpu # we cant do cpuid(1) ?!
|
||||
movl $1, %eax
|
||||
cpuid
|
||||
testl $(1<<25), %edx
|
||||
jz get_fpu # edx bit 25 is set iff sse
|
||||
# well, it looks like were sse-enabled
|
||||
movb $1, VG_(have_ssestate)
|
||||
|
||||
# next, capture the FPU/SSE state
|
||||
get_fpu:
|
||||
fwait
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq3nosse
|
||||
fxsave VG_(m_state_static)+64
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp qq3merge
|
||||
qq3nosse:
|
||||
fnsave VG_(m_state_static)+64
|
||||
frstor VG_(m_state_static)+64
|
||||
|
||||
qq3merge:
|
||||
popfl
|
||||
|
||||
# keep the first and last 10 words free to check for overruns
|
||||
movl $VG_(stack)+39996 -40, %esp
|
||||
|
||||
@ -145,7 +174,18 @@ VG_(switch_to_real_CPU):
|
||||
# of the rest of the program continues on the real CPU,
|
||||
# and there is no way for the simulator to regain control
|
||||
# after this point.
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq4nosse
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp qq4merge
|
||||
qq4nosse:
|
||||
frstor VG_(m_state_static)+64
|
||||
qq4merge:
|
||||
popfl
|
||||
|
||||
movl VG_(m_state_static)+56, %eax
|
||||
pushl %eax
|
||||
popfl
|
||||
@ -172,8 +212,19 @@ VG_(switch_to_real_CPU):
|
||||
call VG_(sigshutdown_actions)
|
||||
popfl
|
||||
popal
|
||||
|
||||
# re-restore the FPU state anyway ...
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq5nosse
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp qq5merge
|
||||
qq5nosse:
|
||||
frstor VG_(m_state_static)+64
|
||||
qq5merge:
|
||||
popfl
|
||||
|
||||
jmp *VG_(m_state_static)+60
|
||||
|
||||
|
||||
|
||||
@ -46,15 +46,36 @@ VG_(do_syscall):
|
||||
|
||||
# and save the real FPU state too
|
||||
fwait
|
||||
fnsave VG_(real_fpu_state_saved_over_syscall)
|
||||
frstor VG_(real_fpu_state_saved_over_syscall)
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq1nosse
|
||||
fxsave VG_(real_sse_state_saved_over_syscall)
|
||||
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
|
||||
fxrstor VG_(real_sse_state_saved_over_syscall)
|
||||
jmp qq1merge
|
||||
qq1nosse:
|
||||
fnsave VG_(real_sse_state_saved_over_syscall)
|
||||
frstor VG_(real_sse_state_saved_over_syscall)
|
||||
qq1merge:
|
||||
popfl
|
||||
|
||||
# remember what the simulators stack pointer is
|
||||
movl %esp, VG_(esp_saved_over_syscall)
|
||||
|
||||
# Now copy the simulated machines state into the real one
|
||||
# esp still refers to the simulators stack
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz qq2nosse
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp qq2merge
|
||||
qq2nosse:
|
||||
frstor VG_(m_state_static)+64
|
||||
qq2merge:
|
||||
popfl
|
||||
|
||||
movl VG_(m_state_static)+56, %eax
|
||||
pushl %eax
|
||||
popfl
|
||||
@ -111,11 +132,32 @@ VG_(do_syscall):
|
||||
popl %eax
|
||||
movl %eax, VG_(m_state_static)+56
|
||||
fwait
|
||||
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz pp2nosse
|
||||
fxsave VG_(m_state_static)+64
|
||||
andl $0x0000FFBF, VG_(m_state_static)+64+24
|
||||
fxrstor VG_(m_state_static)+64
|
||||
jmp pp2merge
|
||||
pp2nosse:
|
||||
fnsave VG_(m_state_static)+64
|
||||
frstor VG_(m_state_static)+64
|
||||
|
||||
pp2merge:
|
||||
popfl
|
||||
|
||||
# Restore the state of the simulator
|
||||
frstor VG_(real_fpu_state_saved_over_syscall)
|
||||
pushfl
|
||||
cmpb $0, VG_(have_ssestate)
|
||||
jz pp1nosse
|
||||
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
|
||||
fxrstor VG_(real_sse_state_saved_over_syscall)
|
||||
jmp pp1merge
|
||||
pp1nosse:
|
||||
frstor VG_(real_sse_state_saved_over_syscall)
|
||||
pp1merge:
|
||||
popfl
|
||||
|
||||
popal
|
||||
|
||||
ret
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user