Extend the state components in VG_(m_state_static) and VG_(baseBlock)

to include the SSE/SSE2 architectural state.  Automagically detect
at startup, in vg_startup.S, whether or not this is a SSE-enabled
CPU and act accordingly.  All subsequent FPU/SSE state transfers
between the simulated and real machine are then done either with
fsave/frstor (as before) or fxsave/fxrstor (the SSE equivalents).

Fragile and fiddly; (1) the SSE state needs to be stored on a 16-byte
boundary, and (2) certain bits in the saved MXCSR reg in a state
written by fxsave need to be anded out before we can safely restore
using fxrstor.

It does appear to work.  I'd appreciate people trying it out on
various CPUs to establish whether the SSE / not-SSE check works
right, and/or anything else is broken.

Unfortunately makes some programs run significantly slower.
I don't know why.  Perhaps due to copying around more processor
state than there was before (SSE state is 512 bytes, FPU state
was only 108).  I will look into this.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1574
This commit is contained in:
Julian Seward 2003-04-29 23:50:00 +00:00
parent 2d4e9e253f
commit 23ae8adf30
9 changed files with 341 additions and 116 deletions

View File

@ -46,15 +46,36 @@ VG_(do_syscall):
# and save the real FPU state too
fwait
fnsave VG_(real_fpu_state_saved_over_syscall)
frstor VG_(real_fpu_state_saved_over_syscall)
pushfl
cmpb $0, VG_(have_ssestate)
jz qq1nosse
fxsave VG_(real_sse_state_saved_over_syscall)
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
fxrstor VG_(real_sse_state_saved_over_syscall)
jmp qq1merge
qq1nosse:
fnsave VG_(real_sse_state_saved_over_syscall)
frstor VG_(real_sse_state_saved_over_syscall)
qq1merge:
popfl
# remember what the simulators stack pointer is
movl %esp, VG_(esp_saved_over_syscall)
# Now copy the simulated machines state into the real one
# esp still refers to the simulators stack
pushfl
cmpb $0, VG_(have_ssestate)
jz qq2nosse
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp qq2merge
qq2nosse:
frstor VG_(m_state_static)+64
qq2merge:
popfl
movl VG_(m_state_static)+56, %eax
pushl %eax
popfl
@ -111,11 +132,32 @@ VG_(do_syscall):
popl %eax
movl %eax, VG_(m_state_static)+56
fwait
pushfl
cmpb $0, VG_(have_ssestate)
jz pp2nosse
fxsave VG_(m_state_static)+64
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp pp2merge
pp2nosse:
fnsave VG_(m_state_static)+64
frstor VG_(m_state_static)+64
pp2merge:
popfl
# Restore the state of the simulator
frstor VG_(real_fpu_state_saved_over_syscall)
pushfl
cmpb $0, VG_(have_ssestate)
jz pp1nosse
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
fxrstor VG_(real_sse_state_saved_over_syscall)
jmp pp1merge
pp1nosse:
frstor VG_(real_sse_state_saved_over_syscall)
pp1merge:
popfl
popal
ret

View File

@ -1334,24 +1334,44 @@ static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
/*--- FPU instruction emitters ---*/
/*----------------------------------------------------*/
static void emit_get_fpu_state ( void )
static void emit_get_sse_state ( void )
{
Int off = 4 * VGOFF_(m_fpustate);
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
VG_(emitL) ( off );
if (dis)
VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
Int off = 4 * VGOFF_(m_ssestate);
if (VG_(have_ssestate)) {
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
VG_(emitB) ( 0x0F );
VG_(emitB) ( 0xAE ); VG_(emitB) ( 0x8D ); /* fxrstor d32(%ebp) */
VG_(emitL) ( off );
if (dis)
VG_(printf)("\n\t\tfxrstor\t%d(%%ebp)\n", off );
} else {
/* Not a SSE-capable CPU. Just do frstor. */
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
VG_(emitL) ( off );
if (dis)
VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
}
}
static void emit_put_fpu_state ( void )
static void emit_put_sse_state ( void )
{
Int off = 4 * VGOFF_(m_fpustate);
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
VG_(emitL) ( off );
if (dis)
VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
Int off = 4 * VGOFF_(m_ssestate);
if (VG_(have_ssestate)) {
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
VG_(emitB) ( 0x0F );
VG_(emitB) ( 0xAE ); VG_(emitB) ( 0x85 ); /* fxsave d32(%ebp) */
VG_(emitL) ( off );
if (dis)
VG_(printf)("\n\t\tfxsave\t%d(%%ebp)\n", off );
} else {
/* Not a SSE-capable CPU. Just do fnsave. */
VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
VG_(emitL) ( off );
if (dis)
VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
}
}
static void emit_fpu_no_mem ( FlagSet uses_sflags,
@ -3024,18 +3044,19 @@ Bool anyFlagUse ( UInstr* u )
}
/* *fplive==True indicates that the simulated machine's FPU state is in
the real FPU. If so we need to be very careful not to trash it.
If FPU state is live and we deem it necessary to copy it back to
the simulated machine's FPU state, we do so. The final state of
fpliveness is returned. In short we _must_ do put_fpu_state if
/* *fplive==True indicates that the simulated machine's FPU/SSE state is in
the real machine's cpu. If so we need to be very careful not to trash it.
If FPU/SSE state is live and we deem it necessary to copy it back to
the simulated machine's FPU/SSE state, we do so. The final state of
fpliveness is returned. In short we _must_ do put_sse_state if
there is any chance at all that the code generated for a UInstr
will change the real FPU state.
will change the real FPU/MMX/SSE/SSE2 state.
*/
static void emitUInstr ( UCodeBlock* cb, Int i,
RRegSet regs_live_before,
/* Running state, which we update. */
Bool* fplive, /* True<==>FPU state in real FPU */
Bool* sselive, /* True<==>FPU/SSE
state in real FPU */
Addr* orig_eip, /* previous curr_eip, or zero */
Addr* curr_eip ) /* current eip */
{
@ -3248,9 +3269,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag2 == RealReg);
vg_assert(u->size == 0);
if (*fplive) {
emit_put_fpu_state();
*fplive = False;
if (*sselive) {
emit_put_sse_state();
*sselive = False;
}
VG_(synth_ccall) ( (Addr) & VG_(do_useseg),
@ -3339,9 +3360,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
case JMP: {
vg_assert(u->tag2 == NoValue);
vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
if (*fplive) {
emit_put_fpu_state();
*fplive = False;
if (*sselive) {
emit_put_sse_state();
*sselive = False;
}
if (u->cond == CondAlways) {
switch (u->tag1) {
@ -3382,9 +3403,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag1 == RealReg);
vg_assert(u->tag2 == Literal);
vg_assert(u->size == 4);
if (*fplive) {
emit_put_fpu_state();
*fplive = False;
if (*sselive) {
emit_put_sse_state();
*sselive = False;
}
synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
break;
@ -3405,9 +3426,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == NoValue);
vg_assert(u->size == 0);
if (*fplive) {
emit_put_fpu_state();
*fplive = False;
if (*sselive) {
emit_put_sse_state();
*sselive = False;
}
/* Call to a helper which is pretending to be a real CPU
instruction (and therefore operates on Real flags and
@ -3433,9 +3454,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
else vg_assert(u->tag3 == NoValue);
vg_assert(u->size == 0);
if (*fplive) {
emit_put_fpu_state();
*fplive = False;
if (*sselive) {
emit_put_sse_state();
*sselive = False;
}
VG_(synth_ccall) ( u->lit32, u->argc, u->regparms_n, argv, tagv,
ret_reg, regs_live_before, u->regs_live_after );
@ -3459,9 +3480,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
case FPU_W:
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == RealReg);
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_fpu_regmem ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3472,9 +3493,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
case FPU:
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == NoValue);
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_fpu_no_mem ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3488,9 +3509,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag2 == RealReg);
vg_assert(u->tag3 == NoValue);
vg_assert(!anyFlagUse(u));
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_MMX2_regmem ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3503,9 +3524,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag2 == RealReg);
vg_assert(u->tag3 == NoValue);
vg_assert(!anyFlagUse(u));
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_MMX2_reg_to_mmxreg ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3518,9 +3539,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag2 == RealReg);
vg_assert(u->tag3 == NoValue);
vg_assert(!anyFlagUse(u));
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_MMX2_mmxreg_to_reg ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3532,9 +3553,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == NoValue);
vg_assert(u->tag3 == NoValue);
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_MMX1_no_mem ( u->flags_r, u->flags_w,
u->val1 & 0xFF );
@ -3544,9 +3565,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == NoValue);
vg_assert(u->tag3 == NoValue);
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_MMX2_no_mem ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3557,9 +3578,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
vg_assert(u->tag3 == NoValue);
if (!(*fplive)) {
emit_get_fpu_state();
*fplive = True;
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
synth_MMX3_no_mem ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
@ -3569,9 +3590,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
default:
if (VG_(needs).extended_UCode) {
if (*fplive) {
emit_put_fpu_state();
*fplive = False;
if (*sselive) {
emit_put_sse_state();
*sselive = False;
}
SK_(emit_XUInstr)(u, regs_live_before);
} else {
@ -3584,9 +3605,9 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
}
}
if (0 && (*fplive)) {
emit_put_fpu_state();
*fplive = False;
if (0 && (*sselive)) {
emit_put_sse_state();
*sselive = False;
}
/* Update UInstr histogram */
@ -3604,7 +3625,7 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
{
Int i;
UChar regs_live_before = 0; /* No regs live at BB start */
Bool fplive;
Bool sselive;
Addr orig_eip, curr_eip;
Int tgt;
@ -3627,7 +3648,7 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
VG_(target_forward)(&tgt);
/* Set up running state. */
fplive = False;
sselive = False;
orig_eip = cb->orig_eip; /* we know EIP is up to date on BB entry */
curr_eip = cb->orig_eip;
vg_assert(curr_eip != 0); /* otherwise the incremental updating
@ -3645,12 +3666,12 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
}
vg_assert(sane);
emitUInstr( cb, i, regs_live_before,
&fplive, &orig_eip, &curr_eip );
&sselive, &orig_eip, &curr_eip );
}
regs_live_before = u->regs_live_after;
}
if (dis) VG_(printf)("\n");
vg_assert(!fplive); /* FPU state must be saved by end of BB */
vg_assert(!sselive); /* SSE state must be saved by end of BB */
vg_assert(eflags_state != UPD_Real); /* flags can't just be in CPU */
if (j != NULL) {

View File

@ -176,8 +176,18 @@ cpuid__99:
VG_(helper_fstsw_AX):
pushl %eax
pushl %esi
movl VGOFF_(m_fpustate), %esi
movl VGOFF_(m_ssestate), %esi
pushfl
cmpb $0, VG_(have_ssestate)
jz aa1nosse
fxrstor (%ebp, %esi, 4)
jmp aa1merge
aa1nosse:
frstor (%ebp, %esi, 4)
aa1merge:
popfl
fstsw %ax
popl %esi
movw %ax, 8(%esp)

View File

@ -575,10 +575,19 @@ extern int VGR_(writev)(int fd,
which need to go here to avoid ugly circularities.
------------------------------------------------------------------ */
/* How big is the saved FPU state? */
#define VG_SIZE_OF_FPUSTATE 108
/* How big is the saved SSE/SSE2 state? Note that this subsumes the
FPU state. On machines without SSE, we just save/restore the FPU
state into the first part of this area. */
/* A general comment about SSE save/restore: It appears that the 7th
word (which is the MXCSR) has to be &ed with 0x0000FFBF in order
that restoring from it later does not cause a GP fault (which is
delivered as a segfault). I guess this will have to be done
any time we do fxsave :-( 7th word means word offset 6 or byte
offset 24 from the start address of the save area.
*/
#define VG_SIZE_OF_SSESTATE 512
/* ... and in words ... */
#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4)
#define VG_SIZE_OF_SSESTATE_W ((VG_SIZE_OF_SSESTATE+3)/4)
/* ---------------------------------------------------------------------
@ -820,7 +829,12 @@ struct _ThreadState {
UInt m_esp;
UInt m_eflags;
UInt m_eip;
UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
/* The SSE/FPU state. This array does not (necessarily) have the
required 16-byte alignment required to get stuff in/out by
fxsave/fxrestore. So we have to do it "by hand".
*/
UInt m_sse[VG_SIZE_OF_SSESTATE_W];
UInt sh_eax;
UInt sh_ebx;
@ -1243,6 +1257,12 @@ extern void VG_(mini_stack_dump) ( ExeContext* ec );
Exports of vg_main.c
------------------------------------------------------------------ */
/* Is this a SSE/SSE2-capable CPU? If so, we had better save/restore
the SSE state all over the place. This is set up very early, in
vg_startup.S. We have to determine it early since we can't even
correctly snapshot the startup machine state without it. */
extern Bool VG_(have_ssestate);
/* Tell the logging mechanism whether we are logging to a file
descriptor or a socket descriptor. */
extern Bool VG_(logging_to_filedes);
@ -1253,13 +1273,18 @@ extern void VG_(do_sanity_checks) ( Bool force_expensive );
/* A structure used as an intermediary when passing the simulated
CPU's state to some assembly fragments, particularly system calls.
Stuff is copied from baseBlock to here, the assembly magic runs,
and then the inverse copy is done.
*/
and then the inverse copy is done. Alignment: the SSE state must
be 16-byte aligned. We ask for the whole struct to be 16-byte
aligned, and the SSE state starts at the 6+8+1+1th == 16th word,
so it too must be 16-byte aligned. Consequence: change this struct
only _very carefully_ ! See also above comment re masking MXCSR.
*/
__attribute__ ((aligned (16)))
extern UInt VG_(m_state_static) [6 /* segment regs, Intel order */
+ 8 /* int regs, in Intel order */
+ 1 /* %eflags */
+ 1 /* %eip */
+ VG_SIZE_OF_FPUSTATE_W /* FPU state */
+ VG_SIZE_OF_SSESTATE_W /* SSE state */
];
/* Handy fns for doing the copy back and forth. */
@ -1543,7 +1568,7 @@ extern void VG_(signalreturn_bogusRA)( void );
startup time, are given values denoting offsets into baseBlock.
These offsets are in *words* from the start of baseBlock. */
#define VG_BASEBLOCK_WORDS 200
#define VG_BASEBLOCK_WORDS 400
extern UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
@ -1562,7 +1587,7 @@ extern Int VGOFF_(m_ebp);
extern Int VGOFF_(m_esi);
extern Int VGOFF_(m_edi);
extern Int VGOFF_(m_eflags);
extern Int VGOFF_(m_fpustate);
extern Int VGOFF_(m_ssestate);
extern Int VGOFF_(m_eip);
extern Int VGOFF_(m_dflag); /* D flag is handled specially */

View File

@ -49,7 +49,7 @@ Int VGOFF_(m_esi) = INVALID_OFFSET;
Int VGOFF_(m_edi) = INVALID_OFFSET;
Int VGOFF_(m_eflags) = INVALID_OFFSET;
Int VGOFF_(m_dflag) = INVALID_OFFSET;
Int VGOFF_(m_fpustate) = INVALID_OFFSET;
Int VGOFF_(m_ssestate) = INVALID_OFFSET;
Int VGOFF_(ldt) = INVALID_OFFSET;
Int VGOFF_(m_cs) = INVALID_OFFSET;
Int VGOFF_(m_ss) = INVALID_OFFSET;
@ -256,7 +256,14 @@ static void vg_init_baseBlock ( void )
VGOFF_(m_dflag) = alloc_BaB(1);
VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
/* The FPU/SSE state. This _must_ be 16-byte aligned. */
(void)alloc_BaB(1); /* Padding, to achieve required alignment. */
VGOFF_(m_ssestate) = alloc_BaB(VG_SIZE_OF_SSESTATE_W);
vg_assert(
( ((UInt)(& VG_(baseBlock)[VGOFF_(m_ssestate)]))
% 16 )
== 0
);
/* This thread's LDT pointer, and segment registers. */
VGOFF_(ldt) = alloc_BaB(1);
@ -404,7 +411,8 @@ UInt VG_(stack)[10000];
UInt VG_(sigstack)[10000];
/* Saving stuff across system calls. */
UInt VG_(real_fpu_state_saved_over_syscall)[VG_SIZE_OF_FPUSTATE_W];
__attribute__ ((aligned (16)))
UInt VG_(real_sse_state_saved_over_syscall)[VG_SIZE_OF_SSESTATE_W];
Addr VG_(esp_saved_over_syscall);
/* Counts downwards in vg_run_innerloop. */
@ -428,6 +436,15 @@ UInt VG_(exitcode) = 0;
descriptor or a socket descriptor. */
Bool VG_(logging_to_filedes) = True;
/* Is this a SSE/SSE2-capable CPU? If so, we had better save/restore
the SSE state all over the place. This is set up very early, in
vg_startup.S. We have to determine it early since we can't even
correctly snapshot the startup machine state without it. */
/* Initially True. Safer to err on the side of SSEness and get SIGILL
than to not notice for some reason that we have SSE and get wierd
errors later on. */
Bool VG_(have_ssestate) = True;
/* ---------------------------------------------------------------------
Counters, for informational purposes only.
@ -1177,11 +1194,15 @@ static void process_cmd_line_options ( void )
Copying to/from m_state_static.
------------------------------------------------------------------ */
/* See comment about this in vg_include.h. Change only with
great care.
*/
__attribute__ ((aligned (16)))
UInt VG_(m_state_static) [6 /* segment regs, Intel order */
+ 8 /* int regs, in Intel order */
+ 1 /* %eflags */
+ 1 /* %eip */
+ VG_SIZE_OF_FPUSTATE_W /* FPU state */
+ VG_SIZE_OF_SSESTATE_W /* FPU state */
];
UInt VG_(insertDflag)(UInt eflags, Int d)
@ -1226,13 +1247,14 @@ void VG_(copy_baseBlock_to_m_state_static) ( void )
VG_(m_state_static)[48/4] = VG_(baseBlock)[VGOFF_(m_esi)];
VG_(m_state_static)[52/4] = VG_(baseBlock)[VGOFF_(m_edi)];
VG_(m_state_static)[56/4] = VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
VG_(baseBlock)[VGOFF_(m_dflag)]);
VG_(m_state_static)[56/4]
= VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
VG_(baseBlock)[VGOFF_(m_dflag)]);
VG_(m_state_static)[60/4] = VG_(baseBlock)[VGOFF_(m_eip)];
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
VG_(m_state_static)[64/4 + i]
= VG_(baseBlock)[VGOFF_(m_fpustate) + i];
= VG_(baseBlock)[VGOFF_(m_ssestate) + i];
}
@ -1255,13 +1277,15 @@ void VG_(copy_m_state_static_to_baseBlock) ( void )
VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[48/4];
VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[52/4];
VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[56/4] & ~EFlagD;
VG_(baseBlock)[VGOFF_(m_dflag)] = VG_(extractDflag)(VG_(m_state_static)[56/4]);
VG_(baseBlock)[VGOFF_(m_eflags)]
= VG_(m_state_static)[56/4] & ~EFlagD;
VG_(baseBlock)[VGOFF_(m_dflag)]
= VG_(extractDflag)(VG_(m_state_static)[56/4]);
VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[60/4];
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_fpustate) + i]
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_ssestate) + i]
= VG_(m_state_static)[64/4 + i];
}
@ -1361,6 +1385,11 @@ void VG_(main) ( void )
VgSchedReturnCode src;
ThreadState* tst;
if (VG_(have_ssestate))
VG_(printf)("Looks like a SSE-capable CPU\n");
else
VG_(printf)("Looks like a MMX-only CPU\n");
/* Check skin and core versions are compatible */
if (VG_CORE_INTERFACE_MAJOR_VERSION != VG_(skin_interface_major_version)) {
VG_(printf)("Error:\n"

View File

@ -419,12 +419,15 @@ void VG_(load_thread_state) ( ThreadId tid )
VG_(baseBlock)[VGOFF_(m_edi)] = VG_(threads)[tid].m_edi;
VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(threads)[tid].m_ebp;
VG_(baseBlock)[VGOFF_(m_esp)] = VG_(threads)[tid].m_esp;
VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(threads)[tid].m_eflags & ~EFlagD;
VG_(baseBlock)[VGOFF_(m_dflag)] = VG_(extractDflag)(VG_(threads)[tid].m_eflags);
VG_(baseBlock)[VGOFF_(m_eflags)]
= VG_(threads)[tid].m_eflags & ~EFlagD;
VG_(baseBlock)[VGOFF_(m_dflag)]
= VG_(extractDflag)(VG_(threads)[tid].m_eflags);
VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip;
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_ssestate) + i]
= VG_(threads)[tid].m_sse[i];
if (VG_(needs).shadow_regs) {
VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
@ -500,12 +503,14 @@ void VG_(save_thread_state) ( ThreadId tid )
VG_(threads)[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
VG_(threads)[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
VG_(threads)[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
VG_(threads)[tid].m_eflags = VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
VG_(baseBlock)[VGOFF_(m_dflag)]);
VG_(threads)[tid].m_eflags
= VG_(insertDflag)(VG_(baseBlock)[VGOFF_(m_eflags)],
VG_(baseBlock)[VGOFF_(m_dflag)]);
VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
VG_(threads)[tid].m_sse[i]
= VG_(baseBlock)[VGOFF_(m_ssestate) + i];
if (VG_(needs).shadow_regs) {
VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
@ -550,8 +555,8 @@ void VG_(save_thread_state) ( ThreadId tid )
VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
VG_(baseBlock)[VGOFF_(m_eip)] = junk;
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
VG_(baseBlock)[VGOFF_(m_ssestate) + i] = junk;
vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
}

View File

@ -904,7 +904,7 @@ typedef
/* Safely-saved version of sigNo, as described above. */
Int sigNo_private;
/* Saved processor state. */
UInt fpustate[VG_SIZE_OF_FPUSTATE_W];
UInt ssestate[VG_SIZE_OF_SSESTATE_W];
UInt eax;
UInt ecx;
UInt edx;
@ -988,8 +988,8 @@ void vg_push_signal_frame ( ThreadId tid, int sigNo )
frame->puContext = (Addr)NULL;
frame->magicPI = 0x31415927;
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
frame->fpustate[i] = tst->m_fpu[i];
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
frame->ssestate[i] = tst->m_sse[i];
frame->eax = tst->m_eax;
frame->ecx = tst->m_ecx;
@ -1050,8 +1050,8 @@ Int vg_pop_signal_frame ( ThreadId tid )
"vg_pop_signal_frame (thread %d): valid magic", tid);
/* restore machine state */
for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
tst->m_fpu[i] = frame->fpustate[i];
for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
tst->m_sse[i] = frame->ssestate[i];
/* Mark the frame structure as nonaccessible. */
VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );

View File

@ -105,10 +105,39 @@ really_start_up:
pushfl
popl %eax
movl %eax, VG_(m_state_static)+56
# now weve captured all the integer registers and
# flags, figure out whether this is an sse-enabled
# cpu or not.
movb $0, VG_(have_ssestate) # assume sse-disabled
movl $0, %eax
cpuid
cmpl $1, %eax
jl get_fpu # we cant do cpuid(1) ?!
movl $1, %eax
cpuid
testl $(1<<25), %edx
jz get_fpu # edx bit 25 is set iff sse
# well, it looks like were sse-enabled
movb $1, VG_(have_ssestate)
# next, capture the FPU/SSE state
get_fpu:
fwait
pushfl
cmpb $0, VG_(have_ssestate)
jz qq3nosse
fxsave VG_(m_state_static)+64
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp qq3merge
qq3nosse:
fnsave VG_(m_state_static)+64
frstor VG_(m_state_static)+64
qq3merge:
popfl
# keep the first and last 10 words free to check for overruns
movl $VG_(stack)+39996 -40, %esp
@ -145,7 +174,18 @@ VG_(switch_to_real_CPU):
# of the rest of the program continues on the real CPU,
# and there is no way for the simulator to regain control
# after this point.
pushfl
cmpb $0, VG_(have_ssestate)
jz qq4nosse
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp qq4merge
qq4nosse:
frstor VG_(m_state_static)+64
qq4merge:
popfl
movl VG_(m_state_static)+56, %eax
pushl %eax
popfl
@ -172,8 +212,19 @@ VG_(switch_to_real_CPU):
call VG_(sigshutdown_actions)
popfl
popal
# re-restore the FPU state anyway ...
pushfl
cmpb $0, VG_(have_ssestate)
jz qq5nosse
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp qq5merge
qq5nosse:
frstor VG_(m_state_static)+64
qq5merge:
popfl
jmp *VG_(m_state_static)+60

View File

@ -46,15 +46,36 @@ VG_(do_syscall):
# and save the real FPU state too
fwait
fnsave VG_(real_fpu_state_saved_over_syscall)
frstor VG_(real_fpu_state_saved_over_syscall)
pushfl
cmpb $0, VG_(have_ssestate)
jz qq1nosse
fxsave VG_(real_sse_state_saved_over_syscall)
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
fxrstor VG_(real_sse_state_saved_over_syscall)
jmp qq1merge
qq1nosse:
fnsave VG_(real_sse_state_saved_over_syscall)
frstor VG_(real_sse_state_saved_over_syscall)
qq1merge:
popfl
# remember what the simulators stack pointer is
movl %esp, VG_(esp_saved_over_syscall)
# Now copy the simulated machines state into the real one
# esp still refers to the simulators stack
pushfl
cmpb $0, VG_(have_ssestate)
jz qq2nosse
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp qq2merge
qq2nosse:
frstor VG_(m_state_static)+64
qq2merge:
popfl
movl VG_(m_state_static)+56, %eax
pushl %eax
popfl
@ -111,11 +132,32 @@ VG_(do_syscall):
popl %eax
movl %eax, VG_(m_state_static)+56
fwait
pushfl
cmpb $0, VG_(have_ssestate)
jz pp2nosse
fxsave VG_(m_state_static)+64
andl $0x0000FFBF, VG_(m_state_static)+64+24
fxrstor VG_(m_state_static)+64
jmp pp2merge
pp2nosse:
fnsave VG_(m_state_static)+64
frstor VG_(m_state_static)+64
pp2merge:
popfl
# Restore the state of the simulator
frstor VG_(real_fpu_state_saved_over_syscall)
pushfl
cmpb $0, VG_(have_ssestate)
jz pp1nosse
andl $0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
fxrstor VG_(real_sse_state_saved_over_syscall)
jmp pp1merge
pp1nosse:
frstor VG_(real_sse_state_saved_over_syscall)
pp1merge:
popfl
popal
ret