Tonight's installment of SSE madness. Add a couple more nasties

(pextrw/pinsrw).


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1683
This commit is contained in:
Julian Seward 2003-06-13 00:26:02 +00:00
parent c4f91b60a1
commit dfaded70ea
4 changed files with 183 additions and 2 deletions

View File

@ -1486,6 +1486,60 @@ static void emit_SSE3g ( FlagSet uses_sflags,
);
}
static void emit_SSE3g1_RegWr ( FlagSet uses_sflags,
FlagSet sets_sflags,
UChar first_byte,
UChar second_byte,
UChar third_byte,
UChar fourth_byte,
UChar fifth_byte,
Int ireg )
{
VG_(new_emit)(True, uses_sflags, sets_sflags);
VG_(emitB) ( first_byte );
VG_(emitB) ( second_byte );
VG_(emitB) ( third_byte );
fourth_byte &= 0xC7; /* mask out reg field */
fourth_byte |= 0xC0; /* set top two bits: mod = 11b */
fourth_byte |= ((ireg & 7) << 3); /* patch in our ireg */
VG_(emitB) ( fourth_byte );
VG_(emitB) ( fifth_byte );
if (dis)
VG_(printf)(
"\n\t\tssereg-to-ireg--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n",
(UInt)first_byte, (UInt)second_byte,
(UInt)third_byte, (UInt)fourth_byte, (UInt)fifth_byte,
nameIReg(4,ireg)
);
}
static void emit_SSE3g1_RegRd ( FlagSet uses_sflags,
FlagSet sets_sflags,
UChar first_byte,
UChar second_byte,
UChar third_byte,
UChar fourth_byte,
UChar fifth_byte,
Int ireg )
{
VG_(new_emit)(True, uses_sflags, sets_sflags);
VG_(emitB) ( first_byte );
VG_(emitB) ( second_byte );
VG_(emitB) ( third_byte );
fourth_byte &= 0xF8; /* mask out reg field */
fourth_byte |= 0xC0; /* set top two bits: mod = 11b */
fourth_byte |= (ireg & 7); /* patch in our ireg */
VG_(emitB) ( fourth_byte );
VG_(emitB) ( fifth_byte );
if (dis)
VG_(printf)(
"\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n",
(UInt)first_byte, (UInt)second_byte,
(UInt)third_byte, (UInt)fourth_byte, (UInt)fifth_byte,
nameIReg(4,ireg)
);
}
static void emit_SSE4 ( FlagSet uses_sflags,
FlagSet sets_sflags,
UChar first_byte,
@ -3750,6 +3804,44 @@ static void emitUInstr ( UCodeBlock* cb, Int i,
u->opcode==SSE3g_RegRd ? True : False );
break;
case SSE3g1_RegWr:
vg_assert(u->size == 4);
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
vg_assert(u->tag3 == RealReg);
vg_assert(!anyFlagUse(u));
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
emit_SSE3g1_RegWr ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
u->val1 & 0xFF,
(u->val2 >> 8) & 0xFF,
u->val2 & 0xFF,
u->lit32 & 0xFF,
u->val3 );
break;
case SSE3g1_RegRd:
vg_assert(u->size == 2);
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
vg_assert(u->tag3 == RealReg);
vg_assert(!anyFlagUse(u));
if (!(*sselive)) {
emit_get_sse_state();
*sselive = True;
}
emit_SSE3g1_RegRd ( u->flags_r, u->flags_w,
(u->val1 >> 8) & 0xFF,
u->val1 & 0xFF,
(u->val2 >> 8) & 0xFF,
u->val2 & 0xFF,
u->lit32 & 0xFF,
u->val3 );
break;
case SSE4:
vg_assert(u->size == 0);
vg_assert(u->tag1 == Lit16);

View File

@ -3754,6 +3754,16 @@ static Addr disInstr ( UCodeBlock* cb, Addr eip, Bool* isEnd )
goto decode_success;
}
#if 0
/* SUBSD */
if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
vg_assert(sz == 4);
eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+3, 8, "subsd",
insn[0], insn[1], insn[2] );
goto decode_success;
}
#endif
/* ADDSD */
if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
vg_assert(sz == 4);
@ -3880,6 +3890,49 @@ static Addr disInstr ( UCodeBlock* cb, Addr eip, Bool* isEnd )
goto decode_success;
}
/* PEXTRW from SSE register; writes ireg */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC5) {
t1 = newTemp(cb);
modrm = insn[2];
vg_assert(epartIsReg(modrm));
vg_assert((modrm & 0xC0) == 0xC0);
uInstr3(cb, SSE3g1_RegWr, 4,
Lit16, (((UShort)0x66) << 8) | (UShort)insn[0],
Lit16, (((UShort)insn[1]) << 8) | (UShort)modrm,
TempReg, t1 );
uLiteral(cb, insn[3]);
uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, gregOfRM(modrm));
if (dis)
VG_(printf)("pextrw %s, %d, %s\n",
nameXMMReg(eregOfRM(modrm)), (Int)insn[3],
nameIReg(4, gregOfRM(modrm)));
eip += 4;
goto decode_success;
}
/* PINSRW to SSE register; reads mem or ireg */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
t1 = newTemp(cb);
modrm = insn[2];
if (epartIsReg(modrm)) {
uInstr2(cb, GET, 2, ArchReg, eregOfRM(modrm), TempReg, t1);
uInstr3(cb, SSE3g1_RegRd, 2,
Lit16, (((UShort)0x66) << 8) | (UShort)insn[0],
Lit16, (((UShort)insn[1]) << 8) | (UShort)modrm,
TempReg, t1 );
uLiteral(cb, insn[3]);
if (dis)
VG_(printf)("pinsrw %s, %d, %s\n",
nameIReg(2, eregOfRM(modrm)),
(Int)insn[3],
nameXMMReg(gregOfRM(modrm)));
eip += 4;
} else {
VG_(core_panic)("PINSRW mem");
}
goto decode_success;
}
/* Fall through into the non-SSE decoder. */
} /* if (VG_(have_ssestate)) */

View File

@ -402,6 +402,7 @@ Int VG_(realreg_to_rank) ( Int realReg )
Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
{
# define LIT0 (u->lit32 == 0)
# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
# define LIT1 (!(LIT0))
# define LITm (u->tag1 == Literal ? True : LIT0 )
# define SZ8 (u->size == 8)
@ -564,6 +565,8 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
case SSE3a_MemRd: return LIT0 && SZsse && CCf && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3g_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3g1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
case SSE4: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
default:
@ -578,6 +581,7 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
}
# undef LIT0
# undef LIT1
# undef LIT8
# undef LITm
# undef SZ8
# undef SZ4
@ -881,6 +885,8 @@ Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
case SSE2a_MemRd: return "SSE2a_MRd";
case SSE3g_RegRd: return "SSE3g_RRd";
case SSE3g_RegWr: return "SSE3g_RWr";
case SSE3g1_RegWr: return "SSE3g1_RWr";
case SSE3g1_RegRd: return "SSE3g1_RRd";
case SSE3: return "SSE3";
case SSE4: return "SSE4";
case SSE3a_MemWr: return "SSE3a_MWr";
@ -1058,6 +1064,15 @@ void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
VG_(pp_UOperand)(u, 3, 4, True);
break;
case SSE3g1_RegWr:
case SSE3g1_RegRd:
VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
(u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
(u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
u->lit32 );
VG_(pp_UOperand)(u, 3, 4, True);
break;
case SSE3:
VG_(printf)("0x%x:0x%x:0x%x",
(u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
@ -1219,8 +1234,10 @@ Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
case SSE3a_MemWr:
case SSE3a_MemRd:
case SSE2a_MemWr:
case SSE3g1_RegRd:
case SSE2a_MemRd: RD(3); break;
case SSE3g1_RegWr:
case SSE3g_RegWr: WR(3); break;
case MMX2_RegRd: RD(2); break;
@ -1380,7 +1397,8 @@ Int maybe_uinstrReadsArchReg ( UInstr* u )
case MMX2_RegRd: case MMX2_RegWr:
case SSE2a_MemWr: case SSE2a_MemRd:
case SSE3a_MemWr: case SSE3a_MemRd:
case SSE3g_RegRd: case SSE3g_RegWr:
case SSE3g_RegRd: case SSE3g_RegWr:
case SSE3g1_RegWr: case SSE3g1_RegRd:
case SSE4: case SSE3:
case WIDEN:
/* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
@ -2249,7 +2267,7 @@ void VG_(translate) ( /*IN*/ ThreadState* tst,
UChar* final_code;
UCodeBlock* cb;
Bool notrace_until_done;
Int notrace_until_limit = 15000;
Int notrace_until_limit = 23500;
VGP_PUSHCC(VgpTranslate);
debugging_translation

View File

@ -635,6 +635,24 @@ typedef
*/
SSE3g_RegWr,
/* 5 bytes, writes an integer register. Insns of the form
bbbbbbbb:bbbbbbbb:bbbbbbbb: 11 ireg bbb :bbbbbbbb. Held in
val1[15:0] and val2[15:0] and lit32[7:0], and ireg is to be
replaced at codegen time by a reference to the relevant
RealReg. Transfer is always at size 4. Arg3 holds this
Temp/Real Reg.
*/
SSE3g1_RegWr,
/* 5 bytes, reads an integer register. Insns of the form
bbbbbbbb:bbbbbbbb:bbbbbbbb: 11 bbb ireg :bbbbbbbb. Held in
val1[15:0] and val2[15:0] and lit32[7:0], and ireg is to be
replaced at codegen time by a reference to the relevant
RealReg. Transfer is always at size 4. Arg3 holds this
Temp/Real Reg.
*/
SSE3g1_RegRd,
/* 4 bytes, reads an integer register. Insns of the form
bbbbbbbb:bbbbbbbb:bbbbbbbb:11 xmmreg ireg.
Held in val1[15:0] and val2[15:0], and ireg is to be replaced