From cafeef8e483d82fb1018eb9111c97782bf1a73c9 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Sun, 4 Jan 2004 23:30:55 +0000 Subject: [PATCH] Support for FXSAVE/FXRSTOR (Tom Hughes). Fixes #71180. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2183 --- addrcheck/ac_main.c | 6 +++--- cachegrind/cg_main.c | 24 +++++++++++++++++------- coregrind/vg_from_ucode.c | 2 +- coregrind/vg_to_ucode.c | 19 +++++++++++++++++++ coregrind/vg_translate.c | 36 +++++++++++++++++++----------------- include/vg_skin.h.base | 2 +- memcheck/mac_needs.c | 6 +++--- memcheck/mc_main.c | 4 ++-- memcheck/mc_translate.c | 4 ++-- 9 files changed, 67 insertions(+), 36 deletions(-) diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c index 74acac0cc..044f917d5 100644 --- a/addrcheck/ac_main.c +++ b/addrcheck/ac_main.c @@ -907,7 +907,7 @@ void ac_fpu_ACCESS_check ( Addr addr, Int size, Bool isWrite ) return; } - if (size == 16 || size == 10 || size == 28 || size == 108) { + if (size == 16 || size == 10 || size == 28 || size == 108 || size == 512) { PROF_EVENT(94); ac_fpu_ACCESS_check_SLOWLY ( addr, size, isWrite ); return; @@ -1055,8 +1055,8 @@ UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr) helper = (Addr)ac_fpu_WRITE_check; goto do_Access_ARG3; do_Access_ARG3: - sk_assert(u_in->size == 4 - || u_in->size == 8 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 8 + || u_in->size == 16 || u_in->size == 512); sk_assert(u_in->tag3 == TempReg); t_addr = u_in->val3; t_size = newTemp(cb); diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c index 0cbeaab0e..6264abfcb 100644 --- a/cachegrind/cg_main.c +++ b/cachegrind/cg_main.c @@ -544,7 +544,7 @@ static Int compute_BBCC_array_size(UCodeBlock* cb) case SSE2a_MemRd: case SSE2a1_MemRd: - sk_assert(u_in->size == 4 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512); t_read = u_in->val3; is_FPU_R = True; break; @@ -577,7 +577,7 @@ static Int compute_BBCC_array_size(UCodeBlock* cb) break; case SSE2a_MemWr: - sk_assert(u_in->size == 4 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512); t_write = u_in->val3; is_FPU_W = True; break; @@ -798,11 +798,16 @@ UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr) case SSE2a_MemRd: case SSE2a1_MemRd: - sk_assert(u_in->size == 4 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512); t_read = u_in->val3; t_read_addr = newTemp(cb); uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr); - data_size = u_in->size; + /* 512 B data-sized instructions will be done inaccurately + * but they're very rare and this avoids errors from + * hitting more than two cache lines in the simulation. */ + data_size = ( u_in->size <= MIN_LINE_SIZE + ? u_in->size + : MIN_LINE_SIZE); VG_(copy_UInstr)(cb, u_in); break; @@ -856,14 +861,19 @@ UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr) break; case SSE2a_MemWr: - sk_assert(u_in->size == 4 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512); /* fall through */ case SSE3a_MemWr: - sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512); t_write = u_in->val3; t_write_addr = newTemp(cb); uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr); - data_size = u_in->size; + /* 512 B data-sized instructions will be done inaccurately + * but they're very rare and this avoids errors from + * hitting more than two cache lines in the simulation. */ + data_size = ( u_in->size <= MIN_LINE_SIZE + ? u_in->size + : MIN_LINE_SIZE); VG_(copy_UInstr)(cb, u_in); break; diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c index d83862d4f..7be34b1a2 100644 --- a/coregrind/vg_from_ucode.c +++ b/coregrind/vg_from_ucode.c @@ -4075,7 +4075,7 @@ static void emitUInstr ( UCodeBlock* cb, Int i, case SSE2a_MemWr: case SSE2a_MemRd: - vg_assert(u->size == 4 || u->size == 16); + vg_assert(u->size == 4 || u->size == 16 || u->size == 512); vg_assert(u->tag1 == Lit16); vg_assert(u->tag2 == Lit16); vg_assert(u->tag3 == RealReg); diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c index 35d19117c..e7c48de5e 100644 --- a/coregrind/vg_to_ucode.c +++ b/coregrind/vg_to_ucode.c @@ -3545,8 +3545,27 @@ static Addr disInstr ( UCodeBlock* cb, Addr eip, Bool* isEnd ) if (VG_(have_ssestate)) { UChar* insn = (UChar*)eip; + /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && (!epartIsReg(insn[2])) + && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { + Bool store = gregOfRM(insn[2]) == 0; + vg_assert(sz == 4); + pair = disAMode ( cb, sorb, eip+2, dis?dis_buf:NULL ); + t1 = LOW24(pair); + eip += 2+HI8(pair); + uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, + Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], + Lit16, (UShort)insn[2], + TempReg, t1 ); + if (dis) + VG_(printf)("fx%s %s\n", store ? "save" : "rstor", dis_buf ); + goto decode_success; + } + /* STMXCSR/LDMXCSR m32 -- load/store the MXCSR register. */ if (insn[0] == 0x0F && insn[1] == 0xAE + && (!epartIsReg(insn[2])) && (gregOfRM(insn[2]) == 3 || gregOfRM(insn[2]) == 2) ) { Bool store = gregOfRM(insn[2]) == 3; vg_assert(sz == 4); diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c index 408a7079e..0a231c15d 100644 --- a/coregrind/vg_translate.c +++ b/coregrind/vg_translate.c @@ -414,7 +414,8 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u ) # define SZ42 (u->size == 4 || u->size == 2) # define SZ48 (u->size == 4 || u->size == 8) # define SZ416 (u->size == 4 || u->size == 16) -# define SZsse (u->size == 4 || u->size == 8 || u->size == 16) +# define SZsse2 (u->size == 4 || u->size == 16 || u->size == 512) +# define SZsse3 (u->size == 4 || u->size == 8 || u->size == 16) # define SZi (u->size == 4 || u->size == 2 || u->size == 1) # define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \ || u->size == 10 || u->size == 28 || u->size == 108) @@ -563,22 +564,22 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u ) case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER; /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */ - case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE2a_MemRd: return LIT0 && SZ416 && CCa && Ls1 && Ls2 && TR3 && XOTHER; - case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3a_MemRd: return LIT0 && SZsse && CCa && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3a1_MemRd: return LIT8 && SZ16 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; - case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER; - case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER; - case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER; + case SSE2a_MemWr: return LIT0 && SZsse2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE2a_MemRd: return LIT0 && SZsse2 && CCa && Ls1 && Ls2 && TR3 && XOTHER; + case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3a_MemWr: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3a_MemRd: return LIT0 && SZsse3 && CCa && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3a1_MemRd: return LIT8 && SZ16 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER; + case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER; + case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER; + case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER; case SSE3ag_MemRd_RegWr: - return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER; + return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER; default: if (VG_(needs).extended_UCode) return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u); @@ -602,7 +603,8 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u ) # undef SZ42 # undef SZ48 # undef SZ416 -# undef SZsse +# undef SZsse2 +# undef SZsse3 # undef SZi # undef SZf # undef SZ4m diff --git a/include/vg_skin.h.base b/include/vg_skin.h.base index 4ef7a638d..52fc2c858 100644 --- a/include/vg_skin.h.base +++ b/include/vg_skin.h.base @@ -960,7 +960,7 @@ typedef /* word 3 */ UShort val3; /* third operand */ UChar opcode; /* opcode */ - UChar size; /* data transfer size */ + UShort size; /* data transfer size */ /* word 4 */ FlagSet flags_r; /* :: FlagSet */ diff --git a/memcheck/mac_needs.c b/memcheck/mac_needs.c index 315847fc4..a3ab8232b 100644 --- a/memcheck/mac_needs.c +++ b/memcheck/mac_needs.c @@ -732,19 +732,19 @@ A 78 ACCESS1_SLOWLY 81 fpu_read aligned 4 82 fpu_read aligned 8 83 fpu_read 2 - 84 fpu_read 10/28/108 + 84 fpu_read 10/28/108/512 M 85 fpu_write M 86 fpu_write aligned 4 M 87 fpu_write aligned 8 M 88 fpu_write 2 -M 89 fpu_write 10/28/108 +M 89 fpu_write 10/28/108/512 90 fpu_access 91 fpu_access aligned 4 92 fpu_access aligned 8 93 fpu_access 2 - 94 fpu_access 10/28/108 + 94 fpu_access 10/28/108/512 100 fpu_access_check_SLOWLY 101 fpu_access_check_SLOWLY(byte loop) diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index 7ee467c05..3cc44deab 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -1190,7 +1190,7 @@ void MC_(fpu_read_check) ( Addr addr, Int size ) } if (size == 16 /*SSE*/ - || size == 10 || size == 28 || size == 108) { + || size == 10 || size == 28 || size == 108 || size == 512) { PROF_EVENT(84); mc_fpu_read_check_SLOWLY ( addr, size ); return; @@ -1273,7 +1273,7 @@ void MC_(fpu_write_check) ( Addr addr, Int size ) } if (size == 16 /*SSE*/ - || size == 10 || size == 28 || size == 108) { + || size == 10 || size == 28 || size == 108 || size == 512) { PROF_EVENT(89); mc_fpu_write_check_SLOWLY ( addr, size ); return; diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 1b3599b7f..d21bb8647 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1112,8 +1112,8 @@ static UCodeBlock* memcheck_instrument ( UCodeBlock* cb_in ) Bool is_load; Int t_size; - sk_assert(u_in->size == 4 - || u_in->size == 8 || u_in->size == 16); + sk_assert(u_in->size == 4 || u_in->size == 8 + || u_in->size == 16 || u_in->size == 512); t_size = INVALID_TEMPREG; is_load = u_in->opcode==SSE2a_MemRd