Basics for 128-bit SIMD code.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@3189
2026-02-08 04:55:52 +00:00 · 2004-12-10 14:58:26 +00:00 · 2004-12-10 14:58:26 +00:00 · 2af2c78613
commit 2af2c78613
parent 96c2c41a93
1 changed files with 104 additions and 4 deletions
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@ -179,7 +179,7 @@ static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
 /* Shadow state is always accessed using integer types.  This returns
   an integer type with the same size (as per sizeofIRType) as the
   given type.  The only valid shadow types are Bit, I8, I16, I32,
-   I64. */
+   I64, V128. */

 static IRType shadowType ( IRType ty )
 {
@ -188,9 +188,10 @@ static IRType shadowType ( IRType ty )
      case Ity_I8:
      case Ity_I16:
      case Ity_I32: 
-      case Ity_I64: return ty;
-      case Ity_F32: return Ity_I32;
-      case Ity_F64: return Ity_I64;
+      case Ity_I64:  return ty;
+      case Ity_F32:  return Ity_I32;
+      case Ity_F64:  return Ity_I64;
+      case Ity_V128: return Ity_V128;
      default: ppIRType(ty); 
               VG_(tool_panic)("memcheck:shadowType");
   }
@ -297,6 +298,12 @@ static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
   return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
 }

+static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
+   tl_assert(isShadowAtom(mce,a1));
+   tl_assert(isShadowAtom(mce,a2));
+   return assignNew(mce, Ity_V128, binop(Iop_Or128, a1, a2));
+}
+
 static IRAtom* mkUifU ( MCEnv* mce, IRType vty,  IRAtom* a1, IRAtom* a2 ) {
   switch (vty) {
      case Ity_I16: return mkUifU16(mce, a1, a2);
@ -840,6 +847,92 @@ IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
 }


+/*------------------------------------------------------------*/
+/*--- Helpers for dealing with vector primops.            ---*/
+/*------------------------------------------------------------*/
+
+/* Here's a simple scheme capable of handling ops derived from SSE1
+   code and while only generating ops that can be efficiently
+   implemented in SSE1. */
+
+/* All-lanes versions are straightforward:
+
+   binary32Fx4(x,y)   ==> PCast32x4(UifU128(x#,y#))
+
+   unary32Fx4(x,y)    ==> PCast32x4(x#)
+
+   Lowest-lane-only versions are more complex:
+
+   binary32F0x4(x,y)  ==> Set128lo32(
+                             x#, 
+                             PCast32(128to32(UifU128(x#,y#))) 
+                          )
+
+   This is perhaps not so obvious.  In particular, it's faster to
+   do a 128-bit UifU and then take the bottom 32 bits than the more
+   obvious scheme of taking the bottom 32 bits of each operand
+   and doing a 32-bit UifU.  Basically since UifU is fast and 
+   chopping lanes off vector values is slow.
+
+   Finally:
+
+   unary32F0x4(x)     ==> Set128lo32(
+                             x#, 
+                             PCast32(128to32(x#)) 
+                          )
+
+   Where:
+
+   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
+   PCast32x4(v#) = CmpNEZ32x4(v#)
+*/
+
+static
+IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
+{
+   IRAtom* at;
+   tl_assert(isShadowAtom(mce, vatomX));
+   tl_assert(isShadowAtom(mce, vatomY));
+   at = mkUifU128(mce, vatomX, vatomY);
+   at = assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
+   return at;
+}
+
+static
+IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
+{
+   IRAtom* at;
+   tl_assert(isShadowAtom(mce, vatomX));
+   at = assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, vatomX));
+   return at;
+}
+
+static
+IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
+{
+   IRAtom* at;
+   tl_assert(isShadowAtom(mce, vatomX));
+   tl_assert(isShadowAtom(mce, vatomY));
+   at = mkUifU128(mce, vatomX, vatomY);
+   at = assignNew(mce, Ity_I32, unop(Iop_128to32, at));
+   at = mkPCastTo(mce, Ity_I32, at);
+   at = assignNew(mce, Ity_V128, binop(Iop_Set128lo32, vatomX, at));
+   return at;
+}
+
+static
+IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
+{
+   IRAtom* at;
+   tl_assert(isShadowAtom(mce, vatomX));
+   at = assignNew(mce, Ity_I32, unop(Iop_128to32, vatomX));
+   at = mkPCastTo(mce, Ity_I32, at);
+   at = assignNew(mce, Ity_V128, binop(Iop_Set128lo32, vatomX, at));
+   return at;
+}
+
+
+
 /*------------------------------------------------------------*/
 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
 /*------------------------------------------------------------*/
@ -865,6 +958,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
   tl_assert(sameKindedAtoms(atom2,vatom2));
   switch (op) {

+      /* 128-bit SIMD */
+
+      case Iop_Add32Fx4:
+         return binary32Fx4(mce, vatom1, vatom2);      
+
+      /* Scalar floating point */
+
      case Iop_RoundF64:
      case Iop_F64toI64:
      case Iop_I64toF64: