amd64 back end: generate 32-bit shift instructions for 32-bit IR shifts.

Until now these have been handled by possibly widening the value to 64 bits, if necessary, followed by a 64-bit shift. That wastes instructions and code space.
2026-02-04 10:21:20 +00:00 · 2020-01-02 09:23:46 +01:00 · 2020-01-02 09:23:46 +01:00 · 4eaa80103d
commit 4eaa80103d
parent 7239439e84
3 changed files with 76 additions and 11 deletions
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@ -626,6 +626,14 @@ AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
   i->Ain.Sh64.dst = dst;
   return i;
 }
+AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp op, UInt src, HReg dst ) {
+   AMD64Instr* i   = LibVEX_Alloc_inline(sizeof(AMD64Instr));
+   i->tag          = Ain_Sh32;
+   i->Ain.Sh32.op  = op;
+   i->Ain.Sh32.src = src;
+   i->Ain.Sh32.dst = dst;
+   return i;
+}
 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
   AMD64Instr* i       = LibVEX_Alloc_inline(sizeof(AMD64Instr));
   i->tag              = Ain_Test64;
@ -1090,6 +1098,14 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
            vex_printf("$%d,", (Int)i->Ain.Sh64.src);
         ppHRegAMD64(i->Ain.Sh64.dst);
         return;
+      case Ain_Sh32:
+         vex_printf("%sl ", showAMD64ShiftOp(i->Ain.Sh32.op));
+         if (i->Ain.Sh32.src == 0)
+            vex_printf("%%cl,");
+         else
+            vex_printf("$%d,", (Int)i->Ain.Sh32.src);
+         ppHRegAMD64_lo32(i->Ain.Sh32.dst);
+         return;
      case Ain_Test64:
         vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
         ppHRegAMD64(i->Ain.Test64.dst);
@ -1471,6 +1487,11 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
         if (i->Ain.Sh64.src == 0)
            addHRegUse(u, HRmRead, hregAMD64_RCX());
         return;
+      case Ain_Sh32:
+         addHRegUse(u, HRmModify, i->Ain.Sh32.dst);
+         if (i->Ain.Sh32.src == 0)
+            addHRegUse(u, HRmRead, hregAMD64_RCX());
+         return;
      case Ain_Test64:
         addHRegUse(u, HRmRead, i->Ain.Test64.dst);
         return;
@ -1808,6 +1829,9 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
      case Ain_Sh64:
         mapReg(m, &i->Ain.Sh64.dst);
         return;
+      case Ain_Sh32:
+         mapReg(m, &i->Ain.Sh32.dst);
+         return;
      case Ain_Test64:
         mapReg(m, &i->Ain.Test64.dst);
         return;
@ -2762,6 +2786,30 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
      }
      break;

+   case Ain_Sh32:
+      opc_cl = opc_imm = subopc = 0;
+      switch (i->Ain.Sh32.op) {
+         case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
+         case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
+         case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
+         default: goto bad;
+      }
+      if (i->Ain.Sh32.src == 0) {
+         rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
+         if (rex != 0x40) *p++ = rex;
+         *p++ = toUChar(opc_cl);
+         p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
+         goto done;
+      } else {
+         rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
+         if (rex != 0x40) *p++ = rex;
+         *p++ = toUChar(opc_imm);
+         p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
+         *p++ = (UChar)(i->Ain.Sh32.src);
+         goto done;
+      }
+      break;
+
   case Ain_Test64:
      /* testq sign-extend($imm32), %reg */
      *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@ -359,7 +359,8 @@ typedef
      Ain_Imm64,       /* Generate 64-bit literal to register */
      Ain_Alu64R,      /* 64-bit mov/arith/logical, dst=REG */
      Ain_Alu64M,      /* 64-bit mov/arith/logical, dst=MEM */
-      Ain_Sh64,        /* 64-bit shift/rotate, dst=REG or MEM */
+      Ain_Sh64,        /* 64-bit shift, dst=REG */
+      Ain_Sh32,        /* 32-bit shift, dst=REG */
      Ain_Test64,      /* 64-bit test (AND, set flags, discard result) */
      Ain_Unary64,     /* 64-bit not and neg */
      Ain_Lea64,       /* 64-bit compute EA into a reg */
@ -441,6 +442,11 @@ typedef
            UInt         src;  /* shift amount, or 0 means %cl */
            HReg         dst;
         } Sh64;
+         struct {
+            AMD64ShiftOp op;
+            UInt         src;  /* shift amount, or 0 means %cl */
+            HReg         dst;
+         } Sh32;
         struct {
            UInt   imm32;
            HReg   dst;
@ -744,6 +750,7 @@ extern AMD64Instr* AMD64Instr_Unary64    ( AMD64UnaryOp op, HReg dst );
 extern AMD64Instr* AMD64Instr_Lea64      ( AMD64AMode* am, HReg dst );
 extern AMD64Instr* AMD64Instr_Alu32R     ( AMD64AluOp, AMD64RMI*, HReg );
 extern AMD64Instr* AMD64Instr_Sh64       ( AMD64ShiftOp, UInt, HReg );
+extern AMD64Instr* AMD64Instr_Sh32       ( AMD64ShiftOp, UInt, HReg );
 extern AMD64Instr* AMD64Instr_Test64     ( UInt imm32, HReg dst );
 extern AMD64Instr* AMD64Instr_MulL       ( Bool syned, AMD64RM* );
 extern AMD64Instr* AMD64Instr_Div        ( Bool syned, Int sz, AMD64RM* );
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@ -1030,9 +1030,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
         HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
         addInstr(env, mk_iMOVsd_RR(regL,dst));

-         /* Do any necessary widening for 32/16/8 bit operands */
+         /* Do any necessary widening for 16/8 bit operands.  Also decide on the
+            final width at which the shift is to be done. */
+         Bool shift64 = False;
         switch (e->Iex.Binop.op) {
            case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: 
+               shift64 = True;
               break;
            case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
               break;
@ -1045,18 +1048,16 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
                                Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
               break;
            case Iop_Shr32:
-               addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
               break;
            case Iop_Sar8:
-               addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
-               addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SHL, 24, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SAR, 24, dst));
               break;
            case Iop_Sar16:
-               addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
-               addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SHL, 16, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SAR, 16, dst));
               break;
            case Iop_Sar32:
-               addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
               break;
            default: 
               ppIROp(e->Iex.Binop.op);
@ -1071,14 +1072,23 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
            vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
            nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
            vassert(nshift >= 0);
-            if (nshift > 0)
+            if (nshift > 0) {
               /* Can't allow nshift==0 since that means %cl */
-               addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
+               if (shift64) {
+                  addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
+               } else {
+                  addInstr(env, AMD64Instr_Sh32(shOp, nshift, dst));
+               }
+            }
         } else {
            /* General case; we have to force the amount into %cl. */
            HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
            addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
-            addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
+            if (shift64) {
+               addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
+            } else {
+               addInstr(env, AMD64Instr_Sh32(shOp, 0/* %cl */, dst));
+            }
         }
         return dst;
      }