amd64 back end: generate 32-bit shift instructions for 32-bit IR shifts.

Until now these have been handled by possibly widening the value to 64 bits,
if necessary, followed by a 64-bit shift.  That wastes instructions and code
space.
This commit is contained in:
Julian Seward 2020-01-02 09:23:46 +01:00
parent 7239439e84
commit 4eaa80103d
3 changed files with 76 additions and 11 deletions

View File

@ -626,6 +626,14 @@ AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
i->Ain.Sh64.dst = dst;
return i;
}
AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp op, UInt src, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
i->tag = Ain_Sh32;
i->Ain.Sh32.op = op;
i->Ain.Sh32.src = src;
i->Ain.Sh32.dst = dst;
return i;
}
AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
i->tag = Ain_Test64;
@ -1090,6 +1098,14 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
vex_printf("$%d,", (Int)i->Ain.Sh64.src);
ppHRegAMD64(i->Ain.Sh64.dst);
return;
case Ain_Sh32:
vex_printf("%sl ", showAMD64ShiftOp(i->Ain.Sh32.op));
if (i->Ain.Sh32.src == 0)
vex_printf("%%cl,");
else
vex_printf("$%d,", (Int)i->Ain.Sh32.src);
ppHRegAMD64_lo32(i->Ain.Sh32.dst);
return;
case Ain_Test64:
vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
ppHRegAMD64(i->Ain.Test64.dst);
@ -1471,6 +1487,11 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
if (i->Ain.Sh64.src == 0)
addHRegUse(u, HRmRead, hregAMD64_RCX());
return;
case Ain_Sh32:
addHRegUse(u, HRmModify, i->Ain.Sh32.dst);
if (i->Ain.Sh32.src == 0)
addHRegUse(u, HRmRead, hregAMD64_RCX());
return;
case Ain_Test64:
addHRegUse(u, HRmRead, i->Ain.Test64.dst);
return;
@ -1808,6 +1829,9 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
case Ain_Sh64:
mapReg(m, &i->Ain.Sh64.dst);
return;
case Ain_Sh32:
mapReg(m, &i->Ain.Sh32.dst);
return;
case Ain_Test64:
mapReg(m, &i->Ain.Test64.dst);
return;
@ -2762,6 +2786,30 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
}
break;
case Ain_Sh32:
opc_cl = opc_imm = subopc = 0;
switch (i->Ain.Sh32.op) {
case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
default: goto bad;
}
if (i->Ain.Sh32.src == 0) {
rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
if (rex != 0x40) *p++ = rex;
*p++ = toUChar(opc_cl);
p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
goto done;
} else {
rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
if (rex != 0x40) *p++ = rex;
*p++ = toUChar(opc_imm);
p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
*p++ = (UChar)(i->Ain.Sh32.src);
goto done;
}
break;
case Ain_Test64:
/* testq sign-extend($imm32), %reg */
*p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);

View File

@ -359,7 +359,8 @@ typedef
Ain_Imm64, /* Generate 64-bit literal to register */
Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */
Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */
Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */
Ain_Sh64, /* 64-bit shift, dst=REG */
Ain_Sh32, /* 32-bit shift, dst=REG */
Ain_Test64, /* 64-bit test (AND, set flags, discard result) */
Ain_Unary64, /* 64-bit not and neg */
Ain_Lea64, /* 64-bit compute EA into a reg */
@ -441,6 +442,11 @@ typedef
UInt src; /* shift amount, or 0 means %cl */
HReg dst;
} Sh64;
struct {
AMD64ShiftOp op;
UInt src; /* shift amount, or 0 means %cl */
HReg dst;
} Sh32;
struct {
UInt imm32;
HReg dst;
@ -744,6 +750,7 @@ extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst );
extern AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst );
extern AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp, AMD64RMI*, HReg );
extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, HReg );
extern AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp, UInt, HReg );
extern AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst );
extern AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* );
extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* );

View File

@ -1030,9 +1030,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(regL,dst));
/* Do any necessary widening for 32/16/8 bit operands */
/* Do any necessary widening for 16/8 bit operands. Also decide on the
final width at which the shift is to be done. */
Bool shift64 = False;
switch (e->Iex.Binop.op) {
case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
shift64 = True;
break;
case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
break;
@ -1045,18 +1048,16 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
break;
case Iop_Shr32:
addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
break;
case Iop_Sar8:
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
addInstr(env, AMD64Instr_Sh32(Ash_SHL, 24, dst));
addInstr(env, AMD64Instr_Sh32(Ash_SAR, 24, dst));
break;
case Iop_Sar16:
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
addInstr(env, AMD64Instr_Sh32(Ash_SHL, 16, dst));
addInstr(env, AMD64Instr_Sh32(Ash_SAR, 16, dst));
break;
case Iop_Sar32:
addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
break;
default:
ppIROp(e->Iex.Binop.op);
@ -1071,14 +1072,23 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
vassert(nshift >= 0);
if (nshift > 0)
if (nshift > 0) {
/* Can't allow nshift==0 since that means %cl */
addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
if (shift64) {
addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
} else {
addInstr(env, AMD64Instr_Sh32(shOp, nshift, dst));
}
}
} else {
/* General case; we have to force the amount into %cl. */
HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
if (shift64) {
addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
} else {
addInstr(env, AMD64Instr_Sh32(shOp, 0/* %cl */, dst));
}
}
return dst;
}