mips: optimize multiplication Iops

Optimize and refactor some of mul* Iop code in VEX/priv/host_mips_.

Patch from Aleksandar Rikalo.
This commit is contained in:
Petar Jovanovic 2017-09-28 19:29:51 +02:00
parent 8cdeee4ebb
commit eb18bd1b44
4 changed files with 168 additions and 96 deletions

View File

@ -811,21 +811,40 @@ MIPSInstr *MIPSInstr_Cmp(Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR,
return i;
}
/* multiply */
MIPSInstr *MIPSInstr_Mul(Bool syned, Bool wid, Bool sz32, HReg dst, HReg srcL,
HReg srcR)
/* mul */
MIPSInstr *MIPSInstr_Mul(HReg dst, HReg srcL, HReg srcR)
{
MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr));
i->tag = Min_Mul;
i->Min.Mul.syned = syned;
i->Min.Mul.widening = wid; /* widen=True else False */
i->Min.Mul.sz32 = sz32; /* True = 32 bits */
i->Min.Mul.dst = dst;
i->Min.Mul.srcL = srcL;
i->Min.Mul.srcR = srcR;
return i;
}
/* mult, multu / dmult, dmultu */
MIPSInstr *MIPSInstr_Mult(Bool syned, HReg srcL, HReg srcR)
{
MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr));
i->tag = Min_Mult;
i->Min.Mult.syned = syned;
i->Min.Mult.srcL = srcL;
i->Min.Mult.srcR = srcR;
return i;
}
/* ext / dext, dextm, dextu */
MIPSInstr *MIPSInstr_Ext(HReg dst, HReg src, UInt pos, UInt size)
{
MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr));
i->tag = Min_Ext;
i->Min.Ext.dst = dst;
i->Min.Ext.src = src;
i->Min.Ext.pos = pos;
i->Min.Ext.size = size;
return i;
}
/* msub */
MIPSInstr *MIPSInstr_Msub(Bool syned, HReg srcL, HReg srcR)
{
@ -1228,26 +1247,35 @@ void ppMIPSInstr(const MIPSInstr * i, Bool mode64)
return;
}
case Min_Mul: {
switch (i->Min.Mul.widening) {
case False:
vex_printf("mul ");
ppHRegMIPS(i->Min.Mul.dst, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mul.srcL, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mul.srcR, mode64);
return;
case True:
vex_printf("%s%s ", i->Min.Mul.sz32 ? "mult" : "dmult",
i->Min.Mul.syned ? "" : "u");
ppHRegMIPS(i->Min.Mul.dst, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mul.srcL, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mul.srcR, mode64);
return;
}
break;
vex_printf("mul ");
ppHRegMIPS(i->Min.Mul.dst, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mul.srcL, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mul.srcR, mode64);
return;
}
case Min_Mult: {
vex_printf("%s%s ", mode64 ? "dmult" : "mult",
i->Min.Mult.syned ? "" : "u");
ppHRegMIPS(i->Min.Mult.srcL, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Mult.srcR, mode64);
return;
}
case Min_Ext: {
vassert(mode64);
vassert(i->Min.Ext.pos < 32);
vassert(i->Min.Ext.size > 0);
vassert(i->Min.Ext.size <= 32);
vassert(i->Min.Ext.size + i->Min.Ext.pos > 0);
vassert(i->Min.Ext.size + i->Min.Ext.pos <= 63);
vex_printf("dext ");
ppHRegMIPS(i->Min.Ext.dst, mode64);
vex_printf(", ");
ppHRegMIPS(i->Min.Ext.src, mode64);
vex_printf(", %u, %u", i->Min.Ext.pos, i->Min.Ext.size);
return;
}
case Min_Mthi: {
vex_printf("mthi ");
@ -1597,6 +1625,18 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64)
addHRegUse(u, HRmWrite, i->Min.Mul.dst);
addHRegUse(u, HRmRead, i->Min.Mul.srcL);
addHRegUse(u, HRmRead, i->Min.Mul.srcR);
addHRegUse(u, HRmWrite, hregMIPS_HI(mode64));
addHRegUse(u, HRmWrite, hregMIPS_LO(mode64));
return;
case Min_Mult:
addHRegUse(u, HRmRead, i->Min.Mult.srcL);
addHRegUse(u, HRmRead, i->Min.Mult.srcR);
addHRegUse(u, HRmWrite, hregMIPS_HI(mode64));
addHRegUse(u, HRmWrite, hregMIPS_LO(mode64));
return;
case Min_Ext:
addHRegUse(u, HRmWrite, i->Min.Ext.dst);
addHRegUse(u, HRmRead, i->Min.Ext.src);
return;
case Min_Mthi:
case Min_Mtlo:
@ -1817,6 +1857,14 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64)
mapReg(m, &i->Min.Mul.srcL);
mapReg(m, &i->Min.Mul.srcR);
return;
case Min_Mult:
mapReg(m, &i->Min.Mult.srcL);
mapReg(m, &i->Min.Mult.srcR);
return;
case Min_Ext:
mapReg(m, &i->Min.Ext.src);
mapReg(m, &i->Min.Ext.dst);
return;
case Min_Mthi:
case Min_Mtlo:
mapReg(m, &i->Min.MtHL.src);
@ -2808,38 +2856,52 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc,
}
case Min_Mul: {
Bool syned = i->Min.Mul.syned;
Bool widening = i->Min.Mul.widening;
Bool sz32 = i->Min.Mul.sz32;
UInt r_srcL = iregNo(i->Min.Mul.srcL, mode64);
UInt r_srcR = iregNo(i->Min.Mul.srcR, mode64);
UInt r_dst = iregNo(i->Min.Mul.dst, mode64);
if (widening) {
if (sz32) {
if (syned)
/* mult */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 24);
else
/* multu */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 25);
} else {
if (syned) /* DMULT r_dst,r_srcL,r_srcR */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 28);
else /* DMULTU r_dst,r_srcL,r_srcR */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 29);
}
} else {
if (sz32)
/* mul */
p = mkFormR(p, 28, r_srcL, r_srcR, r_dst, 0, 2);
else if (mode64 && !sz32)
p = mkFormR(p, 28, r_srcL, r_srcR, r_dst, 0, 2);
/* mul r_dst, r_srcL, r_srcR */
p = mkFormR(p, 28, r_srcL, r_srcR, r_dst, 0, 2);
goto done;
}
case Min_Mult: {
Bool syned = i->Min.Mult.syned;
UInt r_srcL = iregNo(i->Min.Mult.srcL, mode64);
UInt r_srcR = iregNo(i->Min.Mult.srcR, mode64);
if (mode64) {
if (syned)
/* dmult r_srcL, r_srcR */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 28);
else
goto bad;
/* dmultu r_srcL, r_srcR */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 29);
} else {
if (syned)
/* mult r_srcL, r_srcR */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 24);
else
/* multu r_srcL, r_srcR */
p = mkFormR(p, 0, r_srcL, r_srcR, 0, 0, 25);
}
goto done;
}
case Min_Ext: {
UInt r_src = iregNo(i->Min.Ext.src, mode64);
UInt r_dst = iregNo(i->Min.Ext.dst, mode64);
/* For now, only DEXT is implemented. */
vassert(mode64);
vassert(i->Min.Ext.pos < 32);
vassert(i->Min.Ext.size > 0);
vassert(i->Min.Ext.size <= 32);
vassert(i->Min.Ext.size + i->Min.Ext.pos > 0);
vassert(i->Min.Ext.size + i->Min.Ext.pos <= 63);
/* DEXT r_dst, r_src, pos, size */
p = mkFormR(p, 0x1F, r_src, r_dst,
i->Min.Ext.size - 1, i->Min.Ext.pos, 3);
goto done;
}
case Min_Macc: {
Bool syned = i->Min.Macc.syned;
UInt r_srcL = iregNo(i->Min.Macc.srcL, mode64);

View File

@ -276,10 +276,12 @@ typedef enum {
Min_Alu, /* word add/sub/and/or/xor/nor/others? */
Min_Shft, /* word sll/srl/sra */
Min_Unary, /* clo, clz, nop, neg */
Min_Ext, /* ext / dext, dextm, dextu */
Min_Cmp, /* word compare (fake insn) */
Min_Mul, /* widening/non-widening multiply */
Min_Mul, /* non-widening, 32-bit, signed multiply */
Min_Mult, /* widening multiply */
Min_Div, /* div */
Min_Call, /* call to address in register */
@ -415,6 +417,13 @@ typedef struct {
HReg dst;
HReg src;
} Unary;
/* Bit extract */
struct {
HReg dst;
HReg src;
UInt pos;
UInt size;
} Ext;
/* Word compare. Fake instruction, used for basic block ending */
struct {
Bool syned;
@ -433,6 +442,11 @@ typedef struct {
HReg srcL;
HReg srcR;
} Mul;
struct {
Bool syned; /* signed/unsigned */
HReg srcL;
HReg srcR;
} Mult;
struct {
Bool syned; /* signed/unsigned - meaningless if widenind = False */
Bool sz32;
@ -615,10 +629,11 @@ extern MIPSInstr *MIPSInstr_LI(HReg, ULong);
extern MIPSInstr *MIPSInstr_Alu(MIPSAluOp, HReg, HReg, MIPSRH *);
extern MIPSInstr *MIPSInstr_Shft(MIPSShftOp, Bool sz32, HReg, HReg, MIPSRH *);
extern MIPSInstr *MIPSInstr_Unary(MIPSUnaryOp op, HReg dst, HReg src);
extern MIPSInstr *MIPSInstr_Ext(HReg, HReg, UInt, UInt);
extern MIPSInstr *MIPSInstr_Cmp(Bool, Bool, HReg, HReg, HReg, MIPSCondCode);
extern MIPSInstr *MIPSInstr_Mul(Bool syned, Bool hi32, Bool sz32, HReg,
HReg, HReg);
extern MIPSInstr *MIPSInstr_Mul(HReg, HReg, HReg);
extern MIPSInstr *MIPSInstr_Mult(Bool, HReg, HReg);
extern MIPSInstr *MIPSInstr_Div(Bool syned, Bool sz32, HReg, HReg);
extern MIPSInstr *MIPSInstr_Madd(Bool, HReg, HReg);
extern MIPSInstr *MIPSInstr_Msub(Bool, HReg, HReg);

View File

@ -55,6 +55,9 @@ static Bool mode64 = False;
/* Host CPU has FPU and 32 dbl. prec. FP registers. */
static Bool fp_mode64 = False;
/* Host hwcaps */
static UInt hwcaps_host = 0;
/* GPR register class for mips32/64 */
#define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
@ -1058,52 +1061,46 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
return r_dst;
}
if (e->Iex.Binop.op == Iop_Mul32 || e->Iex.Binop.op == Iop_Mul64) {
Bool sz32 = (e->Iex.Binop.op == Iop_Mul32);
if (e->Iex.Binop.op == Iop_Mul32) {
HReg r_dst = newVRegI(env);
HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, MIPSInstr_Mul(False/*Unsigned or Signed */ ,
False /*widen */ ,
sz32 /*32bit or 64bit */,
r_dst, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mul(r_dst, r_srcL, r_srcR));
return r_dst;
}
if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
if (e->Iex.Binop.op == Iop_Mul64 ||
e->Iex.Binop.op == Iop_MullS32) {
vassert(mode64);
HReg r_dst = newVRegI(env);
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tLo_1 = newVRegI(env);
HReg tHi_1 = newVRegI(env);
HReg mask = newVRegI(env);
Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
Bool size = toBool(e->Iex.Binop.op == Iop_MullS32)
|| toBool(e->Iex.Binop.op == Iop_MullU32);
HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, MIPSInstr_Mul(syned /*Unsigned or Signed */ ,
True /*widen */ ,
size /*32bit or 64bit mul */ ,
r_dst, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mfhi(tHi));
addInstr(env, MIPSInstr_Mflo(tLo));
addInstr(env, MIPSInstr_Shft(Mshft_SLL, False, tHi_1,
tHi, MIPSRH_Imm(False, 32)));
addInstr(env, MIPSInstr_LI(mask, 0xffffffff));
addInstr(env, MIPSInstr_Alu(Malu_AND, tLo_1, tLo,
MIPSRH_Reg(mask)));
addInstr(env, MIPSInstr_Alu(Malu_OR, r_dst, tHi_1,
MIPSRH_Reg(tLo_1)));
addInstr(env, MIPSInstr_Mult(True, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mflo(r_dst));
return r_dst;
}
if (e->Iex.Binop.op == Iop_MullU32) {
vassert(mode64);
HReg r_tmpL = newVRegI(env);
HReg r_tmpR = newVRegI(env);
HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
if (VEX_MIPS_CPU_HAS_MIPS64R2(hwcaps_host)) {
addInstr(env, MIPSInstr_Ext(r_tmpL, r_srcL, 0, 32));
addInstr(env, MIPSInstr_Ext(r_tmpR, r_srcR, 0, 32));
} else {
addInstr(env, MIPSInstr_LI(r_tmpL, 0xFFFFFFFF));
addInstr(env, MIPSInstr_Alu(Malu_AND, r_tmpR, r_srcR,
MIPSRH_Reg(r_tmpL)));
addInstr(env, MIPSInstr_Alu(Malu_AND, r_tmpL, r_srcL,
MIPSRH_Reg(r_tmpL)));
}
addInstr(env, MIPSInstr_Mult(False, r_tmpL, r_tmpR));
addInstr(env, MIPSInstr_Mflo(r_tmpR));
return r_tmpR;
}
if (e->Iex.Binop.op == Iop_CmpF64) {
HReg r_srcL, r_srcR;
if (mode64) {
@ -2198,11 +2195,9 @@ static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env,
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
HReg r_dst = newVRegI(env);
HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, MIPSInstr_Mul(syned, True, False /*64bit mul */ ,
r_dst, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mfhi(tHi));
addInstr(env, MIPSInstr_Mflo(tLo));
*rHi = tHi;
@ -2411,14 +2406,10 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
case Iop_MullS32: {
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
HReg r_dst = newVRegI(env);
Bool syned = toBool(op_binop == Iop_MullS32);
HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, MIPSInstr_Mul(syned /*Unsigned or Signed */,
True /*widen */ , True,
r_dst, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR));
addInstr(env, MIPSInstr_Mfhi(tHi));
addInstr(env, MIPSInstr_Mflo(tLo));
*rHi = tHi;
@ -4155,9 +4146,10 @@ HInstrArray *iselSB_MIPS ( const IRSB* bb,
Int i, j;
HReg hreg, hregHI;
ISelEnv* env;
UInt hwcaps_host = archinfo_host->hwcaps;
MIPSAMode *amCounter, *amFailAddr;
hwcaps_host = archinfo_host->hwcaps;
/* sanity ... */
vassert(arch_host == VexArchMIPS32 || arch_host == VexArchMIPS64);
vassert(VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(hwcaps_host)

View File

@ -246,6 +246,9 @@ typedef
/* Check if the processor supports MIPS32R2. */
#define VEX_MIPS_CPU_HAS_MIPS32R2(x) (VEX_MIPS_EX_INFO(x) & \
VEX_MIPS_CPU_ISA_M32R2)
/* Check if the processor supports MIPS64R2. */
#define VEX_MIPS_CPU_HAS_MIPS64R2(x) (VEX_MIPS_EX_INFO(x) & \
VEX_MIPS_CPU_ISA_M64R2)
/* Check if the processor supports DSP ASE Rev 2. */
#define VEX_MIPS_PROC_DSP2(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \
(VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_74K))