mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-03 10:05:29 +00:00
* Track introduction of IR ternary primops and rounding modes, at least as
to the extent needed to make ppc32 work. * As a result, remove the replacements for glibc's floor/ceil fns on ppc32/64, since vex can now correctly simulate the real ones. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5605
This commit is contained in:
parent
d896e4d592
commit
5163ea2aed
@ -401,7 +401,7 @@ run_innerloop_exit:
|
||||
|
||||
/* This check avoidance may be removable if stfiwx is
|
||||
implemented. */
|
||||
# if !defined(ENABLE_INNER)
|
||||
# if 0 //!defined(ENABLE_INNER)
|
||||
/* Check FPSCR & 0xFF == 0 (lowest 8bits are controls) */
|
||||
mffs 4 /* fpscr -> fpr */
|
||||
li 5,48
|
||||
|
||||
@ -66,183 +66,6 @@ void VG_NOTIFY_ON_LOAD(freeres)( void )
|
||||
*(int *)0 = 'x';
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
Avoid glibc's floor/ceil functions on ppc32/64. In recent glibcs
|
||||
(about 2.3.4 and after) these rely on doing fadd/fsub with with
|
||||
round to +inf/-inf set, which vex does not currently handle
|
||||
correctly. This just reroutes to the glibc default implementations.
|
||||
This is a really ugly hack.
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
/*
|
||||
* floor(x)
|
||||
* Return x rounded toward -inf to integral value
|
||||
* Method:
|
||||
* Bit twiddling.
|
||||
* Exception:
|
||||
* Inexact flag raised if x not equal to floor(x).
|
||||
*/
|
||||
|
||||
typedef union
|
||||
{
|
||||
double value;
|
||||
struct
|
||||
{
|
||||
/*u_int32_t*/ UInt msw;
|
||||
/*u_int32_t*/ UInt lsw;
|
||||
} parts;
|
||||
} ieee_double_shape_type;
|
||||
|
||||
/* Get two 32 bit ints from a double. */
|
||||
#define EXTRACT_WORDS(ix0,ix1,d) \
|
||||
do { \
|
||||
ieee_double_shape_type ew_u; \
|
||||
ew_u.value = (d); \
|
||||
(ix0) = ew_u.parts.msw; \
|
||||
(ix1) = ew_u.parts.lsw; \
|
||||
} while (0)
|
||||
|
||||
/* Set a double from two 32 bit ints. */
|
||||
#define INSERT_WORDS(d,ix0,ix1) \
|
||||
do { \
|
||||
ieee_double_shape_type iw_u; \
|
||||
iw_u.parts.msw = (ix0); \
|
||||
iw_u.parts.lsw = (ix1); \
|
||||
(d) = iw_u.value; \
|
||||
} while (0)
|
||||
|
||||
static double bit_twiddling_floor ( double x )
|
||||
{
|
||||
static const double huge = 1.0e300;
|
||||
/*int32_t*/ Int i0,i1,j0;
|
||||
/*u_int32_t*/ UInt i,j;
|
||||
EXTRACT_WORDS(i0,i1,x);
|
||||
j0 = ((i0>>20)&0x7ff)-0x3ff;
|
||||
if(j0<20) {
|
||||
if(j0<0) { /* raise inexact if x != 0 */
|
||||
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
|
||||
if(i0>=0) {i0=i1=0;}
|
||||
else if(((i0&0x7fffffff)|i1)!=0)
|
||||
{ i0=0xbff00000;i1=0;}
|
||||
}
|
||||
} else {
|
||||
i = (0x000fffff)>>j0;
|
||||
if(((i0&i)|i1)==0) return x; /* x is integral */
|
||||
if(huge+x>0.0) { /* raise inexact flag */
|
||||
if(i0<0) i0 += (0x00100000)>>j0;
|
||||
i0 &= (~i); i1=0;
|
||||
}
|
||||
}
|
||||
} else if (j0>51) {
|
||||
if(j0==0x400) return x+x; /* inf or NaN */
|
||||
else return x; /* x is integral */
|
||||
} else {
|
||||
i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20);
|
||||
if((i1&i)==0) return x; /* x is integral */
|
||||
if(huge+x>0.0) { /* raise inexact flag */
|
||||
if(i0<0) {
|
||||
if(j0==20) i0+=1;
|
||||
else {
|
||||
j = i1+(1<<(52-j0));
|
||||
if(j<i1) i0 +=1 ; /* got a carry */
|
||||
i1=j;
|
||||
}
|
||||
}
|
||||
i1 &= (~i);
|
||||
}
|
||||
}
|
||||
INSERT_WORDS(x,i0,i1);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Catch libm.so.6:__floor */
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZufloor)(double);
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZufloor)(double x) {
|
||||
return bit_twiddling_floor(x);
|
||||
}
|
||||
|
||||
/* Catch libm.so.6:floor */
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,floor)(double);
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,floor)(double x) {
|
||||
return bit_twiddling_floor(x);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ceil(x)
|
||||
* Return x rounded toward -inf to integral value
|
||||
* Method:
|
||||
* Bit twiddling.
|
||||
* Exception:
|
||||
* Inexact flag raised if x not equal to ceil(x).
|
||||
*/
|
||||
static double bit_twiddling_ceil ( double x )
|
||||
{
|
||||
static const double huge = 1.0e300;
|
||||
/*int32_t*/ Int i0,i1,j0;
|
||||
/*u_int32_t*/ UInt i,j;
|
||||
EXTRACT_WORDS(i0,i1,x);
|
||||
j0 = ((i0>>20)&0x7ff)-0x3ff;
|
||||
if(j0<20) {
|
||||
if(j0<0) { /* raise inexact if x != 0 */
|
||||
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
|
||||
if(i0<0) {i0=0x80000000;i1=0;}
|
||||
else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;}
|
||||
}
|
||||
} else {
|
||||
i = (0x000fffff)>>j0;
|
||||
if(((i0&i)|i1)==0) return x; /* x is integral */
|
||||
if(huge+x>0.0) { /* raise inexact flag */
|
||||
if(i0>0) i0 += (0x00100000)>>j0;
|
||||
i0 &= (~i); i1=0;
|
||||
}
|
||||
}
|
||||
} else if (j0>51) {
|
||||
if(j0==0x400) return x+x; /* inf or NaN */
|
||||
else return x; /* x is integral */
|
||||
} else {
|
||||
i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20);
|
||||
if((i1&i)==0) return x; /* x is integral */
|
||||
if(huge+x>0.0) { /* raise inexact flag */
|
||||
if(i0>0) {
|
||||
if(j0==20) i0+=1;
|
||||
else {
|
||||
j = i1 + (1<<(52-j0));
|
||||
if(j<i1) i0+=1; /* got a carry */
|
||||
i1 = j;
|
||||
}
|
||||
}
|
||||
i1 &= (~i);
|
||||
}
|
||||
}
|
||||
INSERT_WORDS(x,i0,i1);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Catch libm.so.6:__ceil */
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZuceil)(double);
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZuceil)(double x) {
|
||||
return bit_twiddling_ceil(x);
|
||||
}
|
||||
|
||||
/* Catch libm.so.6:ceil */
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ceil)(double);
|
||||
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ceil)(double x) {
|
||||
return bit_twiddling_ceil(x);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------*/
|
||||
/*--- end ---*/
|
||||
|
||||
@ -1106,6 +1106,61 @@ IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
|
||||
}
|
||||
|
||||
|
||||
/* 3-arg version of the above. */
|
||||
static
|
||||
IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
|
||||
IRAtom* va1, IRAtom* va2, IRAtom* va3 )
|
||||
{
|
||||
IRAtom* at;
|
||||
IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
|
||||
IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
|
||||
IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
|
||||
tl_assert(isShadowAtom(mce,va1));
|
||||
tl_assert(isShadowAtom(mce,va2));
|
||||
tl_assert(isShadowAtom(mce,va3));
|
||||
|
||||
/* The general case is inefficient because PCast is an expensive
|
||||
operation. Here are some special cases which use PCast only
|
||||
twice rather than three times. */
|
||||
|
||||
/* I32 x I64 x I64 -> I64 */
|
||||
/* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
|
||||
if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
|
||||
&& finalVty == Ity_I64) {
|
||||
if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
|
||||
/* Widen 1st arg to I64. Since 1st arg is typically a rounding
|
||||
mode indication which is fully defined, this should get
|
||||
folded out later. */
|
||||
at = mkPCastTo(mce, Ity_I64, va1);
|
||||
/* Now fold in 2nd and 3rd args. */
|
||||
at = mkUifU(mce, Ity_I64, at, va2);
|
||||
at = mkUifU(mce, Ity_I64, at, va3);
|
||||
/* and PCast once again. */
|
||||
at = mkPCastTo(mce, Ity_I64, at);
|
||||
return at;
|
||||
}
|
||||
|
||||
if (0) {
|
||||
VG_(printf)("mkLazy3 ");
|
||||
ppIRType(t1);
|
||||
VG_(printf)("_");
|
||||
ppIRType(t2);
|
||||
VG_(printf)("_");
|
||||
ppIRType(t3);
|
||||
VG_(printf)("_");
|
||||
ppIRType(finalVty);
|
||||
VG_(printf)("\n");
|
||||
}
|
||||
|
||||
/* General case: force everything via 32-bit intermediaries. */
|
||||
at = mkPCastTo(mce, Ity_I32, va1);
|
||||
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
|
||||
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
|
||||
at = mkPCastTo(mce, finalVty, at);
|
||||
return at;
|
||||
}
|
||||
|
||||
|
||||
/* Do the lazy propagation game from a null-terminated vector of
|
||||
atoms. This is presumably the arguments to a helper call, so the
|
||||
IRCallee info is also supplied in order that we can know which
|
||||
@ -1590,6 +1645,46 @@ IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
|
||||
/*--- Generate shadow values from all kinds of IRExprs. ---*/
|
||||
/*------------------------------------------------------------*/
|
||||
|
||||
static
|
||||
IRAtom* expr2vbits_Triop ( MCEnv* mce,
|
||||
IROp op,
|
||||
IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
|
||||
{
|
||||
IRType and_or_ty;
|
||||
IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
|
||||
IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
|
||||
IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
|
||||
|
||||
IRAtom* vatom1 = expr2vbits( mce, atom1 );
|
||||
IRAtom* vatom2 = expr2vbits( mce, atom2 );
|
||||
IRAtom* vatom3 = expr2vbits( mce, atom3 );
|
||||
|
||||
tl_assert(isOriginalAtom(mce,atom1));
|
||||
tl_assert(isOriginalAtom(mce,atom2));
|
||||
tl_assert(isOriginalAtom(mce,atom3));
|
||||
tl_assert(isShadowAtom(mce,vatom1));
|
||||
tl_assert(isShadowAtom(mce,vatom2));
|
||||
tl_assert(isShadowAtom(mce,vatom3));
|
||||
tl_assert(sameKindedAtoms(atom1,vatom1));
|
||||
tl_assert(sameKindedAtoms(atom2,vatom2));
|
||||
tl_assert(sameKindedAtoms(atom3,vatom3));
|
||||
switch (op) {
|
||||
case Iop_AddF64:
|
||||
case Iop_AddF64r32:
|
||||
case Iop_SubF64:
|
||||
case Iop_SubF64r32:
|
||||
case Iop_MulF64:
|
||||
case Iop_MulF64r32:
|
||||
case Iop_DivF64:
|
||||
case Iop_DivF64r32:
|
||||
return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
|
||||
default:
|
||||
ppIROp(op);
|
||||
VG_(tool_panic)("memcheck:expr2vbits_Triop");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
IRAtom* expr2vbits_Binop ( MCEnv* mce,
|
||||
IROp op,
|
||||
@ -1906,7 +2001,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
|
||||
|
||||
/* Scalar floating point */
|
||||
|
||||
case Iop_RoundF64:
|
||||
case Iop_RoundF64toInt:
|
||||
case Iop_RoundF64toF32:
|
||||
case Iop_F64toI64:
|
||||
case Iop_I64toF64:
|
||||
/* First arg is I32 (rounding mode), second is F64 or I64
|
||||
@ -1930,10 +2026,6 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
|
||||
case Iop_PRemF64:
|
||||
case Iop_PRem1F64:
|
||||
case Iop_AtanF64:
|
||||
case Iop_AddF64:
|
||||
case Iop_DivF64:
|
||||
case Iop_SubF64:
|
||||
case Iop_MulF64:
|
||||
return mkLazy2(mce, Ity_I64, vatom1, vatom2);
|
||||
|
||||
case Iop_CmpF64:
|
||||
@ -2185,7 +2277,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_SqrtF64:
|
||||
case Iop_AbsF64:
|
||||
case Iop_2xm1F64:
|
||||
case Iop_Est8FRecip:
|
||||
case Iop_Est5FRSqrt:
|
||||
case Iop_Clz64:
|
||||
case Iop_Ctz64:
|
||||
@ -2193,6 +2284,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
|
||||
case Iop_Clz32:
|
||||
case Iop_Ctz32:
|
||||
case Iop_TruncF64asF32:
|
||||
return mkPCastTo(mce, Ity_I32, vatom);
|
||||
|
||||
case Iop_1Uto64:
|
||||
@ -2428,6 +2520,13 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
|
||||
case Iex_Const:
|
||||
return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
|
||||
|
||||
case Iex_Triop:
|
||||
return expr2vbits_Triop(
|
||||
mce,
|
||||
e->Iex.Triop.op,
|
||||
e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
|
||||
);
|
||||
|
||||
case Iex_Binop:
|
||||
return expr2vbits_Binop(
|
||||
mce,
|
||||
@ -2931,6 +3030,10 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
|
||||
case Iex_Binop:
|
||||
return isBogusAtom(e->Iex.Binop.arg1)
|
||||
|| isBogusAtom(e->Iex.Binop.arg2);
|
||||
case Iex_Triop:
|
||||
return isBogusAtom(e->Iex.Triop.arg1)
|
||||
|| isBogusAtom(e->Iex.Triop.arg2)
|
||||
|| isBogusAtom(e->Iex.Triop.arg3);
|
||||
case Iex_Mux0X:
|
||||
return isBogusAtom(e->Iex.Mux0X.cond)
|
||||
|| isBogusAtom(e->Iex.Mux0X.expr0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user