* Track introduction of IR ternary primops and rounding modes, at least as

to the extent needed to make ppc32 work.

* As a result, remove the replacements for glibc's floor/ceil fns on 
  ppc32/64, since vex can now correctly simulate the real ones.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5605
This commit is contained in:
Julian Seward 2006-02-03 16:12:27 +00:00
parent d896e4d592
commit 5163ea2aed
3 changed files with 110 additions and 184 deletions

View File

@ -401,7 +401,7 @@ run_innerloop_exit:
/* This check avoidance may be removable if stfiwx is
implemented. */
# if !defined(ENABLE_INNER)
# if 0 //!defined(ENABLE_INNER)
/* Check FPSCR & 0xFF == 0 (lowest 8bits are controls) */
mffs 4 /* fpscr -> fpr */
li 5,48

View File

@ -66,183 +66,6 @@ void VG_NOTIFY_ON_LOAD(freeres)( void )
*(int *)0 = 'x';
}
/* ---------------------------------------------------------------------
Avoid glibc's floor/ceil functions on ppc32/64. In recent glibcs
(about 2.3.4 and after) these rely on doing fadd/fsub with with
round to +inf/-inf set, which vex does not currently handle
correctly. This just reroutes to the glibc default implementations.
This is a really ugly hack.
------------------------------------------------------------------ */
#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* floor(x)
* Return x rounded toward -inf to integral value
* Method:
* Bit twiddling.
* Exception:
* Inexact flag raised if x not equal to floor(x).
*/
typedef union
{
double value;
struct
{
/*u_int32_t*/ UInt msw;
/*u_int32_t*/ UInt lsw;
} parts;
} ieee_double_shape_type;
/* Get two 32 bit ints from a double. */
#define EXTRACT_WORDS(ix0,ix1,d) \
do { \
ieee_double_shape_type ew_u; \
ew_u.value = (d); \
(ix0) = ew_u.parts.msw; \
(ix1) = ew_u.parts.lsw; \
} while (0)
/* Set a double from two 32 bit ints. */
#define INSERT_WORDS(d,ix0,ix1) \
do { \
ieee_double_shape_type iw_u; \
iw_u.parts.msw = (ix0); \
iw_u.parts.lsw = (ix1); \
(d) = iw_u.value; \
} while (0)
static double bit_twiddling_floor ( double x )
{
static const double huge = 1.0e300;
/*int32_t*/ Int i0,i1,j0;
/*u_int32_t*/ UInt i,j;
EXTRACT_WORDS(i0,i1,x);
j0 = ((i0>>20)&0x7ff)-0x3ff;
if(j0<20) {
if(j0<0) { /* raise inexact if x != 0 */
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
if(i0>=0) {i0=i1=0;}
else if(((i0&0x7fffffff)|i1)!=0)
{ i0=0xbff00000;i1=0;}
}
} else {
i = (0x000fffff)>>j0;
if(((i0&i)|i1)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0<0) i0 += (0x00100000)>>j0;
i0 &= (~i); i1=0;
}
}
} else if (j0>51) {
if(j0==0x400) return x+x; /* inf or NaN */
else return x; /* x is integral */
} else {
i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20);
if((i1&i)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0<0) {
if(j0==20) i0+=1;
else {
j = i1+(1<<(52-j0));
if(j<i1) i0 +=1 ; /* got a carry */
i1=j;
}
}
i1 &= (~i);
}
}
INSERT_WORDS(x,i0,i1);
return x;
}
/* Catch libm.so.6:__floor */
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZufloor)(double);
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZufloor)(double x) {
return bit_twiddling_floor(x);
}
/* Catch libm.so.6:floor */
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,floor)(double);
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,floor)(double x) {
return bit_twiddling_floor(x);
}
/*
* ceil(x)
* Return x rounded toward -inf to integral value
* Method:
* Bit twiddling.
* Exception:
* Inexact flag raised if x not equal to ceil(x).
*/
static double bit_twiddling_ceil ( double x )
{
static const double huge = 1.0e300;
/*int32_t*/ Int i0,i1,j0;
/*u_int32_t*/ UInt i,j;
EXTRACT_WORDS(i0,i1,x);
j0 = ((i0>>20)&0x7ff)-0x3ff;
if(j0<20) {
if(j0<0) { /* raise inexact if x != 0 */
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
if(i0<0) {i0=0x80000000;i1=0;}
else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;}
}
} else {
i = (0x000fffff)>>j0;
if(((i0&i)|i1)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0>0) i0 += (0x00100000)>>j0;
i0 &= (~i); i1=0;
}
}
} else if (j0>51) {
if(j0==0x400) return x+x; /* inf or NaN */
else return x; /* x is integral */
} else {
i = ((/*u_int32_t*/UInt)(0xffffffff))>>(j0-20);
if((i1&i)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0>0) {
if(j0==20) i0+=1;
else {
j = i1 + (1<<(52-j0));
if(j<i1) i0+=1; /* got a carry */
i1 = j;
}
}
i1 &= (~i);
}
}
INSERT_WORDS(x,i0,i1);
return x;
}
/* Catch libm.so.6:__ceil */
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZuceil)(double);
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ZuZuceil)(double x) {
return bit_twiddling_ceil(x);
}
/* Catch libm.so.6:ceil */
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ceil)(double);
double VG_REPLACE_FUNCTION_ZZ(libmZdsoZd6,ceil)(double x) {
return bit_twiddling_ceil(x);
}
#endif
/*--------------------------------------------------------------------*/
/*--- end ---*/

View File

@ -1106,6 +1106,61 @@ IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
}
/* 3-arg version of the above. */
static
IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
IRAtom* va1, IRAtom* va2, IRAtom* va3 )
{
IRAtom* at;
IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
tl_assert(isShadowAtom(mce,va3));
/* The general case is inefficient because PCast is an expensive
operation. Here are some special cases which use PCast only
twice rather than three times. */
/* I32 x I64 x I64 -> I64 */
/* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
&& finalVty == Ity_I64) {
if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
/* Widen 1st arg to I64. Since 1st arg is typically a rounding
mode indication which is fully defined, this should get
folded out later. */
at = mkPCastTo(mce, Ity_I64, va1);
/* Now fold in 2nd and 3rd args. */
at = mkUifU(mce, Ity_I64, at, va2);
at = mkUifU(mce, Ity_I64, at, va3);
/* and PCast once again. */
at = mkPCastTo(mce, Ity_I64, at);
return at;
}
if (0) {
VG_(printf)("mkLazy3 ");
ppIRType(t1);
VG_(printf)("_");
ppIRType(t2);
VG_(printf)("_");
ppIRType(t3);
VG_(printf)("_");
ppIRType(finalVty);
VG_(printf)("\n");
}
/* General case: force everything via 32-bit intermediaries. */
at = mkPCastTo(mce, Ity_I32, va1);
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
at = mkPCastTo(mce, finalVty, at);
return at;
}
/* Do the lazy propagation game from a null-terminated vector of
atoms. This is presumably the arguments to a helper call, so the
IRCallee info is also supplied in order that we can know which
@ -1590,6 +1645,46 @@ IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
/*--- Generate shadow values from all kinds of IRExprs. ---*/
/*------------------------------------------------------------*/
static
IRAtom* expr2vbits_Triop ( MCEnv* mce,
IROp op,
IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
{
IRType and_or_ty;
IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
IRAtom* vatom1 = expr2vbits( mce, atom1 );
IRAtom* vatom2 = expr2vbits( mce, atom2 );
IRAtom* vatom3 = expr2vbits( mce, atom3 );
tl_assert(isOriginalAtom(mce,atom1));
tl_assert(isOriginalAtom(mce,atom2));
tl_assert(isOriginalAtom(mce,atom3));
tl_assert(isShadowAtom(mce,vatom1));
tl_assert(isShadowAtom(mce,vatom2));
tl_assert(isShadowAtom(mce,vatom3));
tl_assert(sameKindedAtoms(atom1,vatom1));
tl_assert(sameKindedAtoms(atom2,vatom2));
tl_assert(sameKindedAtoms(atom3,vatom3));
switch (op) {
case Iop_AddF64:
case Iop_AddF64r32:
case Iop_SubF64:
case Iop_SubF64r32:
case Iop_MulF64:
case Iop_MulF64r32:
case Iop_DivF64:
case Iop_DivF64r32:
return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
default:
ppIROp(op);
VG_(tool_panic)("memcheck:expr2vbits_Triop");
}
}
static
IRAtom* expr2vbits_Binop ( MCEnv* mce,
IROp op,
@ -1906,7 +2001,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
/* Scalar floating point */
case Iop_RoundF64:
case Iop_RoundF64toInt:
case Iop_RoundF64toF32:
case Iop_F64toI64:
case Iop_I64toF64:
/* First arg is I32 (rounding mode), second is F64 or I64
@ -1930,10 +2026,6 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
case Iop_PRemF64:
case Iop_PRem1F64:
case Iop_AtanF64:
case Iop_AddF64:
case Iop_DivF64:
case Iop_SubF64:
case Iop_MulF64:
return mkLazy2(mce, Ity_I64, vatom1, vatom2);
case Iop_CmpF64:
@ -2185,7 +2277,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_SqrtF64:
case Iop_AbsF64:
case Iop_2xm1F64:
case Iop_Est8FRecip:
case Iop_Est5FRSqrt:
case Iop_Clz64:
case Iop_Ctz64:
@ -2193,6 +2284,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_Clz32:
case Iop_Ctz32:
case Iop_TruncF64asF32:
return mkPCastTo(mce, Ity_I32, vatom);
case Iop_1Uto64:
@ -2428,6 +2520,13 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
case Iex_Const:
return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
case Iex_Triop:
return expr2vbits_Triop(
mce,
e->Iex.Triop.op,
e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
);
case Iex_Binop:
return expr2vbits_Binop(
mce,
@ -2931,6 +3030,10 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
case Iex_Binop:
return isBogusAtom(e->Iex.Binop.arg1)
|| isBogusAtom(e->Iex.Binop.arg2);
case Iex_Triop:
return isBogusAtom(e->Iex.Triop.arg1)
|| isBogusAtom(e->Iex.Triop.arg2)
|| isBogusAtom(e->Iex.Triop.arg3);
case Iex_Mux0X:
return isBogusAtom(e->Iex.Mux0X.cond)
|| isBogusAtom(e->Iex.Mux0X.expr0)