Some platforms such as x86 and amd64 have efficient unaligned access.

On these platforms, implement read_/write_<type> by doing a direct
access, rather than calling a function that will read or write
'byte per byte'.

For platforms that do not have efficient unaligned access,
or that do not support at all unaligned access, call function
  readUAS_/writeUAS_<type> that works as before.

Currently, direct acecss is activated only for x86 and amd64.
Unclear what other platforms support (efficiently) unaligned access.

On unwind intensive code (such as perf/memrw on amd64), this patch
gives up to 5% improvement.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@15290
This commit is contained in:
Philippe Waroquiers 2015-05-25 20:15:25 +00:00
parent f4dde903ab
commit 5f5fa8adf2
2 changed files with 82 additions and 36 deletions

View File

@ -80,7 +80,7 @@ static inline Bool host_is_little_endian ( void ) {
return toBool(*p == 0x10);
}
Short ML_(read_Short)( const UChar* data ) {
Short ML_(readUAS_Short)( const UChar* data ) {
Short r = 0;
if (host_is_little_endian()) {
r = data[0]
@ -92,7 +92,7 @@ Short ML_(read_Short)( const UChar* data ) {
return r;
}
Int ML_(read_Int) ( const UChar* data ) {
Int ML_(readUAS_Int) ( const UChar* data ) {
Int r = 0;
if (host_is_little_endian()) {
r = data[0]
@ -108,7 +108,7 @@ Int ML_(read_Int) ( const UChar* data ) {
return r;
}
Long ML_(read_Long) ( const UChar* data ) {
Long ML_(readUAS_Long) ( const UChar* data ) {
Long r = 0;
if (host_is_little_endian()) {
r = data[0]
@ -132,7 +132,7 @@ Long ML_(read_Long) ( const UChar* data ) {
return r;
}
UShort ML_(read_UShort) ( const UChar* data ) {
UShort ML_(readUAS_UShort) ( const UChar* data ) {
UInt r = 0;
if (host_is_little_endian()) {
r = data[0]
@ -144,7 +144,7 @@ UShort ML_(read_UShort) ( const UChar* data ) {
return r;
}
UChar *ML_(write_UShort) ( UChar* ptr, UShort val ) {
UChar *ML_(writeUAS_UShort) ( UChar* ptr, UShort val ) {
if (host_is_little_endian()) {
ptr[0] = val & 0xff;
ptr[1] = ( val >> 8 ) & 0xff;
@ -155,7 +155,7 @@ UChar *ML_(write_UShort) ( UChar* ptr, UShort val ) {
return ptr + sizeof(UShort);
}
UWord ML_(read_UWord) ( const UChar* data ) {
UWord ML_(readUAS_UWord) ( const UChar* data ) {
if (sizeof(UWord) == sizeof(UInt)) {
return ML_(read_UInt)(data);
} else if (sizeof(UWord) == sizeof(ULong)) {
@ -165,7 +165,7 @@ UWord ML_(read_UWord) ( const UChar* data ) {
}
}
UInt ML_(read_UInt) ( const UChar* data ) {
UInt ML_(readUAS_UInt) ( const UChar* data ) {
UInt r = 0;
if (host_is_little_endian()) {
r = data[0]
@ -181,7 +181,7 @@ UInt ML_(read_UInt) ( const UChar* data ) {
return r;
}
UChar* ML_(write_UInt) ( UChar* ptr, UInt val ) {
UChar* ML_(writeUAS_UInt) ( UChar* ptr, UInt val ) {
if (host_is_little_endian()) {
ptr[0] = val & 0xff;
ptr[1] = ( val >> 8 ) & 0xff;
@ -196,7 +196,7 @@ UChar* ML_(write_UInt) ( UChar* ptr, UInt val ) {
return ptr + sizeof(UInt);
}
ULong ML_(read_ULong) ( const UChar* data ) {
ULong ML_(readUAS_ULong) ( const UChar* data ) {
ULong r = 0;
if (host_is_little_endian()) {
r = data[0]
@ -220,7 +220,7 @@ ULong ML_(read_ULong) ( const UChar* data ) {
return r;
}
UChar* ML_(write_ULong) ( UChar* ptr, ULong val ) {
UChar* ML_(writeUAS_ULong) ( UChar* ptr, ULong val ) {
if (host_is_little_endian()) {
ptr[0] = val & 0xff;
ptr[1] = ( val >> 8 ) & 0xff;
@ -243,16 +243,8 @@ UChar* ML_(write_ULong) ( UChar* ptr, ULong val ) {
return ptr + sizeof(ULong);
}
UChar ML_(read_UChar) ( const UChar* data ) {
return data[0];
}
UChar* ML_(write_UChar) ( UChar* ptr, UChar val ) {
ptr[0] = val;
return ptr + sizeof(UChar);
}
Addr ML_(read_Addr) ( const UChar* data ) {
Addr ML_(readUAS_Addr) ( const UChar* data ) {
if (sizeof(Addr) == sizeof(UInt)) {
return ML_(read_UInt)(data);
} else if (sizeof(Addr) == sizeof(ULong)) {
@ -262,7 +254,7 @@ Addr ML_(read_Addr) ( const UChar* data ) {
}
}
UChar* ML_(write_Addr) ( UChar* ptr, Addr val ) {
UChar* ML_(writeUAS_Addr) ( UChar* ptr, Addr val ) {
if (sizeof(Addr) == sizeof(UInt)) {
return ML_(write_UInt)(ptr, val);
} else if (sizeof(Addr) == sizeof(ULong)) {
@ -272,7 +264,6 @@ UChar* ML_(write_Addr) ( UChar* ptr, Addr val ) {
}
}
/*--------------------------------------------------------------------*/
/*--- end misc.c ---*/
/*--------------------------------------------------------------------*/

View File

@ -47,22 +47,77 @@ void* ML_(dinfo_memdup)( const HChar* cc, const void* str, SizeT nStr );
void* ML_(dinfo_realloc) ( const HChar* cc, void* ptr, SizeT new_size );
void ML_(dinfo_shrink_block)( void* ptr, SizeT szB );
/* Extract (possibly unaligned) data of various sizes from a buffer. */
Short ML_(read_Short)( const UChar* data );
Int ML_(read_Int)( const UChar* data );
Long ML_(read_Long)( const UChar* data );
UShort ML_(read_UShort)( const UChar* data );
UWord ML_(read_UWord)( const UChar* data );
UInt ML_(read_UInt)( const UChar* data );
ULong ML_(read_ULong)( const UChar* data );
UChar ML_(read_UChar)( const UChar* data );
Addr ML_(read_Addr)( const UChar* data );
/* Define functions to read/write types of various sizes from/to a
(potentially unaligned) UChar *data buffer.
Some archs can do efficient unaligned access. For these archs,
do the load/store directly. For others, call the UAS (Un Aligned Safe)
functions. */
#if defined(VGA_x86) || defined(VGA_amd64)
UChar* ML_(write_UShort)( UChar* ptr, UShort val );
UChar* ML_(write_UInt)( UChar* ptr, UInt val );
UChar* ML_(write_ULong)( UChar* ptr, ULong val );
UChar* ML_(write_UChar)( UChar* ptr, UChar val );
UChar* ML_(write_Addr)( UChar* ptr, Addr val );
#define DEF_READ(type) \
static inline type VGAPPEND(vgModuleLocal_read_,type) ( const UChar* data ) \
{ \
return (*(const type*)(data)); \
} \
type VGAPPEND(vgModuleLocal_readUAS_,type) ( const UChar* data )
#define DEF_WRITE(type) \
static inline UChar* VGAPPEND(vgModuleLocal_write_,type) ( UChar* ptr, type val ) \
{ \
(*(type*)(ptr)) = val; \
return ptr + sizeof(type); \
} \
UChar* VGAPPEND(vgModuleLocal_writeUAS_,type) ( UChar* ptr, type val )
#else
#define DEF_READ(type) \
type VGAPPEND(vgModuleLocal_readUAS_,type) ( const UChar* data ); \
static inline type VGAPPEND(vgModuleLocal_read_,type) ( const UChar* data ) \
{ \
return VGAPPEND(vgModuleLocal_readUAS_,type)(data); \
}
#define DEF_WRITE(type) \
UChar* VGAPPEND(vgModuleLocal_writeUAS_,type) ( UChar* ptr, type val ); \
static inline UChar* VGAPPEND(vgModuleLocal_write_,type) ( UChar* ptr, type val ) \
{ \
return VGAPPEND(vgModuleLocal_writeUAS_,type)(ptr,val); \
}
#endif
/* Defines a bunch of functions such as
Short ML_(read_Short)( const UChar* data );
Int ML_(read_Int)( const UChar* data );
... */
DEF_READ(Short);
DEF_READ(Int);
DEF_READ(Long);
DEF_READ(UShort);
DEF_READ(UWord);
DEF_READ(UInt);
DEF_READ(ULong);
DEF_READ(Addr);
/* Defines a bunch of functions such as
UChar* ML_(write_UShort)( UChar* ptr, UShort val );
UChar* ML_(write_UInt)( UChar* ptr, UInt val );
... */
DEF_WRITE(UShort);
DEF_WRITE(UInt);
DEF_WRITE(ULong);
DEF_WRITE(Addr);
static inline UChar ML_(read_UChar)( const UChar* data )
{
return data[0];
}
static inline UChar* ML_(write_UChar)( UChar* ptr, UChar val )
{
ptr[0] = val;
return ptr + sizeof(UChar);
}
/* A handy type, a la Haskell's Maybe type. Yes, I know, C sucks.
Been there. Done that. Seen the movie. Got the T-shirt. Etc. */