Replace adler32 by sdbm_hash in m_deduppoolalloc.c

adler32 is not very good as a hash function.
sdbm_hash gives more different keys that adler32,
and in a large majority of the cases, shorter chains.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@15142
This commit is contained in:
Philippe Waroquiers 2015-04-25 14:53:35 +00:00
parent 0543df0e12
commit f6ab65fc2a

View File

@ -231,6 +231,19 @@ void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa,
ddpa->ht_node_pa = NULL;
}
// hash function used by gawk and SDBM.
static UInt sdbm_hash (const UChar* buf, UInt len )
{
UInt h;
UInt i;
h = 0;
for (i = 0; i < len; i++)
h = *buf++ + (h<<6) + (h<<16) - h;
return h;
}
const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB,
const void *elt)
{
@ -243,14 +256,7 @@ const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB,
ddpa->nr_alloc_calls++;
// Currently using adler32 as hash function.
// Many references tells adler32 is bad as a hash function.
// And effectively, some tests on dwarf debug string shows
// a lot of collisions (at least for short elements).
// (A lot can be 10% of the elements colliding, even on
// small nr of elements such as 10_000).
ht_elt.key = VG_(adler32) (0, NULL, 0);
ht_elt.key = VG_(adler32) (ht_elt.key, elt, eltSzB);
ht_elt.key = sdbm_hash (elt, eltSzB);
ht_elt.eltSzB = eltSzB;
ht_elt.elt = elt;