diff --git a/coregrind/m_deduppoolalloc.c b/coregrind/m_deduppoolalloc.c index 2456e1b65..bf6d1e3c8 100644 --- a/coregrind/m_deduppoolalloc.c +++ b/coregrind/m_deduppoolalloc.c @@ -231,6 +231,19 @@ void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa, ddpa->ht_node_pa = NULL; } + +// hash function used by gawk and SDBM. +static UInt sdbm_hash (const UChar* buf, UInt len ) +{ + UInt h; + UInt i; + + h = 0; + for (i = 0; i < len; i++) + h = *buf++ + (h<<6) + (h<<16) - h; + return h; +} + const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB, const void *elt) { @@ -243,14 +256,7 @@ const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB, ddpa->nr_alloc_calls++; - // Currently using adler32 as hash function. - // Many references tells adler32 is bad as a hash function. - // And effectively, some tests on dwarf debug string shows - // a lot of collisions (at least for short elements). - // (A lot can be 10% of the elements colliding, even on - // small nr of elements such as 10_000). - ht_elt.key = VG_(adler32) (0, NULL, 0); - ht_elt.key = VG_(adler32) (ht_elt.key, elt, eltSzB); + ht_elt.key = sdbm_hash (elt, eltSzB); ht_elt.eltSzB = eltSzB; ht_elt.elt = elt;