diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am index 31ca33206..655a462c6 100644 --- a/coregrind/Makefile.am +++ b/coregrind/Makefile.am @@ -161,6 +161,7 @@ noinst_HEADERS = \ pub_core_commandline.h \ pub_core_coredump.h \ pub_core_cpuid.h \ + pub_core_deduppoolalloc.h \ pub_core_debuginfo.h \ pub_core_debugger.h \ pub_core_debuglog.h \ @@ -270,6 +271,7 @@ COREGRIND_SOURCES_COMMON = \ m_commandline.c \ m_clientstate.c \ m_cpuid.S \ + m_deduppoolalloc.c \ m_debugger.c \ m_debuglog.c \ m_errormgr.c \ diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c index 6373044db..a37c22884 100644 --- a/coregrind/m_debuginfo/debuginfo.c +++ b/coregrind/m_debuginfo/debuginfo.c @@ -204,7 +204,6 @@ DebugInfo* alloc_DebugInfo( const HChar* filename ) static void free_DebugInfo ( DebugInfo* di ) { Word i, j, n; - struct strchunk *chunk, *next; TyEnt* ent; GExpr* gexpr; @@ -230,10 +229,7 @@ static void free_DebugInfo ( DebugInfo* di ) ML_(dinfo_free)(di->symtab); } - for (chunk = di->strchunks; chunk != NULL; chunk = next) { - next = chunk->next; - ML_(dinfo_free)(chunk); - } + VG_(deleteDedupPA) (di->strpool); /* Delete the two admin arrays. These lists exist primarily so that we can visit each object exactly once when we need to @@ -279,7 +275,7 @@ static void free_DebugInfo ( DebugInfo* di ) vg_assert(var); /* Nothing to free in var: all the pointer fields refer to stuff either on an admin list, or in - .strchunks */ + .strpool */ } VG_(deleteXA)(arange->vars); /* Don't free arange itself, as OSetGen_Destroy does diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h index 200949d55..0b5d6f113 100644 --- a/coregrind/m_debuginfo/priv_storage.h +++ b/coregrind/m_debuginfo/priv_storage.h @@ -45,6 +45,7 @@ #include "pub_core_basics.h" // Addr #include "pub_core_xarray.h" // XArray +#include "pub_core_deduppoolalloc.h" // DedupPoolAlloc #include "priv_d3basics.h" // GExpr et al. #include "priv_image.h" // DiCursor @@ -55,7 +56,7 @@ ::sec_names is NULL. If there are other names, these are stored in ::sec_names, which is a NULL terminated vector holding the names. The vector is allocated in VG_AR_DINFO, the names themselves live - in DebugInfo::strchunks. + in DebugInfo::strpool. From the point of view of ELF, the primary vs secondary distinction is artificial: they are all just names associated with the address, @@ -444,12 +445,12 @@ typedef typedef struct { - HChar* name; /* in DebugInfo.strchunks */ + HChar* name; /* in DebugInfo.strpool */ UWord typeR; /* a cuOff */ GExpr* gexpr; /* on DebugInfo.gexprs list */ GExpr* fbGX; /* SHARED. */ HChar* fileName; /* where declared; may be NULL. in - DebugInfo.strchunks */ + DebugInfo.strpool */ Int lineNo; /* where declared; may be zero. */ } DiVariable; @@ -512,8 +513,8 @@ struct _DebugInfoFSM }; -/* To do with the string table in struct _DebugInfo (::strchunks) */ -#define SEGINFO_STRCHUNKSIZE (64*1024) +/* To do with the string table in struct _DebugInfo (::strpool) */ +#define SEGINFO_STRPOOLSIZE (16*1024) /* We may encounter more than one .eh_frame section in an object -- @@ -809,13 +810,9 @@ struct _DebugInfo { Addr fpo_maxavma; Addr fpo_base_avma; - /* Expandable arrays of characters -- the string table. Pointers - into this are stable (the arrays are not reallocated). */ - struct strchunk { - UInt strtab_used; - struct strchunk* next; - HChar strtab[SEGINFO_STRCHUNKSIZE]; - } *strchunks; + /* Pool of strings -- the string table. Pointers + into this are stable (the memory is not reallocated). */ + DedupPoolAlloc *strpool; /* Variable scope information, as harvested from Dwarf3 files. diff --git a/coregrind/m_debuginfo/readdwarf3.c b/coregrind/m_debuginfo/readdwarf3.c index de491e93a..f34a8b306 100644 --- a/coregrind/m_debuginfo/readdwarf3.c +++ b/coregrind/m_debuginfo/readdwarf3.c @@ -1389,7 +1389,7 @@ void get_Form_contents ( /*OUT*/FormContents* cts, typedef struct _TempVar { - HChar* name; /* in DebugInfo's .strchunks */ + HChar* name; /* in DebugInfo's .strpool */ /* Represent ranges economically. nRanges is the number of ranges. Cases: 0: .rngOneMin .rngOneMax .manyRanges are all zero @@ -1449,7 +1449,7 @@ typedef GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB expr, else NULL */ /* The file name table. Is a mapping from integer index to the - (permanent) copy of the string in in DebugInfo's .strchunks. */ + (permanent) copy of the string in in DebugInfo's .strpool. */ XArray* /* of UChar* */ filenameTable; } D3VarParser; diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c index 69ccb4a42..4a836bf51 100644 --- a/coregrind/m_debuginfo/readelf.c +++ b/coregrind/m_debuginfo/readelf.c @@ -1449,7 +1449,7 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) vg_assert(!di->loctab); vg_assert(!di->cfsi); vg_assert(!di->cfsi_exprs); - vg_assert(!di->strchunks); + vg_assert(!di->strpool); vg_assert(!di->soname); { diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c index 0a4716fff..121c82691 100644 --- a/coregrind/m_debuginfo/storage.c +++ b/coregrind/m_debuginfo/storage.c @@ -45,6 +45,7 @@ #include "pub_core_libcprint.h" #include "pub_core_xarray.h" #include "pub_core_oset.h" +#include "pub_core_deduppoolalloc.h" #include "priv_misc.h" /* dinfo_zalloc/free/strdup */ #include "priv_image.h" @@ -231,8 +232,6 @@ void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ) */ HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len ) { - struct strchunk *chunk; - Int space_needed; HChar* p; if (len == -1) { @@ -240,25 +239,13 @@ HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len ) } else { vg_assert(len >= 0); } - - space_needed = 1 + len; - - // Allocate a new strtab chunk if necessary - if (di->strchunks == NULL || - (di->strchunks->strtab_used - + space_needed) > SEGINFO_STRCHUNKSIZE) { - chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk)); - chunk->strtab_used = 0; - chunk->next = di->strchunks; - di->strchunks = chunk; - } - chunk = di->strchunks; - - p = &chunk->strtab[chunk->strtab_used]; - VG_(memcpy)(p, str, len); - chunk->strtab[chunk->strtab_used+len] = '\0'; - chunk->strtab_used += space_needed; - + if (UNLIKELY(di->strpool == NULL)) + di->strpool = VG_(newDedupPA)(SEGINFO_STRPOOLSIZE, + 1, + ML_(dinfo_zalloc), + "di.storage.addStr.1", + ML_(dinfo_free)); + p = VG_(allocEltDedupPA) (di->strpool, len+1, str); return p; } @@ -926,12 +913,12 @@ void ML_(addVar)( struct _DebugInfo* di, Int level, Addr aMin, Addr aMax, - HChar* name, /* in di's .strchunks */ + HChar* name, /* in di's .strpool */ UWord typeR, /* a cuOff */ GExpr* gexpr, GExpr* fbGX, HChar* fileName, /* where decl'd - may be NULL. - in di's .strchunks */ + in di's .strpool */ Int lineNo, /* where decl'd - may be zero */ Bool show ) { @@ -1216,7 +1203,9 @@ Bool preferName ( struct _DebugInfo* di, vg_assert(a_name); vg_assert(b_name); - vg_assert(a_name != b_name); + // vg_assert(a_name != b_name); + // ???? now the pointers can be equal but is that + // ???? not the indication of a latent bug ???? vlena = VG_(strlen)(a_name); vlenb = VG_(strlen)(b_name); @@ -1828,6 +1817,8 @@ void ML_(canonicaliseTables) ( struct _DebugInfo* di ) canonicaliseLoctab ( di ); ML_(canonicaliseCFI) ( di ); canonicaliseVarInfo ( di ); + if (di->strpool) + VG_(freezeDedupPA) (di->strpool); } diff --git a/coregrind/m_deduppoolalloc.c b/coregrind/m_deduppoolalloc.c new file mode 100644 index 000000000..5eb6feb52 --- /dev/null +++ b/coregrind/m_deduppoolalloc.c @@ -0,0 +1,237 @@ +/*--------------------------------------------------------------------*/ +/*--- A pool (memory) allocator that avoids duplicated copies. ---*/ +/*--- m_deduppoolalloc.c ---*/ +/*--------------------------------------------------------------------*/ +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2014-2014 Philippe Waroquiers philippe.waroquiers@skynet.be + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "pub_core_basics.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcassert.h" +#include "pub_core_xarray.h" +#include "pub_core_deduppoolalloc.h" /* self */ +#include "pub_core_hashtable.h" +#include "pub_core_poolalloc.h" +#include "pub_core_options.h" +#include "pub_core_mallocfree.h" +#include "pub_core_debuglog.h" + +struct _DedupPoolAlloc { + SizeT poolSzB; /* Minimum size of a pool. */ + SizeT eltAlign; + void* (*alloc)(const HChar*, SizeT); /* pool allocator */ + const HChar* cc; /* pool allocator's cc */ + void (*free)(void*); /* pool allocator's free-er */ + /* XArray of void* (pointers to pools). The pools themselves. + Each element is a pointer to a block of size at least PoolSzB bytes. */ + XArray *pools; + + /* hash table of pool elements, used to dedup. + If NULL, it means the DedupPoolAlloc is frozen. */ + VgHashTable ht_elements; + + /* Hash table nodes of pool_elements are allocated with a pool, to + decrease memory overhead during insertion in the DedupPoolAlloc. */ + PoolAlloc *ht_node_pa; + + UChar *curpool_free; /* Pos in current pool to allocate next elt. */ + UChar *curpool_limit; /* Last pos in current pool. */ + + /* Total nr of alloc calls, resulting in (we hope) a lot less + real (dedup) elements. */ + ULong nr_alloc_calls; +}; + +typedef + struct _ht_node { + struct _ht_node *next; // Read/Write by hashtable (pub_tool_hashtable.h) + UWord key; // Read by hashtable (pub_tool_hashtable.h) + SizeT eltSzB; + void *elt; + } + ht_node; + +extern DedupPoolAlloc* VG_(newDedupPA) ( SizeT poolSzB, + SizeT eltAlign, + void* (*alloc)(const HChar*, SizeT), + const HChar* cc, + void (*free_fn)(void*) ) +{ + DedupPoolAlloc* ddpa; + vg_assert(poolSzB >= eltAlign); + vg_assert(poolSzB >= 100); /* let's say */ + vg_assert(poolSzB >= 10*eltAlign); /* let's say */ + vg_assert(alloc); + vg_assert(cc); + vg_assert(free_fn); + ddpa = alloc(cc, sizeof(*ddpa)); + vg_assert(ddpa); + VG_(memset)(ddpa, 0, sizeof(*ddpa)); + ddpa->poolSzB = poolSzB; + ddpa->eltAlign = eltAlign; + ddpa->alloc = alloc; + ddpa->cc = cc; + ddpa->free = free_fn; + ddpa->pools = VG_(newXA)( alloc, cc, free_fn, sizeof(void*) ); + + ddpa->ht_elements = VG_(HT_construct) (cc); + ddpa->ht_node_pa = VG_(newPA) ( sizeof(ht_node), + 1000, + alloc, + cc, + free_fn); + + ddpa->curpool_limit = NULL; + ddpa->curpool_free = ddpa->curpool_limit + 1; + vg_assert(ddpa->pools); + return ddpa; +} + +void VG_(deleteDedupPA) ( DedupPoolAlloc* ddpa) +{ + Word i; + if (ddpa->ht_elements) + VG_(freezeDedupPA) (ddpa); // Free data structures used for insertion. + for (i = 0; i < VG_(sizeXA) (ddpa->pools); i++) + ddpa->free (*(UWord **)VG_(indexXA) ( ddpa->pools, i )); + VG_(deleteXA) (ddpa->pools); + ddpa->free (ddpa); +} + +static __inline__ +void ddpa_align_curpool_free ( DedupPoolAlloc* ddpa ) +{ + ddpa->curpool_free = (UChar*)VG_ROUNDUP(ddpa->curpool_free, ddpa->eltAlign); +} + +/* No space. Allocate a new pool. */ +__attribute__((noinline)) +static void ddpa_add_new_pool ( DedupPoolAlloc* ddpa ) +{ + vg_assert(ddpa); + ddpa->curpool_free = ddpa->alloc( ddpa->cc, ddpa->poolSzB); + vg_assert(ddpa->curpool_free); + ddpa->curpool_limit = ddpa->curpool_free + ddpa->poolSzB - 1; + /* add to our collection of pools */ + VG_(addToXA)( ddpa->pools, &ddpa->curpool_free ); + ddpa_align_curpool_free (ddpa); +} + +static Word cmp_pool_elt (const void* node1, const void* node2 ) +{ + const ht_node* hnode1 = node1; + const ht_node* hnode2 = node2; + + if (hnode1->key < hnode2->key) + return -1; + else if (hnode1->key > hnode2->key) + return 1; + else if (hnode1->eltSzB == hnode2->eltSzB) + return VG_(memcmp) (hnode1->elt, hnode2->elt, hnode1->eltSzB); + else if (hnode1->eltSzB < hnode2->eltSzB) + return -1; + else + return 1; +} + +/* Print some stats. */ +static void print_stats (DedupPoolAlloc *ddpa) +{ + VG_(message)(Vg_DebugMsg, + "dedupPA:%s %ld allocs (%d uniq)" + " %ld pools (%ld bytes free in last pool)\n", + ddpa->cc, + (long int) ddpa->nr_alloc_calls, + VG_(HT_count_nodes)(ddpa->ht_elements), + VG_(sizeXA)(ddpa->pools), + (long int) (ddpa->curpool_limit - ddpa->curpool_free + 1)); + VG_(HT_print_stats) (ddpa->ht_elements, cmp_pool_elt); +} + +/* Dummy free, as the ht elements are allocated in a pool, and + we will destroy the pool in one single operation. */ +static void htelem_dummyfree(void* ht_elem) +{ +} + +void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa) +{ + if (VG_(clo_stats) + && (VG_(clo_verbosity) > 2 || VG_(debugLog_getLevel) () >= 2)) { + print_stats(ddpa); + } + VG_(HT_destruct) ( ddpa->ht_elements, htelem_dummyfree); + ddpa->ht_elements = NULL; + VG_(deletePA) (ddpa->ht_node_pa); + ddpa->ht_node_pa = NULL; +} + +void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB, const void *elt) +{ + ht_node ht_elt; + void* elt_ins; + ht_node *ht_ins; + vg_assert(ddpa); + vg_assert(ddpa->ht_elements); + vg_assert (eltSzB <= ddpa->poolSzB); + + ddpa->nr_alloc_calls++; + + // Currently using adler32 as hash function. + // Many references tells adler32 is bad as a hash function. + // And effectively, some tests on dwarf debug string shows + // a lot of collisions (at least for short elements). + // (A lot can be 10% of the elements colliding, even on + // small nr of elements such as 10_000). + ht_elt.key = VG_(adler32) (0, NULL, 0); + ht_elt.key = VG_(adler32) (ht_elt.key, (UChar*)elt, eltSzB); + + ht_elt.eltSzB = eltSzB; + ht_elt.elt = (UChar*) elt; + + ht_ins = VG_(HT_gen_lookup) (ddpa->ht_elements, &ht_elt, cmp_pool_elt); + if (ht_ins) + return ht_ins->elt; + + /* Not found -> we need to allocate a new element from the pool + and insert it in the hash table of inserted elements. */ + + // Add a new pool if not enough space in the current pool + if (UNLIKELY(ddpa->curpool_free + eltSzB - 1 > ddpa->curpool_limit)) { + ddpa_add_new_pool(ddpa); + } + + elt_ins = ddpa->curpool_free; + VG_(memcpy)(elt_ins, elt, eltSzB); + ddpa->curpool_free = ddpa->curpool_free + eltSzB; + ddpa_align_curpool_free (ddpa); + + ht_ins = VG_(allocEltPA) (ddpa->ht_node_pa); + ht_ins->key = ht_elt.key; + ht_ins->eltSzB = eltSzB; + ht_ins->elt = elt_ins; + VG_(HT_add_node)(ddpa->ht_elements, ht_ins); + return elt_ins; +} diff --git a/coregrind/m_hashtable.c b/coregrind/m_hashtable.c index d71e7598e..38eb810a4 100644 --- a/coregrind/m_hashtable.c +++ b/coregrind/m_hashtable.c @@ -32,6 +32,7 @@ #include "pub_core_debuglog.h" #include "pub_core_hashtable.h" #include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" #include "pub_core_mallocfree.h" /*--------------------------------------------------------------------*/ @@ -154,7 +155,7 @@ void VG_(HT_add_node) ( VgHashTable table, void* vnode ) table->iterOK = False; } -/* Looks up a VgHashNode in the table. Returns NULL if not found. */ +/* Looks up a VgHashNode by key in the table. Returns NULL if not found. */ void* VG_(HT_lookup) ( VgHashTable table, UWord key ) { VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ]; @@ -168,6 +169,22 @@ void* VG_(HT_lookup) ( VgHashTable table, UWord key ) return NULL; } +/* Looks up a VgHashNode by node in the table. Returns NULL if not found. + GEN!!! marks the lines that differs from VG_(HT_lookup). */ +void* VG_(HT_gen_lookup) ( VgHashTable table, void* node, HT_Cmp_t cmp ) +{ + VgHashNode* hnode = (VgHashNode*) node; // GEN!!! + VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!! + + while (curr) { + if (cmp (hnode, curr) == 0) { // GEN!!! + return curr; + } + curr = curr->next; + } + return NULL; +} + /* Removes a VgHashNode from the table. Returns NULL if not found. */ void* VG_(HT_remove) ( VgHashTable table, UWord key ) { @@ -190,6 +207,120 @@ void* VG_(HT_remove) ( VgHashTable table, UWord key ) return NULL; } +/* Removes a VgHashNode by node from the table. Returns NULL if not found. + GEN!!! marks the lines that differs from VG_(HT_remove). */ +void* VG_(HT_gen_remove) ( VgHashTable table, void* node, HT_Cmp_t cmp ) +{ + VgHashNode* hnode = (VgHashNode*) node; // GEN!!! + UWord chain = CHAIN_NO(hnode->key, table); // GEN!!! + VgHashNode* curr = table->chains[chain]; + VgHashNode** prev_next_ptr = &(table->chains[chain]); + + /* Table has been modified; hence HT_Next should assert. */ + table->iterOK = False; + + while (curr) { + if (cmp(hnode, curr) == 0) { // GEN!!! + *prev_next_ptr = curr->next; + table->n_elements--; + return curr; + } + prev_next_ptr = &(curr->next); + curr = curr->next; + } + return NULL; +} + +void VG_(HT_print_stats) ( VgHashTable table, HT_Cmp_t cmp ) +{ + #define MAXOCCUR 20 + UInt elt_occurences[MAXOCCUR]; + UInt key_occurences[MAXOCCUR]; + UInt cno_occurences[MAXOCCUR]; + /* Key occurence : how many ht elements have the same key. + elt_occurences : how many elements are inserted multiple time. + cno_occurences : how many chains have that length. + The last entry in these arrays collects all occurences >= MAXOCCUR-1. */ + #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[n-1]++ : occur[n]++) + UInt i; + UInt nkey, nelt, ncno; + VgHashNode *cnode, *node; + + for (i = 0; i < 20; i++) { + key_occurences[i] = 0; + elt_occurences[i] = 0; + cno_occurences[i] = 0; + } + + // Note that the below algorithm is quadractic in nr of elements in a chain + // but if that happens, the hash table/function is really bad and that + // should be fixed. + for (i = 0; i < table->n_chains; i++) { + ncno = 0; + for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) { + ncno++; + + nkey = 0; + // Is the same cnode->key existing before cnode ? + for (node = table->chains[i]; node != cnode; node = node->next) { + if (node->key == cnode->key) + nkey++; + } + // If cnode->key not in a previous node, count occurences of key. + if (nkey == 0) { + for (node = cnode; node != NULL; node = node->next) { + if (node->key == cnode->key) + nkey++; + } + INCOCCUR(key_occurences, nkey); + } + + nelt = 0; + // Is the same cnode element existing before cnode ? + for (node = table->chains[i]; node != cnode; node = node->next) { + if (cmp) { + if ((*cmp)(node, cnode) == 0) + nelt++; + } else + if (node->key == cnode->key) + nelt++; + } + // If cnode element not in a previous node, count occurences of elt. + if (nelt == 0) { + for (node = cnode; node != NULL; node = node->next) { + if (cmp) { + if ((*cmp)(node, cnode) == 0) + nelt++; + } else + if (node->key == cnode->key) + nelt++; + } + INCOCCUR(elt_occurences, nelt); + } + } + INCOCCUR(cno_occurences, ncno); + } + + VG_(message)(Vg_DebugMsg, + "nr occurences of" + " chains of len N," + " N-plicated keys," + " N-plicated elts\n"); + nkey = nelt = ncno = 0; + for (i = 0; i < MAXOCCUR; i++) { + if (elt_occurences[i] > 0 || key_occurences[i] > 0 || cno_occurences[i] > 0) + VG_(message)(Vg_DebugMsg, + "N:%2d : nr chain %6d, nr keys %6d, nr elts %6d\n", + i, cno_occurences[i], key_occurences[i], elt_occurences[i]); + nkey += key_occurences[i]; + nelt += elt_occurences[i]; + ncno += cno_occurences[i]; + } + VG_(message)(Vg_DebugMsg, "total nr of unique chains: %6d, keys %6d, elts %6d\n", + ncno, nkey, nelt); +} + + /* Allocates a suitably-sized array, copies pointers to all the hashtable elements into it, then returns both the array and the size of it. The array must be freed with VG_(free). diff --git a/coregrind/pub_core_deduppoolalloc.h b/coregrind/pub_core_deduppoolalloc.h new file mode 100644 index 000000000..0803485fa --- /dev/null +++ b/coregrind/pub_core_deduppoolalloc.h @@ -0,0 +1,43 @@ + +/*--------------------------------------------------------------------*/ +/*--- A pool (memory) allocator that avoids duplicated copies. ---*/ +/*--- pub_core_deduppoolalloc.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2014-2014 Philippe Waroquiers philippe.waroquiers@skynet.be + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PUB_CORE_DEDUPPOOLALLOC_H +#define __PUB_CORE_DEDUPPOOLALLOC_H + +#include "pub_tool_deduppoolalloc.h" + +// No core-only exports; everything in this module is visible to both +// the core and tools. + +#endif // __PUB_CORE_DEDUPPOOLALLOC_H + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/include/Makefile.am b/include/Makefile.am index 02e3287d9..2aef15c4b 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -9,6 +9,7 @@ nobase_pkginclude_HEADERS = \ pub_tool_aspacemgr.h \ pub_tool_clientstate.h \ pub_tool_clreq.h \ + pub_tool_deduppoolalloc.h \ pub_tool_debuginfo.h \ pub_tool_errormgr.h \ pub_tool_execontext.h \ diff --git a/include/pub_tool_deduppoolalloc.h b/include/pub_tool_deduppoolalloc.h new file mode 100644 index 000000000..040fc51e3 --- /dev/null +++ b/include/pub_tool_deduppoolalloc.h @@ -0,0 +1,90 @@ + +/*--------------------------------------------------------------------*/ +/*--- A pool (memory) allocator that avoids duplicated copies. ---*/ +/*--- pub_tool_deduppoolalloc.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2014-2014 Philippe Waroquiers philippe.waroquiers@skynet.be + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PUB_TOOL_DEDUPPOOLALLOC_H +#define __PUB_TOOL_DEDUPPOOLALLOC_H + +#include "pub_tool_basics.h" // UWord + +//----------------------------------------------------------------------------- +// PURPOSE: Provides a pool allocator for elements, storing only once identical +// elements. In other words, this can be considered a "dictionary" of elements. +// +// This pool allocator manages elements allocation by allocating "pools" of +// many elements from a lower level allocator (typically pub_tool_mallocfree.h). +// Single elements are allocated from these pools. +// Currently, elements can only be allocated, elements cannot be freed +// individually. +// Once allocated, an element must not be modified anymore. +// +// A dedup pool allocator has significantly less memory overhead than +// calling directly pub_tool_mallocfree.h if the deduplication factor +// is big. However, allocating an element incurs a cost for searching +// if an identical element is already in the pool. +// +// Note: the elements of the pool cannot be freed (at least currently). +// The only way to free the elements is to delete the pool allocator. +//-------------------------------------------------------------------- + + +typedef struct _DedupPoolAlloc DedupPoolAlloc; + +/* Create new DedupPoolAlloc, using given allocation and free function. + Alloc fn must not fail (that is, if it returns it must have succeeded.) + poolSzB is the (minimum) size in bytes of the pool of elements allocated + with alloc. + eltAlign is the minimum required alignement for the elements allocated + from the DedupPoolAlloc. */ +extern DedupPoolAlloc* VG_(newDedupPA) ( SizeT poolSzB, + SizeT eltAlign, + void* (*alloc)(const HChar*, SizeT), + const HChar* cc, + void (*free_fn)(void*) ); + +/* Allocates a new element from ddpa with eltSzB bytes to store elt. */ +extern void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, + SizeT eltSzB, const void *elt); + + +/* The Dedup Pool Allocator must maintain a data structure to avoid + duplicates as long as new elements can be allocated from the pool. + Once no new elements will be allocated, this dedup data structure + can be released using VG_(freezeDedupPA). Once ddpa has been frozen, + it is an error to call VG_(allocEltDedupPA). */ +extern void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa); + +/* Free all memory associated with a DedupPoolAlloc. */ +extern void VG_(deleteDedupPA) ( DedupPoolAlloc *ddpa); + +#endif // __PUB_TOOL_DEDUPPOOLALLOC_ + +/*--------------------------------------------------------------------*/ +/*--- end pub_tool_deduppoolalloc.h ---*/ +/*--------------------------------------------------------------------*/ diff --git a/include/pub_tool_hashtable.h b/include/pub_tool_hashtable.h index fc721ba8f..d640c39e4 100644 --- a/include/pub_tool_hashtable.h +++ b/include/pub_tool_hashtable.h @@ -63,14 +63,36 @@ extern Int VG_(HT_count_nodes) ( VgHashTable table ); /* Add a node to the table. Duplicate keys are permitted. */ extern void VG_(HT_add_node) ( VgHashTable t, void* node ); -/* Looks up a VgHashNode in the table. Returns NULL if not found. If entries +/* Looks up a VgHashNode by key in the table. + * Returns NULL if not found. If entries * with duplicate keys are present, the most recently-added of the dups will * be returned, but it's probably better to avoid dups altogether. */ extern void* VG_(HT_lookup) ( VgHashTable table, UWord key ); -/* Removes a VgHashNode from the table. Returns NULL if not found. */ +/* Removes a VgHashNode by key from the table. Returns NULL if not found. */ extern void* VG_(HT_remove) ( VgHashTable table, UWord key ); +typedef Word (*HT_Cmp_t) ( const void* node1, const void* node2 ); + +/* Same as VG_(HT_lookup) and VG_(HT_remove), but allowing a part of or + the full element to be compared for equality, not only the key. + The typical use for the below function is to store a hash value of the + element in the key, and have the comparison function checking for equality + of the full element data. + Attention about the comparison function: + * It must *not* compare the 'next' pointer. + * when comparing the rest of the node, if the node data contains holes + between components, either the node memory should be fully initialised + (e.g. allocated using VG_(calloc)) or each component should be compared + individually. */ +extern void* VG_(HT_gen_lookup) ( VgHashTable table, void* node, HT_Cmp_t cmp ); +extern void* VG_(HT_gen_remove) ( VgHashTable table, void* node, HT_Cmp_t cmp ); + +/* Output detailed usage/collision statistics. + cmp will be used to verify if 2 elements with the same key are equal. + Use NULL cmp if the hash table elements are only to be compared by key. */ +extern void VG_(HT_print_stats) ( VgHashTable table, HT_Cmp_t cmp ); + /* Allocates a suitably-sized array, copies pointers to all the hashtable elements into it, then returns both the array and the size of it. The array must be freed with VG_(free). */ diff --git a/include/pub_tool_poolalloc.h b/include/pub_tool_poolalloc.h index ee173cd69..59c25d54a 100644 --- a/include/pub_tool_poolalloc.h +++ b/include/pub_tool_poolalloc.h @@ -53,11 +53,11 @@ typedef struct _PoolAlloc PoolAlloc; /* Create new PoolAlloc, using given allocation and free function, and for elements of the specified size. Alloc fn must not fail (that is, if it returns it must have succeeded.) */ -PoolAlloc* VG_(newPA) ( UWord elemSzB, - UWord nPerPool, - void* (*alloc)(const HChar*, SizeT), - const HChar* cc, - void (*free_fn)(void*) ); +extern PoolAlloc* VG_(newPA) ( UWord elemSzB, + UWord nPerPool, + void* (*alloc)(const HChar*, SizeT), + const HChar* cc, + void (*free_fn)(void*) ); /* Free all memory associated with a PoolAlloc. */