Various changes:

* remove flags --trace-addr= and --trace-level=.  These no longer
  have any effect, so there's no point in having the associated flags.

* add flag --show-conflicts=no|yes [yes], which makes it possible to
  disable the conflicting-access collection machinery.  This makes
  Helgrind run much faster.  Perhaps useful in regression testing,
  when it is desired only to find out if a race exists, but not to
  collect enough information to easily diagnose it.

* add flag --conflict-cache-size= [1000000], which makes it possible
  to control how much memory is used for storage of information about
  historical (potentially-conflicting) accesses.

* Update comments on the conflicting-access machinery to more closely
  reflect the code.  Includes comments on the important aspects of
  the value N_OLDREF_ACCS.  Increase said constant from 3 to 5.

* Fix bug in event_map_bind: when searching for an OldRef.accs[]
  entry that matches the current access, don't forget to also 
  compare the access sizes.  The old code only compared the thread
  identity and the read/writeness.

* hg_main.c: disable Dwarf3 variable/type info reading by default.
  Mostly this provides little benefit and can cause Helgrind to use
  a lot more time and memory at startup.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8845
This commit is contained in:
Julian Seward 2008-12-21 10:43:10 +00:00
parent 9168f4d31e
commit 28f3b05d68
4 changed files with 88 additions and 46 deletions

View File

@ -70,15 +70,15 @@ Char* HG_(strdup) ( HChar* cc, const Char* s )
/* Description of these flags is in hg_basics.h. */
Bool HG_(clo_track_lockorders) = True;
Bool HG_(clo_track_lockorders) = True;
Bool HG_(clo_cmp_race_err_addrs) = False;
Bool HG_(clo_cmp_race_err_addrs) = False;
Addr HG_(clo_trace_addr) = 0;
Bool HG_(clo_show_conflicts) = True;
Word HG_(clo_trace_level) = 0;
UWord HG_(clo_conflict_cache_size) = 1000000;
Word HG_(clo_sanity_flags) = 0;
Word HG_(clo_sanity_flags) = 0;
/*--------------------------------------------------------------------*/

View File

@ -72,11 +72,16 @@ extern Bool HG_(clo_track_lockorders);
(regtesting) this is sometimes important. */
extern Bool HG_(clo_cmp_race_err_addrs);
/* Tracing memory accesses, so we can see what's going on.
clo_trace_addr is the address to monitor. clo_trace_level = 0 for
no tracing, 1 for summary, 2 for detailed. */
extern Addr HG_(clo_trace_addr);
extern Word HG_(clo_trace_level);
/* Show conflicting accesses? This involves collecting and storing
large numbers of call stacks just in case we might need to show
them later, and so is expensive (although very useful). Hence
allow it to be optionally disabled. */
extern Bool HG_(clo_show_conflicts);
/* Size of the conflicting-access cache, measured in terms of
maximum possible number of elements in the previous-access map.
Must be between 10k amd 10 million. Default is 1 million. */
extern UWord HG_(clo_conflict_cache_size);
/* Sanity check level. This is an or-ing of
SCE_{THREADS,LOCKS,BIGRANGE,ACCESS,LAOG}. */

View File

@ -3986,12 +3986,15 @@ static Bool hg_process_cmd_line_option ( Char* arg )
else if (VG_CLO_STREQ(arg, "--cmp-race-err-addrs=yes"))
HG_(clo_cmp_race_err_addrs) = True;
else if (VG_CLO_STREQN(13, arg, "--trace-addr=")) {
HG_(clo_trace_addr) = VG_(atoll16)(&arg[13]);
if (HG_(clo_trace_level) == 0)
HG_(clo_trace_level) = 1;
}
else VG_BNUM_CLO(arg, "--trace-level", HG_(clo_trace_level), 0, 2)
else if (VG_CLO_STREQ(arg, "--show-conflicts=no"))
HG_(clo_show_conflicts) = False;
else if (VG_CLO_STREQ(arg, "--show-conflicts=yes"))
HG_(clo_show_conflicts) = True;
/* If you change the 10k/10mill limits, remember to also change
them in assertions at the top of event_map_maybe_GC. */
else VG_BNUM_CLO(arg, "--conflict-cache-size",
HG_(clo_conflict_cache_size), 10*1000, 10*1000*1000)
/* "stuvwx" --> stuvwx (binary) */
else if (VG_CLO_STREQN(18, arg, "--hg-sanity-flags=")) {
@ -4024,9 +4027,9 @@ static Bool hg_process_cmd_line_option ( Char* arg )
static void hg_print_usage ( void )
{
VG_(printf)(
" --track-lockorders=no|yes show lock ordering errors? [yes]\n"
" --trace-addr=0xXXYYZZ show all state changes for address 0xXXYYZZ\n"
" --trace-level=0|1|2 verbosity level of --trace-addr [1]\n"
" --track-lockorders=no|yes show lock ordering errors? [yes]\n"
" --show-conflicts=no|yes show both stack traces in a race? [yes]\n"
" --conflict-cache-size=N size of conflict history cache [1000000]\n"
);
VG_(replacement_malloc_print_usage)();
}
@ -4036,10 +4039,9 @@ static void hg_print_debug_usage ( void )
VG_(replacement_malloc_print_debug_usage)();
VG_(printf)(" --cmp-race-err-addrs=no|yes are data addresses in "
"race errors significant? [no]\n");
VG_(printf)(" --hg-sanity-flags=<XXXXXX> sanity check "
VG_(printf)(" --hg-sanity-flags=<XXXXXX> sanity check "
" at events (X = 0|1) [000000]\n");
VG_(printf)(" --hg-sanity-flags values:\n");
VG_(printf)(" 100000 crosscheck happens-before-graph searches\n");
VG_(printf)(" 010000 after changes to "
"lock-order-acquisition-graph\n");
VG_(printf)(" 001000 at memory accesses (NB: not currently used)\n");
@ -4198,7 +4200,11 @@ static void hg_pre_clo_init ( void )
hg_cli__realloc,
HG_CLI__MALLOC_REDZONE_SZB );
VG_(needs_var_info)(); /* optional */
/* 21 Dec 08: disabled this; it mostly causes H to start more
slowly and use significantly more memory, without very often
providing useful results. The user can request to load this
information manually with --read-var-info=yes. */
if (0) VG_(needs_var_info)(); /* optional */
VG_(track_new_mem_startup) ( evh__new_mem_w_perms );
VG_(track_new_mem_stack_signal)( evh__new_mem_w_tid );

View File

@ -2666,7 +2666,6 @@ inline static void gal_Free ( GroupAlloc* ga, void* p )
// //
/////////////////////////////////////////////////////////
#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
/* This is in two parts:
@ -2678,26 +2677,40 @@ inline static void gal_Free ( GroupAlloc* ga, void* p )
only represent each one once. The set is indexed/searched by
ordering on the stack trace vectors.
2. An OSet of OldRefs. These store information about each old ref
that we need to record. It is indexed by address of the
2. A SparseWA of OldRefs. These store information about each old
ref that we need to record. It is indexed by address of the
location for which the information is recorded. For LRU
purposes, each OldRef also contains a generation number,
indicating when it was most recently accessed.
The important part of an OldRef is, however, its accs[] array.
This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
allows us to collect the last access-traceback by up to
N_OLDREF_ACCS different threads for this location. The accs[]
array is a MTF-array. If a pair falls off the end, that's too
bad -- we will lose info about that thread's access to this
location.
This is an array of N_OLDREF_ACCS which binds (thread, R/W,
size) triples to RCECs. This allows us to collect the last
access-traceback by up to N_OLDREF_ACCS different triples for
this location. The accs[] array is a MTF-array. If a binding
falls off the end, that's too bad -- we will lose info about
that triple's access to this location.
When this OSet becomes too big, we can throw away the entries
When the SparseWA becomes too big, we can throw away the OldRefs
whose generation numbers are below some threshold; hence doing
approximate LRU discarding. For each discarded OldRef we must
of course decrement the reference count on the all RCECs it
refers to, in order that entries from (1) eventually get
discarded too.
A major improvement in reliability of this mechanism would be to
have a dynamically sized OldRef.accs[] array, so no entries ever
fall off the end. In investigations (Dec 08) it appears that a
major cause for the non-availability of conflicting-access traces
in race reports is caused by the fixed size of this array. I
suspect for most OldRefs, only a few entries are used, but for a
minority of cases there is an overflow, leading to info lossage.
Investigations also suggest this is very workload and scheduling
sensitive. Therefore a dynamic sizing would be better.
However, dynamic sizing would defeat the use of a GroupAllocator
for OldRef structures. And that's important for performance. So
it's not straightforward to do.
*/
@ -2916,7 +2929,7 @@ static RCEC* get_RCEC ( Thr* thr )
*/
typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
#define N_OLDREF_ACCS 3
#define N_OLDREF_ACCS 5
typedef
struct {
@ -3007,16 +3020,25 @@ static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
if (b) {
/* We already have a record for this address. We now need to
see if we have a stack trace pertaining to this thread's
access. */
see if we have a stack trace pertaining to this (thread, R/W,
size) triple. */
tl_assert(keyW == a);
ref = (OldRef*)valW;
tl_assert(ref->magic == OldRef_MAGIC);
tl_assert(thr);
for (i = 0; i < N_OLDREF_ACCS; i++) {
if (ref->accs[i].thr == thr)
break;
if (ref->accs[i].thr != thr)
continue;
/* since .thr encodes both the accessing thread and the
read/writeness, we know now that at least those features
of the access match this entry. So we just need to check
the size indication. Do this by inspecting the lowest 2 bits of
.rcec, which contain the encoded size info. */
if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
continue;
/* else we have a match, so stop looking. */
break;
}
if (i < N_OLDREF_ACCS) {
@ -3033,13 +3055,16 @@ static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
ref->accs[i].rcec = rcec;
tl_assert(ref->accs[i].thr == thr);
} else {
/* No entry for this thread. Shuffle all of them down one
slot, and put the new entry at the start of the array. */
/* No entry for this (thread, R/W, size) triple. Shuffle all
of them down one slot, and put the new entry at the start
of the array. */
if (ref->accs[N_OLDREF_ACCS-1].thr) {
/* the last slot is in use. We must dec the rc on the
associated rcec. */
tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
stats__ctxt_rcdec2++;
if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
} else {
tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
@ -3070,9 +3095,9 @@ static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
ref->gen = oldrefGen;
ref->accs[0].rcec = rcec;
ref->accs[0].thr = thr;
/* thr==NULL is used to signify an empty slot, so we can't
add a NULL thr. */
tl_assert(ptr_and_UWord(thr, ~3) != 0);
/* thr==NULL is used to signify an empty slot, so we can't add a
NULL thr. */
tl_assert(ptr_and_UWord(thr, ~3) != 0);
for (j = 1; j < N_OLDREF_ACCS; j++) {
ref->accs[j].thr = NULL;
ref->accs[j].rcec = NULL;
@ -3305,12 +3330,17 @@ static void event_map_maybe_GC ( void )
UWord genMap_min = 0;
UWord genMap_size = 0;
if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
return;
if (0)
VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
/* Check for sane command line params. Limit values must match
those in hg_process_cmd_line_option. */
tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
/* Check our counting is sane (expensive) */
if (CHECK_CEM)
tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
@ -3432,7 +3462,8 @@ static void event_map_maybe_GC ( void )
tl_assert(keyW >= maxGen);
tl_assert(retained >= valW);
if (retained - valW
> (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
> (UWord)(HG_(clo_conflict_cache_size)
* EVENT_MAP_GC_DISCARD_FRACTION)) {
retained -= valW;
maxGen = keyW;
} else {
@ -3688,7 +3719,7 @@ static inline SVal msm_read ( SVal svOld,
tl_assert(is_sane_SVal_C(svNew));
}
tl_assert(svNew != SVal_INVALID);
if (svNew != svOld) {
if (svNew != svOld && HG_(clo_show_conflicts)) {
if (SVal__isC(svOld) && SVal__isC(svNew)) {
event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
stats__msm_read_change++;
@ -3769,7 +3800,7 @@ static inline SVal msm_write ( SVal svOld,
tl_assert(is_sane_SVal_C(svNew));
}
tl_assert(svNew != SVal_INVALID);
if (svNew != svOld) {
if (svNew != svOld && HG_(clo_show_conflicts)) {
if (SVal__isC(svOld) && SVal__isC(svNew)) {
event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
stats__msm_write_change++;