From bf162724b924de3ab7dbd2b9e59370b9c5b07425 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sun, 25 Dec 2005 06:34:04 +0000 Subject: [PATCH] Merge in r5435 from COMPVBITS. Also added a note to docs/internals/performance.txt about it. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5438 --- coregrind/m_execontext.c | 58 +++++++++++++++++++--------------- coregrind/m_stacktrace.c | 5 +-- docs/internals/performance.txt | 3 ++ include/pub_tool_stacktrace.h | 3 +- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/coregrind/m_execontext.c b/coregrind/m_execontext.c index c5b5c9393..2b99e92e6 100644 --- a/coregrind/m_execontext.c +++ b/coregrind/m_execontext.c @@ -46,7 +46,8 @@ struct _ExeContext { struct _ExeContext * next; - /* Variable-length array. The size is VG_(clo_backtrace_size); at + UInt n_ips; + /* Variable-length array. The size is 'n_ips'; at least 1, at most VG_DEEPEST_BACKTRACE. [0] is the current IP, [1] is its caller, [2] is the caller of [1], etc. */ Addr ips[0]; @@ -126,38 +127,42 @@ void VG_(print_ExeContext_stats) ( void ) /* Print an ExeContext. */ void VG_(pp_ExeContext) ( ExeContext* ec ) { - VG_(pp_StackTrace)( ec->ips, VG_(clo_backtrace_size) ); + VG_(pp_StackTrace)( ec->ips, ec->n_ips ); } /* Compare two ExeContexts, comparing all callers. */ Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 ) { + Int i; + if (e1 == NULL || e2 == NULL) return False; + + // Must be at least one address in each trace. + tl_assert(e1->n_ips >= 1 && e2->n_ips >= 1); + switch (res) { case Vg_LowRes: /* Just compare the top two callers. */ ec_cmp2s++; - if (e1->ips[0] != e2->ips[0]) return False; - - if (VG_(clo_backtrace_size) < 2) return True; - if (e1->ips[1] != e2->ips[1]) return False; + for (i = 0; i < 2; i++) { + if ( (e1->n_ips <= i) && (e2->n_ips <= i)) return True; + if ( (e1->n_ips <= i) && !(e2->n_ips <= i)) return False; + if (!(e1->n_ips <= i) && (e2->n_ips <= i)) return False; + if (e1->ips[i] != e2->ips[i]) return False; + } return True; case Vg_MedRes: /* Just compare the top four callers. */ ec_cmp4s++; - if (e1->ips[0] != e2->ips[0]) return False; - - if (VG_(clo_backtrace_size) < 2) return True; - if (e1->ips[1] != e2->ips[1]) return False; - - if (VG_(clo_backtrace_size) < 3) return True; - if (e1->ips[2] != e2->ips[2]) return False; - - if (VG_(clo_backtrace_size) < 4) return True; - if (e1->ips[3] != e2->ips[3]) return False; + for (i = 0; i < 4; i++) { + if ( (e1->n_ips <= i) && (e2->n_ips <= i)) return True; + if ( (e1->n_ips <= i) && !(e2->n_ips <= i)) return False; + if (!(e1->n_ips <= i) && (e2->n_ips <= i)) return False; + if (e1->ips[i] != e2->ips[i]) return False; + } return True; case Vg_HighRes: @@ -188,18 +193,20 @@ ExeContext* VG_(record_ExeContext) ( ThreadId tid ) UWord hash; ExeContext* new_ec; ExeContext* list; + UInt n_ips; init_ExeContext_storage(); - vg_assert(VG_(clo_backtrace_size) >= 1 - && VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE); + vg_assert(VG_(clo_backtrace_size) >= 1 && + VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE); - VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) ); + n_ips = VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) ); + tl_assert(n_ips >= 1); /* Now figure out if we've seen this one before. First hash it so as to determine the list number. */ hash = 0; - for (i = 0; i < VG_(clo_backtrace_size); i++) { + for (i = 0; i < n_ips; i++) { hash ^= ips[i]; hash = (hash << 29) | (hash >> 3); } @@ -215,7 +222,7 @@ ExeContext* VG_(record_ExeContext) ( ThreadId tid ) if (list == NULL) break; ec_searchcmps++; same = True; - for (i = 0; i < VG_(clo_backtrace_size); i++) { + for (i = 0; i < n_ips; i++) { if (list->ips[i] != ips[i]) { same = False; break; @@ -234,13 +241,14 @@ ExeContext* VG_(record_ExeContext) ( ThreadId tid ) ec_totstored++; new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, - sizeof(struct _ExeContext *) - + VG_(clo_backtrace_size) * sizeof(Addr) ); + sizeof(struct _ExeContext) + + n_ips * sizeof(Addr) ); - for (i = 0; i < VG_(clo_backtrace_size); i++) + for (i = 0; i < n_ips; i++) new_ec->ips[i] = ips[i]; - new_ec->next = ec_list[hash]; + new_ec->n_ips = n_ips; + new_ec->next = ec_list[hash]; ec_list[hash] = new_ec; return new_ec; diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c index 21bc0e936..a9ad21963 100644 --- a/coregrind/m_stacktrace.c +++ b/coregrind/m_stacktrace.c @@ -65,12 +65,9 @@ UInt VG_(get_StackTrace2) ( Addr* ips, UInt n_ips, vg_assert(sizeof(Addr) == sizeof(void*)); /* Snaffle IPs from the client's stack into ips[0 .. n_ips-1], - putting zeroes in when the trail goes cold, which we guess to be + stopping when the trail goes cold, which we guess to be when FP is not a reasonable stack location. */ - for (i = 0; i < n_ips; i++) - ips[i] = 0; - // JRS 2002-sep-17: hack, to round up fp_max to the end of the // current page, at least. Dunno if it helps. // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again diff --git a/docs/internals/performance.txt b/docs/internals/performance.txt index 3691ab8b5..8a12bfcd3 100644 --- a/docs/internals/performance.txt +++ b/docs/internals/performance.txt @@ -26,6 +26,9 @@ Post 3.1.0: - Nick reduced the iteration count of the loop in swizzle() from 20 to 5, which gave almost identical results while saving 2% in perf/tinycc and 10% in perf/heap on a 3GHz Prescott P4. +- Nick changed ExeContext gathering to not record/save extra zeroes at the + end. Saved 7% on perf/heap with --num-callers=50, and about 1% on + perf/tinycc. COMPVBITS branch: - Nick converted to compress V bits, initial version saved 0--5% on most diff --git a/include/pub_tool_stacktrace.h b/include/pub_tool_stacktrace.h index b332bd834..8ed9bf597 100644 --- a/include/pub_tool_stacktrace.h +++ b/include/pub_tool_stacktrace.h @@ -36,7 +36,8 @@ typedef Addr* StackTrace; // Walks the stack to get instruction pointers from the top stack frames for // thread 'tid'. Maximum of 'n_ips' addresses put into 'ips'; 0 is the top -// of the stack, 1 is its caller, etc. +// of the stack, 1 is its caller, etc. Everything from ips[n_ips] onwards +// is undefined and should not be read. extern UInt VG_(get_StackTrace) ( ThreadId tid, StackTrace ips, UInt n_ips ); // Apply a function to every element in the StackTrace. The parameter 'n'