Callgrind new feature: count global bus lock events "Ge"

To count global bus lock events, use "--collect-bus=yes".
For x86, this will count the number of executed instructions
with a lock prefix; for architectures with LL/SC, this will
count the number of executed SC instructions.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11167
This commit is contained in:
Josef Weidendorfer 2010-06-09 22:33:02 +00:00
parent c2abab1f04
commit c3b643bb35
5 changed files with 108 additions and 4 deletions

View File

@ -415,6 +415,8 @@ Bool CLG_(process_cmd_line_option)(Char* arg)
/* compatibility alias, deprecated option */
else if VG_BOOL_CLO(arg, "--trace-jump", CLG_(clo).collect_jumps) {}
else if VG_BOOL_CLO(arg, "--collect-bus", CLG_(clo).collect_bus) {}
else if VG_BOOL_CLO(arg, "--combine-dumps", CLG_(clo).combine_dumps) {}
else if VG_BOOL_CLO(arg, "--collect-atstart", CLG_(clo).collect_atstart) {}
@ -572,6 +574,7 @@ void CLG_(print_usage)(void)
" --collect-atstart=no|yes Collect at process/thread start [yes]\n"
" --toggle-collect=<func> Toggle collection on enter/leave function\n"
" --collect-jumps=no|yes Collect jumps? [no]\n"
" --collect-bus=no|yes Collect global bus events? [no]\n"
#if CLG_EXPERIMENTAL
" --collect-alloc=no|yes Collect memory allocation info? [no]\n"
#endif

View File

@ -353,10 +353,27 @@ callgrind.out.<emphasis>pid</emphasis>.<emphasis>part</emphasis>-<emphasis>threa
start event collection a few million instructions after you have enabled
instrumentation.</para>
</sect2>
<sect2 id="cl-manual.busevents" xreflabel="Counting global bus events">
<title>Counting global bus events</title>
<para>For access to shared data among threads in a multithreaded
code, synchronization is required to avoid raced conditions.
Synchronization primitives are usually implemented via atomic instructions.
However, excessive use of such instructions can lead to performance
issues.</para>
<para>To enable analysis of this problem, Callgrind optionally can count
the number of atomic instructions executed. More precisely, for x86/x86_64,
these are instructions using a lock prefix. For architectures supporting
LL/SC, these are the number of SC instructions executed. For both, the term
"global bus events" is used.</para>
<para>The short name of the event type used for global bus events is "Ge".
To count global bus events, use <option><xref linkend="opt.collect-bus"/></option>.
</para>
</sect2>
<sect2 id="cl-manual.cycles" xreflabel="Avoiding cycles">
<title>Avoiding cycles</title>
@ -762,6 +779,16 @@ Also see <xref linkend="cl-manual.limits"/>.</para>
</listitem>
</varlistentry>
<varlistentry id="opt.collect-bus" xreflabel="--collect-bus">
<term>
<option><![CDATA[--collect-bus=<no|yes> [default: no] ]]></option>
</term>
<listitem>
<para>This specifies whether the number of global bus events executed
should be collected. The event type "Ge" is used for these events.</para>
</listitem>
</varlistentry>
</variablelist>
<!-- end of xi:include in the manpage -->
</sect2>

View File

@ -87,6 +87,8 @@ struct _CommandLineOptions {
Bool collect_alloc; /* Collect size of allocated memory */
Bool collect_systime; /* Collect time for system calls */
Bool collect_bus; /* Collect global bus events */
/* Instrument options */
Bool instrument_atstart; /* Instrument at start? */
Bool simulate_cache; /* Call into cache simulator ? */
@ -679,8 +681,9 @@ extern ULong* CLG_(cost_base);
#define EG_IR 1
#define EG_DR 2
#define EG_DW 3
#define EG_ALLOC 4
#define EG_SYS 5
#define EG_BUS 4
#define EG_ALLOC 5
#define EG_SYS 6
struct event_sets {
EventSet *base, *full;

View File

@ -94,6 +94,30 @@ static void CLG_(init_statistics)(Statistics* s)
}
/*------------------------------------------------------------*/
/*--- Simple callbacks (not cache similator) ---*/
/*------------------------------------------------------------*/
VG_REGPARM(1)
static void log_global_event(InstrInfo* ii)
{
ULong* cost_Bus;
CLG_DEBUG(0, "log_global_event: Ir %#lx/%u\n",
CLG_(bb_base) + ii->instr_offset, ii->instr_size);
if (!CLG_(current_state).collect) return;
CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
if (CLG_(current_state).nonskipped)
cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
else
cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
cost_Bus[0]++;
}
/*------------------------------------------------------------*/
/*--- Instrumentation structures and event queue handling ---*/
/*------------------------------------------------------------*/
@ -137,6 +161,7 @@ typedef
Ev_Dr, // Data read
Ev_Dw, // Data write
Ev_Dm, // Data modify (read then write)
Ev_G // Global bus event
}
EventTag;
@ -159,6 +184,8 @@ typedef
IRAtom* ea;
Int szB;
} Dm;
struct {
} G;
} Ev;
}
Event;
@ -242,6 +269,9 @@ static void showEvent ( Event* ev )
ppIRExpr(ev->Ev.Dm.ea);
VG_(printf)("\n");
break;
case Ev_G:
VG_(printf)("G %p\n", ev->inode);
break;
default:
tl_assert(0);
break;
@ -286,6 +316,11 @@ static void flushEvents ( ClgState* clgs )
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
EG_DW);
break;
case Ev_G:
// extend event set by Bus counter
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
EG_BUS);
break;
default:
tl_assert(0);
}
@ -401,6 +436,14 @@ static void flushEvents ( ClgState* clgs )
regparms = 3;
inew = i+1;
break;
case Ev_G:
/* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
helperName = "log_global_event";
helperAddr = &log_global_event;
argv = mkIRExprVec_1( i_node_expr );
regparms = 1;
inew = i+1;
break;
default:
tl_assert(0);
}
@ -505,6 +548,21 @@ void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
clgs->events_used++;
}
static
void addEvent_G ( ClgState* clgs, InstrInfo* inode )
{
Event* evt;
if (!CLG_(clo).collect_bus) return;
if (clgs->events_used == N_EVENTS)
flushEvents(clgs);
tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
evt = &clgs->events[clgs->events_used];
init_Event(evt);
evt->tag = Ev_G;
evt->inode = inode;
clgs->events_used++;
}
/* Initialise or check (if already seen before) an InstrInfo for next insn.
We only can set instr_offset/instr_size here. The required event set and
resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
@ -840,6 +898,7 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
dataSize *= 2; /* since this is a doubleword-cas */
addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
addEvent_G( &clgs, curr_inode );
break;
}
@ -855,6 +914,12 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
addEvent_Dw( &clgs, curr_inode,
sizeofIRType(dataTy), st->Ist.LLSC.addr );
/* I don't know whether the global-bus-lock cost should
be attributed to the LL or the SC, but it doesn't
really matter since they always have to be used in
pairs anyway. Hence put it (quite arbitrarily) on
the SC. */
addEvent_G( &clgs, curr_inode );
}
break;
}

View File

@ -1782,6 +1782,9 @@ void CLG_(init_eventsets)()
CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw");
}
if (CLG_(clo).collect_bus)
CLG_(register_event_group)(EG_BUS, "Ge");
if (CLG_(clo).collect_alloc)
CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
@ -1793,6 +1796,7 @@ void CLG_(init_eventsets)()
// event set comprising all event groups, used for inclusive cost
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
CLG_DEBUGIF(1) {
@ -1819,6 +1823,7 @@ void CLG_(init_eventsets)()
CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
CLG_(append_event)(CLG_(dumpmap), "AcCost2");
CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
CLG_(append_event)(CLG_(dumpmap), "Ge");
CLG_(append_event)(CLG_(dumpmap), "allocCount");
CLG_(append_event)(CLG_(dumpmap), "allocSize");
CLG_(append_event)(CLG_(dumpmap), "sysCount");
@ -1832,7 +1837,8 @@ static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
{
if (!CLG_(clo).simulate_cache)
cost[ fullOffset(EG_IR) ] += exe_count;
else
if (ii->eventset)
CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
ii->eventset, bbcc->cost + ii->cost_offset);
}