mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-05 19:13:46 +00:00
Callgrind new feature: count global bus lock events "Ge"
To count global bus lock events, use "--collect-bus=yes". For x86, this will count the number of executed instructions with a lock prefix; for architectures with LL/SC, this will count the number of executed SC instructions. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11167
This commit is contained in:
parent
c2abab1f04
commit
c3b643bb35
@ -415,6 +415,8 @@ Bool CLG_(process_cmd_line_option)(Char* arg)
|
||||
/* compatibility alias, deprecated option */
|
||||
else if VG_BOOL_CLO(arg, "--trace-jump", CLG_(clo).collect_jumps) {}
|
||||
|
||||
else if VG_BOOL_CLO(arg, "--collect-bus", CLG_(clo).collect_bus) {}
|
||||
|
||||
else if VG_BOOL_CLO(arg, "--combine-dumps", CLG_(clo).combine_dumps) {}
|
||||
|
||||
else if VG_BOOL_CLO(arg, "--collect-atstart", CLG_(clo).collect_atstart) {}
|
||||
@ -572,6 +574,7 @@ void CLG_(print_usage)(void)
|
||||
" --collect-atstart=no|yes Collect at process/thread start [yes]\n"
|
||||
" --toggle-collect=<func> Toggle collection on enter/leave function\n"
|
||||
" --collect-jumps=no|yes Collect jumps? [no]\n"
|
||||
" --collect-bus=no|yes Collect global bus events? [no]\n"
|
||||
#if CLG_EXPERIMENTAL
|
||||
" --collect-alloc=no|yes Collect memory allocation info? [no]\n"
|
||||
#endif
|
||||
|
||||
@ -353,10 +353,27 @@ callgrind.out.<emphasis>pid</emphasis>.<emphasis>part</emphasis>-<emphasis>threa
|
||||
start event collection a few million instructions after you have enabled
|
||||
instrumentation.</para>
|
||||
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="cl-manual.busevents" xreflabel="Counting global bus events">
|
||||
<title>Counting global bus events</title>
|
||||
|
||||
<para>For access to shared data among threads in a multithreaded
|
||||
code, synchronization is required to avoid raced conditions.
|
||||
Synchronization primitives are usually implemented via atomic instructions.
|
||||
However, excessive use of such instructions can lead to performance
|
||||
issues.</para>
|
||||
|
||||
<para>To enable analysis of this problem, Callgrind optionally can count
|
||||
the number of atomic instructions executed. More precisely, for x86/x86_64,
|
||||
these are instructions using a lock prefix. For architectures supporting
|
||||
LL/SC, these are the number of SC instructions executed. For both, the term
|
||||
"global bus events" is used.</para>
|
||||
|
||||
<para>The short name of the event type used for global bus events is "Ge".
|
||||
To count global bus events, use <option><xref linkend="opt.collect-bus"/></option>.
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="cl-manual.cycles" xreflabel="Avoiding cycles">
|
||||
<title>Avoiding cycles</title>
|
||||
@ -762,6 +779,16 @@ Also see <xref linkend="cl-manual.limits"/>.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="opt.collect-bus" xreflabel="--collect-bus">
|
||||
<term>
|
||||
<option><![CDATA[--collect-bus=<no|yes> [default: no] ]]></option>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>This specifies whether the number of global bus events executed
|
||||
should be collected. The event type "Ge" is used for these events.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
<!-- end of xi:include in the manpage -->
|
||||
</sect2>
|
||||
|
||||
@ -87,6 +87,8 @@ struct _CommandLineOptions {
|
||||
Bool collect_alloc; /* Collect size of allocated memory */
|
||||
Bool collect_systime; /* Collect time for system calls */
|
||||
|
||||
Bool collect_bus; /* Collect global bus events */
|
||||
|
||||
/* Instrument options */
|
||||
Bool instrument_atstart; /* Instrument at start? */
|
||||
Bool simulate_cache; /* Call into cache simulator ? */
|
||||
@ -679,8 +681,9 @@ extern ULong* CLG_(cost_base);
|
||||
#define EG_IR 1
|
||||
#define EG_DR 2
|
||||
#define EG_DW 3
|
||||
#define EG_ALLOC 4
|
||||
#define EG_SYS 5
|
||||
#define EG_BUS 4
|
||||
#define EG_ALLOC 5
|
||||
#define EG_SYS 6
|
||||
|
||||
struct event_sets {
|
||||
EventSet *base, *full;
|
||||
|
||||
@ -94,6 +94,30 @@ static void CLG_(init_statistics)(Statistics* s)
|
||||
}
|
||||
|
||||
|
||||
/*------------------------------------------------------------*/
|
||||
/*--- Simple callbacks (not cache similator) ---*/
|
||||
/*------------------------------------------------------------*/
|
||||
|
||||
VG_REGPARM(1)
|
||||
static void log_global_event(InstrInfo* ii)
|
||||
{
|
||||
ULong* cost_Bus;
|
||||
|
||||
CLG_DEBUG(0, "log_global_event: Ir %#lx/%u\n",
|
||||
CLG_(bb_base) + ii->instr_offset, ii->instr_size);
|
||||
|
||||
if (!CLG_(current_state).collect) return;
|
||||
|
||||
CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
|
||||
|
||||
if (CLG_(current_state).nonskipped)
|
||||
cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
|
||||
else
|
||||
cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
|
||||
cost_Bus[0]++;
|
||||
}
|
||||
|
||||
|
||||
/*------------------------------------------------------------*/
|
||||
/*--- Instrumentation structures and event queue handling ---*/
|
||||
/*------------------------------------------------------------*/
|
||||
@ -137,6 +161,7 @@ typedef
|
||||
Ev_Dr, // Data read
|
||||
Ev_Dw, // Data write
|
||||
Ev_Dm, // Data modify (read then write)
|
||||
Ev_G // Global bus event
|
||||
}
|
||||
EventTag;
|
||||
|
||||
@ -159,6 +184,8 @@ typedef
|
||||
IRAtom* ea;
|
||||
Int szB;
|
||||
} Dm;
|
||||
struct {
|
||||
} G;
|
||||
} Ev;
|
||||
}
|
||||
Event;
|
||||
@ -242,6 +269,9 @@ static void showEvent ( Event* ev )
|
||||
ppIRExpr(ev->Ev.Dm.ea);
|
||||
VG_(printf)("\n");
|
||||
break;
|
||||
case Ev_G:
|
||||
VG_(printf)("G %p\n", ev->inode);
|
||||
break;
|
||||
default:
|
||||
tl_assert(0);
|
||||
break;
|
||||
@ -286,6 +316,11 @@ static void flushEvents ( ClgState* clgs )
|
||||
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
|
||||
EG_DW);
|
||||
break;
|
||||
case Ev_G:
|
||||
// extend event set by Bus counter
|
||||
ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
|
||||
EG_BUS);
|
||||
break;
|
||||
default:
|
||||
tl_assert(0);
|
||||
}
|
||||
@ -401,6 +436,14 @@ static void flushEvents ( ClgState* clgs )
|
||||
regparms = 3;
|
||||
inew = i+1;
|
||||
break;
|
||||
case Ev_G:
|
||||
/* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
|
||||
helperName = "log_global_event";
|
||||
helperAddr = &log_global_event;
|
||||
argv = mkIRExprVec_1( i_node_expr );
|
||||
regparms = 1;
|
||||
inew = i+1;
|
||||
break;
|
||||
default:
|
||||
tl_assert(0);
|
||||
}
|
||||
@ -505,6 +548,21 @@ void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
|
||||
clgs->events_used++;
|
||||
}
|
||||
|
||||
static
|
||||
void addEvent_G ( ClgState* clgs, InstrInfo* inode )
|
||||
{
|
||||
Event* evt;
|
||||
if (!CLG_(clo).collect_bus) return;
|
||||
if (clgs->events_used == N_EVENTS)
|
||||
flushEvents(clgs);
|
||||
tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
|
||||
evt = &clgs->events[clgs->events_used];
|
||||
init_Event(evt);
|
||||
evt->tag = Ev_G;
|
||||
evt->inode = inode;
|
||||
clgs->events_used++;
|
||||
}
|
||||
|
||||
/* Initialise or check (if already seen before) an InstrInfo for next insn.
|
||||
We only can set instr_offset/instr_size here. The required event set and
|
||||
resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
|
||||
@ -840,6 +898,7 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
|
||||
dataSize *= 2; /* since this is a doubleword-cas */
|
||||
addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
|
||||
addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
|
||||
addEvent_G( &clgs, curr_inode );
|
||||
break;
|
||||
}
|
||||
|
||||
@ -855,6 +914,12 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
|
||||
dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
|
||||
addEvent_Dw( &clgs, curr_inode,
|
||||
sizeofIRType(dataTy), st->Ist.LLSC.addr );
|
||||
/* I don't know whether the global-bus-lock cost should
|
||||
be attributed to the LL or the SC, but it doesn't
|
||||
really matter since they always have to be used in
|
||||
pairs anyway. Hence put it (quite arbitrarily) on
|
||||
the SC. */
|
||||
addEvent_G( &clgs, curr_inode );
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1782,6 +1782,9 @@ void CLG_(init_eventsets)()
|
||||
CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw");
|
||||
}
|
||||
|
||||
if (CLG_(clo).collect_bus)
|
||||
CLG_(register_event_group)(EG_BUS, "Ge");
|
||||
|
||||
if (CLG_(clo).collect_alloc)
|
||||
CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
|
||||
|
||||
@ -1793,6 +1796,7 @@ void CLG_(init_eventsets)()
|
||||
|
||||
// event set comprising all event groups, used for inclusive cost
|
||||
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
|
||||
CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
|
||||
CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
|
||||
|
||||
CLG_DEBUGIF(1) {
|
||||
@ -1819,6 +1823,7 @@ void CLG_(init_eventsets)()
|
||||
CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
|
||||
CLG_(append_event)(CLG_(dumpmap), "AcCost2");
|
||||
CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
|
||||
CLG_(append_event)(CLG_(dumpmap), "Ge");
|
||||
CLG_(append_event)(CLG_(dumpmap), "allocCount");
|
||||
CLG_(append_event)(CLG_(dumpmap), "allocSize");
|
||||
CLG_(append_event)(CLG_(dumpmap), "sysCount");
|
||||
@ -1832,7 +1837,8 @@ static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
|
||||
{
|
||||
if (!CLG_(clo).simulate_cache)
|
||||
cost[ fullOffset(EG_IR) ] += exe_count;
|
||||
else
|
||||
|
||||
if (ii->eventset)
|
||||
CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
|
||||
ii->eventset, bbcc->cost + ii->cost_offset);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user