Enhance callgrind option --collect-system.

- The command option --collect-systime has been enhanced to specify
    the unit used to record the elapsed time spent during system calls.
    The command option now accepts the values no|yes|msec|usec|nsec,
    where yes is a synonym of msec.  When giving the value nsec, the
    system cpu time of system calls is also recorded.

Note that the nsec option is not supported on Darwin.
This commit is contained in:
Philippe Waroquiers 2019-10-10 21:42:01 +02:00
parent fe7ec1eca2
commit 83a535e431
9 changed files with 201 additions and 46 deletions

5
NEWS
View File

@ -32,6 +32,11 @@ support for X86/macOS 10.13 and AMD64/macOS 10.13.
* Cachegrind:
* Callgrind:
- The command option --collect-systime has been enhanced to specify
the unit used to record the elapsed time spent during system calls.
The command option now accepts the values no|yes|msec|usec|nsec,
where yes is a synonym of msec. When giving the value nsec, the
system cpu time of system calls is also recorded.
* Massif:

View File

@ -523,7 +523,23 @@ Bool CLG_(process_cmd_line_option)(const HChar* arg)
else if VG_INT_CLO( arg, "--dump-every-bb", CLG_(clo).dump_every_bb) {}
else if VG_BOOL_CLO(arg, "--collect-alloc", CLG_(clo).collect_alloc) {}
else if VG_BOOL_CLO(arg, "--collect-systime", CLG_(clo).collect_systime) {}
else if VG_XACT_CLO(arg, "--collect-systime=no",
CLG_(clo).collect_systime, systime_no) {}
else if VG_XACT_CLO(arg, "--collect-systime=msec",
CLG_(clo).collect_systime, systime_msec) {}
else if VG_XACT_CLO(arg, "--collect-systime=yes", /* backward compatibility. */
CLG_(clo).collect_systime, systime_msec) {}
else if VG_XACT_CLO(arg, "--collect-systime=usec",
CLG_(clo).collect_systime, systime_usec) {}
else if VG_XACT_CLO(arg, "--collect-systime=nsec",
CLG_(clo).collect_systime, systime_nsec) {
# if defined(VGO_darwin)
VG_(fmsg_bad_option)
(arg,
"--collect-systime=nsec not supported on darwin\n");
# endif
}
else if VG_BOOL_CLO(arg, "--collect-bus", CLG_(clo).collect_bus) {}
/* for option compatibility with cachegrind */
else if VG_BOOL_CLO(arg, "--cache-sim", CLG_(clo).simulate_cache) {}
@ -580,7 +596,11 @@ void CLG_(print_usage)(void)
#if CLG_EXPERIMENTAL
" --collect-alloc=no|yes Collect memory allocation info? [no]\n"
#endif
" --collect-systime=no|yes Collect system call time info? [no]\n"
" --collect-systime=no|yes|msec|usec|nsec Collect system call time info? [no]\n"
" no Do not collect system call time info.\n"
" msec|yes Collect syscount, syscall elapsed time (milli-seconds).\n"
" usec Collect syscount, syscall elapsed time (micro-seconds).\n"
" nsec Collect syscount, syscall elapsed and syscall cpu time (nano-seconds).\n"
"\n cost entity separation options:\n"
" --separate-threads=no|yes Separate data per thread [no]\n"
@ -646,7 +666,7 @@ void CLG_(set_clo_defaults)(void)
CLG_(clo).collect_atstart = True;
CLG_(clo).collect_jumps = False;
CLG_(clo).collect_alloc = False;
CLG_(clo).collect_systime = False;
CLG_(clo).collect_systime = systime_no;
CLG_(clo).collect_bus = False;
CLG_(clo).skip_plt = True;

View File

@ -800,11 +800,25 @@ Also see <xref linkend="cl-manual.limits"/>.</para>
<varlistentry id="opt.collect-systime" xreflabel="--collect-systime">
<term>
<option><![CDATA[--collect-systime=<no|yes> [default: no] ]]></option>
<option><![CDATA[--collect-systime=<no|yes|msec|usec|nsec> [default: no] ]]></option>
</term>
<listitem>
<para>This specifies whether information for system call times
should be collected.</para>
should be collected.</para>
<para>The value <computeroutput>no</computeroutput> indicates to record
no system call information.</para>
<para>The other values indicate to record the number of system calls
done (sysCount event) and the elapsed time (sysTime event) spent
in system calls.
The <computeroutput>--collect-systime</computeroutput> value gives
the unit used for sysTime : milli seconds, micro seconds or nano
seconds. With the value <computeroutput>nsec</computeroutput>,
callgrind also records the cpu time spent during system calls
(sysCpuTime).</para>
<para>The value <computeroutput>yes</computeroutput> is a synonym
of <computeroutput>msec</computeroutput>.
The value <computeroutput>nsec</computeroutput> is not supported
on Darwin.</para>
</listitem>
</varlistentry>

View File

@ -1305,6 +1305,21 @@ static VgFile *new_dumpfile(int tid, const HChar* trigger)
HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
VG_(fprintf)(fp, "events: %s\n", evmap);
VG_(free)(evmap);
switch (CLG_(clo).collect_systime) {
case systime_no: break;
case systime_msec:
VG_(fprintf)(fp, "event: sysTime : sysTime (elapsed ms)\n");
break;
case systime_usec:
VG_(fprintf)(fp, "event: sysTime : sysTime (elapsed us)\n");
break;
case systime_nsec:
VG_(fprintf)(fp, "event: sysTime : sysTime (elapsed ns)\n");
VG_(fprintf)(fp, "event: sysCpuTime : sysCpuTime (system cpu ns)\n");
break;
default:
tl_assert(0);
}
/* summary lines */
sum = CLG_(get_eventset_cost)( CLG_(sets).full );

View File

@ -58,11 +58,6 @@
/* Enable experimental features? */
#define CLG_EXPERIMENTAL 0
/* Syscall Timing in microseconds?
* (define to 0 if you get compile errors) */
#define CLG_MICROSYSTIME 0
/*------------------------------------------------------------*/
/*--- Command line options ---*/
@ -70,6 +65,19 @@
#define DEFAULT_OUTFORMAT "callgrind.out.%p"
/* If and how to collect syscall time.
systime_no : do not collect systime
systime_msec : collect syscount, systime elapsed, milli second precision.
systime_usec : collect syscount, systime elapsed, micro second precision.
systime_nsec : collect syscount, systime elapsed, systime cpu, nano second
precision. */
typedef enum {
systime_no,
systime_msec,
systime_usec,
systime_nsec
} Collect_Systime;
typedef struct _CommandLineOptions CommandLineOptions;
struct _CommandLineOptions {
@ -100,7 +108,7 @@ struct _CommandLineOptions {
Bool collect_jumps; /* Collect (cond.) jumps in functions ? */
Bool collect_alloc; /* Collect size of allocated memory */
Bool collect_systime; /* Collect time for system calls */
Collect_Systime collect_systime; /* Collect time for system calls */
Bool collect_bus; /* Collect global bus events */

View File

@ -1697,60 +1697,115 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
}
/* Syscall Timing */
/* Syscall Timing. syscalltime[tid] is the time at which thread tid last
started a syscall. */
/* struct timeval syscalltime[VG_N_THREADS]; */
#if CLG_MICROSYSTIME
ULong *syscalltime;
#else
UInt *syscalltime;
#endif
/* struct vki_timespec syscalltime[VG_N_THREADS];
Whatever the syscall we use to measure the syscall time, we convert to
seconds and nanoseconds. */
struct vki_timespec *syscalltime;
struct vki_timespec *syscallcputime;
static
void collect_time (struct vki_timespec *systime, struct vki_timespec *syscputime)
{
switch (CLG_(clo).collect_systime) {
case systime_no: tl_assert (0);
case systime_msec: {
UInt ms_timer = VG_(read_millisecond_timer)();
systime->tv_sec = ms_timer / 1000;
systime->tv_nsec = (ms_timer % 1000) * 1000000L;
break;
}
case systime_usec: {
struct vki_timeval tv_now;
VG_(gettimeofday)(&tv_now, NULL);
systime->tv_sec = tv_now.tv_sec;
systime->tv_nsec = tv_now.tv_usec * 1000;
break;
}
case systime_nsec:
# if defined(VGO_linux) || defined(VGO_solaris)
VG_(clock_gettime)(systime, VKI_CLOCK_MONOTONIC);
VG_(clock_gettime)(syscputime, VKI_CLOCK_THREAD_CPUTIME_ID);
# elif defined(VGO_darwin)
tl_assert(0);
# else
# error "Unknown OS"
# endif
break;
}
}
static
void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
UWord* args, UInt nArgs)
{
if (CLG_(clo).collect_systime) {
#if CLG_MICROSYSTIME
struct vki_timeval tv_now;
VG_(gettimeofday)(&tv_now, NULL);
syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
#else
syscalltime[tid] = VG_(read_millisecond_timer)();
#endif
collect_time(&syscalltime[tid],
CLG_(clo).collect_systime == systime_nsec ? &syscallcputime[tid] : NULL);
}
/* Returns "after - before" in the unit as specified by --collect-systime.
after is supposed to be >= before, and tv_nsec must be >= 0 and < One_Second_In_Nsec. */
static
ULong vki_timespec_diff (struct vki_timespec after, struct vki_timespec before)
{
vki_time_t diff_sec = after.tv_sec - before.tv_sec;
long diff_nsec = after.tv_nsec - before.tv_nsec;
ULong nsec_factor; // factor to convert the desired unit into nsec.
if (diff_nsec < 0) {
diff_sec--;
diff_nsec += 1000000000ULL;
}
switch (CLG_(clo).collect_systime) {
case systime_no: tl_assert (0);
case systime_msec: nsec_factor = 1000000ULL; break;
case systime_usec: nsec_factor = 1000ULL; break;
case systime_nsec: nsec_factor = 1ULL; break;
default: tl_assert(0);
}
return ((ULong) diff_sec * 1000000000ULL + diff_nsec) / nsec_factor;
}
static
void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
UWord* args, UInt nArgs, SysRes res)
{
if (CLG_(clo).collect_systime &&
CLG_(current_state).bbcc) {
Int o;
#if CLG_MICROSYSTIME
struct vki_timeval tv_now;
if (CLG_(current_state).bbcc) {
Int o;
struct vki_timespec ts_now;
struct vki_timespec ts_cpunow;
ULong diff;
VG_(gettimeofday)(&tv_now, NULL);
diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
#else
UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
#endif
/* offset o is for "SysCount", o+1 for "SysTime" */
collect_time(&ts_now,
CLG_(clo).collect_systime == systime_nsec ? &ts_cpunow : NULL);
diff = vki_timespec_diff (ts_now, syscalltime[tid]);
/* offset o is for "SysCount", o+1 for "SysTime",
o+2 is (optionally) "SysCpuTime". */
o = fullOffset(EG_SYS);
CLG_ASSERT(o>=0);
CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
(ULong)diff);
CLG_(current_state).cost[o] ++;
CLG_(current_state).cost[o+1] += diff;
diff);
if (!CLG_(current_state).bbcc->skipped)
CLG_(init_cost_lz)(CLG_(sets).full,
&(CLG_(current_state).bbcc->skipped));
CLG_(current_state).cost[o] ++;
CLG_(current_state).cost[o+1] += diff;
CLG_(current_state).bbcc->skipped[o] ++;
CLG_(current_state).bbcc->skipped[o+1] += diff;
if (CLG_(clo).collect_systime == systime_nsec) {
diff = vki_timespec_diff (ts_cpunow, syscallcputime[tid]);
CLG_DEBUG(0," SysCpuTime (Off %d) for Syscall %u: %llu\n", o+2, syscallno,
diff);
CLG_(current_state).cost[o+2] += diff;
CLG_(current_state).bbcc->skipped[o+2] += diff;
}
}
}
@ -1973,13 +2028,22 @@ void CLG_(post_clo_init)(void)
"sp-at-mem-access\n");
}
if (CLG_(clo).collect_systime) {
if (CLG_(clo).collect_systime != systime_no) {
VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
CLG_(post_syscalltime));
syscalltime = CLG_MALLOC("cl.main.pci.1",
VG_N_THREADS * sizeof syscalltime[0]);
for (UInt i = 0; i < VG_N_THREADS; ++i) {
syscalltime[i] = 0;
syscalltime[i].tv_sec = 0;
syscalltime[i].tv_nsec = 0;
}
if (CLG_(clo).collect_systime == systime_nsec) {
syscallcputime = CLG_MALLOC("cl.main.pci.2",
VG_N_THREADS * sizeof syscallcputime[0]);
for (UInt i = 0; i < VG_N_THREADS; ++i) {
syscallcputime[i].tv_sec = 0;
syscallcputime[i].tv_nsec = 0;
}
}
}

View File

@ -1625,8 +1625,12 @@ void CLG_(init_eventsets)()
if (CLG_(clo).collect_alloc)
CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
if (CLG_(clo).collect_systime)
CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
if (CLG_(clo).collect_systime != systime_no) {
if (CLG_(clo).collect_systime == systime_nsec)
CLG_(register_event_group3)(EG_SYS, "sysCount", "sysTime", "sysCpuTime");
else
CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
}
// event set used as base for instruction self cost
CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR);
@ -1670,6 +1674,7 @@ void CLG_(init_eventsets)()
CLG_(append_event)(CLG_(dumpmap), "allocSize");
CLG_(append_event)(CLG_(dumpmap), "sysCount");
CLG_(append_event)(CLG_(dumpmap), "sysTime");
CLG_(append_event)(CLG_(dumpmap), "sysCpuTime");
}

View File

@ -962,6 +962,20 @@ UInt VG_(read_millisecond_timer) ( void )
return (now - base) / 1000;
}
# if defined(VGO_linux) || defined(VGO_solaris)
void VG_(clock_gettime) ( struct vki_timespec *ts, vki_clockid_t clk_id )
{
SysRes res;
res = VG_(do_syscall2)(__NR_clock_gettime, clk_id,
(UWord)ts);
vg_assert (sr_isError(res) == 0);
}
# elif defined(VGO_darwin)
/* See pub_tool_libcproc.h */
# else
# error "Unknown OS"
# endif
Int VG_(gettimeofday)(struct vki_timeval *tv, struct vki_timezone *tz)
{
SysRes res;

View File

@ -103,6 +103,16 @@ extern UInt VG_(read_millisecond_timer) ( void );
extern Int VG_(gettimeofday)(struct vki_timeval *tv, struct vki_timezone *tz);
# if defined(VGO_linux) || defined(VGO_solaris)
/* Get the clock value as specified by clk_id. Asserts if unsuccesful. */
extern void VG_(clock_gettime)(struct vki_timespec *ts, vki_clockid_t clk_id);
# elif defined(VGO_darwin)
/* It seems clock_gettime is only available on recent Darwin versions.
For the moment, let's assume it is not available. */
# else
# error "Unknown OS"
# endif
// Returns the number of milliseconds of user cpu time we have used,
// as reported by 'getrusage'.
extern UInt VG_(get_user_milliseconds)(void);