mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-03 10:05:29 +00:00
Add support for copy and ad hoc profiling to DHAT.
This commit is contained in:
parent
d2d54dbcc7
commit
8c08253b89
2
.gitignore
vendored
2
.gitignore
vendored
@ -273,8 +273,10 @@
|
||||
/dhat/tests/*.stdout.out
|
||||
/dhat/tests/.deps
|
||||
/dhat/tests/acc
|
||||
/dhat/tests/ad-hoc
|
||||
/dhat/tests/basic
|
||||
/dhat/tests/big
|
||||
/dhat/tests/copy
|
||||
/dhat/tests/empty
|
||||
/dhat/tests/sig
|
||||
/dhat/tests/single
|
||||
|
||||
12
NEWS
12
NEWS
@ -16,6 +16,18 @@ support for X86/macOS 10.13, AMD64/macOS 10.13 and nanoMIPS/Linux.
|
||||
|
||||
* DHAT:
|
||||
|
||||
- DHAT has been extended, with two new modes of operation. The new
|
||||
--mode=copy flag triggers copy profiling, which records calls to memcpy,
|
||||
strcpy, and similar functions. The new --mode=ad-hoc flag triggers ad hoc
|
||||
profiling, which records calls to the DHAT_AD_HOC_EVENT client request in
|
||||
the new dhat/dhat.h file. This is useful for learning more about hot code
|
||||
paths. See the user manual for more information about the new modes.
|
||||
|
||||
- Because of these changes, DHAT's file format has changed. DHAT output
|
||||
files produced with earlier versions of DHAT will not work with this
|
||||
version of DHAT's viewer, and DHAT output files produced with this version
|
||||
of DHAT will not work with earlier versions of DHAT's viewer.
|
||||
|
||||
* Cachegrind:
|
||||
|
||||
* Callgrind:
|
||||
|
||||
@ -92,29 +92,35 @@ SizeT VG_(malloc_effective_client_redzone_size)(void)
|
||||
/*--- Useful functions ---*/
|
||||
/*------------------------------------------------------------*/
|
||||
|
||||
void* VG_(cli_malloc) ( SizeT align, SizeT nbytes )
|
||||
{
|
||||
void* VG_(cli_malloc) ( SizeT align, SizeT nbytes )
|
||||
{
|
||||
// 'align' should be valid (ie. big enough and a power of two) by now.
|
||||
// VG_(arena_memalign)() will abort if it's not.
|
||||
if (VG_MIN_MALLOC_SZB == align)
|
||||
return VG_(arena_malloc) ( VG_AR_CLIENT, "replacemalloc.cm.1",
|
||||
nbytes );
|
||||
else
|
||||
return VG_(arena_memalign) ( VG_AR_CLIENT, "replacemalloc.cm.2",
|
||||
return VG_(arena_malloc) ( VG_AR_CLIENT, "replacemalloc.cm.1",
|
||||
nbytes );
|
||||
else
|
||||
return VG_(arena_memalign) ( VG_AR_CLIENT, "replacemalloc.cm.2",
|
||||
align, nbytes );
|
||||
}
|
||||
|
||||
void VG_(cli_free) ( void* p )
|
||||
{
|
||||
VG_(arena_free) ( VG_AR_CLIENT, p );
|
||||
}
|
||||
|
||||
// Useful for querying user blocks.
|
||||
SizeT VG_(cli_malloc_usable_size) ( void* p )
|
||||
{
|
||||
void* VG_(cli_realloc) ( void* ptr, SizeT nbytes )
|
||||
{
|
||||
return VG_(arena_realloc) ( VG_AR_CLIENT, "replacemalloc.cr.1",
|
||||
ptr, nbytes );
|
||||
}
|
||||
|
||||
void VG_(cli_free) ( void* p )
|
||||
{
|
||||
VG_(arena_free) ( VG_AR_CLIENT, p );
|
||||
}
|
||||
|
||||
// Useful for querying user blocks.
|
||||
SizeT VG_(cli_malloc_usable_size) ( void* p )
|
||||
{
|
||||
return VG_(arena_malloc_usable_size)(VG_AR_CLIENT, p);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Bool VG_(addr_is_in_block)( Addr a, Addr start, SizeT size, SizeT rz_szB )
|
||||
{
|
||||
return ( start - rz_szB <= a && a < start + size + rz_szB );
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
include $(top_srcdir)/Makefile.tool.am
|
||||
|
||||
#SUBDIRS += perf
|
||||
|
||||
EXTRA_DIST = docs/dh-manual.xml dh_view.html dh_view.css dh_view.js
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
# Headers, etc
|
||||
#----------------------------------------------------------------------------
|
||||
|
||||
pkginclude_HEADERS = \
|
||||
dhat.h
|
||||
|
||||
# Ensure the viewer components get copied into the install tree.
|
||||
dhatdir = $(pkglibexecdir)
|
||||
dhat_DATA = dh_view.html dh_view.css dh_view.js
|
||||
@ -21,10 +22,10 @@ if VGCONF_HAVE_PLATFORM_SEC
|
||||
noinst_PROGRAMS += dhat-@VGCONF_ARCH_SEC@-@VGCONF_OS@
|
||||
endif
|
||||
|
||||
EXP_DHAT_SOURCES_COMMON = dh_main.c
|
||||
DHAT_SOURCES_COMMON = dh_main.c
|
||||
|
||||
dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \
|
||||
$(EXP_DHAT_SOURCES_COMMON)
|
||||
$(DHAT_SOURCES_COMMON)
|
||||
dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \
|
||||
$(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
|
||||
dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \
|
||||
@ -45,7 +46,7 @@ dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LINK = \
|
||||
|
||||
if VGCONF_HAVE_PLATFORM_SEC
|
||||
dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \
|
||||
$(EXP_DHAT_SOURCES_COMMON)
|
||||
$(DHAT_SOURCES_COMMON)
|
||||
dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \
|
||||
$(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
|
||||
dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \
|
||||
@ -78,11 +79,16 @@ if VGCONF_OS_IS_DARWIN
|
||||
noinst_DSYMS = $(noinst_PROGRAMS)
|
||||
endif
|
||||
|
||||
vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_SOURCES =
|
||||
# dh_replace_strmem.c runs on the simulated CPU, and is built with
|
||||
# AM_CFLAGS_PSO_* (see $(top_srcdir)/Makefile.all.am).
|
||||
VGPRELOAD_DHAT_SOURCES_COMMON = dh_replace_strmem.c
|
||||
|
||||
vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_SOURCES = \
|
||||
$(VGPRELOAD_DHAT_SOURCES_COMMON)
|
||||
vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_CPPFLAGS = \
|
||||
$(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
|
||||
vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_CFLAGS = \
|
||||
$(AM_CFLAGS_PSO_@VGCONF_PLATFORM_PRI_CAPS@)
|
||||
$(AM_CFLAGS_PSO_@VGCONF_PLATFORM_PRI_CAPS@) -O2
|
||||
vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_DEPENDENCIES = \
|
||||
$(LIBREPLACEMALLOC_@VGCONF_PLATFORM_PRI_CAPS@)
|
||||
vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_LDFLAGS = \
|
||||
@ -90,11 +96,12 @@ vgpreload_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_LDFLAGS = \
|
||||
$(LIBREPLACEMALLOC_LDFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
|
||||
|
||||
if VGCONF_HAVE_PLATFORM_SEC
|
||||
vgpreload_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_SOURCES =
|
||||
vgpreload_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_SOURCES = \
|
||||
$(VGPRELOAD_DHAT_SOURCES_COMMON)
|
||||
vgpreload_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_CPPFLAGS = \
|
||||
$(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
|
||||
vgpreload_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_CFLAGS = \
|
||||
$(AM_CFLAGS_PSO_@VGCONF_PLATFORM_SEC_CAPS@)
|
||||
$(AM_CFLAGS_PSO_@VGCONF_PLATFORM_SEC_CAPS@) -O2
|
||||
vgpreload_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_DEPENDENCIES = \
|
||||
$(LIBREPLACEMALLOC_@VGCONF_PLATFORM_SEC_CAPS@)
|
||||
vgpreload_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_LDFLAGS = \
|
||||
|
||||
894
dhat/dh_main.c
894
dhat/dh_main.c
File diff suppressed because it is too large
Load Diff
41
dhat/dh_replace_strmem.c
Normal file
41
dhat/dh_replace_strmem.c
Normal file
@ -0,0 +1,41 @@
|
||||
/*--------------------------------------------------------------------*/
|
||||
/*--- Replacements for memcpy(), which run on the simulated CPU ---*/
|
||||
/*--- simulated CPU. ---*/
|
||||
/*--- dh_replace_strmem.c ---*/
|
||||
/*--------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
This file is part of DHAT, a Valgrind tool for profiling the
|
||||
heap usage of programs.
|
||||
|
||||
Copyright (C) 2020-2020 Nicholas Nethercote
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307, USA.
|
||||
|
||||
The GNU General Public License is contained in the file COPYING.
|
||||
*/
|
||||
|
||||
#include "dhat.h"
|
||||
|
||||
#define RECORD_COPY(_qzz_len) \
|
||||
VALGRIND_DO_CLIENT_REQUEST_STMT(_VG_USERREQ__DHAT_COPY, \
|
||||
(_qzz_len), 0, 0, 0, 0)
|
||||
|
||||
#include "../shared/vg_replace_strmem.c"
|
||||
|
||||
/*--------------------------------------------------------------------*/
|
||||
/*--- end ---*/
|
||||
/*--------------------------------------------------------------------*/
|
||||
1070
dhat/dh_test.js
1070
dhat/dh_test.js
File diff suppressed because it is too large
Load Diff
414
dhat/dh_view.js
414
dhat/dh_view.js
@ -51,7 +51,7 @@ let gHeaderDiv, gTestingDiv, gMainDiv, gLegendDiv, gTimingsDiv;
|
||||
let gFilename;
|
||||
|
||||
// The object extracted from the JSON input.
|
||||
let gData;
|
||||
let gData = {};
|
||||
|
||||
// The root of the radix tree build from gData. A radix tree is a
|
||||
// space-optimized prefix tree in which each node that is the only child is
|
||||
@ -64,62 +64,68 @@ let gRoot;
|
||||
// - label: Used in the drop-down menu.
|
||||
// - bolds: Which fields to highlight in the output.
|
||||
// - cmpField: Field used to sort the radix tree.
|
||||
// - enable: Function saying whether this option is enabled.
|
||||
// - sig: Significance function used to determine aggregate nodes.
|
||||
// - sigLabel: Significance threshold description function.
|
||||
//
|
||||
const gSelectData = [
|
||||
{
|
||||
label: "Total (bytes)",
|
||||
label: () => `Total (${bytesUnit()})`,
|
||||
bolds: { "totalTitle": 1, "totalBytes": 1 },
|
||||
cmpField: "_totalBytes",
|
||||
enable: (aBkLt, aBkAcc) => true,
|
||||
sig: (aT) => aT._totalBytes >= 0.01 * gRoot._totalBytes,
|
||||
sigLabel: () => `\
|
||||
total >= ${bytesAndPerc(0.01 * gRoot._totalBytes, gRoot._totalBytes)}`
|
||||
},
|
||||
{
|
||||
isDefault: true,
|
||||
label: "Total (blocks)",
|
||||
label: () => `Total (${blocksUnit()})`,
|
||||
bolds: { "totalTitle": 1, "totalBlocks": 1 },
|
||||
cmpField: "_totalBlocks",
|
||||
enable: (aBkLt, aBkAcc) => true,
|
||||
sig: (aT) => aT._totalBlocks >= 0.01 * gRoot._totalBlocks,
|
||||
sigLabel: () => `\
|
||||
total >= ${blocksAndPerc(0.01 * gRoot._totalBlocks, gRoot._totalBlocks)}`
|
||||
},
|
||||
// No "Total (bytes), tiny" because it's extremely unlikely that an AP with a
|
||||
// No "Total (bytes), tiny" because it's extremely unlikely that a PP with a
|
||||
// tiny average size will take up a significant number of bytes.
|
||||
{
|
||||
label: "Total (blocks), tiny",
|
||||
label: () => `Total (${blocksUnit()}), tiny`,
|
||||
bolds: { "totalTitle": 1, "totalBlocks": 1, "totalAvgSizeBytes": 1 },
|
||||
cmpField: "_totalBlocks",
|
||||
enable: (aBkLt, aBkAcc) => true,
|
||||
sig: (aT) => aT._totalBlocks >= 0.005 * gRoot._totalBlocks &&
|
||||
aT._totalAvgSizeBytes() <= 16,
|
||||
sigLabel: () => `\
|
||||
(total >= ${blocksAndPerc(0.005 * gRoot._totalBlocks, gRoot._totalBlocks)}) && \
|
||||
(total avg size <= ${bytes(16)})`
|
||||
(avg size <= ${bytes(16)})`
|
||||
},
|
||||
// No "Total (bytes), short-lived", because an AP with few large, short-lived
|
||||
// No "Total (bytes), short-lived", because a PP with few large, short-lived
|
||||
// blocks is unlikely. (In contrast, "Total (blocks), short-lived" is useful,
|
||||
// because an AP with many small, short-lived blocks *is* likely.) And if
|
||||
// such an AP existed, it'll probably show up in "Total (bytes), zero reads
|
||||
// because a PP with many small, short-lived blocks *is* likely.) And if
|
||||
// such a PP existed, it'll probably show up in "Total (bytes), zero reads
|
||||
// or zero writes" or "Total (bytes), low-access" anyway, because there's
|
||||
// little time for accesses in 500 instructions.
|
||||
// little time for accesses in a small number of instructions.
|
||||
{
|
||||
label: "Total (blocks), short-lived",
|
||||
bolds: { "totalTitle": 1, "totalBlocks": 1, "totalAvgLifetimeInstrs": 1 },
|
||||
label: () => "Total (blocks), short-lived",
|
||||
bolds: { "totalTitle": 1, "totalBlocks": 1, "totalAvgLifetime": 1 },
|
||||
cmpField: "_totalBlocks",
|
||||
enable: (aBkLt, aBkAcc) => aBkLt,
|
||||
sig: (aT) => aT._totalBlocks >= 0.005 * gRoot._totalBlocks &&
|
||||
aT._totalAvgLifetimeInstrs() <= 500,
|
||||
aT._totalAvgLifetimes() <= gData.tuth,
|
||||
sigLabel: () => `\
|
||||
(total >= ${blocksAndPerc(0.005 * gRoot._totalBlocks, gRoot._totalBlocks)}) && \
|
||||
(total avg lifetime <= ${instrs(500)})`
|
||||
(avg lifetime <= ${time(gData.tuth)})`
|
||||
},
|
||||
{
|
||||
label: "Total (bytes), zero reads or zero writes",
|
||||
label: () => "Total (bytes), zero reads or zero writes",
|
||||
bolds: { "totalTitle": 1, "totalBytes": 1,
|
||||
"readsTitle": 1, "readsBytes": 1,
|
||||
"writesTitle": 1, "writesBytes": 1,
|
||||
},
|
||||
cmpField: "_totalBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._totalBytes >= 0.005 * gRoot._totalBytes &&
|
||||
(aT._readsBytes === 0 || aT._writesBytes === 0),
|
||||
sigLabel: () => `\
|
||||
@ -127,12 +133,13 @@ total >= ${blocksAndPerc(0.01 * gRoot._totalBlocks, gRoot._totalBlocks)}`
|
||||
((reads == ${bytes(0)}) || (writes == ${bytes(0)}))`
|
||||
},
|
||||
{
|
||||
label: "Total (blocks), zero reads or zero writes",
|
||||
label: () => "Total (blocks), zero reads or zero writes",
|
||||
bolds: { "totalTitle": 1, "totalBlocks": 1,
|
||||
"readsTitle": 1, "readsBytes": 1,
|
||||
"writesTitle": 1, "writesBytes": 1,
|
||||
},
|
||||
cmpField: "_totalBlocks",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._totalBlocks >= 0.005 * gRoot._totalBlocks &&
|
||||
(aT._readsBytes === 0 || aT._writesBytes === 0),
|
||||
sigLabel: () => `\
|
||||
@ -140,12 +147,13 @@ total >= ${blocksAndPerc(0.01 * gRoot._totalBlocks, gRoot._totalBlocks)}`
|
||||
((reads == ${bytes(0)}) || (writes == ${bytes(0)}))`
|
||||
},
|
||||
{
|
||||
label: "Total (bytes), low-access",
|
||||
label: () => "Total (bytes), low-access",
|
||||
bolds: { "totalTitle": 1, "totalBytes": 1,
|
||||
"readsTitle": 1, "readsAvgPerByte": 1,
|
||||
"writesTitle": 1, "writesAvgPerByte": 1,
|
||||
},
|
||||
cmpField: "_totalBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._totalBytes >= 0.005 * gRoot._totalBytes &&
|
||||
aT._readsBytes !== 0 &&
|
||||
aT._writesBytes !== 0 &&
|
||||
@ -158,12 +166,13 @@ total >= ${blocksAndPerc(0.01 * gRoot._totalBlocks, gRoot._totalBlocks)}`
|
||||
((reads <= ${perByte(0.4)}) || (writes <= ${perByte(0.4)}))`
|
||||
},
|
||||
{
|
||||
label: "Total (blocks), low-access",
|
||||
label: () => "Total (blocks), low-access",
|
||||
bolds: { "totalTitle": 1, "totalBlocks": 1,
|
||||
"readsTitle": 1, "readsAvgPerByte": 1,
|
||||
"writesTitle": 1, "writesAvgPerByte": 1,
|
||||
},
|
||||
cmpField: "_totalBlocks",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._totalBlocks >= 0.005 * gRoot._totalBlocks &&
|
||||
aT._readsBytes !== 0 &&
|
||||
aT._writesBytes !== 0 &&
|
||||
@ -176,14 +185,15 @@ total >= ${blocksAndPerc(0.01 * gRoot._totalBlocks, gRoot._totalBlocks)}`
|
||||
((reads <= ${perByte(0.4)}) || (writes <= ${perByte(0.4)}))`
|
||||
},
|
||||
// No "Total (avg size bytes)": not interesting.
|
||||
// No "Total (avg lifetime instrs)": covered by "Total (blocks), short-lived".
|
||||
// No "Total (avg lifetime)": covered by "Total (blocks), short-lived".
|
||||
// No "Max (bytes)": not interesting, and unclear how to sort.
|
||||
// No "Max (blocks)": not interesting, and unclear how to sort.
|
||||
// No "Max (avg size bytes)": not interesting, and unclear how to sort.
|
||||
{
|
||||
label: "At t-gmax (bytes)",
|
||||
label: () => "At t-gmax (bytes)",
|
||||
bolds: { "atTGmaxTitle": 1, "atTGmaxBytes": 1 },
|
||||
cmpField: "_atTGmaxBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkLt,
|
||||
sig: (aT) => aT._atTGmaxBytes >= 0.01 * gRoot._atTGmaxBytes,
|
||||
sigLabel: () => `\
|
||||
at-t-gmax >= ${bytesAndPerc(0.01 * gRoot._atTGmaxBytes, gRoot._atTGmaxBytes)}`
|
||||
@ -191,9 +201,10 @@ at-t-gmax >= ${bytesAndPerc(0.01 * gRoot._atTGmaxBytes, gRoot._atTGmaxBytes)}`
|
||||
// No "At t-gmax (blocks)": not interesting.
|
||||
// No "At t-gmax (avg size bytes)": not interesting.
|
||||
{
|
||||
label: "At t-end (bytes)",
|
||||
label: () => "At t-end (bytes)",
|
||||
bolds: { "atTEndTitle": 1, "atTEndBytes": 1 },
|
||||
cmpField: "_atTEndBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkLt,
|
||||
sig: (aT) => aT._atTEndBytes >= 0.01 * gRoot._atTEndBytes,
|
||||
sigLabel: () => `\
|
||||
at-t-end >= ${bytesAndPerc(0.01 * gRoot._atTEndBytes, gRoot._atTEndBytes)}`
|
||||
@ -201,17 +212,19 @@ at-t-end >= ${bytesAndPerc(0.01 * gRoot._atTEndBytes, gRoot._atTEndBytes)}`
|
||||
// No "At t-end (blocks)": not interesting.
|
||||
// No "At t-end (avg size bytes)": not interesting.
|
||||
{
|
||||
label: "Reads (bytes)",
|
||||
label: () => "Reads (bytes)",
|
||||
bolds: { "readsTitle": 1, "readsBytes": 1 },
|
||||
cmpField: "_readsBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._readsBytes >= 0.01 * gRoot._readsBytes,
|
||||
sigLabel: () => `\
|
||||
reads >= ${bytesAndPerc(0.01 * gRoot._readsBytes, gRoot._readsBytes)}`
|
||||
},
|
||||
{
|
||||
label: "Reads (bytes), high-access",
|
||||
label: () => "Reads (bytes), high-access",
|
||||
bolds: { "readsTitle": 1, "readsBytes": 1, "readsAvgPerByte": 1 },
|
||||
cmpField: "_readsBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._readsBytes >= 0.005 * gRoot._readsBytes &&
|
||||
(aT._readsAvgPerByte() >= 1000 ||
|
||||
aT._writesAvgPerByte() >= 1000),
|
||||
@ -221,17 +234,19 @@ reads >= ${bytesAndPerc(0.01 * gRoot._readsBytes, gRoot._readsBytes)}`
|
||||
},
|
||||
// No "Reads (avg per byte)": covered by other access-related ones.
|
||||
{
|
||||
label: "Writes (bytes)",
|
||||
label: () => "Writes (bytes)",
|
||||
bolds: { "writesTitle": 1, "writesBytes": 1 },
|
||||
cmpField: "_writesBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._writesBytes >= 0.01 * gRoot._writesBytes,
|
||||
sigLabel: () => `\
|
||||
writes >= ${bytesAndPerc(0.01 * gRoot._writesBytes, gRoot._writesBytes)}`
|
||||
},
|
||||
{
|
||||
label: "Writes (bytes), high-access",
|
||||
label: () => "Writes (bytes), high-access",
|
||||
bolds: { "writesTitle": 1, "writesBytes": 1, "writesAvgPerByte": 1 },
|
||||
cmpField: "_writesBytes",
|
||||
enable: (aBkLt, aBkAcc) => aBkAcc,
|
||||
sig: (aT) => aT._writesBytes >= 0.005 * gRoot._writesBytes &&
|
||||
(aT._readsAvgPerByte() >= 1000 ||
|
||||
aT._writesAvgPerByte() >= 1000),
|
||||
@ -304,10 +319,10 @@ function TreeNode(aKind, aFrames) {
|
||||
this._totalBytes = 0;
|
||||
this._totalBlocks = 0;
|
||||
|
||||
this._totalLifetimesInstrs = 0;
|
||||
this._totalLifetimes = 0;
|
||||
|
||||
// These numbers only make sense for leaf nodes. Unlike total stats, which
|
||||
// can be summed, _maxBytes/_maxBlocks for two APs can't be easily combined
|
||||
// can be summed, _maxBytes/_maxBlocks for two PPs can't be easily combined
|
||||
// because the maxes may have occurred at different times.
|
||||
if (this._kind === kLeaf) {
|
||||
this._maxBytes = 0;
|
||||
@ -341,15 +356,20 @@ function TreeNode(aKind, aFrames) {
|
||||
}
|
||||
|
||||
TreeNode.prototype = {
|
||||
_add(aTotalBytes, aTotalBlocks, aTotalLifetimesInstrs, aMaxBytes,
|
||||
_add(aTotalBytes, aTotalBlocks, aTotalLifetimes, aMaxBytes,
|
||||
aMaxBlocks, aAtTGmaxBytes, aAtTGmaxBlocks, aAtTEndBytes,
|
||||
aAtTEndBlocks, aReadsBytes, aWritesBytes, aAccesses) {
|
||||
|
||||
// We ignore this._kind, this._frames, and this._kids.
|
||||
|
||||
// Note: if !gData.bklt and/or !gData.bkacc, some of these fields these
|
||||
// values come from will be missing in the input file, so the values will
|
||||
// be `undefined`, and the fields will end up as `NaN`. But this is ok
|
||||
// because we don't show them.
|
||||
|
||||
this._totalBytes += aTotalBytes;
|
||||
this._totalBlocks += aTotalBlocks;
|
||||
this._totalLifetimesInstrs += aTotalLifetimesInstrs;
|
||||
this._totalLifetimes += aTotalLifetimes;
|
||||
|
||||
if (this._kind === kLeaf) {
|
||||
// Leaf nodes should only be added to once, because DHAT currently
|
||||
@ -391,9 +411,9 @@ TreeNode.prototype = {
|
||||
}
|
||||
},
|
||||
|
||||
_addAP(aAP) {
|
||||
this._add(aAP.tb, aAP.tbk, aAP.tli, aAP.mb, aAP.mbk, aAP.gb, aAP.gbk,
|
||||
aAP.fb, aAP.fbk, aAP.rb, aAP.wb, aAP.acc);
|
||||
_addPP(aPP) {
|
||||
this._add(aPP.tb, aPP.tbk, aPP.tl, aPP.mb, aPP.mbk, aPP.gb, aPP.gbk,
|
||||
aPP.eb, aPP.ebk, aPP.rb, aPP.wb, aPP.acc);
|
||||
},
|
||||
|
||||
// This is called in two cases.
|
||||
@ -401,7 +421,7 @@ TreeNode.prototype = {
|
||||
// cloning a node).
|
||||
// - Aggregating multiple nodes.
|
||||
_addNode(aT) {
|
||||
this._add(aT._totalBytes, aT._totalBlocks, aT._totalLifetimesInstrs,
|
||||
this._add(aT._totalBytes, aT._totalBlocks, aT._totalLifetimes,
|
||||
aT._maxBytes, aT._maxBlocks, aT._atTGmaxBytes, aT._atTGmaxBlocks,
|
||||
aT._atTEndBytes, aT._atTEndBlocks,
|
||||
aT._readsBytes, aT._writesBytes, aT._accesses);
|
||||
@ -409,7 +429,7 @@ TreeNode.prototype = {
|
||||
|
||||
// Split the node after the aTi'th internal frame. The inheriting kid will
|
||||
// get the post-aTi frames; the new kid will get aNewFrames.
|
||||
_split(aTi, aAP, aNewFrames) {
|
||||
_split(aTi, aPP, aNewFrames) {
|
||||
// kid1 inherits t's kind and values.
|
||||
let inheritedFrames = this._frames.splice(aTi + 1);
|
||||
let kid1 = new TreeNode(this._kind, inheritedFrames);
|
||||
@ -420,7 +440,7 @@ TreeNode.prototype = {
|
||||
|
||||
// Put all remaining frames into kid2.
|
||||
let kid2 = new TreeNode(kLeaf, aNewFrames);
|
||||
kid2._addAP(aAP);
|
||||
kid2._addPP(aPP);
|
||||
|
||||
// Update this.
|
||||
if (this._kind === kLeaf) {
|
||||
@ -432,15 +452,15 @@ TreeNode.prototype = {
|
||||
delete this._maxBlocks;
|
||||
}
|
||||
this._kids = [kid1, kid2];
|
||||
this._addAP(aAP);
|
||||
this._addPP(aPP);
|
||||
},
|
||||
|
||||
_totalAvgSizeBytes() {
|
||||
return div(this._totalBytes, this._totalBlocks);
|
||||
},
|
||||
|
||||
_totalAvgLifetimeInstrs() {
|
||||
return div(this._totalLifetimesInstrs, this._totalBlocks);
|
||||
_totalAvgLifetimes() {
|
||||
return div(this._totalLifetimes, this._totalBlocks);
|
||||
},
|
||||
|
||||
_maxAvgSizeBytes() {
|
||||
@ -474,15 +494,15 @@ function checkFields(aObj, aFields) {
|
||||
}
|
||||
}
|
||||
|
||||
// Do basic checking of an AP read from file.
|
||||
function checkAP(aAP) {
|
||||
let fields = ["tb", "tbk", "tli",
|
||||
"mb", "mbk",
|
||||
"gb", "gbk",
|
||||
"fb", "fbk",
|
||||
"rb", "wb",
|
||||
"fs"];
|
||||
checkFields(aAP, fields);
|
||||
// Do basic checking of a PP read from file.
|
||||
function checkPP(aPP) {
|
||||
checkFields(aPP, ["tb", "tbk", "fs"]);
|
||||
if (gData.bklt) {
|
||||
checkFields(aPP, ["mb", "mbk", "gb", "gbk", "eb", "ebk"]);
|
||||
}
|
||||
if (gData.bkacc) {
|
||||
checkFields(aPP, ["rb", "wb"]);
|
||||
}
|
||||
}
|
||||
|
||||
// Access counts latch as 0xffff. Treating 0xffff as Infinity gives us exactly
|
||||
@ -497,51 +517,78 @@ function normalizeAccess(aAcc) {
|
||||
assert(false, "too-large access value");
|
||||
}
|
||||
|
||||
const kExpectedFileVersion = 1;
|
||||
const kExpectedFileVersion = 2;
|
||||
|
||||
// Build gRoot from gData.
|
||||
function buildTree() {
|
||||
// Check global values.
|
||||
let fields = ["dhatFileVersion",
|
||||
let fields = ["dhatFileVersion", "mode", "verb",
|
||||
"bklt", "bkacc",
|
||||
"tu", "Mtu",
|
||||
"cmd", "pid",
|
||||
"mi", "ei",
|
||||
"aps", "ftbl"];
|
||||
"te", "pps", "ftbl"];
|
||||
checkFields(gData, fields);
|
||||
if (gData.dhatFileVersion != kExpectedFileVersion) {
|
||||
throw Error(`data file has version number ${gData.dhatFileVersion}, ` +
|
||||
`expected version number ${kExpectedFileVersion}`);
|
||||
throw new Error(
|
||||
`data file has version number ${gData.dhatFileVersion}, ` +
|
||||
`expected version number ${kExpectedFileVersion}`);
|
||||
}
|
||||
|
||||
if (gData.bklt) {
|
||||
checkFields(gData, ["tg", "tuth"]);
|
||||
}
|
||||
|
||||
// Update sort metric labels, and disable sort metrics that aren't allowed
|
||||
// for this data.
|
||||
for (let [i, option] of gSelect.childNodes.entries()) {
|
||||
let data = gSelectData[i];
|
||||
option.label = data.label();
|
||||
option.disabled = !data.enable(gData.bklt, gData.bkacc);
|
||||
}
|
||||
|
||||
// If the selected sort metric was just disabled, switch the sort metric
|
||||
// back to the default (which is never disabled).
|
||||
let option = gSelect.childNodes[gSelect.selectedIndex];
|
||||
if (option.disabled) {
|
||||
for (let [i, data] of gSelectData.entries()) {
|
||||
let option = gSelect.childNodes[i];
|
||||
if (data.isDefault) {
|
||||
option.selected = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build the radix tree. Nodes are in no particular order to start with. The
|
||||
// algorithm is tricky because we need to use internal frames when possible.
|
||||
gRoot = new TreeNode(kLeaf, [0]); // Frame 0 is always "[root]".
|
||||
|
||||
for (let [i, ap] of gData.aps.entries()) {
|
||||
checkAP(ap);
|
||||
for (let [i, pp] of gData.pps.entries()) {
|
||||
checkPP(pp);
|
||||
|
||||
// Decompress the run-length encoding in `acc`, if present.
|
||||
if (ap.acc) {
|
||||
if (pp.acc) {
|
||||
let acc = [];
|
||||
for (let i = 0; i < ap.acc.length; i++) {
|
||||
if (ap.acc[i] < 0) {
|
||||
for (let i = 0; i < pp.acc.length; i++) {
|
||||
if (pp.acc[i] < 0) {
|
||||
// A negative number encodes a repeat count. The following entry has
|
||||
// the value to be repeated.
|
||||
let reps = -ap.acc[i++];
|
||||
let val = ap.acc[i];
|
||||
let reps = -pp.acc[i++];
|
||||
let val = pp.acc[i];
|
||||
for (let j = 0; j < reps; j++) {
|
||||
acc.push(normalizeAccess(val));
|
||||
}
|
||||
} else {
|
||||
acc.push(normalizeAccess(ap.acc[i]));
|
||||
acc.push(normalizeAccess(pp.acc[i]));
|
||||
}
|
||||
}
|
||||
ap.acc = acc;
|
||||
pp.acc = acc;
|
||||
}
|
||||
|
||||
// The first AP is a special case, because we have to build gRoot.
|
||||
// The first PP is a special case, because we have to build gRoot.
|
||||
if (i === 0) {
|
||||
gRoot._frames.push(...ap.fs);
|
||||
gRoot._addAP(ap);
|
||||
gRoot._frames.push(...pp.fs);
|
||||
gRoot._addPP(pp);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -553,8 +600,7 @@ function buildTree() {
|
||||
// `abcd` is a frame sequence (and `-` is an empty sequence), `N` is a node
|
||||
// value, and `Xs` are the node's children.
|
||||
|
||||
for (let [j, kidFrame] of ap.fs.entries()) {
|
||||
|
||||
for (let [j, kidFrame] of pp.fs.entries()) {
|
||||
// Search for kidFrame among internal frames.
|
||||
if (ti + 1 < t._frames.length) {
|
||||
// t has an internal frame at the right index.
|
||||
@ -566,7 +612,7 @@ function buildTree() {
|
||||
// The internal frame doesn't match. Split the node.
|
||||
//
|
||||
// E.g. abcd:20-[] + abef:10 => ab:30-[cd:20-[], ef:10-[]]
|
||||
t._split(ti, ap, ap.fs.slice(j));
|
||||
t._split(ti, pp, pp.fs.slice(j));
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
@ -580,12 +626,12 @@ function buildTree() {
|
||||
// get the leftover frames.
|
||||
//
|
||||
// E.g. ab:20-[] + abcd:10 => ab:30-[-:20-[], cd:10-[]]
|
||||
t._split(ti, ap, ap.fs.slice(j));
|
||||
t._split(ti, pp, pp.fs.slice(j));
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
t._addAP(ap);
|
||||
t._addPP(pp);
|
||||
|
||||
// Search for the frame among the kids.
|
||||
let kid;
|
||||
@ -604,8 +650,8 @@ function buildTree() {
|
||||
//
|
||||
// E.g. ab:20-[c:10-Xs, d:10-Ys] + abef:10 =>
|
||||
// ab:30-[c:10-Xs, d:10-Ys, ef:10-[]]
|
||||
kid = new TreeNode(kLeaf, ap.fs.slice(j));
|
||||
kid._addAP(ap);
|
||||
kid = new TreeNode(kLeaf, pp.fs.slice(j));
|
||||
kid._addPP(pp);
|
||||
t._kids.push(kid);
|
||||
done = true;
|
||||
break;
|
||||
@ -615,9 +661,9 @@ function buildTree() {
|
||||
|
||||
if (!done) {
|
||||
// If we reach here, either:
|
||||
// - ap's frames match an existing frame sequence, in which case we
|
||||
// just need to _addAP(); or
|
||||
// - ap's frames are a subsequence of an existing sequence, in which
|
||||
// - pp's frames match an existing frame sequence, in which case we
|
||||
// just need to _addPP(); or
|
||||
// - pp's frames are a subsequence of an existing sequence, in which
|
||||
// case we must split.
|
||||
|
||||
if (ti + 1 < t._frames.length) {
|
||||
@ -625,20 +671,20 @@ function buildTree() {
|
||||
// frames. Split, creating an empty node.
|
||||
//
|
||||
// E.g. abcd:20-Xs + ab:10 => ab:30-[cd:20-Xs, -:10-[]]
|
||||
t._split(ti, ap, []);
|
||||
t._split(ti, pp, []);
|
||||
|
||||
} else if (!t._kids) {
|
||||
// This is impossible because DHAT currently produces records with
|
||||
// unique locations. If we remove addresses from frames in the future
|
||||
// then duplicate locations will occur, and the following code is how
|
||||
// it must be handled.
|
||||
throw Error(`data file contains a repeated location`);
|
||||
throw new Error(`data file contains a repeated location (1)`);
|
||||
|
||||
// Matches an existing sequence that doesn't end in node with empty
|
||||
// frames. Add the AP.
|
||||
// frames. Add the PP.
|
||||
//
|
||||
// E.g. ab:20-[] + ab:10 => ab:30-[]
|
||||
t._addAP(ap);
|
||||
t._addPP(pp);
|
||||
|
||||
} else {
|
||||
// Look for a kid with empty frames.
|
||||
@ -655,14 +701,14 @@ function buildTree() {
|
||||
// unique locations. If we remove addresses from frames in the future
|
||||
// then duplicate locations will occur, and the following code is how
|
||||
// it must be handled.
|
||||
throw Error(`data file contains a repeated location`);
|
||||
throw new Error(`data file contains a repeated location (2)`);
|
||||
|
||||
// Matches an existing sequence that ends in a node with empty
|
||||
// frames. Add the AP.
|
||||
// frames. Add the PP.
|
||||
//
|
||||
// E.g. ab:20-[c:10-Xs, -:10-[]] + ab:10 => ab:30-[c:10-Xs, -:20-[]]
|
||||
t._addAP(ap);
|
||||
emptyKid._addAP(ap);
|
||||
t._addPP(pp);
|
||||
emptyKid._addPP(pp);
|
||||
|
||||
} else {
|
||||
// A subsequence of an existing sequence that ends at the end of t's
|
||||
@ -671,14 +717,13 @@ function buildTree() {
|
||||
// E.g. ab:20-[c:10-Xs, d:10-Ys] + ab:10 =>
|
||||
// ab:30-[c:10-Xs, d:10-Ys, -:10-[]]
|
||||
let newKid = new TreeNode(kLeaf, []);
|
||||
newKid._addAP(ap);
|
||||
newKid._addPP(pp);
|
||||
|
||||
t._kids.push(newKid);
|
||||
t._addAP(ap);
|
||||
t._addPP(pp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -697,11 +742,23 @@ function perc(aNum, aDenom) {
|
||||
}
|
||||
|
||||
function perMinstr(aN) {
|
||||
return `${kDFormat.format(div(1000000 * aN, gData.ei))}/Minstr`;
|
||||
return `${kDFormat.format(div(1000000 * aN, gData.te))}/${gData.Mtu}`;
|
||||
}
|
||||
|
||||
function byteUnit() {
|
||||
return gData.hasOwnProperty("bu") ? gData.bsu : "byte";
|
||||
}
|
||||
|
||||
function bytesUnit() {
|
||||
return gData.hasOwnProperty("bsu") ? gData.bsu : "bytes";
|
||||
}
|
||||
|
||||
function blocksUnit() {
|
||||
return gData.hasOwnProperty("bksu") ? gData.bksu : "blocks";
|
||||
}
|
||||
|
||||
function bytes(aN) {
|
||||
return `${kDFormat.format(aN)} bytes`;
|
||||
return `${kDFormat.format(aN)} ${bytesUnit()}`;
|
||||
}
|
||||
|
||||
function bytesAndPerc(aN, aTotalN) {
|
||||
@ -713,7 +770,7 @@ function bytesAndPercAndRate(aN, aTotalN) {
|
||||
}
|
||||
|
||||
function blocks(aN) {
|
||||
return `${kDFormat.format(aN)} blocks`;
|
||||
return `${kDFormat.format(aN)} ${blocksUnit()}`;
|
||||
}
|
||||
|
||||
function blocksAndPerc(aN, aTotalN) {
|
||||
@ -729,15 +786,15 @@ function avgSizeBytes(aN) {
|
||||
}
|
||||
|
||||
function perByte(aN) {
|
||||
return `${kDFormat.format(aN)}/byte`;
|
||||
return `${kDFormat.format(aN)}/${byteUnit()}`;
|
||||
}
|
||||
|
||||
function instrs(aN) {
|
||||
return `${kDFormat.format(aN)} instrs`;
|
||||
function time(aN) {
|
||||
return `${kDFormat.format(aN)} ${gData.tu}`;
|
||||
}
|
||||
|
||||
function avgLifetimeInstrs(aN) {
|
||||
return `avg lifetime ${instrs(aN)}`;
|
||||
function avgLifetime(aN) {
|
||||
return `avg lifetime ${time(aN)}`;
|
||||
}
|
||||
|
||||
function accesses(aAccesses) {
|
||||
@ -817,6 +874,7 @@ function appendInvocationAndTimes(aP) {
|
||||
let v, v1, v2;
|
||||
|
||||
v = "Invocation {\n";
|
||||
v += ` Mode: ${gData.mode}\n`;
|
||||
v += ` Command: ${gData.cmd}\n`;
|
||||
v += ` PID: ${gData.pid}\n`;
|
||||
v += "}\n\n";
|
||||
@ -825,9 +883,11 @@ function appendInvocationAndTimes(aP) {
|
||||
|
||||
v = "Times {\n";
|
||||
|
||||
v1 = perc(gData.mi, gData.ei);
|
||||
v += ` t-gmax: ${instrs(gData.mi)} (${v1} of program duration)\n`;
|
||||
v += ` t-end: ${instrs(gData.ei)}\n`;
|
||||
v1 = perc(gData.tg, gData.te);
|
||||
if (gData.bklt) {
|
||||
v += ` t-gmax: ${time(gData.tg)} (${v1} of program duration)\n`;
|
||||
}
|
||||
v += ` t-end: ${time(gData.te)}\n`;
|
||||
|
||||
v += "}\n\n";
|
||||
|
||||
@ -1017,103 +1077,109 @@ function appendTreeInner(aT, aP, aBolds, aCmp, aPc, aSig, aNodeIdNums,
|
||||
|
||||
let v1, v2, v3, v4, v5;
|
||||
|
||||
// "AP" + node ID + kid count.
|
||||
// "PP" + node ID + kid count.
|
||||
v1 = aNodeIdNums.join('.');
|
||||
v2 = aNumSibs + 1;
|
||||
v3 = kids ? `(${kids.length} children) ` : "";
|
||||
fr(`AP ${v1}/${v2} ${v3}{`, true, false);
|
||||
fr(`PP ${v1}/${v2} ${v3}{`, true, false);
|
||||
nl(true);
|
||||
|
||||
// "Total".
|
||||
v1 = bytesAndPercAndRate(aT._totalBytes, gRoot._totalBytes);
|
||||
v2 = blocksAndPercAndRate(aT._totalBlocks, gRoot._totalBlocks);
|
||||
v3 = avgSizeBytes(aT._totalAvgSizeBytes());
|
||||
v4 = avgLifetimeInstrs(aT._totalAvgLifetimeInstrs());
|
||||
v5 = perc(aT._totalAvgLifetimeInstrs(), gData.ei);
|
||||
v4 = avgLifetime(aT._totalAvgLifetimes());
|
||||
v5 = perc(aT._totalAvgLifetimes(), gData.te);
|
||||
fr(" Total: ", aBolds.totalTitle);
|
||||
fr(v1, aBolds.totalBytes);
|
||||
fr(" in ");
|
||||
fr(v2, aBolds.totalBlocks);
|
||||
fr(", ", aBolds.totalAvgSizeBytes, false);
|
||||
fr(v3, aBolds.totalAvgSizeBytes);
|
||||
fr(", ", aBolds.totalAvgLifetimeInstrs, false);
|
||||
fr(`${v4} (${v5} of program duration)`, aBolds.totalAvgLifetimeInstrs);
|
||||
if (gData.bklt) {
|
||||
fr(", ", aBolds.totalAvgLifetime, false);
|
||||
fr(`${v4} (${v5} of program duration)`, aBolds.totalAvgLifetime);
|
||||
}
|
||||
nl(aBolds.totalTitle);
|
||||
|
||||
// "Max".
|
||||
if (aT !== gRoot && aT._kind === kLeaf) {
|
||||
assert(!kids, "leaf node has children");
|
||||
// These percentages are relative to the local totals, not the root
|
||||
// totals.
|
||||
v1 = bytes(aT._maxBytes);
|
||||
v2 = blocks(aT._maxBlocks);
|
||||
v3 = avgSizeBytes(aT._maxAvgSizeBytes());
|
||||
fr(` Max: ${v1} in ${v2}, ${v3}`);
|
||||
nl();
|
||||
if (gData.bklt) {
|
||||
// "Max".
|
||||
if (aT !== gRoot && aT._kind === kLeaf) {
|
||||
assert(!kids, "leaf node has children");
|
||||
// These percentages are relative to the local totals, not the root
|
||||
// totals.
|
||||
v1 = bytes(aT._maxBytes);
|
||||
v2 = blocks(aT._maxBlocks);
|
||||
v3 = avgSizeBytes(aT._maxAvgSizeBytes());
|
||||
fr(` Max: ${v1} in ${v2}, ${v3}`);
|
||||
nl();
|
||||
}
|
||||
|
||||
// "At t-gmax".
|
||||
v1 = bytesAndPerc(aT._atTGmaxBytes, gRoot._atTGmaxBytes);
|
||||
v2 = blocksAndPerc(aT._atTGmaxBlocks, gRoot._atTGmaxBlocks);
|
||||
v3 = avgSizeBytes(aT._atTGmaxAvgSizeBytes());
|
||||
fr(" At t-gmax: ", aBolds.atTGmaxTitle);
|
||||
fr(v1, aBolds.atTGmaxBytes);
|
||||
fr(` in ${v2}, ${v3}`);
|
||||
nl(aBolds.atTGmaxTitle);
|
||||
|
||||
// "At t-end".
|
||||
v1 = bytesAndPerc(aT._atTEndBytes, gRoot._atTEndBytes);
|
||||
v2 = blocksAndPerc(aT._atTEndBlocks, gRoot._atTEndBlocks);
|
||||
v3 = avgSizeBytes(aT._atTEndAvgSizeBytes());
|
||||
fr(" At t-end: ", aBolds.atTEndTitle);
|
||||
fr(v1, aBolds.atTEndBytes);
|
||||
fr(` in ${v2}, ${v3}`);
|
||||
nl(aBolds.atTEndTitle);
|
||||
}
|
||||
|
||||
// "At t-gmax".
|
||||
v1 = bytesAndPerc(aT._atTGmaxBytes, gRoot._atTGmaxBytes);
|
||||
v2 = blocksAndPerc(aT._atTGmaxBlocks, gRoot._atTGmaxBlocks);
|
||||
v3 = avgSizeBytes(aT._atTGmaxAvgSizeBytes());
|
||||
fr(" At t-gmax: ", aBolds.atTGmaxTitle);
|
||||
fr(v1, aBolds.atTGmaxBytes);
|
||||
fr(` in ${v2}, ${v3}`);
|
||||
nl(aBolds.atTGmaxTitle);
|
||||
if (gData.bkacc) {
|
||||
// "Reads".
|
||||
v1 = bytesAndPercAndRate(aT._readsBytes, gRoot._readsBytes);
|
||||
v2 = perByte(aT._readsAvgPerByte());
|
||||
fr(" Reads: ", aBolds.readsTitle);
|
||||
fr(v1, aBolds.readsBytes);
|
||||
fr(", ", aBolds.readsBytes && aBolds.readsAvgPerByte, false);
|
||||
fr(v2, aBolds.readsAvgPerByte);
|
||||
nl(aBolds.readsTitle);
|
||||
|
||||
// "At t-end".
|
||||
v1 = bytesAndPerc(aT._atTEndBytes, gRoot._atTEndBytes);
|
||||
v2 = blocksAndPerc(aT._atTEndBlocks, gRoot._atTEndBlocks);
|
||||
v3 = avgSizeBytes(aT._atTEndAvgSizeBytes());
|
||||
fr(" At t-end: ", aBolds.atTEndTitle);
|
||||
fr(v1, aBolds.atTEndBytes);
|
||||
fr(` in ${v2}, ${v3}`);
|
||||
nl(aBolds.atTEndTitle);
|
||||
// "Writes".
|
||||
v1 = bytesAndPercAndRate(aT._writesBytes, gRoot._writesBytes);
|
||||
v2 = perByte(aT._writesAvgPerByte());
|
||||
fr(" Writes: ", aBolds.writesTitle);
|
||||
fr(v1, aBolds.writesBytes);
|
||||
fr(", ", aBolds.writesBytes && aBolds.writesAvgPerByte, false);
|
||||
fr(v2, aBolds.writesAvgPerByte);
|
||||
nl(aBolds.writesTitle);
|
||||
|
||||
// "Reads".
|
||||
v1 = bytesAndPercAndRate(aT._readsBytes, gRoot._readsBytes);
|
||||
v2 = perByte(aT._readsAvgPerByte());
|
||||
fr(" Reads: ", aBolds.readsTitle);
|
||||
fr(v1, aBolds.readsBytes);
|
||||
fr(", ", aBolds.readsBytes && aBolds.readsAvgPerByte, false);
|
||||
fr(v2, aBolds.readsAvgPerByte);
|
||||
nl(aBolds.readsTitle);
|
||||
|
||||
// "Writes".
|
||||
v1 = bytesAndPercAndRate(aT._writesBytes, gRoot._writesBytes);
|
||||
v2 = perByte(aT._writesAvgPerByte());
|
||||
fr(" Writes: ", aBolds.writesTitle);
|
||||
fr(v1, aBolds.writesBytes);
|
||||
fr(", ", aBolds.writesBytes && aBolds.writesAvgPerByte, false);
|
||||
fr(v2, aBolds.writesAvgPerByte);
|
||||
nl(aBolds.writesTitle);
|
||||
|
||||
// "Accesses". We show 32 per line (but not on aggregate nodes).
|
||||
if (aT._accesses && aT._accesses.length > 0) {
|
||||
let v = " Accesses: {";
|
||||
let prevN;
|
||||
for (let [i, n] of aT._accesses.entries()) {
|
||||
if ((i % 32) === 0) {
|
||||
fr(v);
|
||||
nl();
|
||||
v1 = i.toString().padStart(3, ' ');
|
||||
v = ` [${v1}] `;
|
||||
v += `${accesses(n)} `;
|
||||
} else {
|
||||
// Use a ditto mark for repeats.
|
||||
v += (n === prevN && n !== 0) ? "〃 " : `${accesses(n)} `;
|
||||
// "Accesses". We show 32 per line (but not on aggregate nodes).
|
||||
if (aT._accesses && aT._accesses.length > 0) {
|
||||
let v = " Accesses: {";
|
||||
let prevN;
|
||||
for (let [i, n] of aT._accesses.entries()) {
|
||||
if ((i % 32) === 0) {
|
||||
fr(v);
|
||||
nl();
|
||||
v1 = i.toString().padStart(3, ' ');
|
||||
v = ` [${v1}] `;
|
||||
v += `${accesses(n)} `;
|
||||
} else {
|
||||
// Use a ditto mark for repeats.
|
||||
v += (n === prevN && n !== 0) ? "〃 " : `${accesses(n)} `;
|
||||
}
|
||||
prevN = n;
|
||||
}
|
||||
prevN = n;
|
||||
}
|
||||
fr(v);
|
||||
nl();
|
||||
fr(v);
|
||||
nl();
|
||||
|
||||
fr(" }");
|
||||
nl();
|
||||
fr(" }");
|
||||
nl();
|
||||
}
|
||||
}
|
||||
|
||||
// "Allocated at".
|
||||
fr(" Allocated at {", true, false);
|
||||
fr(` ${gData.verb} at {`, true, false);
|
||||
nl(true);
|
||||
if (aT._kind === kAgg) {
|
||||
// Don't print ancestor frames; just print the "insignificant" frame.
|
||||
@ -1219,7 +1285,7 @@ function appendTree(aP, aBolds, aCmp, aPc, aSig) {
|
||||
}
|
||||
|
||||
function appendSignificanceThreshold(aP, aSigLabel) {
|
||||
let v = `\nAP significance threshold: ${aSigLabel()}\n`;
|
||||
let v = `\nPP significance threshold: ${aSigLabel()}\n`;
|
||||
appendElementWithText(aP, "span", v, "threshold");
|
||||
}
|
||||
|
||||
@ -1287,7 +1353,7 @@ function displayTree(aTRead, aTParse, aTBuild) {
|
||||
// Get details relating to the chosen sort metrics.
|
||||
let data = gSelectData[gSelect.selectedIndex];
|
||||
let bolds = data.bolds;
|
||||
let label = data.label;
|
||||
let label = data.label();
|
||||
let cmpField = data.cmpField;
|
||||
let sig = data.sig;
|
||||
let sigLabel = data.sigLabel;
|
||||
@ -1397,7 +1463,7 @@ function onLoad() {
|
||||
gSelect = appendElement(selectDiv, "select");
|
||||
gSelect.onchange = changeSortMetric;
|
||||
for (let [i, data] of gSelectData.entries()) {
|
||||
let option = appendElementWithText(gSelect, "option", data.label);
|
||||
let option = appendElementWithText(gSelect, "option", data.label());
|
||||
option.value = i;
|
||||
if (data.isDefault) {
|
||||
option.selected = true;
|
||||
@ -1421,13 +1487,15 @@ function onLoad() {
|
||||
appendElementWithText(ul, "li", "'t-gmax': time of global heap maximum " +
|
||||
"(as measured in bytes)");
|
||||
appendElementWithText(ul, "li", "'t-end': time of program end");
|
||||
// The file may use different units (via the `tu` and `Mtu` fields), but
|
||||
// these are the standard units so mention them here.
|
||||
appendElementWithText(ul, "li", "'instrs': instructions");
|
||||
appendElementWithText(ul, "li", "'Minstr': mega-instruction, i.e. one " +
|
||||
"million instructions");
|
||||
appendElementWithText(ul, "li", "'AP': allocation point");
|
||||
appendElementWithText(ul, "li", "'PP': program point");
|
||||
appendElementWithText(ul, "li", "'avg': average");
|
||||
appendElementWithText(ul, "li", "'-' (in accesses): zero");
|
||||
appendElementWithText(ul, "li", "'∞' (in accesses): leaf AP counts max out " +
|
||||
appendElementWithText(ul, "li", "'∞' (in accesses): leaf PP counts max out " +
|
||||
"at 65534; larger counts are treated as " +
|
||||
"infinity");
|
||||
appendElementWithText(ul, "li", "'〃' (in accesses): same as previous entry");
|
||||
|
||||
75
dhat/dhat.h
Normal file
75
dhat/dhat.h
Normal file
@ -0,0 +1,75 @@
|
||||
|
||||
/*
|
||||
----------------------------------------------------------------
|
||||
|
||||
Notice that the following BSD-style license applies to this one
|
||||
file (dhat.h) only. The rest of Valgrind is licensed under the
|
||||
terms of the GNU General Public License, version 2, unless
|
||||
otherwise indicated. See the COPYING file in the source
|
||||
distribution for details.
|
||||
|
||||
----------------------------------------------------------------
|
||||
|
||||
This file is part of DHAT, a Valgrind tool for profiling the
|
||||
heap usage of programs.
|
||||
|
||||
Copyright (C) 2020 Nicholas Nethercote. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. The origin of this software must not be misrepresented; you must
|
||||
not claim that you wrote the original software. If you use this
|
||||
software in a product, an acknowledgment in the product
|
||||
documentation would be appreciated but is not required.
|
||||
|
||||
3. Altered source versions must be plainly marked as such, and must
|
||||
not be misrepresented as being the original software.
|
||||
|
||||
4. The name of the author may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
----------------------------------------------------------------
|
||||
|
||||
Notice that the above BSD-style license applies to this one file
|
||||
(memcheck.h) only. The entire rest of Valgrind is licensed under
|
||||
the terms of the GNU General Public License, version 2. See the
|
||||
COPYING file in the source distribution for details.
|
||||
|
||||
----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "valgrind.h"
|
||||
|
||||
typedef
|
||||
enum {
|
||||
VG_USERREQ__DHAT_AD_HOC_EVENT = VG_USERREQ_TOOL_BASE('D', 'H'),
|
||||
|
||||
// This is just for DHAT's internal use. Don't use it.
|
||||
_VG_USERREQ__DHAT_COPY = VG_USERREQ_TOOL_BASE('D','H') + 256
|
||||
} Vg_DHATClientRequest;
|
||||
|
||||
// Record an ad hoc event. The meaning of the weight argument will depend on
|
||||
// what the event represents, which is up to the user. If no meaningful weight
|
||||
// argument exists, just use 1.
|
||||
#define DHAT_AD_HOC_EVENT(_qzz_weight) \
|
||||
VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DHAT_AD_HOC_EVENT, \
|
||||
(_qzz_weight), 0, 0, 0, 0)
|
||||
|
||||
@ -16,15 +16,15 @@
|
||||
<sect1 id="dh-manual.overview" xreflabel="Overview">
|
||||
<title>Overview</title>
|
||||
|
||||
<para>DHAT is a tool for examining how programs use their heap
|
||||
<para>DHAT is primarily a tool for examining how programs use their heap
|
||||
allocations.</para>
|
||||
|
||||
<para>It tracks the allocated blocks, and inspects every memory access
|
||||
to find which block, if any, it is to. It presents, on an allocation point
|
||||
to find which block, if any, it is to. It presents, on a program point
|
||||
basis, information about these blocks such as sizes, lifetimes, numbers of
|
||||
reads and writes, and read and write patterns.</para>
|
||||
|
||||
<para>Using this information it is possible to identify allocation points with
|
||||
<para>Using this information it is possible to identify program points with
|
||||
the following characteristics:</para>
|
||||
|
||||
<itemizedlist>
|
||||
@ -54,6 +54,9 @@ as instruction counts. This sounds a little odd at first, but it
|
||||
makes runs repeatable in a way which is not possible if CPU time is
|
||||
used.</para>
|
||||
|
||||
<para>DHAT also has support for copy profiling and ad hoc profiling. These are
|
||||
described below.</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
@ -155,11 +158,12 @@ because this can significantly reduce the size of DHAT's output files.</para>
|
||||
|
||||
<sect2 id="dh-output-header"><title>The Output Header</title>
|
||||
|
||||
<para>The first part of the output shows the program command and process ID.
|
||||
For example:</para>
|
||||
<para>The first part of the output shows the mode, program command and process
|
||||
ID. For example:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
Invocation {
|
||||
Mode: heap
|
||||
Command: /home/njn/moz/rust0/build/x86_64-unknown-linux-gnu/stage2/bin/rustc --crate-name tuple_stress src/main.rs
|
||||
PID: 18816
|
||||
}
|
||||
@ -179,18 +183,19 @@ Times {
|
||||
</sect2>
|
||||
|
||||
|
||||
<sect2 id="dh-ap-tree"><title>The AP Tree</title>
|
||||
<sect2 id="dh-ap-tree"><title>The PP Tree</title>
|
||||
|
||||
<para>The third part of the output is the largest and most interesting part,
|
||||
showing the allocation point (AP) tree.</para>
|
||||
showing the program point (PP) tree.</para>
|
||||
|
||||
|
||||
<sect3 id="dh-structure"><title>Structure</title>
|
||||
|
||||
<para>The following image shows a screenshot of part of an AP
|
||||
<para>The following image shows a screenshot of part of a PP
|
||||
tree. The font is very small because this screenshot is intended to
|
||||
demonstrate the high-level structure of the tree rather than the
|
||||
details within the text.</para>
|
||||
details within the text. (It is also slightly out-of-date, and doesn't quite
|
||||
match the current output produced by DHAT's viewer.)</para>
|
||||
|
||||
<graphic fileref="images/dh-tree.png" scalefit="1"/>
|
||||
|
||||
@ -228,7 +233,7 @@ email, bug report, etc.</para>
|
||||
<para>The root node looks like this:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
AP 1/1 (25 children) {
|
||||
PP 1/1 (25 children) {
|
||||
Total: 1,355,253,987 bytes (100%, 67,454.81/Minstr) in 5,943,417 blocks (100%, 295.82/Minstr), avg size 228.03 bytes, avg lifetime 3,134,692,250.67 instrs (15.6% of program duration)
|
||||
At t-gmax: 423,930,307 bytes (100%) in 1,575,682 blocks (100%), avg size 269.05 bytes
|
||||
At t-end: 258,002 bytes (100%) in 2,129 blocks (100%), avg size 121.18 bytes
|
||||
@ -250,11 +255,11 @@ next example will explain these in more detail.</para>
|
||||
|
||||
<sect3 id="dh-interior-nodes"><title>Interior Nodes</title>
|
||||
|
||||
<para>AP nodes further down the tree show information about a subset of
|
||||
<para>PP nodes further down the tree show information about a subset of
|
||||
allocations. For example:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
AP 1.1/25 (2 children) {
|
||||
PP 1.1/25 (2 children) {
|
||||
Total: 54,533,440 bytes (4.02%, 2,714.28/Minstr) in 458,839 blocks (7.72%, 22.84/Minstr), avg size 118.85 bytes, avg lifetime 1,127,259,403.64 instrs (5.61% of program duration)
|
||||
At t-gmax: 0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
|
||||
At t-end: 0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
|
||||
@ -288,7 +293,7 @@ stack trace that is shared by all the blocks covered by this node.</para>
|
||||
<para>The <computeroutput>Total</computeroutput> line shows that this node
|
||||
accounts for 4.02% of all bytes allocated during execution, and 7.72% of all
|
||||
blocks. These percentages are useful for comparing the significance of
|
||||
different nodes within a single profile; an AP that accounts for 10% of bytes
|
||||
different nodes within a single profile; a PP that accounts for 10% of bytes
|
||||
allocated is likely to be more interesting than one that accounts for
|
||||
2%.</para>
|
||||
|
||||
@ -301,16 +306,16 @@ different workloads.</para>
|
||||
average size and lifetimes of these blocks.</para>
|
||||
|
||||
<para>The <computeroutput>At t-gmax</computeroutput> line says shows that no
|
||||
blocks from this AP were alive when the global heap peak occurred. In other
|
||||
blocks from this PP were alive when the global heap peak occurred. In other
|
||||
words, these blocks do not contribute at all to the global heap peak.</para>
|
||||
|
||||
<para>The <computeroutput>At t-end</computeroutput> line shows that no blocks
|
||||
were from this AP were alive at shutdown. In other words, all those blocks were
|
||||
were from this PP were alive at shutdown. In other words, all those blocks were
|
||||
explicitly freed before termination.</para>
|
||||
|
||||
<para>The <computeroutput>Reads</computeroutput> and
|
||||
<computeroutput>Writes</computeroutput> lines show how many bytes were read
|
||||
within this AP's blocks, the fraction this represents of all heap reads, and
|
||||
within this PP's blocks, the fraction this represents of all heap reads, and
|
||||
the read rate. Finally, it shows the read ratio, which is the number of reads
|
||||
per byte. In this case the number is 0.29, which is quite low -- if no byte was
|
||||
read twice, then only 29% of the allocated bytes, which means that at least 71%
|
||||
@ -336,7 +341,7 @@ vectors and hash tables, and isn't always fixable. </para>
|
||||
<para>This is a leaf node:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
AP 1.1.1.1/2 {
|
||||
PP 1.1.1.1/2 {
|
||||
Total: 31,460,928 bytes (2.32%, 1,565.9/Minstr) in 262,171 blocks (4.41%, 13.05/Minstr), avg size 120 bytes, avg lifetime 986,406,885.05 instrs (4.91% of program duration)
|
||||
Max: 16,779,136 bytes in 65,543 blocks, avg size 256 bytes
|
||||
At t-gmax: 0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
|
||||
@ -365,10 +370,10 @@ is a great-grandchild of the root; is the first grandchild of the node in the
|
||||
previous example; and has no children.</para>
|
||||
|
||||
<para>Leaf nodes contain an additional <computeroutput>Max</computeroutput>
|
||||
line, indicating the peak memory use for the blocks covered by this AP. (This
|
||||
line, indicating the peak memory use for the blocks covered by this PP. (This
|
||||
peak may have occurred at a time other than
|
||||
<computeroutput>t-gmax</computeroutput>.) In this case, 31,460,298 bytes were
|
||||
allocated from this AP, but the maximum size alive at once was 16,779,136
|
||||
allocated from this PP, but the maximum size alive at once was 16,779,136
|
||||
bytes.</para>
|
||||
|
||||
<para>Stack frames that begin with a <computeroutput>^</computeroutput> rather
|
||||
@ -383,7 +388,7 @@ This also means that each node makes complete sense on its own.</para>
|
||||
|
||||
<sect3 id="dh-access-counts"><title>Access Counts</title>
|
||||
|
||||
<para>If all blocks covered by an AP node have the same size, an additional
|
||||
<para>If all blocks covered by a PP node have the same size, an additional
|
||||
<computeroutput>Accesses</computeroutput> field will be present. It indicates
|
||||
how the reads and writes within these blocks were distributed. For
|
||||
example:</para>
|
||||
@ -399,7 +404,7 @@ Accesses: {
|
||||
}
|
||||
]]></programlisting>
|
||||
|
||||
<para>Every block covered by this AP was 32 bytes. Within all of those blocks,
|
||||
<para>Every block covered by this PP was 32 bytes. Within all of those blocks,
|
||||
byte 0 was accessed (read or written) 65,547 times, byte 1 was accessed 7
|
||||
times, byte 2 was accessed 8 times, and so on.</para>
|
||||
|
||||
@ -425,12 +430,12 @@ layout inefficiencies.</para>
|
||||
|
||||
<sect3 id="aggregate-nodes"><title>Aggregate Nodes</title>
|
||||
|
||||
<para>The AP tree is very large and many nodes represent tiny numbers of blocks
|
||||
<para>The PP tree is very large and many nodes represent tiny numbers of blocks
|
||||
and bytes. Therefore, DHAT's viewer aggregates insignificant nodes like
|
||||
this:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
AP 1.14.2/2 {
|
||||
PP 1.14.2/2 {
|
||||
Total: 5,175 blocks (0.09%, 0.26/Minstr)
|
||||
Allocated at {
|
||||
[5 insignificant]
|
||||
@ -449,15 +454,15 @@ case).</para>
|
||||
|
||||
<sect2 id="dh-output-footer"><title>The Output Footer</title>
|
||||
|
||||
<para>Below the AP tree is a line like this:</para>
|
||||
<para>Below the PP tree is a line like this:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
AP significance threshold: total >= 59,434.17 blocks (1%)
|
||||
PP significance threshold: total >= 59,434.17 blocks (1%)
|
||||
]]></programlisting>
|
||||
|
||||
<para>It shows the function used to determine if an AP node is significant. All
|
||||
<para>It shows the function used to determine if a PP node is significant. All
|
||||
nodes that don't satisfy this function are aggregated. It is occasionally
|
||||
useful if you don't understand why an AP node has been aggregated. The exact
|
||||
useful if you don't understand why a PP node has been aggregated. The exact
|
||||
threshold depends on the sort metric (see below).</para>
|
||||
|
||||
<para>Finally, the bottom of the page shows a legend that explains some of the
|
||||
@ -587,21 +592,21 @@ filtering, so that only nodes meeting a particular criteria are shown.</para>
|
||||
<para>The values within a node that represent the chosen sort metric are shown
|
||||
in bold, so they stand out.</para>
|
||||
|
||||
<para>Here is part of an AP node found with "Total (blocks), tiny", showing
|
||||
<para>Here is part of a PP node found with "Total (blocks), tiny", showing
|
||||
blocks with an average size of only 8.67 bytes:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
Total: 3,407,848 bytes (0.25%, 169.62/Minstr) in 393,214 blocks (6.62%, 19.57/Minstr), avg size 8.67 bytes, avg lifetime 1,167,795,629.1 instrs (5.81% of program duration)
|
||||
]]></programlisting>
|
||||
|
||||
<para>Here is part of an AP node found with "Total (blocks), short-lived",
|
||||
<para>Here is part of a PP node found with "Total (blocks), short-lived",
|
||||
showing blocks with an average lifetime of only 181.75 instructions:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
Total: 23,068,584 bytes (1.7%, 1,148.19/Minstr) in 262,143 blocks (4.41%, 13.05/Minstr), avg size 88 bytes, avg lifetime 181.75 instrs (0% of program duration)
|
||||
]]></programlisting>
|
||||
|
||||
<para>Here is an example of an AP identified with "Total (blocks), zero reads
|
||||
<para>Here is an example of a PP identified with "Total (blocks), zero reads
|
||||
or zero writes", showing blocks that are allocated but never touched:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
@ -613,7 +618,7 @@ Reads: 0 bytes (0%, 0/Minstr), 0/byte
|
||||
Writes: 0 bytes (0%, 0/Minstr), 0/byte
|
||||
]]></programlisting>
|
||||
|
||||
<para>All the blocks identified by these APs are good candidates for
|
||||
<para>All the blocks identified by these PPs are good candidates for
|
||||
optimization.</para>
|
||||
|
||||
</sect2>
|
||||
@ -648,10 +653,10 @@ increasing the current heap size by 200 bytes and then decreasing it by 100
|
||||
bytes.) As a result, it can only increase the global heap peak (if indeed,
|
||||
this results in a new peak) by 100 bytes.</para>
|
||||
|
||||
<para>Finally, the allocation point assigned to the block allocated by the
|
||||
<para>Finally, the program point assigned to the block allocated by the
|
||||
<computeroutput>malloc(100)</computeroutput> call is retained once the block
|
||||
is reallocated. Which means that all 300 bytes are attributed to that
|
||||
allocation point, and no separate allocation point is created for the
|
||||
program point, and no separate program point is created for the
|
||||
<computeroutput>realloc(200)</computeroutput> call. This may be surprising,
|
||||
but it has one large benefit.</para>
|
||||
|
||||
@ -659,12 +664,84 @@ but it has one large benefit.</para>
|
||||
adds data to that buffer from numerous different points in the code,
|
||||
reallocating the buffer each time it gets full. (E.g. code generation in a
|
||||
compiler might work this way.) With the described approach, the first heap
|
||||
block and all subsequent heap blocks are attributed to the same allocation
|
||||
point. While this is something of a lie -- the first allocation point isn't
|
||||
actually responsible for the other allocations -- it is arguably better than
|
||||
having the allocation points spread around, in a distribution
|
||||
that unpredictably depends on whenever the reallocation points were
|
||||
triggered.</para>
|
||||
block and all subsequent heap blocks are attributed to the same program point.
|
||||
While this is something of a lie -- the first program point isn't actually
|
||||
responsible for the other allocations -- it is arguably better than having the
|
||||
program points spread around in a distribution that unpredictably depends on
|
||||
whenever the reallocations were triggered.</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="dh-manual.copy-profiling" xreflabel="Copy profiling">
|
||||
<title>Copy profiling</title>
|
||||
|
||||
<para>If DHAT is invoked with <option>--mode=copy</option>, instead of
|
||||
profiling heap operations (allocations and deallocations), it profiles copy
|
||||
operations, such as <computeroutput>memcpy</computeroutput>,
|
||||
<computeroutput>memmove</computeroutput>,
|
||||
<computeroutput>strcpy</computeroutput>, and
|
||||
<computeroutput>bcopy</computeroutput>. This is sometimes useful.</para>
|
||||
|
||||
<para>Here is an example PP node from this mode:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
PP 1.1.2/5 (4 children) {
|
||||
Total: 1,210,925 bytes (10.03%, 4,358.66/Minstr) in 112,717 blocks (35.2%, 405.72/Minstr), avg size 10.74 bytes
|
||||
Copied at {
|
||||
^1: 0x4842524: memmove (vg_replace_strmem.c:1289)
|
||||
#2: 0x1F0A0D: copy_nonoverlapping<u8> (intrinsics.rs:1858)
|
||||
#3: 0x1F0A0D: copy_from_slice<u8> (mod.rs:2524)
|
||||
#4: 0x1F0A0D: spec_extend<u8> (vec.rs:2227)
|
||||
#5: 0x1F0A0D: extend_from_slice<u8> (vec.rs:1619)
|
||||
#6: 0x1F0A0D: push_str (string.rs:821)
|
||||
#7: 0x1F0A0D: write_str (string.rs:2418)
|
||||
#8: 0x1F0A0D: <&mut W as core::fmt::Write>::write_str (mod.rs:195)
|
||||
}
|
||||
}
|
||||
]]></programlisting>
|
||||
|
||||
<para>It is very similar to the PP nodes for heap profiling, but with less
|
||||
information, because copy profiling doesn't involve any tracking of memory
|
||||
regions with lifetimes.</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="dh-manual.ad-hoc-profiling" xreflabel="Ad hoc profiling">
|
||||
<title>Ad hoc profiling</title>
|
||||
|
||||
<para>If DHAT is invoked with <option>--mode=ad-hoc</option>, instead of
|
||||
profiling heap operations (allocations and deallocations), it profiles calls to
|
||||
the <computeroutput>DHAT_AD_HOC_EVENT</computeroutput> client request, which is
|
||||
declared in <filename>dhat/dhat.h</filename>.</para>
|
||||
|
||||
<para>Here is an example PP node from this mode:</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
PP 1.1.1.1/2 {
|
||||
Total: 30 units (17.65%, 115.97/Minstr) in 1 events (14.29%, 3.87/Minstr), avg size 30 units
|
||||
Occurred at {
|
||||
^1: 0x109407: g (ad-hoc.c:4)
|
||||
^2: 0x109425: f (ad-hoc.c:8)
|
||||
#3: 0x109497: main (ad-hoc.c:14)
|
||||
}
|
||||
}
|
||||
]]></programlisting>
|
||||
|
||||
<para>This kind of profiling is useful when you know a code path is hot but you
|
||||
want to know more about it.</para>
|
||||
|
||||
<para>For example, you might want to know which callsites of a hot function
|
||||
account for most of the calls. You could put a
|
||||
<computeroutput>DHAT_AD_HOC_EVENT(1);</computeroutput> call at the start of
|
||||
that function.</para>
|
||||
|
||||
<para>Alternatively, you might want to know the typical length of a vector in a
|
||||
hot location. You could put a
|
||||
<computeroutput>DHAT_AD_HOC_EVENT(len);</computeroutput> call at the
|
||||
appropriate location, when <computeroutput>len</computeroutput> is the length
|
||||
of the vector.</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
@ -694,6 +771,17 @@ triggered.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="opt.mode" xreflabel="--mode">
|
||||
<term>
|
||||
<option><![CDATA[--mode=<heap|copy|ad-hoc> [default: heap] ]]></option>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>The profiling mode: heap profiling, copy profiling, or ad hoc
|
||||
profiling.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
<para>Note that stacks by default have 12 frames. This may be more than
|
||||
|
||||
@ -5,16 +5,20 @@ dist_noinst_SCRIPTS = filter_stderr
|
||||
|
||||
EXTRA_DIST = \
|
||||
acc.stderr.exp acc.vgtest \
|
||||
ad-hoc.stderr.exp ad-hoc.vgtest \
|
||||
basic.stderr.exp basic.vgtest \
|
||||
big.stderr.exp big.vgtest \
|
||||
copy.stderr.exp copy.vgtest \
|
||||
empty.stderr.exp empty.vgtest \
|
||||
sig.stderr.exp sig.vgtest \
|
||||
single.stderr.exp single.vgtest
|
||||
|
||||
check_PROGRAMS = \
|
||||
acc \
|
||||
ad-hoc \
|
||||
basic \
|
||||
big \
|
||||
copy \
|
||||
empty \
|
||||
sig \
|
||||
single
|
||||
|
||||
27
dhat/tests/ad-hoc.c
Normal file
27
dhat/tests/ad-hoc.c
Normal file
@ -0,0 +1,27 @@
|
||||
#include "dhat/dhat.h"
|
||||
#include <stdlib.h>
|
||||
void g(void) {
|
||||
DHAT_AD_HOC_EVENT(30);
|
||||
}
|
||||
|
||||
void f(void) {
|
||||
g();
|
||||
DHAT_AD_HOC_EVENT(20);
|
||||
g();
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
f();
|
||||
DHAT_AD_HOC_EVENT(10);
|
||||
f();
|
||||
|
||||
// At one point malloc was broken with --mode=ad-hoc(!), and Valgrind was
|
||||
// printing messages like "VG_USERREQ__CLIENT_CALL1: func=0x0" when malloc
|
||||
// was called. So check that it's basically working...
|
||||
char* p = malloc(100);
|
||||
p = realloc(p, 200);
|
||||
free(p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
1
dhat/tests/ad-hoc.stderr.exp
Normal file
1
dhat/tests/ad-hoc.stderr.exp
Normal file
@ -0,0 +1 @@
|
||||
Total: 170 units in 7 events
|
||||
3
dhat/tests/ad-hoc.vgtest
Normal file
3
dhat/tests/ad-hoc.vgtest
Normal file
@ -0,0 +1,3 @@
|
||||
prog: ad-hoc
|
||||
vgopts: --mode=ad-hoc --dhat-out-file=dhat.out
|
||||
cleanup: rm dhat.out
|
||||
@ -3,6 +3,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "dhat/dhat.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
@ -24,5 +25,9 @@ int main(void)
|
||||
|
||||
free(c);
|
||||
// totals: 3008 read, 3516 write
|
||||
|
||||
// Should be ignored because we're not in ad hoc mode.
|
||||
DHAT_AD_HOC_EVENT(100);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
60
dhat/tests/copy.c
Normal file
60
dhat/tests/copy.c
Normal file
@ -0,0 +1,60 @@
|
||||
// This tests --mode=copy with various copying functions.
|
||||
|
||||
#define _GNU_SOURCE // For mempcpy.
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
void f(char* a, char* b, wchar_t* wa, wchar_t* wb);
|
||||
void test_malloc();
|
||||
|
||||
int main(void) {
|
||||
char a[1000];
|
||||
char b[1000];
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
a[i] = 'a';
|
||||
b[i] = 'b';
|
||||
}
|
||||
a[999] = '\0';
|
||||
b[999] = '\0';
|
||||
|
||||
wchar_t wa[250];
|
||||
wchar_t wb[250];
|
||||
for (int i = 0; i < 250; i++) {
|
||||
wa[i] = 'A';
|
||||
wb[i] = 'B';
|
||||
}
|
||||
wa[249] = '\0';
|
||||
wb[249] = '\0';
|
||||
|
||||
for (int i = 0; i < 100; i++) {
|
||||
f(a, b, wa, wb);
|
||||
}
|
||||
|
||||
test_malloc();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void f(char* a, char* b, wchar_t* wa, wchar_t* wb) {
|
||||
// The memcpy is duplicated so we have 10 calls, which makes for nice round
|
||||
// numbers in the totals.
|
||||
memcpy (a, b, 1000); // Redirects to memmove
|
||||
memcpy (a, b, 1000); // Redirects to memmove
|
||||
memmove(a, b, 1000);
|
||||
mempcpy(a, b, 1000);
|
||||
bcopy (a, b, 1000); // Redirects to memmove
|
||||
strcpy (a, b);
|
||||
strncpy(a, b, 1000);
|
||||
stpcpy (a, b); // Redirects to strcpy
|
||||
stpncpy(a, b, 1000);
|
||||
wcscpy (wa, wb);
|
||||
}
|
||||
|
||||
void test_malloc() {
|
||||
// At one point malloc was broken with --mode=copy(!), and Valgrind was
|
||||
// printing messages like "VG_USERREQ__CLIENT_CALL1: func=0x0" when malloc
|
||||
// was called. So check that it's basically working...
|
||||
char* p = malloc(100);
|
||||
p = realloc(p, 200);
|
||||
free(p);
|
||||
}
|
||||
1
dhat/tests/copy.stderr.exp
Normal file
1
dhat/tests/copy.stderr.exp
Normal file
@ -0,0 +1 @@
|
||||
Total: 1,000,... bytes in 1,0.. blocks
|
||||
4
dhat/tests/copy.vgtest
Normal file
4
dhat/tests/copy.vgtest
Normal file
@ -0,0 +1,4 @@
|
||||
prog: copy
|
||||
vgopts: --mode=copy --dhat-out-file=dhat.out
|
||||
stderr_filter: filter_copy
|
||||
cleanup: rm dhat.out
|
||||
9
dhat/tests/filter_copy
Executable file
9
dhat/tests/filter_copy
Executable file
@ -0,0 +1,9 @@
|
||||
#! /bin/sh
|
||||
|
||||
# It's impossible to get exact matches for copy counts because even trivial C
|
||||
# programs do a few memcpy/strcpy calls. So we allow some fuzzy matching.
|
||||
# So we allow 1,000,000..1,009,999 bytes and 1,000..1,099 blocks.
|
||||
|
||||
./filter_stderr "$@" |
|
||||
sed -e "s/1,00.,... bytes in 1,0.. blocks/1,000,... bytes in 1,0.. blocks/"
|
||||
|
||||
@ -21,8 +21,7 @@ sed "/^ file:\/\/\// d" |
|
||||
sed "/^in a web browser/ d" |
|
||||
sed "/^ \// d" | # This is pretty feeble, but I don't see
|
||||
# how to do better
|
||||
sed "/^Scroll to the end/ d" |
|
||||
sed "/^explanation of some/ d" |
|
||||
sed "/^The text at the bottom/ d" |
|
||||
|
||||
# and remove any blank lines in the output
|
||||
sed "/^[[:space:]]*$/d"
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
/* Can be called from VG_(tdict).malloc_malloc et al to do the actual
|
||||
* alloc/freeing. */
|
||||
extern void* VG_(cli_malloc) ( SizeT align, SizeT nbytes );
|
||||
extern void* VG_(cli_realloc)( void* ptr, SizeT nbytes );
|
||||
extern void VG_(cli_free) ( void* p );
|
||||
// Returns the usable size of a heap-block. It's the asked-for size plus
|
||||
// possibly some more due to rounding up.
|
||||
|
||||
@ -41,7 +41,7 @@ IRSB* nl_instrument ( VgCallbackClosure* closure,
|
||||
const VexArchInfo* archinfo_host,
|
||||
IRType gWordTy, IRType hWordTy )
|
||||
{
|
||||
return bb;
|
||||
return bb;
|
||||
}
|
||||
|
||||
static void nl_fini(Int exitcode)
|
||||
|
||||
@ -35,12 +35,13 @@
|
||||
#include "pub_tool_clreq.h"
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
We have our own versions of these functions for two reasons:
|
||||
We have our own versions of these functions for multiple reasons:
|
||||
(a) it allows us to do overlap checking
|
||||
(b) some of the normal versions are hyper-optimised, which fools
|
||||
(b) it allows us to do copy tracking
|
||||
(c) some of the normal versions are hyper-optimised, which fools
|
||||
Memcheck and cause spurious value warnings. Our versions are
|
||||
simpler.
|
||||
(c) the glibc SSE-variants can read past the end of the input data
|
||||
(d) the glibc SSE-variants can read past the end of the input data
|
||||
ranges. This can cause false-positive Memcheck / Helgrind / DRD
|
||||
reports.
|
||||
|
||||
@ -173,6 +174,15 @@ static inline void my_exit ( int x )
|
||||
#ifndef RECORD_OVERLAP_ERROR
|
||||
#define RECORD_OVERLAP_ERROR(s, src, dst, len) do { } while (0)
|
||||
#endif
|
||||
|
||||
// Used for tools that record bulk copies: memcpy, strcpy, etc.
|
||||
#ifndef RECORD_COPY
|
||||
#define RECORD_COPY(len) do { } while (0)
|
||||
#define FOR_COPY(x)
|
||||
#else
|
||||
#define FOR_COPY(x) x
|
||||
#endif
|
||||
|
||||
#ifndef VALGRIND_CHECK_VALUE_IS_DEFINED
|
||||
#define VALGRIND_CHECK_VALUE_IS_DEFINED(__lvalue) 1
|
||||
#endif
|
||||
@ -496,12 +506,14 @@ static inline void my_exit ( int x )
|
||||
while (*src) *dst++ = *src++; \
|
||||
*dst = 0; \
|
||||
\
|
||||
/* This checks for overlap after copying, unavoidable without */ \
|
||||
/* This happens after copying, unavoidable without */ \
|
||||
/* pre-counting length... should be ok */ \
|
||||
SizeT srclen = (Addr)src-(Addr)src_orig+1; \
|
||||
RECORD_COPY(srclen); \
|
||||
if (is_overlap(dst_orig, \
|
||||
src_orig, \
|
||||
(Addr)dst-(Addr)dst_orig+1, \
|
||||
(Addr)src-(Addr)src_orig+1)) \
|
||||
srclen)) \
|
||||
RECORD_OVERLAP_ERROR("strcpy", dst_orig, src_orig, 0); \
|
||||
\
|
||||
return dst_orig; \
|
||||
@ -539,7 +551,9 @@ static inline void my_exit ( int x )
|
||||
while (m < n && *src) { m++; *dst++ = *src++; } \
|
||||
/* Check for overlap after copying; all n bytes of dst are relevant, */ \
|
||||
/* but only m+1 bytes of src if terminator was found */ \
|
||||
if (is_overlap(dst_orig, src_orig, n, (m < n) ? m+1 : n)) \
|
||||
SizeT srclen = (m < n) ? m+1 : n; \
|
||||
RECORD_COPY(srclen); \
|
||||
if (is_overlap(dst_orig, src_orig, n, srclen)) \
|
||||
RECORD_OVERLAP_ERROR("strncpy", dst, src, n); \
|
||||
while (m++ < n) *dst++ = 0; /* must pad remainder with nulls */ \
|
||||
\
|
||||
@ -585,7 +599,9 @@ static inline void my_exit ( int x )
|
||||
/* m non-nul bytes have now been copied, and m <= n-1. */ \
|
||||
/* Check for overlap after copying; all n bytes of dst are relevant, */ \
|
||||
/* but only m+1 bytes of src if terminator was found */ \
|
||||
if (is_overlap(dst_orig, src_orig, n, (m < n) ? m+1 : n)) \
|
||||
SizeT srclen = (m < n) ? m+1 : n; \
|
||||
RECORD_COPY(srclen); \
|
||||
if (is_overlap(dst_orig, src_orig, n, srclen)) \
|
||||
RECORD_OVERLAP_ERROR("strlcpy", dst, src, n); \
|
||||
/* Nul-terminate dst. */ \
|
||||
if (n > 0) *dst = 0; \
|
||||
@ -943,6 +959,7 @@ static inline void my_exit ( int x )
|
||||
void* VG_REPLACE_FUNCTION_EZZ(becTag,soname,fnname) \
|
||||
( void *dst, const void *src, SizeT len ) \
|
||||
{ \
|
||||
RECORD_COPY(len); \
|
||||
if (do_ol_check && is_overlap(dst, src, len, len)) \
|
||||
RECORD_OVERLAP_ERROR("memcpy", dst, src, len); \
|
||||
\
|
||||
@ -1034,6 +1051,7 @@ static inline void my_exit ( int x )
|
||||
MEMCPY(VG_Z_LIBC_SONAME, memcpy) /* fallback case */
|
||||
MEMCPY(VG_Z_LIBC_SONAME, __GI_memcpy)
|
||||
MEMCPY(VG_Z_LIBC_SONAME, __memcpy_sse2)
|
||||
MEMCPY(VG_Z_LIBC_SONAME, __memcpy_avx_unaligned_erms)
|
||||
MEMCPY(VG_Z_LD_SO_1, memcpy) /* ld.so.1 */
|
||||
MEMCPY(VG_Z_LD64_SO_1, memcpy) /* ld64.so.1 */
|
||||
/* icc9 blats these around all over the place. Not only in the main
|
||||
@ -1142,10 +1160,12 @@ static inline void my_exit ( int x )
|
||||
\
|
||||
/* This checks for overlap after copying, unavoidable without */ \
|
||||
/* pre-counting length... should be ok */ \
|
||||
SizeT srclen = (Addr)src-(Addr)src_orig+1; \
|
||||
RECORD_COPY(srclen); \
|
||||
if (is_overlap(dst_orig, \
|
||||
src_orig, \
|
||||
(Addr)dst-(Addr)dst_orig+1, \
|
||||
(Addr)src-(Addr)src_orig+1)) \
|
||||
srclen)) \
|
||||
RECORD_OVERLAP_ERROR("stpcpy", dst_orig, src_orig, 0); \
|
||||
\
|
||||
return dst; \
|
||||
@ -1185,7 +1205,9 @@ static inline void my_exit ( int x )
|
||||
while (m < n && *src) { m++; *dst++ = *src++; } \
|
||||
/* Check for overlap after copying; all n bytes of dst are relevant, */ \
|
||||
/* but only m+1 bytes of src if terminator was found */ \
|
||||
if (is_overlap(dst_str, src_orig, n, (m < n) ? m+1 : n)) \
|
||||
SizeT srclen = (m < n) ? m+1 : n; \
|
||||
RECORD_COPY(srclen); \
|
||||
if (is_overlap(dst_str, src_orig, n, srclen)) \
|
||||
RECORD_OVERLAP_ERROR("stpncpy", dst, src, n); \
|
||||
dst_str = dst; \
|
||||
while (m++ < n) *dst++ = 0; /* must pad remainder with nulls */ \
|
||||
@ -1200,9 +1222,6 @@ static inline void my_exit ( int x )
|
||||
|
||||
/*---------------------- memset ----------------------*/
|
||||
|
||||
/* Why are we bothering to intercept this? It seems entirely
|
||||
pointless. */
|
||||
|
||||
#define MEMSET(soname, fnname) \
|
||||
void* VG_REPLACE_FUNCTION_EZZ(20210,soname,fnname) \
|
||||
(void *s, Int c, SizeT n); \
|
||||
@ -1301,6 +1320,7 @@ static inline void my_exit ( int x )
|
||||
void VG_REPLACE_FUNCTION_EZU(20230,soname,fnname) \
|
||||
(const void *srcV, void *dstV, SizeT n) \
|
||||
{ \
|
||||
RECORD_COPY(n); \
|
||||
SizeT i; \
|
||||
HChar* dst = dstV; \
|
||||
const HChar* src = srcV; \
|
||||
@ -1338,6 +1358,7 @@ static inline void my_exit ( int x )
|
||||
void* VG_REPLACE_FUNCTION_EZU(20240,soname,fnname) \
|
||||
(void *dstV, const void *srcV, SizeT n, SizeT destlen) \
|
||||
{ \
|
||||
RECORD_COPY(n); \
|
||||
SizeT i; \
|
||||
HChar* dst = dstV; \
|
||||
const HChar* src = srcV; \
|
||||
@ -1438,12 +1459,14 @@ static inline void my_exit ( int x )
|
||||
char* VG_REPLACE_FUNCTION_EZU(20270,soname,fnname) \
|
||||
(char* dst, const char* src, SizeT len) \
|
||||
{ \
|
||||
FOR_COPY(const HChar* src_orig = src); \
|
||||
HChar* ret = dst; \
|
||||
if (! len) \
|
||||
goto badness; \
|
||||
while ((*dst++ = *src++) != '\0') \
|
||||
if (--len == 0) \
|
||||
goto badness; \
|
||||
RECORD_COPY((Addr)src-(Addr)src_orig); \
|
||||
return ret; \
|
||||
badness: \
|
||||
VALGRIND_PRINTF_BACKTRACE( \
|
||||
@ -1474,11 +1497,13 @@ static inline void my_exit ( int x )
|
||||
char* VG_REPLACE_FUNCTION_EZU(20280,soname,fnname) \
|
||||
(char* dst, const char* src, SizeT len) \
|
||||
{ \
|
||||
FOR_COPY(const HChar* src_orig = src); \
|
||||
if (! len) \
|
||||
goto badness; \
|
||||
while ((*dst++ = *src++) != '\0') \
|
||||
if (--len == 0) \
|
||||
goto badness; \
|
||||
RECORD_COPY((Addr)src-(Addr)src_orig); \
|
||||
return dst - 1; \
|
||||
badness: \
|
||||
VALGRIND_PRINTF_BACKTRACE( \
|
||||
@ -1508,6 +1533,7 @@ static inline void my_exit ( int x )
|
||||
void* VG_REPLACE_FUNCTION_EZU(20290,soname,fnname) \
|
||||
( void *dst, const void *src, SizeT len ) \
|
||||
{ \
|
||||
RECORD_COPY(len); \
|
||||
SizeT len_saved = len; \
|
||||
\
|
||||
if (len == 0) \
|
||||
@ -1557,15 +1583,13 @@ static inline void my_exit ( int x )
|
||||
{ \
|
||||
register HChar *d; \
|
||||
register const HChar *s; \
|
||||
\
|
||||
if (dstlen < len) goto badness; \
|
||||
\
|
||||
if (dstlen < len) \
|
||||
goto badness; \
|
||||
RECORD_COPY(len); \
|
||||
if (len == 0) \
|
||||
return dst; \
|
||||
\
|
||||
if (is_overlap(dst, src, len, len)) \
|
||||
RECORD_OVERLAP_ERROR("memcpy_chk", dst, src, len); \
|
||||
\
|
||||
if ( dst > src ) { \
|
||||
d = (HChar *)dst + len - 1; \
|
||||
s = (const HChar *)src + len - 1; \
|
||||
@ -1977,11 +2001,14 @@ static inline void my_exit ( int x )
|
||||
\
|
||||
/* This checks for overlap after copying, unavoidable without */ \
|
||||
/* pre-counting length... should be ok */ \
|
||||
/* +4 because sizeof(wchar_t) == 4 */ \
|
||||
SizeT srclen = (Addr)src-(Addr)src_orig+4; \
|
||||
RECORD_COPY(srclen); \
|
||||
if (is_overlap(dst_orig, \
|
||||
src_orig, \
|
||||
/* +4 because sizeof(wchar_t) == 4 */ \
|
||||
(Addr)dst-(Addr)dst_orig+4, \
|
||||
(Addr)src-(Addr)src_orig+4)) \
|
||||
srclen)) \
|
||||
RECORD_OVERLAP_ERROR("wcscpy", dst_orig, src_orig, 0); \
|
||||
\
|
||||
return dst_orig; \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user