mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-03 18:13:01 +00:00
10% reduction in debuginfo storage requirements for large applications on 32 bit platforms. This code had been present since the MacOSX port was merged but had been disabled. Remove equivalent code for shrinking the symbol tables since they are much (4 x) smaller than the line number tables, trimming them is hardly worth the effort. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12050
888 lines
32 KiB
C
888 lines
32 KiB
C
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- Format-neutral storage of and querying of info acquired from ---*/
|
|
/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
|
|
/*--- priv_storage.h ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Valgrind, a dynamic binary instrumentation
|
|
framework.
|
|
|
|
Copyright (C) 2000-2010 Julian Seward
|
|
jseward@acm.org
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
/*
|
|
Stabs reader greatly improved by Nick Nethercote, Apr 02.
|
|
This module was also extensively hacked on by Jeremy Fitzhardinge
|
|
and Tom Hughes.
|
|
*/
|
|
/* See comment at top of debuginfo.c for explanation of
|
|
the _svma / _avma / _image / _bias naming scheme.
|
|
*/
|
|
/* Note this is not freestanding; needs pub_core_xarray.h and
|
|
priv_tytypes.h to be included before it. */
|
|
|
|
#ifndef __PRIV_STORAGE_H
|
|
#define __PRIV_STORAGE_H
|
|
|
|
/* --------------------- SYMBOLS --------------------- */
|
|
|
|
/* A structure to hold an ELF/MachO symbol (very crudely). Usually
|
|
the symbol only has one name, which is stored in ::pri_name, and
|
|
::sec_names is NULL. If there are other names, these are stored in
|
|
::sec_names, which is a NULL terminated vector holding the names.
|
|
The vector is allocated in VG_AR_DINFO, the names themselves live
|
|
in DebugInfo::strchunks.
|
|
|
|
From the point of view of ELF, the primary vs secondary distinction
|
|
is artificial: they are all just names associated with the address,
|
|
none of which has higher precedence than any other. However, from
|
|
the point of view of mapping an address to a name to display to the
|
|
user, we need to choose one "preferred" name, and so that might as
|
|
well be installed as the pri_name, whilst all others can live in
|
|
sec_names[]. This has the convenient side effect that, in the
|
|
common case where there is only one name for the address,
|
|
sec_names[] does not need to be allocated.
|
|
*/
|
|
typedef
|
|
struct {
|
|
Addr addr; /* lowest address of entity */
|
|
Addr tocptr; /* ppc64-linux only: value that R2 should have */
|
|
UChar* pri_name; /* primary name, never NULL */
|
|
UChar** sec_names; /* NULL, or a NULL term'd array of other names */
|
|
// XXX: this could be shrunk (on 32-bit platforms) by using 30
|
|
// bits for the size and 1 bit each for isText and isIFunc. If you
|
|
// do this, make sure that all assignments to the latter two use
|
|
// 0 or 1 (or True or False), and that a positive number larger
|
|
// than 1 is never used to represent True.
|
|
UInt size; /* size in bytes */
|
|
Bool isText;
|
|
Bool isIFunc; /* symbol is an indirect function? */
|
|
}
|
|
DiSym;
|
|
|
|
/* --------------------- SRCLOCS --------------------- */
|
|
|
|
/* Line count at which overflow happens, due to line numbers being
|
|
stored as shorts in `struct nlist' in a.out.h. */
|
|
#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
|
|
|
|
#define LINENO_BITS 20
|
|
#define LOC_SIZE_BITS (32 - LINENO_BITS)
|
|
#define MAX_LINENO ((1 << LINENO_BITS) - 1)
|
|
|
|
/* Unlikely to have any lines with instruction ranges > 4096 bytes */
|
|
#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
|
|
|
|
/* Number used to detect line number overflows; if one line is
|
|
60000-odd smaller than the previous, it was probably an overflow.
|
|
*/
|
|
#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
|
|
|
|
/* A structure to hold addr-to-source info for a single line. There
|
|
can be a lot of these, hence the dense packing. */
|
|
typedef
|
|
struct {
|
|
/* Word 1 */
|
|
Addr addr; /* lowest address for this line */
|
|
/* Word 2 */
|
|
UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
|
|
UInt lineno:LINENO_BITS; /* source line number, or zero */
|
|
/* Word 3 */
|
|
UChar* filename; /* source filename */
|
|
/* Word 4 */
|
|
UChar* dirname; /* source directory name */
|
|
}
|
|
DiLoc;
|
|
|
|
/* --------------------- CF INFO --------------------- */
|
|
|
|
/* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
|
|
address range [base .. base+len-1].
|
|
|
|
On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
|
|
some point and {e,r}ip is in the range [base .. base+len-1], it
|
|
tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
|
|
current frame and also ra, the return address of the current frame.
|
|
|
|
First off, calculate CFA, the Canonical Frame Address, thusly:
|
|
|
|
cfa = case cfa_how of
|
|
CFIC_IA_SPREL -> {e,r}sp + cfa_off
|
|
CFIC_IA_BPREL -> {e,r}bp + cfa_off
|
|
CFIR_IA_EXPR -> expr whose index is in cfa_off
|
|
|
|
Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
|
|
this frame's {e,r}ra value can be calculated like this:
|
|
|
|
old_{e,r}sp/{e,r}bp/ra
|
|
= case {e,r}sp/{e,r}bp/ra_how of
|
|
CFIR_UNKNOWN -> we don't know, sorry
|
|
CFIR_SAME -> same as it was before (sp/fp only)
|
|
CFIR_CFAREL -> cfa + sp/bp/ra_off
|
|
CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
|
|
CFIR_EXPR -> expr whose index is in sp/bp/ra_off
|
|
|
|
On ARM it's pretty much the same, except we have more registers to
|
|
keep track of:
|
|
|
|
cfa = case cfa_how of
|
|
CFIC_R13REL -> r13 + cfa_off
|
|
CFIC_R12REL -> r12 + cfa_off
|
|
CFIC_R11REL -> r11 + cfa_off
|
|
CFIC_R7REL -> r7 + cfa_off
|
|
CFIR_EXPR -> expr whose index is in cfa_off
|
|
|
|
old_r14/r13/r12/r11/r7/ra
|
|
= case r14/r13/r12/r11/r7/ra_how of
|
|
CFIR_UNKNOWN -> we don't know, sorry
|
|
CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
|
|
CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
|
|
CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
|
|
CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
|
|
|
|
On s390x we have a similar logic as x86 or amd64. We need the stack pointer
|
|
(r15), the frame pointer r11 (like BP) and together with the instruction
|
|
address in the PSW we can calculate the previous values:
|
|
cfa = case cfa_how of
|
|
CFIC_IA_SPREL -> r15 + cfa_off
|
|
CFIC_IA_BPREL -> r11 + cfa_off
|
|
CFIR_IA_EXPR -> expr whose index is in cfa_off
|
|
|
|
old_sp/fp/ra
|
|
= case sp/fp/ra_how of
|
|
CFIR_UNKNOWN -> we don't know, sorry
|
|
CFIR_SAME -> same as it was before (sp/fp only)
|
|
CFIR_CFAREL -> cfa + sp/fp/ra_off
|
|
CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
|
|
CFIR_EXPR -> expr whose index is in sp/fp/ra_off
|
|
*/
|
|
|
|
#define CFIC_IA_SPREL ((UChar)1)
|
|
#define CFIC_IA_BPREL ((UChar)2)
|
|
#define CFIC_IA_EXPR ((UChar)3)
|
|
#define CFIC_ARM_R13REL ((UChar)4)
|
|
#define CFIC_ARM_R12REL ((UChar)5)
|
|
#define CFIC_ARM_R11REL ((UChar)6)
|
|
#define CFIC_ARM_R7REL ((UChar)7)
|
|
#define CFIC_EXPR ((UChar)8) /* all targets */
|
|
|
|
#define CFIR_UNKNOWN ((UChar)64)
|
|
#define CFIR_SAME ((UChar)65)
|
|
#define CFIR_CFAREL ((UChar)66)
|
|
#define CFIR_MEMCFAREL ((UChar)67)
|
|
#define CFIR_EXPR ((UChar)68)
|
|
|
|
#if defined(VGA_x86) || defined(VGA_amd64)
|
|
typedef
|
|
struct {
|
|
Addr base;
|
|
UInt len;
|
|
UChar cfa_how; /* a CFIC_IA value */
|
|
UChar ra_how; /* a CFIR_ value */
|
|
UChar sp_how; /* a CFIR_ value */
|
|
UChar bp_how; /* a CFIR_ value */
|
|
Int cfa_off;
|
|
Int ra_off;
|
|
Int sp_off;
|
|
Int bp_off;
|
|
}
|
|
DiCfSI;
|
|
#elif defined(VGA_arm)
|
|
typedef
|
|
struct {
|
|
Addr base;
|
|
UInt len;
|
|
UChar cfa_how; /* a CFIC_ value */
|
|
UChar ra_how; /* a CFIR_ value */
|
|
UChar r14_how; /* a CFIR_ value */
|
|
UChar r13_how; /* a CFIR_ value */
|
|
UChar r12_how; /* a CFIR_ value */
|
|
UChar r11_how; /* a CFIR_ value */
|
|
UChar r7_how; /* a CFIR_ value */
|
|
Int cfa_off;
|
|
Int ra_off;
|
|
Int r14_off;
|
|
Int r13_off;
|
|
Int r12_off;
|
|
Int r11_off;
|
|
Int r7_off;
|
|
}
|
|
DiCfSI;
|
|
#elif defined(VGA_ppc32) || defined(VGA_ppc64)
|
|
/* Just have a struct with the common fields in, so that code that
|
|
processes the common fields doesn't have to be ifdef'd against
|
|
VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
|
|
at the moment. */
|
|
typedef
|
|
struct {
|
|
Addr base;
|
|
UInt len;
|
|
UChar cfa_how; /* a CFIC_ value */
|
|
UChar ra_how; /* a CFIR_ value */
|
|
Int cfa_off;
|
|
Int ra_off;
|
|
}
|
|
DiCfSI;
|
|
#elif defined(VGA_s390x)
|
|
typedef
|
|
struct {
|
|
Addr base;
|
|
UInt len;
|
|
UChar cfa_how; /* a CFIC_ value */
|
|
UChar sp_how; /* a CFIR_ value */
|
|
UChar ra_how; /* a CFIR_ value */
|
|
UChar fp_how; /* a CFIR_ value */
|
|
Int cfa_off;
|
|
Int sp_off;
|
|
Int ra_off;
|
|
Int fp_off;
|
|
}
|
|
DiCfSI;
|
|
#else
|
|
# error "Unknown arch"
|
|
#endif
|
|
|
|
|
|
typedef
|
|
enum {
|
|
Cop_Add=0x321,
|
|
Cop_Sub,
|
|
Cop_And,
|
|
Cop_Mul,
|
|
Cop_Shl,
|
|
Cop_Shr,
|
|
Cop_Eq,
|
|
Cop_Ge,
|
|
Cop_Gt,
|
|
Cop_Le,
|
|
Cop_Lt,
|
|
Cop_Ne
|
|
}
|
|
CfiOp;
|
|
|
|
typedef
|
|
enum {
|
|
Creg_IA_SP=0x213,
|
|
Creg_IA_BP,
|
|
Creg_IA_IP,
|
|
Creg_ARM_R13,
|
|
Creg_ARM_R12,
|
|
Creg_ARM_R15,
|
|
Creg_ARM_R14,
|
|
Creg_S390_R14
|
|
}
|
|
CfiReg;
|
|
|
|
typedef
|
|
enum {
|
|
Cex_Undef=0x123,
|
|
Cex_Deref,
|
|
Cex_Const,
|
|
Cex_Binop,
|
|
Cex_CfiReg,
|
|
Cex_DwReg
|
|
}
|
|
CfiExprTag;
|
|
|
|
typedef
|
|
struct {
|
|
CfiExprTag tag;
|
|
union {
|
|
struct {
|
|
} Undef;
|
|
struct {
|
|
Int ixAddr;
|
|
} Deref;
|
|
struct {
|
|
UWord con;
|
|
} Const;
|
|
struct {
|
|
CfiOp op;
|
|
Int ixL;
|
|
Int ixR;
|
|
} Binop;
|
|
struct {
|
|
CfiReg reg;
|
|
} CfiReg;
|
|
struct {
|
|
Int reg;
|
|
} DwReg;
|
|
}
|
|
Cex;
|
|
}
|
|
CfiExpr;
|
|
|
|
extern Int ML_(CfiExpr_Undef) ( XArray* dst );
|
|
extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
|
|
extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
|
|
extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR );
|
|
extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
|
|
extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
|
|
|
|
extern void ML_(ppCfiExpr)( XArray* src, Int ix );
|
|
|
|
/* ---------------- FPO INFO (Windows PE) -------------- */
|
|
|
|
/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
|
|
a primitive CFI */
|
|
typedef
|
|
struct _FPO_DATA { /* 16 bytes */
|
|
UInt ulOffStart; /* offset of 1st byte of function code */
|
|
UInt cbProcSize; /* # bytes in function */
|
|
UInt cdwLocals; /* # bytes/4 in locals */
|
|
UShort cdwParams; /* # bytes/4 in params */
|
|
UChar cbProlog; /* # bytes in prolog */
|
|
UChar cbRegs :3; /* # regs saved */
|
|
UChar fHasSEH:1; /* Structured Exception Handling */
|
|
UChar fUseBP :1; /* EBP has been used */
|
|
UChar reserved:1;
|
|
UChar cbFrame:2; /* frame type */
|
|
}
|
|
FPO_DATA;
|
|
|
|
#define PDB_FRAME_FPO 0
|
|
#define PDB_FRAME_TRAP 1
|
|
#define PDB_FRAME_TSS 2
|
|
|
|
/* --------------------- VARIABLES --------------------- */
|
|
|
|
typedef
|
|
struct {
|
|
Addr aMin;
|
|
Addr aMax;
|
|
XArray* /* of DiVariable */ vars;
|
|
}
|
|
DiAddrRange;
|
|
|
|
typedef
|
|
struct {
|
|
UChar* name; /* in DebugInfo.strchunks */
|
|
UWord typeR; /* a cuOff */
|
|
GExpr* gexpr; /* on DebugInfo.gexprs list */
|
|
GExpr* fbGX; /* SHARED. */
|
|
UChar* fileName; /* where declared; may be NULL. in
|
|
DebugInfo.strchunks */
|
|
Int lineNo; /* where declared; may be zero. */
|
|
}
|
|
DiVariable;
|
|
|
|
Word
|
|
ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
|
|
|
|
/* --------------------- DEBUGINFO --------------------- */
|
|
|
|
/* This is the top-level data type. It's a structure which contains
|
|
information pertaining to one mapped ELF object. This type is
|
|
exported only abstractly - in pub_tool_debuginfo.h. */
|
|
|
|
/* First though, here's an auxiliary data structure. It is only ever
|
|
used as part of a struct _DebugInfo. We use it to record
|
|
observations about mappings and permission changes to the
|
|
associated file, so as to decide when to read debug info. It's
|
|
essentially an ultra-trivial finite state machine which, when it
|
|
reaches an accept state, signals that we should now read debug info
|
|
from the object into the associated struct _DebugInfo. The accept
|
|
state is arrived at when have_rx_map and have_rw_map both become
|
|
true. The initial state is one in which we have no observations,
|
|
so have_rx_map and have_rw_map are both false.
|
|
|
|
This is all rather ad-hoc; for example it has no way to record more
|
|
than one rw or rx mapping for a given object, not because such
|
|
events have never been observed, but because we've never needed to
|
|
note more than the first one of any such in order when to decide to
|
|
read debug info. It may be that in future we need to track more
|
|
state in order to make the decision, so this struct would then get
|
|
expanded.
|
|
|
|
The normal sequence of events is one of
|
|
|
|
start --> r-x mapping --> rw- mapping --> accept
|
|
start --> rw- mapping --> r-x mapping --> accept
|
|
|
|
that is, take the first r-x and rw- mapping we see, and we're done.
|
|
|
|
On MacOSX 10.7, 32-bit, there appears to be a new variant:
|
|
|
|
start --> r-- mapping --> rw- mapping
|
|
--> upgrade r-- mapping to r-x mapping --> accept
|
|
|
|
where the upgrade is done by a call to vm_protect. Hence we
|
|
need to also track this possibility.
|
|
*/
|
|
struct _DebugInfoFSM
|
|
{
|
|
/* --- all targets --- */
|
|
UChar* filename; /* in mallocville (VG_AR_DINFO) */
|
|
|
|
Bool have_rx_map; /* did we see a r?x mapping yet for the file? */
|
|
Bool have_rw_map; /* did we see a rw? mapping yet for the file? */
|
|
|
|
Addr rx_map_avma; /* these fields record the file offset, length */
|
|
SizeT rx_map_size; /* and map address of the r?x mapping we believe */
|
|
OffT rx_map_foff; /* is the .text segment mapping */
|
|
|
|
Addr rw_map_avma; /* ditto, for the rw? mapping we believe is the */
|
|
SizeT rw_map_size; /* .data segment mapping */
|
|
OffT rw_map_foff;
|
|
|
|
/* --- OSX 10.7, 32-bit only --- */
|
|
Bool have_ro_map; /* did we see a r-- mapping yet for the file? */
|
|
|
|
Addr ro_map_avma; /* file offset, length, avma for said mapping */
|
|
SizeT ro_map_size;
|
|
OffT ro_map_foff;
|
|
};
|
|
|
|
|
|
/* To do with the string table in struct _DebugInfo (::strchunks) */
|
|
#define SEGINFO_STRCHUNKSIZE (64*1024)
|
|
|
|
|
|
/* We may encounter more than one .eh_frame section in an object --
|
|
unusual but apparently allowed by ELF. See
|
|
http://sourceware.org/bugzilla/show_bug.cgi?id=12675
|
|
*/
|
|
#define N_EHFRAME_SECTS 2
|
|
|
|
|
|
/* So, the main structure for holding debug info for one object. */
|
|
|
|
struct _DebugInfo {
|
|
|
|
/* Admin stuff */
|
|
|
|
struct _DebugInfo* next; /* list of DebugInfos */
|
|
Bool mark; /* marked for deletion? */
|
|
|
|
/* An abstract handle, which can be used by entities outside of
|
|
m_debuginfo to (in an abstract datatype sense) refer to this
|
|
struct _DebugInfo. A .handle of zero is invalid; valid handles
|
|
are 1 and above. The same handle is never issued twice (in any
|
|
given run of Valgrind), so a handle becomes invalid when the
|
|
associated struct _DebugInfo is discarded, and remains invalid
|
|
forever thereafter. The .handle field is set as soon as this
|
|
structure is allocated. */
|
|
ULong handle;
|
|
|
|
/* Used for debugging only - indicate what stuff to dump whilst
|
|
reading stuff into the seginfo. Are computed as early in the
|
|
lifetime of the DebugInfo as possible -- at the point when it is
|
|
created. Use these when deciding what to spew out; do not use
|
|
the global VG_(clo_blah) flags. */
|
|
|
|
Bool trace_symtab; /* symbols, our style */
|
|
Bool trace_cfi; /* dwarf frame unwind, our style */
|
|
Bool ddump_syms; /* mimic /usr/bin/readelf --syms */
|
|
Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */
|
|
Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
|
|
|
|
/* The "decide when it is time to read debuginfo" state machine.
|
|
This structure must get filled in before we can start reading
|
|
anything from the ELF/MachO file. This structure is filled in
|
|
by VG_(di_notify_mmap) and its immediate helpers. */
|
|
struct _DebugInfoFSM fsm;
|
|
|
|
/* Once the ::fsm has reached an accept state -- typically, when
|
|
both a rw? and r?x mapping for .filename have been observed --
|
|
we can go on to read the symbol tables and debug info.
|
|
.have_dinfo changes from False to True when the debug info has
|
|
been completely read in and postprocessed (canonicalised) and is
|
|
now suitable for querying. */
|
|
/* If have_dinfo is False, then all fields below this point are
|
|
invalid and should not be consulted. */
|
|
Bool have_dinfo; /* initially False */
|
|
|
|
/* All the rest of the fields in this structure are filled in once
|
|
we have committed to reading the symbols and debug info (that
|
|
is, at the point where .have_dinfo is set to True). */
|
|
|
|
/* The file's soname. FIXME: ensure this is always allocated in
|
|
VG_AR_DINFO. */
|
|
UChar* soname;
|
|
|
|
/* Description of some important mapped segments. The presence or
|
|
absence of the mapping is denoted by the _present field, since
|
|
in some obscure circumstances (to do with data/sdata/bss) it is
|
|
possible for the mapping to be present but have zero size.
|
|
Certainly text_ is mandatory on all platforms; not sure about
|
|
the rest though.
|
|
|
|
--------------------------------------------------------
|
|
|
|
Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
|
|
|
|
either (rx_map_size == 0 && cfsi == NULL) (the degenerate case)
|
|
|
|
or the normal case, which is the AND of the following:
|
|
(0) rx_map_size > 0
|
|
(1) no two DebugInfos with rx_map_size > 0
|
|
have overlapping [rx_map_avma,+rx_map_size)
|
|
(2) [cfsi_minavma,cfsi_maxavma] does not extend
|
|
beyond [rx_map_avma,+rx_map_size); that is, the former is a
|
|
subrange or equal to the latter.
|
|
(3) all DiCfSI in the cfsi array all have ranges that fall within
|
|
[rx_map_avma,+rx_map_size).
|
|
(4) all DiCfSI in the cfsi array are non-overlapping
|
|
|
|
The cumulative effect of these restrictions is to ensure that
|
|
all the DiCfSI records in the entire system are non overlapping.
|
|
Hence any address falls into either exactly one DiCfSI record,
|
|
or none. Hence it is safe to cache the results of searches for
|
|
DiCfSI records. This is the whole point of these restrictions.
|
|
The caching of DiCfSI searches is done in VG_(use_CF_info). The
|
|
cache is flushed after any change to debugInfo_list. DiCfSI
|
|
searches are cached because they are central to stack unwinding
|
|
on amd64-linux.
|
|
|
|
Where are these invariants imposed and checked?
|
|
|
|
They are checked after a successful read of debuginfo into
|
|
a DebugInfo*, in check_CFSI_related_invariants.
|
|
|
|
(1) is not really imposed anywhere. We simply assume that the
|
|
kernel will not map the text segments from two different objects
|
|
into the same space. Sounds reasonable.
|
|
|
|
(2) follows from (4) and (3). It is ensured by canonicaliseCFI.
|
|
(3) is ensured by ML_(addDiCfSI).
|
|
(4) is ensured by canonicaliseCFI.
|
|
|
|
--------------------------------------------------------
|
|
|
|
Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
|
|
|
|
The _debug_{svma,bias} fields were added as part of a fix to
|
|
#185816. The problem encompassed in that bug report was that it
|
|
wasn't correct to use apply the bias values deduced for a
|
|
primary object to its associated debuginfo object, because the
|
|
debuginfo object (or the primary) could have been prelinked to a
|
|
different SVMA. Hence debuginfo and primary objects need to
|
|
have their own biases.
|
|
|
|
------ JRS: (referring to r9329): ------
|
|
Let me see if I understand the workings correctly. Initially
|
|
the _debug_ values are set to the same values as the "normal"
|
|
ones, as there's a bunch of bits of code like this (in
|
|
readelf.c)
|
|
|
|
di->text_svma = svma;
|
|
...
|
|
di->text_bias = rx_bias;
|
|
di->text_debug_svma = svma;
|
|
di->text_debug_bias = rx_bias;
|
|
|
|
If a debuginfo object subsequently shows up then the
|
|
_debug_svma/bias are set for the debuginfo object. Result is
|
|
that if there's no debuginfo object then the values are the same
|
|
as the primary-object values, and if there is a debuginfo object
|
|
then they will (or at least may) be different.
|
|
|
|
Then when we need to actually bias something, we'll have to
|
|
decide whether to use the primary bias or the debuginfo bias.
|
|
And the strategy is to use the primary bias for ELF symbols but
|
|
the debuginfo bias for anything pulled out of Dwarf.
|
|
|
|
------ THH: ------
|
|
Correct - the debug_svma and bias values apply to any address
|
|
read from the debug data regardless of where that debug data is
|
|
stored and the other values are used for addresses from other
|
|
places (primarily the symbol table).
|
|
|
|
------ JRS: ------
|
|
Ok; so this was my only area of concern. Are there any
|
|
corner-case scenarios where this wouldn't be right? It sounds
|
|
like we're assuming the ELF symbols come from the primary object
|
|
and, if there is a debug object, then all the Dwarf comes from
|
|
there. But what if (eg) both symbols and Dwarf come from the
|
|
debug object? Is that even possible or allowable?
|
|
|
|
------ THH: ------
|
|
You may have a point...
|
|
|
|
The current logic is to try and take any one set of data from
|
|
either the base object or the debug object. There are four sets
|
|
of data we consider:
|
|
|
|
- Symbol Table
|
|
- Stabs
|
|
- DWARF1
|
|
- DWARF2
|
|
|
|
If we see the primary section for a given set in the base object
|
|
then we ignore all sections relating to that set in the debug
|
|
object.
|
|
|
|
Now in principle if we saw a secondary section (like debug_line
|
|
say) in the base object, but not the main section (debug_info in
|
|
this case) then we would take debug_info from the debug object
|
|
but would use the debug_line from the base object unless we saw
|
|
a replacement copy in the debug object. That's probably unlikely
|
|
however.
|
|
|
|
A bigger issue might be, as you say, the symbol table as we will
|
|
pick that up from the debug object if it isn't in the base. The
|
|
dynamic symbol table will always have to be in the base object
|
|
though so we will have to be careful when processing symbols to
|
|
know which table we are reading in that case.
|
|
|
|
What we probably need to do is tell read_elf_symtab which object
|
|
the symbols it is being asked to read came from.
|
|
|
|
(A followup patch to deal with this was committed in r9469).
|
|
*/
|
|
/* .text */
|
|
Bool text_present;
|
|
Addr text_avma;
|
|
Addr text_svma;
|
|
SizeT text_size;
|
|
PtrdiffT text_bias;
|
|
Addr text_debug_svma;
|
|
PtrdiffT text_debug_bias;
|
|
/* .data */
|
|
Bool data_present;
|
|
Addr data_svma;
|
|
Addr data_avma;
|
|
SizeT data_size;
|
|
PtrdiffT data_bias;
|
|
Addr data_debug_svma;
|
|
PtrdiffT data_debug_bias;
|
|
/* .sdata */
|
|
Bool sdata_present;
|
|
Addr sdata_svma;
|
|
Addr sdata_avma;
|
|
SizeT sdata_size;
|
|
PtrdiffT sdata_bias;
|
|
Addr sdata_debug_svma;
|
|
PtrdiffT sdata_debug_bias;
|
|
/* .rodata */
|
|
Bool rodata_present;
|
|
Addr rodata_svma;
|
|
Addr rodata_avma;
|
|
SizeT rodata_size;
|
|
PtrdiffT rodata_bias;
|
|
Addr rodata_debug_svma;
|
|
PtrdiffT rodata_debug_bias;
|
|
/* .bss */
|
|
Bool bss_present;
|
|
Addr bss_svma;
|
|
Addr bss_avma;
|
|
SizeT bss_size;
|
|
PtrdiffT bss_bias;
|
|
Addr bss_debug_svma;
|
|
PtrdiffT bss_debug_bias;
|
|
/* .sbss */
|
|
Bool sbss_present;
|
|
Addr sbss_svma;
|
|
Addr sbss_avma;
|
|
SizeT sbss_size;
|
|
PtrdiffT sbss_bias;
|
|
Addr sbss_debug_svma;
|
|
PtrdiffT sbss_debug_bias;
|
|
/* .plt */
|
|
Bool plt_present;
|
|
Addr plt_avma;
|
|
SizeT plt_size;
|
|
/* .got */
|
|
Bool got_present;
|
|
Addr got_avma;
|
|
SizeT got_size;
|
|
/* .got.plt */
|
|
Bool gotplt_present;
|
|
Addr gotplt_avma;
|
|
SizeT gotplt_size;
|
|
/* .opd -- needed on ppc64-linux for finding symbols */
|
|
Bool opd_present;
|
|
Addr opd_avma;
|
|
SizeT opd_size;
|
|
/* .ehframe -- needed on amd64-linux for stack unwinding. We might
|
|
see more than one, hence the arrays. */
|
|
UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */
|
|
Addr ehframe_avma[N_EHFRAME_SECTS];
|
|
SizeT ehframe_size[N_EHFRAME_SECTS];
|
|
|
|
/* Sorted tables of stuff we snarfed from the file. This is the
|
|
eventual product of reading the debug info. All this stuff
|
|
lives in VG_AR_DINFO. */
|
|
|
|
/* An expandable array of symbols. */
|
|
DiSym* symtab;
|
|
UWord symtab_used;
|
|
UWord symtab_size;
|
|
/* An expandable array of locations. */
|
|
DiLoc* loctab;
|
|
UWord loctab_used;
|
|
UWord loctab_size;
|
|
/* An expandable array of CFI summary info records. Also includes
|
|
summary address bounds, showing the min and max address covered
|
|
by any of the records, as an aid to fast searching. And, if the
|
|
records require any expression nodes, they are stored in
|
|
cfsi_exprs. */
|
|
DiCfSI* cfsi;
|
|
UWord cfsi_used;
|
|
UWord cfsi_size;
|
|
Addr cfsi_minavma;
|
|
Addr cfsi_maxavma;
|
|
XArray* cfsi_exprs; /* XArray of CfiExpr */
|
|
|
|
/* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
|
|
data. Non-expandable array, hence .size == .used. */
|
|
FPO_DATA* fpo;
|
|
UWord fpo_size;
|
|
Addr fpo_minavma;
|
|
Addr fpo_maxavma;
|
|
|
|
/* Expandable arrays of characters -- the string table. Pointers
|
|
into this are stable (the arrays are not reallocated). */
|
|
struct strchunk {
|
|
UInt strtab_used;
|
|
struct strchunk* next;
|
|
UChar strtab[SEGINFO_STRCHUNKSIZE];
|
|
} *strchunks;
|
|
|
|
/* Variable scope information, as harvested from Dwarf3 files.
|
|
|
|
In short it's an
|
|
|
|
array of (array of PC address ranges and variables)
|
|
|
|
The outer array indexes over scopes, with Entry 0 containing
|
|
information on variables which exist for any value of the program
|
|
counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
|
|
etc contain information on increasinly deeply nested variables.
|
|
|
|
Each inner array is an array of (an address range, and a set
|
|
of variables that are in scope over that address range).
|
|
|
|
The address ranges may not overlap.
|
|
|
|
Since Entry 0 in the outer array holds information on variables
|
|
that exist for any value of the PC (that is, global vars), it
|
|
follows that Entry 0's inner array can only have one address
|
|
range pair, one that covers the entire address space.
|
|
*/
|
|
XArray* /* of OSet of DiAddrRange */varinfo;
|
|
|
|
/* These are arrays of the relevant typed objects, held here
|
|
partially for the purposes of visiting each object exactly once
|
|
when we need to delete them. */
|
|
|
|
/* An array of TyEnts. These are needed to make sense of any types
|
|
in the .varinfo. Also, when deleting this DebugInfo, we must
|
|
first traverse this array and throw away malloc'd stuff hanging
|
|
off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
|
|
XArray* /* of TyEnt */ admin_tyents;
|
|
|
|
/* An array of guarded DWARF3 expressions. */
|
|
XArray* admin_gexprs;
|
|
};
|
|
|
|
/* --------------------- functions --------------------- */
|
|
|
|
/* ------ Adding ------ */
|
|
|
|
/* Add a symbol to si's symbol table. The contents of 'sym' are
|
|
copied. It is assumed (and checked) that 'sym' only contains one
|
|
name, so there is no auxiliary ::sec_names vector to duplicate.
|
|
IOW, the copy is a shallow copy, and there are assertions in place
|
|
to ensure that's OK. */
|
|
extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
|
|
|
|
/* Add a line-number record to a DebugInfo. */
|
|
extern
|
|
void ML_(addLineInfo) ( struct _DebugInfo* di,
|
|
UChar* filename,
|
|
UChar* dirname, /* NULL is allowable */
|
|
Addr this, Addr next, Int lineno, Int entry);
|
|
|
|
/* Add a CFI summary record. The supplied DiCfSI is copied. */
|
|
extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
|
|
|
|
/* Add a string to the string table of a DebugInfo. If len==-1,
|
|
ML_(addStr) will itself measure the length of the string. */
|
|
extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len );
|
|
|
|
extern void ML_(addVar)( struct _DebugInfo* di,
|
|
Int level,
|
|
Addr aMin,
|
|
Addr aMax,
|
|
UChar* name,
|
|
UWord typeR, /* a cuOff */
|
|
GExpr* gexpr,
|
|
GExpr* fbGX, /* SHARED. */
|
|
UChar* fileName, /* where decl'd - may be NULL */
|
|
Int lineNo, /* where decl'd - may be zero */
|
|
Bool show );
|
|
|
|
/* Canonicalise the tables held by 'di', in preparation for use. Call
|
|
this after finishing adding entries to these tables. */
|
|
extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
|
|
|
|
/* Canonicalise the call-frame-info table held by 'di', in preparation
|
|
for use. This is called by ML_(canonicaliseTables) but can also be
|
|
called on it's own to sort just this table. */
|
|
extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
|
|
|
|
/* ------ Searching ------ */
|
|
|
|
/* Find a symbol-table index containing the specified pointer, or -1
|
|
if not found. Binary search. */
|
|
extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
|
|
Bool match_anywhere_in_sym,
|
|
Bool findText );
|
|
|
|
/* Find a location-table index containing the specified pointer, or -1
|
|
if not found. Binary search. */
|
|
extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
|
|
|
|
/* Find a CFI-table index containing the specified pointer, or -1 if
|
|
not found. Binary search. */
|
|
extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
|
|
|
|
/* Find a FPO-table index containing the specified pointer, or -1
|
|
if not found. Binary search. */
|
|
extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
|
|
|
|
/* ------ Misc ------ */
|
|
|
|
/* Show a non-fatal debug info reading error. Use vg_panic if
|
|
terminal. 'serious' errors are always shown, not 'serious' ones
|
|
are shown only at verbosity level 2 and above. */
|
|
extern
|
|
void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg );
|
|
|
|
/* Print a symbol. */
|
|
extern void ML_(ppSym) ( Int idx, DiSym* sym );
|
|
|
|
/* Print a call-frame-info summary. */
|
|
extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
|
|
|
|
|
|
#define TRACE_SYMTAB(format, args...) \
|
|
if (di->trace_symtab) { VG_(printf)(format, ## args); }
|
|
|
|
|
|
#endif /* ndef __PRIV_STORAGE_H */
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end ---*/
|
|
/*--------------------------------------------------------------------*/
|