/*--------------------------------------------------------------------*/
/*--- Top level management of symbols and debugging information. ---*/
/*--- debuginfo.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2000-2006 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
/*
Stabs reader greatly improved by Nick Nethercote, Apr 02.
This module was also extensively hacked on by Jeremy Fitzhardinge
and Tom Hughes.
*/
#include "pub_core_basics.h"
#include "pub_core_threadstate.h"
#include "pub_core_debuginfo.h" /* self */
#include "pub_core_demangle.h"
#include "pub_core_libcbase.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcprint.h"
#include "pub_core_mallocfree.h"
#include "pub_core_options.h"
#include "pub_core_redir.h" // VG_(redir_notify_{new,delete}_SegInfo)
#include "pub_core_aspacemgr.h"
#include "priv_storage.h"
#include "priv_readdwarf.h"
#include "priv_readstabs.h"
#include "priv_readelf.h"
/*------------------------------------------------------------*/
/*--- Root structure ---*/
/*------------------------------------------------------------*/
/* The root structure for the entire symbol table system. It is a
linked list of SegInfos. Note that this entire mechanism assumes
that what we read from /proc/self/maps doesn't contain overlapping
address ranges, and as a result the SegInfos in this list describe
disjoint address ranges.
*/
static SegInfo* segInfo_list = NULL;
/*------------------------------------------------------------*/
/*--- Notification (acquire/discard) helpers ---*/
/*------------------------------------------------------------*/
/* Allocate and zero out a new SegInfo record. */
static
SegInfo* alloc_SegInfo(Addr start, SizeT size, OffT foffset,
const HChar* filename)
{
SegInfo* si = VG_(arena_calloc)(VG_AR_SYMTAB, 1, sizeof(SegInfo));
si->start = start;
si->size = size;
si->foffset = foffset;
si->filename = VG_(arena_strdup)(VG_AR_SYMTAB, filename);
// Everything else -- pointers, sizes, arrays -- is zeroed by calloc.
return si;
}
/* Free a SegInfo, and also all the stuff hanging off it. */
static void free_SegInfo ( SegInfo* si )
{
struct strchunk *chunk, *next;
vg_assert(si != NULL);
if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
if (si->symtab) VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
if (si->loctab) VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
if (si->cfsi) VG_(arena_free)(VG_AR_SYMTAB, si->cfsi);
for (chunk = si->strchunks; chunk != NULL; chunk = next) {
next = chunk->next;
VG_(arena_free)(VG_AR_SYMTAB, chunk);
}
VG_(arena_free)(VG_AR_SYMTAB, si);
}
/* 'si' is a member of segInfo_list. Find it, remove it from the
list, notify m_redir that this has happened, and free all storage
reachable from it.
*/
static void discard_SegInfo ( SegInfo* si )
{
SegInfo** prev_next_ptr = &segInfo_list;
SegInfo* curr = segInfo_list;
while (curr) {
if (curr == si) {
// Found it; remove from list and free it.
if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir))
VG_(message)(Vg_DebugMsg,
"Discarding syms at %p-%p in %s due to munmap()",
si->start, si->start + si->size,
curr->filename ? curr->filename : (UChar*)"???");
vg_assert(*prev_next_ptr == curr);
*prev_next_ptr = curr->next;
VG_(redir_notify_delete_SegInfo)( curr );
free_SegInfo(curr);
return;
}
prev_next_ptr = &curr->next;
curr = curr->next;
}
// Not found.
}
/* Repeatedly scan segInfo_list, looking for segInfos intersecting
[start,start+length), and call discard_SegInfo to get rid of them.
This modifies the list, hence the multiple iterations.
JRS 20060401: I don't understand that last sentence. */
static void discard_syms_in_range ( Addr start, SizeT length )
{
Bool found;
SegInfo* curr;
while (True) {
found = False;
curr = segInfo_list;
while (True) {
if (curr == NULL)
break;
if (start+length-1 < curr->start
|| curr->start+curr->size-1 < start) {
/* no overlap */
} else {
found = True;
break;
}
curr = curr->next;
}
if (!found) break;
discard_SegInfo( curr );
}
}
/* Create a new SegInfo with the specific address/length/vma offset,
then snarf whatever info we can from the given filename into it. */
static
SegInfo* acquire_syms_for_range( Addr seg_addr, SizeT seg_len,
OffT seg_offset, const Char* seg_filename)
{
SegInfo* si = alloc_SegInfo(seg_addr, seg_len, seg_offset, seg_filename);
if (! ML_(read_elf_debug_info) ( si )) {
// Something went wrong (eg. bad ELF file).
free_SegInfo( si );
si = NULL;
} else {
// Prepend si to segInfo_list
si->next = segInfo_list;
segInfo_list = si;
ML_(canonicaliseTables) ( si );
/* notify m_redir about it */
VG_(redir_notify_new_SegInfo)( si );
}
return si;
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/* The debug info system is driven by notifications that a text
segment has been mapped in, or unmapped. When that happens it
tries to acquire/discard whatever info is available for the
corresponding object. This section contains the notification
handlers. */
/* Notify the debuginfo system about a new mapping. This is the way
new debug information gets loaded. If allow_SkFileV is True, it
will try load debug info if the mapping at 'a' belongs to Valgrind;
whereas normally (False) it will not do that. This allows us to
carefully control when the thing will read symbols from the
Valgrind executable itself. */
void VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
{
NSegment* seg;
HChar* filename;
Bool ok;
/* If this mapping is at the beginning of a file, isn't part of
Valgrind, is at least readable and seems to contain an object
file, then try reading symbols from it.
Getting this heuristic right is critical. On x86-linux, objects
are typically mapped twice:
1b8fb000-1b8ff000 r-xp 00000000 08:02 4471477 vgpreload_memcheck.so
1b8ff000-1b900000 rw-p 00004000 08:02 4471477 vgpreload_memcheck.so
whereas ppc32-linux mysteriously does this:
118a6000-118ad000 r-xp 00000000 08:05 14209428 vgpreload_memcheck.so
118ad000-118b6000 ---p 00007000 08:05 14209428 vgpreload_memcheck.so
118b6000-118bd000 rwxp 00000000 08:05 14209428 vgpreload_memcheck.so
The third mapping should not be considered to have executable
code in. Therefore a test which works for both is: r and x and
NOT w. Reading symbols from the rwx segment -- which overlaps
the r-x segment in the file -- causes the redirection mechanism
to redirect to addresses in that third segment, which is wrong
and causes crashes.
------
JRS 28 Dec 05: unfortunately icc 8.1 on x86 has been seen to
produce executables with a single rwx segment rather than a
(r-x,rw-) pair. That means the rules have to be modified thusly:
x86-linux: consider if r and x
all others: consider if r and x and NOT w
*/
# if defined(VGP_x86_linux)
Bool require_no_W = False;
# else
Bool require_no_W = True;
# endif
seg = VG_(am_find_nsegment)(a);
vg_assert(seg);
filename = VG_(am_get_filename)( seg );
if (!filename)
return;
filename = VG_(arena_strdup)( VG_AR_SYMTAB, filename );
ok = (seg->kind == SkFileC || (seg->kind == SkFileV && allow_SkFileV))
&& seg->offset == 0
&& seg->fnIdx != -1
&& seg->hasR
&& seg->hasX
&& (require_no_W ? (!seg->hasW) : True)
&& ML_(is_elf_object_file)( (const void*)seg->start );
if (!ok) {
VG_(arena_free)(VG_AR_SYMTAB, filename);
return;
}
/* Dump any info previously associated with the range. */
discard_syms_in_range( seg->start, seg->end + 1 - seg->start );
/* .. and acquire new info. */
acquire_syms_for_range( seg->start, seg->end + 1 - seg->start,
seg->offset, filename );
/* acquire_syms_for_range makes its own copy of filename, so is
safe to free it. */
VG_(arena_free)(VG_AR_SYMTAB, filename);
}
/* Unmap is simpler - throw away any SegInfos intersecting
[a, a+len). */
void VG_(di_notify_munmap)( Addr a, SizeT len )
{
discard_syms_in_range(a, len);
}
/* Uh, this doesn't do anything at all. IIRC glibc (or ld.so, I don't
remember) does a bunch of mprotects on itself, and if we follow
through here, it causes the debug info for that object to get
discarded. */
void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot )
{
Bool exe_ok = toBool(prot & VKI_PROT_EXEC);
# if defined(VGP_x86_linux)
exe_ok = exe_ok || toBool(prot & VKI_PROT_READ);
# endif
if (0 && !exe_ok)
discard_syms_in_range(a, len);
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- TOP LEVEL: QUERYING EXISTING DEBUG INFO ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/*------------------------------------------------------------*/
/*--- Use of symbol table & location info to create ---*/
/*--- plausible-looking stack dumps. ---*/
/*------------------------------------------------------------*/
/* Search all symtabs that we know about to locate ptr. If found, set
*psi to the relevant SegInfo, and *symno to the symtab entry number
within that. If not found, *psi is set to NULL. */
static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi,
/*OUT*/Int* symno,
Bool match_anywhere_in_fun )
{
Int sno;
SegInfo* si;
for (si = segInfo_list; si != NULL; si = si->next) {
if (si->start <= ptr && ptr < si->start+si->size) {
sno = ML_(search_one_symtab) ( si, ptr, match_anywhere_in_fun );
if (sno == -1) goto not_found;
*symno = sno;
*psi = si;
return;
}
}
not_found:
*psi = NULL;
}
/* Search all loctabs that we know about to locate ptr. If found, set
*psi to the relevant SegInfo, and *locno to the loctab entry number
within that. If not found, *psi is set to NULL.
*/
static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
/*OUT*/Int* locno )
{
Int lno;
SegInfo* si;
for (si = segInfo_list; si != NULL; si = si->next) {
if (si->start <= ptr && ptr < si->start+si->size) {
lno = ML_(search_one_loctab) ( si, ptr );
if (lno == -1) goto not_found;
*locno = lno;
*psi = si;
return;
}
}
not_found:
*psi = NULL;
}
/* The whole point of this whole big deal: map a code address to a
plausible symbol name. Returns False if no idea; otherwise True.
Caller supplies buf and nbuf. If demangle is False, don't do
demangling, regardless of VG_(clo_demangle) -- probably because the
call has come from VG_(get_fnname_nodemangle)(). */
static
Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
Bool match_anywhere_in_fun, Bool show_offset)
{
SegInfo* si;
Int sno;
Int offset;
search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
if (si == NULL)
return False;
if (demangle) {
VG_(demangle) ( True/*do C++ demangle*/,
si->symtab[sno].name, buf, nbuf );
} else {
VG_(strncpy_safely) ( buf, si->symtab[sno].name, nbuf );
}
offset = a - si->symtab[sno].addr;
if (show_offset && offset != 0) {
Char buf2[12];
Char* symend = buf + VG_(strlen)(buf);
Char* end = buf + nbuf;
Int len;
len = VG_(sprintf)(buf2, "%c%d",
offset < 0 ? '-' : '+',
offset < 0 ? -offset : offset);
vg_assert(len < (Int)sizeof(buf2));
if (len < (end - symend)) {
Char *cp = buf2;
VG_(memcpy)(symend, cp, len+1);
}
}
return True;
}
/* ppc64-linux only: find the TOC pointer (R2 value) that should be in
force at the entry point address of the function containing
guest_code_addr. Returns 0 if not known. */
Addr VG_(get_tocptr) ( Addr guest_code_addr )
{
SegInfo* si;
Int sno;
search_all_symtabs ( guest_code_addr,
&si, &sno, True/*match_anywhere_in_fun*/ );
if (si == NULL)
return 0;
else
return si->symtab[sno].tocptr;
}
/* This is available to tools... always demangle C++ names,
match anywhere in function, but don't show offsets. */
Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
{
return get_fnname ( /*demangle*/True, a, buf, nbuf,
/*match_anywhere_in_fun*/True,
/*show offset?*/False );
}
/* This is available to tools... always demangle C++ names,
match anywhere in function, and show offset if nonzero. */
Bool VG_(get_fnname_w_offset) ( Addr a, Char* buf, Int nbuf )
{
return get_fnname ( /*demangle*/True, a, buf, nbuf,
/*match_anywhere_in_fun*/True,
/*show offset?*/True );
}
/* This is available to tools... always demangle C++ names,
only succeed if 'a' matches first instruction of function,
and don't show offsets. */
Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
{
return get_fnname ( /*demangle*/True, a, buf, nbuf,
/*match_anywhere_in_fun*/False,
/*show offset?*/False );
}
/* This is only available to core... don't demangle C++ names,
match anywhere in function, and don't show offsets. */
Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
{
return get_fnname ( /*demangle*/False, a, buf, nbuf,
/*match_anywhere_in_fun*/True,
/*show offset?*/False );
}
/* This is only available to core... don't demangle C++ names, but do
do Z-demangling, match anywhere in function, and don't show
offsets. */
Bool VG_(get_fnname_Z_demangle_only) ( Addr a, Char* buf, Int nbuf )
{
# define N_TMPBUF 4096 /* arbitrary, 4096 == ERRTXT_LEN */
Char tmpbuf[N_TMPBUF];
Bool ok;
vg_assert(nbuf > 0);
ok = get_fnname ( /*demangle*/False, a, tmpbuf, N_TMPBUF,
/*match_anywhere_in_fun*/True,
/*show offset?*/False );
tmpbuf[N_TMPBUF-1] = 0; /* paranoia */
if (!ok)
return False;
/* We have something, at least. Try to Z-demangle it. */
VG_(demangle)( False/*don't do C++ demangling*/, tmpbuf, buf, nbuf);
buf[nbuf-1] = 0; /* paranoia */
return True;
# undef N_TMPBUF
}
/* Map a code address to the name of a shared object file or the executable.
Returns False if no idea; otherwise True. Doesn't require debug info.
Caller supplies buf and nbuf. */
Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
{
SegInfo* si;
for (si = segInfo_list; si != NULL; si = si->next) {
if (si->start <= a && a < si->start+si->size) {
VG_(strncpy_safely)(buf, si->filename, nbuf);
return True;
}
}
return False;
}
/* Map a code address to its SegInfo. Returns NULL if not found. Doesn't
require debug info. */
SegInfo* VG_(find_seginfo) ( Addr a )
{
SegInfo* si;
for (si = segInfo_list; si != NULL; si = si->next) {
if (si->start <= a && a < si->start+si->size) {
return si;
}
}
return NULL;
}
/* Map a code address to a filename. Returns True if successful. */
Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
{
SegInfo* si;
Int locno;
search_all_loctabs ( a, &si, &locno );
if (si == NULL)
return False;
VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename);
return True;
}
/* Map a code address to a line number. Returns True if successful. */
Bool VG_(get_linenum)( Addr a, UInt* lineno )
{
SegInfo* si;
Int locno;
search_all_loctabs ( a, &si, &locno );
if (si == NULL)
return False;
*lineno = si->loctab[locno].lineno;
return True;
}
/* Map a code address to a filename/line number/dir name info.
See prototype for detailed description of behaviour.
*/
Bool VG_(get_filename_linenum) ( Addr a,
/*OUT*/Char* filename, Int n_filename,
/*OUT*/Char* dirname, Int n_dirname,
/*OUT*/Bool* dirname_available,
/*OUT*/UInt* lineno )
{
SegInfo* si;
Int locno;
vg_assert( (dirname == NULL && dirname_available == NULL)
||
(dirname != NULL && dirname_available != NULL) );
search_all_loctabs ( a, &si, &locno );
if (si == NULL)
return False;
VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename);
*lineno = si->loctab[locno].lineno;
if (dirname) {
/* caller wants directory info too .. */
vg_assert(n_dirname > 0);
if (si->loctab[locno].dirname) {
/* .. and we have some */
*dirname_available = True;
VG_(strncpy_safely)(dirname, si->loctab[locno].dirname,
n_dirname);
} else {
/* .. but we don't have any */
*dirname_available = False;
*dirname = 0;
}
}
return True;
}
/* Print into buf info on code address, function name and filename */
static Int putStr ( Int n, Int n_buf, Char* buf, Char* str )
{
for (; n < n_buf-1 && *str != 0; n++,str++)
buf[n] = *str;
buf[n] = '\0';
return n;
}
static Int putStrEsc ( Int n, Int n_buf, Char* buf, Char* str )
{
Char alt[2];
for (; *str != 0; str++) {
switch (*str) {
case '&': n = putStr( n, n_buf, buf, "&"); break;
case '<': n = putStr( n, n_buf, buf, "<"); break;
case '>': n = putStr( n, n_buf, buf, ">"); break;
default: alt[0] = *str;
alt[1] = 0;
n = putStr( n, n_buf, buf, alt );
break;
}
}
return n;
}
Char* VG_(describe_IP)(Addr eip, Char* buf, Int n_buf)
{
# define APPEND(_str) \
n = putStr(n, n_buf, buf, _str);
# define APPEND_ESC(_str) \
n = putStrEsc(n, n_buf, buf, _str);
# define BUF_LEN 4096
UInt lineno;
UChar ibuf[50];
Int n = 0;
static UChar buf_fn[BUF_LEN];
static UChar buf_obj[BUF_LEN];
static UChar buf_srcloc[BUF_LEN];
static UChar buf_dirname[BUF_LEN];
Bool know_dirinfo = False;
Bool know_fnname = VG_(get_fnname) (eip, buf_fn, BUF_LEN);
Bool know_objname = VG_(get_objname)(eip, buf_obj, BUF_LEN);
Bool know_srcloc = VG_(get_filename_linenum)(
eip,
buf_srcloc, BUF_LEN,
buf_dirname, BUF_LEN, &know_dirinfo,
&lineno
);
if (VG_(clo_xml)) {
Bool human_readable = True;
HChar* maybe_newline = human_readable ? "\n " : "";
HChar* maybe_newline2 = human_readable ? "\n " : "";
/* Print in XML format, dumping in as much info as we know. */
APPEND("");
VG_(sprintf)(ibuf,"