Julian Seward 9377cf0ea5 Add limited support for DWARF3 'expressions' in stack-unwind (CFI)
info (DW_CFA_def_cfa_expression, DW_CFA_expression,
DW_CFA_val_expression).  Mechanism to support all of these is in place
although only DW_CFA_val_expression is currently connected up.

This is really nasty.  The basic idea is to partially evaluate each
expression at the debuginfo-reading time by running it on a stack
machine in which each stack element is an expression tree.  If the
expression can be 'run' successfully, the tree (dag, really) remaining
at the top of the stack is massaged and put into the DiCfSI record for
that address range.  At unwind time the tree is evaluated if needed.
Such cases are in fact extremely rare and so the vast majority of
unwindings use the same mechanism as before.

As a result of all this:

* some obscure cases in glibc-2.5's libpthread.so unwind when they
  didn't before

* --debug-dump=frames produces identical output to that of readelf
  for libc-2.5.so and associated libpthread.so

* All the action centers around the new type CfiExpr, which is a
  union expression-tree type in the same style as IRExpr et al

* Many dark corners of the CFI reader have been looked at and
  (re-)validated



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@6620
2007-02-27 16:52:23 +00:00

379 lines
13 KiB
C

/*--------------------------------------------------------------------*/
/*--- Read stabs debug info. readstabs.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2000-2007 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
/*
Stabs reader greatly improved by Nick Nethercote, Apr 02.
This module was also extensively hacked on by Jeremy Fitzhardinge
and Tom Hughes.
*/
#include "pub_core_basics.h"
#include "pub_core_debuginfo.h"
#include "pub_core_libcbase.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcprint.h"
#include "pub_core_mallocfree.h"
#include "pub_core_xarray.h"
#include "priv_storage.h"
#include "priv_readstabs.h" /* self */
/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
#include <a.out.h> /* stabs defns */
/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
/*------------------------------------------------------------*/
/*--- Read STABS format debug info. ---*/
/*------------------------------------------------------------*/
/* Stabs entry types, from:
* The "stabs" debug format
* Menapace, Kingdon and MacKenzie
* Cygnus Support
*/
typedef enum { N_UNDEF = 0, /* undefined symbol, new stringtab */
N_GSYM = 32, /* Global symbol */
N_FUN = 36, /* Function start or end */
N_STSYM = 38, /* Data segment file-scope variable */
N_LCSYM = 40, /* BSS segment file-scope variable */
N_RSYM = 64, /* Register variable */
N_SLINE = 68, /* Source line number */
N_SO = 100, /* Source file path and name */
N_LSYM = 128, /* Stack variable or type */
N_BINCL = 130, /* Beginning of an include file */
N_SOL = 132, /* Include file name */
N_PSYM = 160, /* Function parameter */
N_EINCL = 162, /* End of an include file */
N_LBRAC = 192, /* Start of lexical block */
N_EXCL = 194, /* Placeholder for an include file */
N_RBRAC = 224 /* End of lexical block */
} stab_types;
/* Read stabs-format debug info. This is all rather horrible because
stabs is a underspecified, kludgy hack.
*/
void ML_(read_debuginfo_stabs) ( SegInfo* si, OffT debug_offset,
UChar* stabC, Int stab_sz,
UChar* stabstr, Int stabstr_sz )
{
const Bool debug = False;
const Bool contdebug = False;
Int i;
Int n_stab_entries;
struct nlist* stab = (struct nlist*)stabC;
UChar *next_stabstr = NULL;
/* state for various things */
struct {
Addr start; /* start address */
Addr end; /* end address */
Int line; /* first line */
} func = { 0, 0, -1 };
struct {
Char *name;
Bool same;
} file = { NULL, True };
struct {
Int prev; /* prev line */
Int no; /* current line */
Int ovf; /* line wrap */
Addr addr; /* start of this line */
Bool first; /* first line in function */
} line = { 0, 0, 0, 0, False };
/* Ok. It all looks plausible. Go on and read debug data.
stab kinds: 100 N_SO a source file name
68 N_SLINE a source line number
36 N_FUN start of a function
In this loop, we maintain a current file name, updated as
N_SO/N_SOLs appear, and a current function base address,
updated as N_FUNs appear. Based on that, address ranges for
N_SLINEs are calculated, and stuffed into the line info table.
Finding the instruction address range covered by an N_SLINE is
complicated; see the N_SLINE case below.
*/
file.name = ML_(addStr)(si,"???", -1);
n_stab_entries = stab_sz/(int)sizeof(struct nlist);
for (i = 0; i < n_stab_entries; i++) {
const struct nlist *st = &stab[i];
Char *string;
if (debug && 1) {
VG_(printf) ( "%2d type=%d othr=%d desc=%d value=0x%x strx=%d %s\n", i,
st->n_type, st->n_other, st->n_desc,
(int)st->n_value,
(int)st->n_un.n_strx,
stabstr + st->n_un.n_strx );
}
/* handle continued string stabs */
{
Int qbuflen = 0;
Int qidx = 0;
Char* qbuf = NULL;
Int qlen;
Bool qcontinuing = False;
UInt qstringidx;
qstringidx = st->n_un.n_strx;
string = stabstr + qstringidx;
qlen = VG_(strlen)(string);
while (string
&& qlen > 0
&& (qcontinuing || string[qlen-1] == '\\')) {
/* Gak, we have a continuation. Skip forward through
subsequent stabs to gather all the parts of the
continuation. Increment i, but keep st pointing at
current stab. */
qcontinuing = string[qlen-1] == '\\';
/* remove trailing \ */
while (string[qlen-1] == '\\' && qlen > 0)
qlen--;
if (contdebug)
VG_(printf)("found extension string: \"%s\" "
"len=%d(%c) idx=%d buflen=%d\n",
string, qlen, string[qlen-1], qidx, qbuflen);
/* XXX this is silly. The si->strtab should have a way of
appending to the last added string... */
if ((qidx + qlen) >= qbuflen) {
Char *n;
if (qbuflen == 0)
qbuflen = 16;
while ((qidx + qlen) >= qbuflen)
qbuflen *= 2;
n = VG_(arena_malloc)(VG_AR_SYMTAB, qbuflen);
VG_(memcpy)(n, qbuf, qidx);
if (qbuf != NULL)
VG_(arena_free)(VG_AR_SYMTAB, qbuf);
qbuf = n;
}
VG_(memcpy)(&qbuf[qidx], string, qlen);
qidx += qlen;
if (contdebug) {
qbuf[qidx] = '\0';
VG_(printf)("working buf=\"%s\"\n", qbuf);
}
i++;
if (i >= n_stab_entries)
break;
if (stab[i].n_un.n_strx) {
string = stabstr + stab[i].n_un.n_strx;
qlen = VG_(strlen)(string);
} else {
string = NULL;
qlen = 0;
}
}
if (qbuf != NULL) {
i--; /* overstepped */
string = ML_(addStr)(si, qbuf, qidx);
VG_(arena_free)(VG_AR_SYMTAB, qbuf);
if (contdebug)
VG_(printf)("made composite: \"%s\"\n", string);
}
}
switch(st->n_type) {
case N_UNDEF:
/* new string table base */
if (next_stabstr != NULL) {
stabstr_sz -= next_stabstr - stabstr;
stabstr = next_stabstr;
if (stabstr_sz <= 0) {
VG_(printf)(" @@ bad stabstr size %d\n", stabstr_sz);
return;
}
}
next_stabstr = stabstr + st->n_value;
break;
case N_BINCL: {
break;
}
case N_EINCL:
break;
case N_EXCL:
break;
case N_SOL: /* sub-source (include) file */
if (line.ovf != 0)
VG_(message)(Vg_UserMsg,
"Warning: file %s is very big (> 65535 lines) "
"Line numbers and annotation for this file might "
"be wrong. Sorry",
file.name);
/* FALLTHROUGH */
case N_SO: { /* new source file */
UChar *nm = string;
UInt len = VG_(strlen)(nm);
Addr addr = func.start + st->n_value;
if (line.addr != 0) {
/* finish off previous line */
ML_(addLineInfo)(si, file.name, NULL, line.addr,
addr, line.no + line.ovf * LINENO_OVERFLOW, i);
}
/* reset line state */
line.ovf = 0;
line.addr = 0;
line.prev = 0;
line.no = 0;
if (len > 0 && nm[len-1] != '/') {
file.name = ML_(addStr)(si, nm, -1);
if (debug)
VG_(printf)("new source: %s\n", file.name);
} else if (len == 0)
file.name = ML_(addStr)(si, "?1\0", -1);
break;
}
case N_SLINE: { /* line info */
Addr addr = func.start + st->n_value;
if (line.addr != 0) {
/* there was a previous */
ML_(addLineInfo)(si, file.name, NULL, line.addr,
addr, line.no + line.ovf * LINENO_OVERFLOW, i);
}
line.addr = addr;
line.prev = line.no;
line.no = (Int)((UShort)st->n_desc);
if (line.prev > line.no + OVERFLOW_DIFFERENCE && file.same) {
VG_(message)(Vg_DebugMsg,
"Line number overflow detected (%d --> %d) in %s",
line.prev, line.no, file.name);
line.ovf++;
}
file.same = True;
/* This is pretty horrible. If this is the first line of
the function, then bind any unbound symbols to the arg
scope, since they're probably arguments. */
if (line.first) {
line.first = False;
/* remember first line of function */
if (func.start != 0) {
func.line = line.no;
}
}
break;
}
case N_FUN: { /* function start/end */
Addr addr = 0; /* end address for prev line/scope */
Bool newfunc = False;
/* if this the end of the function or we haven't
previously finished the previous function... */
if (*string == '\0' || func.start != 0) {
/* end of function */
newfunc = False;
line.first = False;
/* end line at end of function */
addr = func.start + st->n_value;
/* now between functions */
func.start = 0;
// XXXX DEAD POINT XXXX
}
if (*string != '\0') {
/* new function */
newfunc = True;
line.first = True;
/* line ends at start of next function */
addr = debug_offset + st->n_value;
func.start = addr;
}
if (line.addr) {
ML_(addLineInfo)(si, file.name, NULL, line.addr,
addr, line.no + line.ovf * LINENO_OVERFLOW, i);
line.addr = 0;
}
//DEAD POINT
//DEAD POINT
break;
}
case N_LBRAC: {
/* open new scope */
// DEAD POINT
break;
}
case N_RBRAC: {
/* close scope */
// DEAD POINT
break;
}
case N_GSYM: /* global variable */
case N_STSYM: /* static in data segment */
case N_LCSYM: /* static in bss segment */
case N_PSYM: /* function parameter */
case N_LSYM: /* stack variable */
case N_RSYM: /* register variable */
break;
}
}
}
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/