mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-04 10:21:20 +00:00
Also without cache simulation, Callgrind maintains Ir cost. This is done in setup_bbcc by incrementing an execution counter for last_bbcc (the cost center for the previously executed BB in current context) and the global cost counter. However, we forgot to increment any counter if the currently executing function should be ignored. We need to still update costs, add attribute this to a not-ignored call site (as given in CLG_(current_state).nonskipped). Before this fix, there was a difference in Ir cost with vs. without cache simulation. This was because ignored functions (e.g. PLT code) contributed no cost when not doing cache simulation. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13130
907 lines
24 KiB
C
907 lines
24 KiB
C
/*--------------------------------------------------------------------*/
|
|
/*--- Callgrind ---*/
|
|
/*--- bbcc.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Callgrind, a Valgrind tool for call tracing.
|
|
|
|
Copyright (C) 2002-2012, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "global.h"
|
|
#include "costs.h"
|
|
|
|
#include "pub_tool_threadstate.h"
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- BBCC operations ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
#define N_BBCC_INITIAL_ENTRIES 10437
|
|
|
|
/* BBCC table (key is BB/Context), per thread, resizable */
|
|
bbcc_hash current_bbccs;
|
|
|
|
void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
|
|
{
|
|
Int i;
|
|
|
|
CLG_ASSERT(bbccs != 0);
|
|
|
|
bbccs->size = N_BBCC_INITIAL_ENTRIES;
|
|
bbccs->entries = 0;
|
|
bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1",
|
|
bbccs->size * sizeof(BBCC*));
|
|
|
|
for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
|
|
}
|
|
|
|
void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
|
|
{
|
|
CLG_ASSERT(dst != 0);
|
|
|
|
dst->size = current_bbccs.size;
|
|
dst->entries = current_bbccs.entries;
|
|
dst->table = current_bbccs.table;
|
|
}
|
|
|
|
bbcc_hash* CLG_(get_current_bbcc_hash)()
|
|
{
|
|
return ¤t_bbccs;
|
|
}
|
|
|
|
void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
|
|
{
|
|
CLG_ASSERT(h != 0);
|
|
|
|
current_bbccs.size = h->size;
|
|
current_bbccs.entries = h->entries;
|
|
current_bbccs.table = h->table;
|
|
}
|
|
|
|
/*
|
|
* Zero all costs of a BBCC
|
|
*/
|
|
void CLG_(zero_bbcc)(BBCC* bbcc)
|
|
{
|
|
Int i;
|
|
jCC* jcc;
|
|
|
|
CLG_ASSERT(bbcc->cxt != 0);
|
|
CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %d "
|
|
"(fn '%s', rec %d)\n",
|
|
bb_addr(bbcc->bb),
|
|
bbcc->cxt->base_number + bbcc->rec_index,
|
|
bbcc->cxt->fn[0]->name,
|
|
bbcc->rec_index);
|
|
|
|
if ((bbcc->ecounter_sum ==0) &&
|
|
(bbcc->ret_counter ==0)) return;
|
|
|
|
for(i=0;i<bbcc->bb->cost_count;i++)
|
|
bbcc->cost[i] = 0;
|
|
for(i=0;i <= bbcc->bb->cjmp_count;i++) {
|
|
bbcc->jmp[i].ecounter = 0;
|
|
for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from)
|
|
CLG_(init_cost)( CLG_(sets).full, jcc->cost );
|
|
}
|
|
bbcc->ecounter_sum = 0;
|
|
bbcc->ret_counter = 0;
|
|
}
|
|
|
|
|
|
|
|
void CLG_(forall_bbccs)(void (*func)(BBCC*))
|
|
{
|
|
BBCC *bbcc, *bbcc2;
|
|
int i, j;
|
|
|
|
for (i = 0; i < current_bbccs.size; i++) {
|
|
if ((bbcc=current_bbccs.table[i]) == NULL) continue;
|
|
while (bbcc) {
|
|
/* every bbcc should have a rec_array */
|
|
CLG_ASSERT(bbcc->rec_array != 0);
|
|
|
|
for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
|
|
if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
|
|
|
|
(*func)(bbcc2);
|
|
}
|
|
bbcc = bbcc->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* All BBCCs for recursion level 0 are inserted into a
|
|
* thread specific hash table with key
|
|
* - address of BB structure (unique, as never freed)
|
|
* - current context (includes caller chain)
|
|
* BBCCs for other recursion levels are in bbcc->rec_array.
|
|
*
|
|
* The hash is used in setup_bb(), i.e. to find the cost
|
|
* counters to be changed in the execution of a BB.
|
|
*/
|
|
|
|
static __inline__
|
|
UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
|
|
{
|
|
CLG_ASSERT(bb != 0);
|
|
CLG_ASSERT(cxt != 0);
|
|
|
|
return ((Addr)bb + (Addr)cxt) % size;
|
|
}
|
|
|
|
|
|
/* Lookup for a BBCC in hash.
|
|
*/
|
|
static
|
|
BBCC* lookup_bbcc(BB* bb, Context* cxt)
|
|
{
|
|
BBCC* bbcc = bb->last_bbcc;
|
|
UInt idx;
|
|
|
|
/* check LRU */
|
|
if (bbcc->cxt == cxt) {
|
|
if (!CLG_(clo).separate_threads) {
|
|
/* if we don't dump threads separate, tid doesn't have to match */
|
|
return bbcc;
|
|
}
|
|
if (bbcc->tid == CLG_(current_tid)) return bbcc;
|
|
}
|
|
|
|
CLG_(stat).bbcc_lru_misses++;
|
|
|
|
idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
|
|
bbcc = current_bbccs.table[idx];
|
|
while (bbcc &&
|
|
(bb != bbcc->bb ||
|
|
cxt != bbcc->cxt)) {
|
|
bbcc = bbcc->next;
|
|
}
|
|
|
|
CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %d, fn '%s'): %p (tid %d)\n",
|
|
bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
|
|
bbcc, bbcc ? bbcc->tid : 0);
|
|
|
|
CLG_DEBUGIF(2)
|
|
if (bbcc) CLG_(print_bbcc)(-2,bbcc);
|
|
|
|
return bbcc;
|
|
}
|
|
|
|
|
|
/* double size of hash table 1 (addr->BBCC) */
|
|
static void resize_bbcc_hash(void)
|
|
{
|
|
Int i, new_size, conflicts1 = 0, conflicts2 = 0;
|
|
BBCC** new_table;
|
|
UInt new_idx;
|
|
BBCC *curr_BBCC, *next_BBCC;
|
|
|
|
new_size = 2*current_bbccs.size+3;
|
|
new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1",
|
|
new_size * sizeof(BBCC*));
|
|
|
|
if (!new_table) return;
|
|
|
|
for (i = 0; i < new_size; i++)
|
|
new_table[i] = NULL;
|
|
|
|
for (i = 0; i < current_bbccs.size; i++) {
|
|
if (current_bbccs.table[i] == NULL) continue;
|
|
|
|
curr_BBCC = current_bbccs.table[i];
|
|
while (NULL != curr_BBCC) {
|
|
next_BBCC = curr_BBCC->next;
|
|
|
|
new_idx = bbcc_hash_idx(curr_BBCC->bb,
|
|
curr_BBCC->cxt,
|
|
new_size);
|
|
|
|
curr_BBCC->next = new_table[new_idx];
|
|
new_table[new_idx] = curr_BBCC;
|
|
if (curr_BBCC->next) {
|
|
conflicts1++;
|
|
if (curr_BBCC->next->next)
|
|
conflicts2++;
|
|
}
|
|
|
|
curr_BBCC = next_BBCC;
|
|
}
|
|
}
|
|
|
|
VG_(free)(current_bbccs.table);
|
|
|
|
|
|
CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n",
|
|
current_bbccs.size, new_size,
|
|
current_bbccs.entries, conflicts1, conflicts2);
|
|
|
|
current_bbccs.size = new_size;
|
|
current_bbccs.table = new_table;
|
|
CLG_(stat).bbcc_hash_resizes++;
|
|
}
|
|
|
|
|
|
static __inline
|
|
BBCC** new_recursion(int size)
|
|
{
|
|
BBCC** bbccs;
|
|
int i;
|
|
|
|
bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size);
|
|
for(i=0;i<size;i++)
|
|
bbccs[i] = 0;
|
|
|
|
CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs);
|
|
|
|
return bbccs;
|
|
}
|
|
|
|
|
|
/*
|
|
* Allocate a new BBCC
|
|
*
|
|
* Uninitialized:
|
|
* cxt, rec_index, rec_array, next_bbcc, next1, next2
|
|
*/
|
|
static __inline__
|
|
BBCC* new_bbcc(BB* bb)
|
|
{
|
|
BBCC* bbcc;
|
|
Int i;
|
|
|
|
/* We need cjmp_count+1 JmpData structs:
|
|
* the last is for the unconditional jump/call/ret at end of BB
|
|
*/
|
|
bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1",
|
|
sizeof(BBCC) +
|
|
(bb->cjmp_count+1) * sizeof(JmpData));
|
|
bbcc->bb = bb;
|
|
bbcc->tid = CLG_(current_tid);
|
|
|
|
bbcc->ret_counter = 0;
|
|
bbcc->skipped = 0;
|
|
bbcc->cost = CLG_(get_costarray)(bb->cost_count);
|
|
for(i=0;i<bb->cost_count;i++)
|
|
bbcc->cost[i] = 0;
|
|
for(i=0; i<=bb->cjmp_count; i++) {
|
|
bbcc->jmp[i].ecounter = 0;
|
|
bbcc->jmp[i].jcc_list = 0;
|
|
}
|
|
bbcc->ecounter_sum = 0;
|
|
|
|
/* Init pointer caches (LRU) */
|
|
bbcc->lru_next_bbcc = 0;
|
|
bbcc->lru_from_jcc = 0;
|
|
bbcc->lru_to_jcc = 0;
|
|
|
|
CLG_(stat).distinct_bbccs++;
|
|
|
|
CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n",
|
|
bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs);
|
|
|
|
return bbcc;
|
|
}
|
|
|
|
|
|
/**
|
|
* Inserts a new BBCC into hashes.
|
|
* BBCC specific items must be set as this is used for the hash
|
|
* keys:
|
|
* fn : current function
|
|
* tid : current thread ID
|
|
* from : position where current function is called from
|
|
*
|
|
* Recursion level doesn't need to be set as this is not included
|
|
* in the hash key: Only BBCCs with rec level 0 are in hashes.
|
|
*/
|
|
static
|
|
void insert_bbcc_into_hash(BBCC* bbcc)
|
|
{
|
|
UInt idx;
|
|
|
|
CLG_ASSERT(bbcc->cxt != 0);
|
|
|
|
CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n",
|
|
bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
|
|
|
|
/* check fill degree of hash and resize if needed (>90%) */
|
|
current_bbccs.entries++;
|
|
if (100 * current_bbccs.entries / current_bbccs.size > 90)
|
|
resize_bbcc_hash();
|
|
|
|
idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
|
|
bbcc->next = current_bbccs.table[idx];
|
|
current_bbccs.table[idx] = bbcc;
|
|
|
|
CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n",
|
|
current_bbccs.entries);
|
|
}
|
|
|
|
static const HChar* mangled_cxt(Context* cxt, int rec_index)
|
|
{
|
|
static HChar mangled[FN_NAME_LEN];
|
|
int i, p;
|
|
|
|
if (!cxt) return "(no context)";
|
|
|
|
p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
|
|
if (rec_index >0)
|
|
p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
|
|
for(i=1;i<cxt->size;i++)
|
|
p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
|
|
|
|
return mangled;
|
|
}
|
|
|
|
|
|
/* Create a new BBCC as a copy of an existing one,
|
|
* but with costs set to 0 and jcc chains empty.
|
|
*
|
|
* This is needed when a BB is executed in another context than
|
|
* the one at instrumentation time of the BB.
|
|
*
|
|
* Use cases:
|
|
* rec_index == 0: clone from a BBCC with differing tid/cxt
|
|
* and insert into hashes
|
|
* rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0
|
|
* don't insert into hashes
|
|
*/
|
|
static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
|
|
{
|
|
BBCC* bbcc;
|
|
|
|
CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n",
|
|
bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
|
|
|
|
bbcc = new_bbcc(orig->bb);
|
|
|
|
if (rec_index == 0) {
|
|
|
|
/* hash insertion is only allowed if tid or cxt is different */
|
|
CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
|
|
(orig->cxt != cxt));
|
|
|
|
bbcc->rec_index = 0;
|
|
bbcc->cxt = cxt;
|
|
bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
|
|
bbcc->rec_array[0] = bbcc;
|
|
|
|
insert_bbcc_into_hash(bbcc);
|
|
}
|
|
else {
|
|
if (CLG_(clo).separate_threads)
|
|
CLG_ASSERT(orig->tid == CLG_(current_tid));
|
|
|
|
CLG_ASSERT(orig->cxt == cxt);
|
|
CLG_ASSERT(orig->rec_array);
|
|
CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
|
|
CLG_ASSERT(orig->rec_array[rec_index] ==0);
|
|
|
|
/* new BBCC will only have differing recursion level */
|
|
bbcc->rec_index = rec_index;
|
|
bbcc->cxt = cxt;
|
|
bbcc->rec_array = orig->rec_array;
|
|
bbcc->rec_array[rec_index] = bbcc;
|
|
}
|
|
|
|
/* update list of BBCCs for same BB */
|
|
bbcc->next_bbcc = orig->bb->bbcc_list;
|
|
orig->bb->bbcc_list = bbcc;
|
|
|
|
|
|
CLG_DEBUGIF(3)
|
|
CLG_(print_bbcc)(-2, bbcc);
|
|
|
|
// FIXME: mangled_cxt returns a pointer to a static buffer that
|
|
// gets overwritten with each invocation.
|
|
CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n"
|
|
" orig %s\n"
|
|
" new %s\n",
|
|
orig, rec_index, bb_addr(orig->bb),
|
|
mangled_cxt(orig->cxt, orig->rec_index),
|
|
mangled_cxt(bbcc->cxt, bbcc->rec_index));
|
|
|
|
CLG_(stat).bbcc_clones++;
|
|
|
|
return bbcc;
|
|
};
|
|
|
|
|
|
|
|
/* Get a pointer to the cost centre structure for given basic block
|
|
* address. If created, the BBCC is inserted into the BBCC hash.
|
|
* Also sets BB_seen_before by reference.
|
|
*
|
|
*/
|
|
BBCC* CLG_(get_bbcc)(BB* bb)
|
|
{
|
|
BBCC* bbcc;
|
|
|
|
CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb));
|
|
|
|
bbcc = bb->bbcc_list;
|
|
|
|
if (!bbcc) {
|
|
bbcc = new_bbcc(bb);
|
|
|
|
/* initialize BBCC */
|
|
bbcc->cxt = 0;
|
|
bbcc->rec_array = 0;
|
|
bbcc->rec_index = 0;
|
|
|
|
bbcc->next_bbcc = bb->bbcc_list;
|
|
bb->bbcc_list = bbcc;
|
|
bb->last_bbcc = bbcc;
|
|
|
|
CLG_DEBUGIF(3)
|
|
CLG_(print_bbcc)(-2, bbcc);
|
|
}
|
|
|
|
CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n",
|
|
bb_addr(bb), bbcc);
|
|
|
|
return bbcc;
|
|
}
|
|
|
|
|
|
/* Callgrind manages its own call stack for each thread.
|
|
* When leaving a function, a underflow can happen when
|
|
* Callgrind's tracing was switched on in the middle of
|
|
* a run, i.e. when Callgrind was not able to trace the
|
|
* call instruction.
|
|
* This function tries to reconstruct the original call.
|
|
* As we know the return address (the address following
|
|
* the CALL instruction), we can detect the function
|
|
* we return back to, but the original call site is unknown.
|
|
* We suppose a call site at return address - 1.
|
|
* (TODO: other heuristic: lookup info of instrumented BBs).
|
|
*/
|
|
static void handleUnderflow(BB* bb)
|
|
{
|
|
/* RET at top of call stack */
|
|
BBCC* source_bbcc;
|
|
BB* source_bb;
|
|
Bool seen_before;
|
|
fn_node* caller;
|
|
int fn_number;
|
|
unsigned *pactive;
|
|
call_entry* call_entry_up;
|
|
|
|
CLG_DEBUG(1," Callstack underflow !\n");
|
|
|
|
/* we emulate an old call from the function we return to
|
|
* by using (<return address> -1) */
|
|
source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
|
|
source_bbcc = CLG_(get_bbcc)(source_bb);
|
|
|
|
/* seen_before can be true if RET from a signal handler */
|
|
if (!seen_before) {
|
|
source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
|
|
}
|
|
else if (CLG_(current_state).collect)
|
|
source_bbcc->ecounter_sum++;
|
|
|
|
/* Force a new top context, will be set active by push_cxt() */
|
|
CLG_(current_fn_stack).top--;
|
|
CLG_(current_state).cxt = 0;
|
|
caller = CLG_(get_fn_node)(bb);
|
|
CLG_(push_cxt)( caller );
|
|
|
|
if (!seen_before) {
|
|
/* set rec array for source BBCC: this is at rec level 1 */
|
|
source_bbcc->rec_array = new_recursion(caller->separate_recursions);
|
|
source_bbcc->rec_array[0] = source_bbcc;
|
|
|
|
CLG_ASSERT(source_bbcc->cxt == 0);
|
|
source_bbcc->cxt = CLG_(current_state).cxt;
|
|
insert_bbcc_into_hash(source_bbcc);
|
|
}
|
|
CLG_ASSERT(CLG_(current_state).bbcc);
|
|
|
|
/* correct active counts */
|
|
fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
|
|
pactive = CLG_(get_fn_entry)(fn_number);
|
|
(*pactive)--;
|
|
|
|
/* This assertion is not correct for reentrant
|
|
* signal handlers */
|
|
/* CLG_ASSERT(*pactive == 0); */
|
|
|
|
CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
|
|
/* back to current context */
|
|
CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
|
|
CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
|
|
(Addr)-1, False);
|
|
call_entry_up =
|
|
&(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
|
|
/* assume this call is lasting since last dump or
|
|
* for a signal handler since it's call */
|
|
if (CLG_(current_state).sig == 0)
|
|
CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
|
|
CLG_(get_current_thread)()->lastdump_cost );
|
|
else
|
|
CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
|
|
}
|
|
|
|
|
|
/*
|
|
* Helper function called at start of each instrumented BB to setup
|
|
* pointer to costs for current thread/context/recursion level
|
|
*/
|
|
|
|
VG_REGPARM(1)
|
|
void CLG_(setup_bbcc)(BB* bb)
|
|
{
|
|
BBCC *bbcc, *last_bbcc;
|
|
Bool call_emulation = False, delayed_push = False, skip = False;
|
|
Addr sp;
|
|
BB* last_bb;
|
|
ThreadId tid;
|
|
ClgJumpKind jmpkind;
|
|
Bool isConditionalJump;
|
|
Int passed = 0, csp;
|
|
Bool ret_without_call = False;
|
|
Int popcount_on_return = 1;
|
|
|
|
CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb));
|
|
|
|
/* This is needed because thread switches can not reliable be tracked
|
|
* with callback CLG_(run_thread) only: we have otherwise no way to get
|
|
* the thread ID after a signal handler returns.
|
|
* This could be removed again if that bug is fixed in Valgrind.
|
|
* This is in the hot path but hopefully not to costly.
|
|
*/
|
|
tid = VG_(get_running_tid)();
|
|
#if 1
|
|
/* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid).
|
|
* As this is on the hot path, we only call CLG_(switch_thread)(tid)
|
|
* if tid differs from the CLG_(current_tid).
|
|
*/
|
|
if (UNLIKELY(tid != CLG_(current_tid)))
|
|
CLG_(switch_thread)(tid);
|
|
#else
|
|
CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
|
|
#endif
|
|
|
|
sp = VG_(get_SP)(tid);
|
|
last_bbcc = CLG_(current_state).bbcc;
|
|
last_bb = last_bbcc ? last_bbcc->bb : 0;
|
|
|
|
if (last_bb) {
|
|
passed = CLG_(current_state).jmps_passed;
|
|
CLG_ASSERT(passed <= last_bb->cjmp_count);
|
|
jmpkind = last_bb->jmp[passed].jmpkind;
|
|
isConditionalJump = (passed < last_bb->cjmp_count);
|
|
|
|
if (CLG_(current_state).collect) {
|
|
if (!CLG_(current_state).nonskipped) {
|
|
last_bbcc->ecounter_sum++;
|
|
last_bbcc->jmp[passed].ecounter++;
|
|
if (!CLG_(clo).simulate_cache) {
|
|
/* update Ir cost */
|
|
UInt instr_count = last_bb->jmp[passed].instr+1;
|
|
CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
|
|
}
|
|
}
|
|
else {
|
|
/* do not increment exe counter of BBs in skipped functions, as it
|
|
* would fool dumping code */
|
|
if (!CLG_(clo).simulate_cache) {
|
|
/* update Ir cost */
|
|
UInt instr_count = last_bb->jmp[passed].instr+1;
|
|
CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
|
|
CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ]
|
|
+= instr_count;
|
|
}
|
|
}
|
|
}
|
|
|
|
CLG_DEBUGIF(4) {
|
|
CLG_(print_execstate)(-2, &CLG_(current_state) );
|
|
CLG_(print_bbcc_cost)(-2, last_bbcc);
|
|
}
|
|
}
|
|
else {
|
|
jmpkind = jk_None;
|
|
isConditionalJump = False;
|
|
}
|
|
|
|
/* Manipulate JmpKind if needed, only using BB specific info */
|
|
|
|
csp = CLG_(current_call_stack).sp;
|
|
|
|
/* A return not matching the top call in our callstack is a jump */
|
|
if ( (jmpkind == jk_Return) && (csp >0)) {
|
|
Int csp_up = csp-1;
|
|
call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
|
|
|
|
/* We have a real return if
|
|
* - the stack pointer (SP) left the current stack frame, or
|
|
* - SP has the same value as when reaching the current function
|
|
* and the address of this BB is the return address of last call
|
|
* (we even allow to leave multiple frames if the SP stays the
|
|
* same and we find a matching return address)
|
|
* The latter condition is needed because on PPC, SP can stay
|
|
* the same over CALL=b(c)l / RET=b(c)lr boundaries
|
|
*/
|
|
if (sp < top_ce->sp) popcount_on_return = 0;
|
|
else if (top_ce->sp == sp) {
|
|
while(1) {
|
|
if (top_ce->ret_addr == bb_addr(bb)) break;
|
|
if (csp_up>0) {
|
|
csp_up--;
|
|
top_ce = &(CLG_(current_call_stack).entry[csp_up]);
|
|
if (top_ce->sp == sp) {
|
|
popcount_on_return++;
|
|
continue;
|
|
}
|
|
}
|
|
popcount_on_return = 0;
|
|
break;
|
|
}
|
|
}
|
|
if (popcount_on_return == 0) {
|
|
jmpkind = jk_Jump;
|
|
ret_without_call = True;
|
|
}
|
|
}
|
|
|
|
/* Should this jump be converted to call or pop/call ? */
|
|
if (( jmpkind != jk_Return) &&
|
|
( jmpkind != jk_Call) && last_bb) {
|
|
|
|
/* We simulate a JMP/Cont to be a CALL if
|
|
* - jump is in another ELF object or section kind
|
|
* - jump is to first instruction of a function (tail recursion)
|
|
*/
|
|
if (ret_without_call ||
|
|
/* This is for detection of optimized tail recursion.
|
|
* On PPC, this is only detected as call when going to another
|
|
* function. The problem is that on PPC it can go wrong
|
|
* more easily (no stack frame setup needed)
|
|
*/
|
|
#if defined(VGA_ppc32)
|
|
(bb->is_entry && (last_bb->fn != bb->fn)) ||
|
|
#else
|
|
bb->is_entry ||
|
|
#endif
|
|
(last_bb->sect_kind != bb->sect_kind) ||
|
|
(last_bb->obj->number != bb->obj->number)) {
|
|
|
|
CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n",
|
|
last_bb->fn->name, last_bb->obj->name,
|
|
bb->fn->name, bb->obj->name,
|
|
ret_without_call?" (RET w/o CALL)":"");
|
|
|
|
if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
|
|
|
|
call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
|
|
|
|
if (top_ce->jcc) {
|
|
|
|
CLG_DEBUG(1," Pop on Jump!\n");
|
|
|
|
/* change source for delayed push */
|
|
CLG_(current_state).bbcc = top_ce->jcc->from;
|
|
sp = top_ce->sp;
|
|
passed = top_ce->jcc->jmp;
|
|
CLG_(pop_call_stack)();
|
|
}
|
|
else {
|
|
CLG_ASSERT(CLG_(current_state).nonskipped != 0);
|
|
}
|
|
}
|
|
|
|
jmpkind = jk_Call;
|
|
call_emulation = True;
|
|
}
|
|
}
|
|
|
|
if (jmpkind == jk_Call)
|
|
skip = CLG_(get_fn_node)(bb)->skip;
|
|
|
|
CLG_DEBUGIF(1) {
|
|
if (isConditionalJump)
|
|
VG_(printf)("Cond-");
|
|
switch(jmpkind) {
|
|
case jk_None: VG_(printf)("Fall-through"); break;
|
|
case jk_Jump: VG_(printf)("Jump"); break;
|
|
case jk_Call: VG_(printf)("Call"); break;
|
|
case jk_Return: VG_(printf)("Return"); break;
|
|
default: tl_assert(0);
|
|
}
|
|
VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
|
|
last_bb ? bb_jmpaddr(last_bb) : 0,
|
|
bb_addr(bb), sp);
|
|
}
|
|
|
|
/* Handle CALL/RET and update context to get correct BBCC */
|
|
|
|
if (jmpkind == jk_Return) {
|
|
|
|
if ((csp == 0) ||
|
|
((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
|
|
( *(CLG_(current_fn_stack).top-1)==0)) ) {
|
|
|
|
/* On an empty call stack or at a signal separation marker,
|
|
* a RETURN generates an call stack underflow.
|
|
*/
|
|
handleUnderflow(bb);
|
|
CLG_(pop_call_stack)();
|
|
}
|
|
else {
|
|
CLG_ASSERT(popcount_on_return >0);
|
|
CLG_(unwind_call_stack)(sp, popcount_on_return);
|
|
}
|
|
}
|
|
else {
|
|
Int unwind_count = CLG_(unwind_call_stack)(sp, 0);
|
|
if (unwind_count > 0) {
|
|
/* if unwinding was done, this actually is a return */
|
|
jmpkind = jk_Return;
|
|
}
|
|
|
|
if (jmpkind == jk_Call) {
|
|
delayed_push = True;
|
|
|
|
csp = CLG_(current_call_stack).sp;
|
|
if (call_emulation && csp>0)
|
|
sp = CLG_(current_call_stack).entry[csp-1].sp;
|
|
|
|
}
|
|
}
|
|
|
|
/* Change new context if needed, taking delayed_push into account */
|
|
if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
|
|
CLG_(push_cxt)(CLG_(get_fn_node)(bb));
|
|
}
|
|
CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
|
|
|
|
/* If there is a fresh instrumented BBCC, assign current context */
|
|
bbcc = CLG_(get_bbcc)(bb);
|
|
if (bbcc->cxt == 0) {
|
|
CLG_ASSERT(bbcc->rec_array == 0);
|
|
|
|
bbcc->cxt = CLG_(current_state).cxt;
|
|
bbcc->rec_array =
|
|
new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
|
|
bbcc->rec_array[0] = bbcc;
|
|
|
|
insert_bbcc_into_hash(bbcc);
|
|
}
|
|
else {
|
|
/* get BBCC with current context */
|
|
|
|
/* first check LRU of last bbcc executed */
|
|
|
|
if (last_bbcc) {
|
|
bbcc = last_bbcc->lru_next_bbcc;
|
|
if (bbcc &&
|
|
((bbcc->bb != bb) ||
|
|
(bbcc->cxt != CLG_(current_state).cxt)))
|
|
bbcc = 0;
|
|
}
|
|
else
|
|
bbcc = 0;
|
|
|
|
if (!bbcc)
|
|
bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
|
|
if (!bbcc)
|
|
bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
|
|
|
|
bb->last_bbcc = bbcc;
|
|
}
|
|
|
|
/* save for fast lookup */
|
|
if (last_bbcc)
|
|
last_bbcc->lru_next_bbcc = bbcc;
|
|
|
|
if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
|
|
UInt level, idx;
|
|
fn_node* top = *(CLG_(current_fn_stack).top);
|
|
|
|
level = *CLG_(get_fn_entry)(top->number);
|
|
|
|
if (delayed_push && !skip) {
|
|
if (CLG_(clo).skip_direct_recursion) {
|
|
/* a call was detected, which means that the source BB != 0 */
|
|
CLG_ASSERT(CLG_(current_state).bbcc != 0);
|
|
/* only increment rec. level if called from different function */
|
|
if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])
|
|
level++;
|
|
}
|
|
else level++;
|
|
}
|
|
if (level> top->separate_recursions)
|
|
level = top->separate_recursions;
|
|
|
|
if (level == 0) {
|
|
/* can only happen if instrumentation just was switched on */
|
|
level = 1;
|
|
*CLG_(get_fn_entry)(top->number) = 1;
|
|
}
|
|
|
|
idx = level -1;
|
|
if (bbcc->rec_array[idx])
|
|
bbcc = bbcc->rec_array[idx];
|
|
else
|
|
bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
|
|
|
|
CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
|
|
}
|
|
|
|
if (delayed_push) {
|
|
if (!skip && CLG_(current_state).nonskipped) {
|
|
/* a call from skipped to nonskipped */
|
|
CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
|
|
/* FIXME: take the real passed count from shadow stack */
|
|
passed = CLG_(current_state).bbcc->bb->cjmp_count;
|
|
}
|
|
CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
|
|
bbcc, sp, skip);
|
|
}
|
|
|
|
if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) {
|
|
|
|
/* Handle conditional jumps followed, i.e. trace arcs
|
|
* This uses JCC structures, too */
|
|
|
|
jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
|
|
CLG_ASSERT(jcc != 0);
|
|
// Change from default, and check if already changed
|
|
if (jcc->jmpkind == jk_Call)
|
|
jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump;
|
|
else {
|
|
// FIXME: Why can this fail?
|
|
// CLG_ASSERT(jcc->jmpkind == jmpkind);
|
|
}
|
|
|
|
jcc->call_counter++;
|
|
if (isConditionalJump)
|
|
CLG_(stat).jcnd_counter++;
|
|
else
|
|
CLG_(stat).jump_counter++;
|
|
}
|
|
|
|
CLG_(current_state).bbcc = bbcc;
|
|
// needed for log_* handlers called in this BB
|
|
CLG_(bb_base) = bb->obj->offset + bb->offset;
|
|
CLG_(cost_base) = bbcc->cost;
|
|
|
|
CLG_DEBUGIF(1) {
|
|
VG_(printf)(" ");
|
|
CLG_(print_bbcc_fn)(bbcc);
|
|
VG_(printf)("\n");
|
|
}
|
|
|
|
CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %d), Instrs %d (Len %d)\n",
|
|
bb_addr(bb), bbcc->cost, bb->cost_count,
|
|
bb->instr_count, bb->instr_len);
|
|
CLG_DEBUGIF(3)
|
|
CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
|
|
CLG_DEBUG(3,"\n");
|
|
|
|
CLG_(stat).bb_executions++;
|
|
}
|