mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-04 18:56:10 +00:00
(compute_BBCC_array_size): Assertion `!is_STORE && !is_FPU_R && !is_FPU_W' failed. I don't know if this is correct, but I can run kate having done so. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@171
1070 lines
35 KiB
C
1070 lines
35 KiB
C
/*--------------------------------------------------------------------*/
|
|
/*--- The cache simulation framework: instrumentation, recording ---*/
|
|
/*--- and results printing. ---*/
|
|
/*--- vg_cachesim.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Valgrind, an x86 protected-mode emulator
|
|
designed for debugging and profiling binaries on x86-Unixes.
|
|
|
|
Copyright (C) 2000-2002 Julian Seward
|
|
jseward@acm.org
|
|
Julian_Seward@muraroa.demon.co.uk
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file LICENSE.
|
|
*/
|
|
|
|
#include <string.h>
|
|
|
|
#include "vg_include.h"
|
|
|
|
#include "vg_cachesim_L2.c"
|
|
#include "vg_cachesim_I1.c"
|
|
#include "vg_cachesim_D1.c"
|
|
|
|
|
|
/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
|
|
#define MAX_x86_INSTR_SIZE 16
|
|
|
|
/* Size of various buffers used for storing strings */
|
|
#define FILENAME_LEN 256
|
|
#define FN_NAME_LEN 256
|
|
#define BUF_LEN 512
|
|
#define COMMIFY_BUF_LEN 128
|
|
#define RESULTS_BUF 128
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Output file related stuff ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
#define OUT_FILE "cachegrind.out"
|
|
|
|
static void file_err()
|
|
{
|
|
VG_(message)(Vg_UserMsg,
|
|
"FATAL: can't open cache simulation output file `%s'",
|
|
OUT_FILE );
|
|
VG_(exit)(1);
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Cost center types, operations ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
typedef struct _CC CC;
|
|
struct _CC {
|
|
ULong a;
|
|
ULong m1;
|
|
ULong m2;
|
|
};
|
|
|
|
static __inline__ void initCC(CC* cc) {
|
|
cc->a = 0;
|
|
cc->m1 = 0;
|
|
cc->m2 = 0;
|
|
}
|
|
|
|
|
|
typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
|
|
|
|
/* Instruction-level cost-centres. The typedefs for these structs are in
|
|
* vg_include.c
|
|
*
|
|
* WARNING: the 'tag' field *must* be the first byte of both CC types.
|
|
* the 'instr_addr' *must* be the second word of both CC types.
|
|
*
|
|
* This is because we use them when we don't know what type of CC we're dealing
|
|
* with.
|
|
*/
|
|
struct _iCC {
|
|
/* word 1 */
|
|
UChar tag;
|
|
UChar instr_size;
|
|
|
|
/* words 2+ */
|
|
Addr instr_addr;
|
|
CC I;
|
|
};
|
|
|
|
struct _idCC {
|
|
/* word 1 */
|
|
UChar tag;
|
|
UChar instr_size;
|
|
UChar data_size;
|
|
|
|
/* words 2+ */
|
|
Addr instr_addr;
|
|
CC I;
|
|
CC D;
|
|
};
|
|
|
|
static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
|
|
{
|
|
cc->tag = INSTR_CC;
|
|
cc->instr_size = instr_size;
|
|
cc->instr_addr = instr_addr;
|
|
initCC(&cc->I);
|
|
}
|
|
|
|
static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
|
|
UInt instr_size, UInt data_size)
|
|
{
|
|
cc->tag = X_CC;
|
|
cc->instr_size = instr_size;
|
|
cc->data_size = data_size;
|
|
cc->instr_addr = instr_addr;
|
|
initCC(&cc->I);
|
|
initCC(&cc->D);
|
|
}
|
|
|
|
static __inline__ void sprint_iCC(Char buf[BUF_LEN], UInt ln, iCC* cc)
|
|
{
|
|
VG_(sprintf)(buf, "%u %llu %llu %llu\n",
|
|
ln, cc->I.a, cc->I.m1, cc->I.m2/*, cc->instr_addr*/);
|
|
}
|
|
|
|
static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], UInt ln,
|
|
idCC* cc)
|
|
{
|
|
VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu\n",
|
|
ln, cc->I.a, cc->I.m1, cc->I.m2,
|
|
cc->D.a, cc->D.m1, cc->D.m2/*, cc->instr_addr*/);
|
|
}
|
|
|
|
static __inline__ void sprint_write_CC(Char buf[BUF_LEN], UInt ln, idCC* cc)
|
|
{
|
|
VG_(sprintf)(buf, "%u %llu %llu %llu . . . %llu %llu %llu\n",
|
|
ln, cc->I.a, cc->I.m1, cc->I.m2,
|
|
cc->D.a, cc->D.m1, cc->D.m2/*, cc->instr_addr*/);
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- BBCC hash table stuff ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
/* The table of BBCCs is of the form hash(filename, hash(fn_name,
|
|
* hash(BBCCs))). Each hash table is separately chained. The sizes below work
|
|
* fairly well for Konqueror. */
|
|
|
|
#define N_FILE_ENTRIES 251
|
|
#define N_FN_ENTRIES 53
|
|
#define N_BBCC_ENTRIES 37
|
|
|
|
/* The cost centres for a basic block are stored in a contiguous array.
|
|
* They are distinguishable by their tag field. */
|
|
typedef struct _BBCC BBCC;
|
|
struct _BBCC {
|
|
Addr orig_addr;
|
|
UInt array_size; /* byte-size of variable length array */
|
|
BBCC* next;
|
|
Addr array[0]; /* variable length array */
|
|
};
|
|
|
|
typedef struct _fn_node fn_node;
|
|
struct _fn_node {
|
|
Char* fn_name;
|
|
BBCC* BBCCs[N_BBCC_ENTRIES];
|
|
fn_node* next;
|
|
};
|
|
|
|
typedef struct _file_node file_node;
|
|
struct _file_node {
|
|
Char* filename;
|
|
fn_node* fns[N_FN_ENTRIES];
|
|
file_node* next;
|
|
};
|
|
|
|
/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
|
|
file_node *BBCC_table[N_FILE_ENTRIES];
|
|
|
|
Int distinct_files = 0;
|
|
Int distinct_fns = 0;
|
|
|
|
Int distinct_instrs = 0;
|
|
Int full_debug_BBs = 0;
|
|
Int file_line_debug_BBs = 0;
|
|
Int fn_name_debug_BBs = 0;
|
|
Int no_debug_BBs = 0;
|
|
|
|
Int BB_retranslations = 0;
|
|
|
|
static void init_BBCC_table()
|
|
{
|
|
Int i;
|
|
for (i = 0; i < N_FILE_ENTRIES; i++)
|
|
BBCC_table[i] = NULL;
|
|
}
|
|
|
|
static void get_file_fn_names(Addr instr_addr, Char filename[FILENAME_LEN],
|
|
Char fn_name[FN_NAME_LEN])
|
|
{
|
|
UInt dummy_line_num;
|
|
Bool found1, found2, no_demangle = False;
|
|
|
|
found1 = VG_(what_line_is_this)(instr_addr, filename,
|
|
FILENAME_LEN, &dummy_line_num);
|
|
found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
|
|
|
|
if (!found1 && !found2) {
|
|
no_debug_BBs++;
|
|
VG_(strcpy)(filename, "???");
|
|
VG_(strcpy)(fn_name, "???");
|
|
|
|
} else if ( found1 && found2) {
|
|
full_debug_BBs++;
|
|
|
|
} else if ( found1 && !found2) {
|
|
file_line_debug_BBs++;
|
|
VG_(strcpy)(fn_name, "???");
|
|
|
|
} else /*(!found1 && found2)*/ {
|
|
fn_name_debug_BBs++;
|
|
VG_(strcpy)(filename, "???");
|
|
}
|
|
}
|
|
|
|
/* Forward declaration. */
|
|
static Int compute_BBCC_array_size(UCodeBlock* cb);
|
|
|
|
static __inline__
|
|
file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
|
|
{
|
|
Int i;
|
|
file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
|
|
new->filename = VG_(strdup)(VG_AR_PRIVATE, filename);
|
|
for (i = 0; i < N_FN_ENTRIES; i++) {
|
|
new->fns[i] = NULL;
|
|
}
|
|
new->next = next;
|
|
return new;
|
|
}
|
|
|
|
static __inline__
|
|
fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
|
|
{
|
|
Int i;
|
|
fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
|
|
new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
|
|
for (i = 0; i < N_BBCC_ENTRIES; i++) {
|
|
new->BBCCs[i] = NULL;
|
|
}
|
|
new->next = next;
|
|
return new;
|
|
}
|
|
|
|
static __inline__
|
|
BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
|
|
{
|
|
Int BBCC_array_size = compute_BBCC_array_size(cb);
|
|
BBCC* new;
|
|
|
|
new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
|
|
new->orig_addr = bb_orig_addr;
|
|
new->array_size = BBCC_array_size;
|
|
new->next = next;
|
|
|
|
return new;
|
|
}
|
|
|
|
#define HASH_CONSTANT 256
|
|
|
|
static UInt hash(Char *s, UInt table_size)
|
|
{
|
|
int hash_value = 0;
|
|
for ( ; *s; s++)
|
|
hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
|
|
return hash_value;
|
|
}
|
|
|
|
/* Do a three step traversal: by filename, then fn_name, then instr_addr.
|
|
* In all cases prepends new nodes to their chain. Returns a pointer to the
|
|
* cost centre. Also sets BB_seen_before by reference.
|
|
*/
|
|
static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
|
|
Bool *BB_seen_before)
|
|
{
|
|
file_node *curr_file_node;
|
|
fn_node *curr_fn_node;
|
|
BBCC *curr_BBCC;
|
|
Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
|
|
UInt filename_hash, fnname_hash, BBCC_hash;
|
|
|
|
get_file_fn_names(bb_orig_addr, filename, fn_name);
|
|
|
|
VGP_PUSHCC(VgpCacheGetBBCC);
|
|
filename_hash = hash(filename, N_FILE_ENTRIES);
|
|
curr_file_node = BBCC_table[filename_hash];
|
|
while (NULL != curr_file_node &&
|
|
strcmp(filename, curr_file_node->filename) != 0) {
|
|
curr_file_node = curr_file_node->next;
|
|
}
|
|
if (NULL == curr_file_node) {
|
|
BBCC_table[filename_hash] = curr_file_node =
|
|
new_file_node(filename, BBCC_table[filename_hash]);
|
|
distinct_files++;
|
|
}
|
|
|
|
fnname_hash = hash(fn_name, N_FN_ENTRIES);
|
|
curr_fn_node = curr_file_node->fns[fnname_hash];
|
|
while (NULL != curr_fn_node &&
|
|
strcmp(fn_name, curr_fn_node->fn_name) != 0) {
|
|
curr_fn_node = curr_fn_node->next;
|
|
}
|
|
if (NULL == curr_fn_node) {
|
|
curr_file_node->fns[fnname_hash] = curr_fn_node =
|
|
new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
|
|
distinct_fns++;
|
|
}
|
|
|
|
BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
|
|
curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
|
|
while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
|
|
curr_BBCC = curr_BBCC->next;
|
|
}
|
|
if (curr_BBCC == NULL) {
|
|
curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
|
|
new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
|
|
*BB_seen_before = False;
|
|
|
|
} else {
|
|
vg_assert(bb_orig_addr == curr_BBCC->orig_addr);
|
|
vg_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
|
|
if (VG_(clo_verbosity) > 1) {
|
|
VG_(message)(Vg_DebugMsg, "BB retranslation, retrieving from BBCC table");
|
|
}
|
|
*BB_seen_before = True;
|
|
BB_retranslations++;
|
|
}
|
|
VGP_POPCC;
|
|
return curr_BBCC;
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Cache simulation instrumentation phase ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
#define uInstr1 VG_(newUInstr1)
|
|
#define uInstr2 VG_(newUInstr2)
|
|
#define uInstr3 VG_(newUInstr3)
|
|
#define dis VG_(disassemble)
|
|
#define uLiteral VG_(setLiteralField)
|
|
#define newTemp VG_(getNewTemp)
|
|
|
|
static Int compute_BBCC_array_size(UCodeBlock* cb)
|
|
{
|
|
UInstr* u_in;
|
|
Int i, CC_size, BBCC_size = 0;
|
|
Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
|
|
|
|
is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
|
|
|
|
for (i = 0; i < cb->used; i++) {
|
|
/* VG_(ppUInstr)(0, &cb->instrs[i]); */
|
|
|
|
u_in = &cb->instrs[i];
|
|
switch(u_in->opcode) {
|
|
|
|
case INCEIP:
|
|
goto case_for_end_of_instr;
|
|
|
|
case JMP:
|
|
if (u_in->cond != CondAlways) break;
|
|
|
|
goto case_for_end_of_instr;
|
|
|
|
case_for_end_of_instr:
|
|
|
|
CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W
|
|
? sizeof(idCC) : sizeof(iCC));
|
|
|
|
BBCC_size += CC_size;
|
|
is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
|
|
break;
|
|
|
|
case LOAD:
|
|
/* Two LDBs are possible for a single instruction */
|
|
vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */
|
|
!is_FPU_R && !is_FPU_W);
|
|
is_LOAD = True;
|
|
break;
|
|
|
|
case STORE:
|
|
/* Multiple STOREs are possible for 'pushal' */
|
|
vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
|
|
is_STORE = True;
|
|
break;
|
|
|
|
case FPU_R:
|
|
vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
|
|
is_FPU_R = True;
|
|
break;
|
|
|
|
case FPU_W:
|
|
vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
|
|
is_FPU_W = True;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return BBCC_size;
|
|
}
|
|
|
|
/* Use this rather than eg. -1 because it's stored as a UInt. */
|
|
#define INVALID_DATA_SIZE 999999
|
|
|
|
UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
|
|
{
|
|
UCodeBlock* cb;
|
|
Int i;
|
|
UInstr* u_in;
|
|
BBCC* BBCC_node;
|
|
Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
|
|
Int CC_size = -1; /* Shut gcc warnings up */
|
|
Addr instr_addr = orig_addr;
|
|
UInt instr_size, data_size = INVALID_DATA_SIZE;
|
|
Int helper = -1; /* Shut gcc warnings up */
|
|
UInt stack_used;
|
|
Bool BB_seen_before = False;
|
|
Bool prev_instr_was_Jcond = False;
|
|
Addr BBCC_ptr0, BBCC_ptr;
|
|
|
|
/* Get BBCC (creating if necessary -- requires a counting pass over the BB
|
|
* if it's the first time it's been seen), and point to start of the
|
|
* BBCC array. */
|
|
BBCC_node = get_BBCC(orig_addr, cb_in, &BB_seen_before);
|
|
BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
|
|
|
|
cb = VG_(allocCodeBlock)();
|
|
cb->nextTemp = cb_in->nextTemp;
|
|
|
|
t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
|
|
|
|
for (i = 0; i < cb_in->used; i++) {
|
|
u_in = &cb_in->instrs[i];
|
|
|
|
//VG_(ppUInstr)(0, u_in);
|
|
|
|
/* What this is all about: we want to instrument each x86 instruction
|
|
* translation. The end of these are marked in three ways. The three
|
|
* ways, and the way we instrument them, are as follows:
|
|
*
|
|
* 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
|
|
* 2. UCode, Juncond --> UCode, Instrumentation, Juncond
|
|
* 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
|
|
*
|
|
* We must put the instrumentation before the jumps so that it is always
|
|
* executed. We don't have to put the instrumentation before the INCEIP
|
|
* (it could go after) but we do so for consistency.
|
|
*
|
|
* Junconds are always the last instruction in a basic block. Jconds are
|
|
* always the 2nd last, and must be followed by a Jcond. We check this
|
|
* with various assertions.
|
|
*
|
|
* Note that in VG_(disBB) we patched the `extra4b' field of the first
|
|
* occurring JMP in a block with the size of its x86 instruction. This
|
|
* is used now.
|
|
*
|
|
* Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ
|
|
* occurs in the middle of a BB and gets an INCEIP after it.
|
|
*
|
|
* The instrumentation is just a call to the appropriate helper function,
|
|
* passing it the address of the instruction's CC.
|
|
*/
|
|
if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
|
|
|
|
switch (u_in->opcode) {
|
|
|
|
case INCEIP:
|
|
instr_size = u_in->val1;
|
|
goto case_for_end_of_x86_instr;
|
|
|
|
case JMP:
|
|
if (u_in->cond == CondAlways) {
|
|
vg_assert(i+1 == cb_in->used);
|
|
|
|
/* Don't instrument if previous instr was a Jcond. */
|
|
if (prev_instr_was_Jcond) {
|
|
vg_assert(0 == u_in->extra4b);
|
|
VG_(copyUInstr)(cb, u_in);
|
|
break;
|
|
}
|
|
prev_instr_was_Jcond = False;
|
|
|
|
} else {
|
|
vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */
|
|
prev_instr_was_Jcond = True;
|
|
}
|
|
|
|
/* Ah, the first JMP... instrument, please. */
|
|
instr_size = u_in->extra4b;
|
|
goto case_for_end_of_x86_instr;
|
|
|
|
/* Shared code that is executed at the end of an x86 translation
|
|
* block, marked by either an INCEIP or an unconditional JMP. */
|
|
case_for_end_of_x86_instr:
|
|
|
|
#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
|
|
|
|
/* Initialise the CC in the BBCC array appropriately if it hasn't
|
|
* been initialised before.
|
|
* Then call appropriate sim function, passing it the CC address.
|
|
* Note that CALLM_S/CALL_E aren't required here; by this point,
|
|
* the checking related to them has already happened. */
|
|
stack_used = 0;
|
|
|
|
vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
|
|
vg_assert(0 != instr_addr);
|
|
|
|
/* Save the caller-save registers before we push our args */
|
|
uInstr1(cb, PUSH, 4, RealReg, R_EAX);
|
|
uInstr1(cb, PUSH, 4, RealReg, R_ECX);
|
|
uInstr1(cb, PUSH, 4, RealReg, R_EDX);
|
|
|
|
if (!IS_(read) && !IS_(write)) {
|
|
iCC* CC_ptr = (iCC*)(BBCC_ptr);
|
|
vg_assert(INVALID_DATA_SIZE == data_size);
|
|
vg_assert(INVALID_TEMPREG == t_read_addr &&
|
|
INVALID_TEMPREG == t_write_addr);
|
|
CC_size = sizeof(iCC);
|
|
if (!BB_seen_before)
|
|
init_iCC(CC_ptr, instr_addr, instr_size);
|
|
|
|
helper = VGOFF_(cachesim_log_non_mem_instr);
|
|
|
|
} else {
|
|
CC_type X_CC;
|
|
idCC* CC_ptr = (idCC*)(BBCC_ptr);
|
|
|
|
vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
|
|
8 == data_size || 10 == data_size);
|
|
|
|
CC_size = sizeof(idCC);
|
|
helper = VGOFF_(cachesim_log_mem_instr);
|
|
|
|
if (IS_(read) && !IS_(write)) {
|
|
X_CC = READ_CC;
|
|
vg_assert(INVALID_TEMPREG != t_read_addr &&
|
|
INVALID_TEMPREG == t_write_addr);
|
|
t_data_addr = t_read_addr;
|
|
|
|
} else if (!IS_(read) && IS_(write)) {
|
|
X_CC = WRITE_CC;
|
|
vg_assert(INVALID_TEMPREG == t_read_addr &&
|
|
INVALID_TEMPREG != t_write_addr);
|
|
t_data_addr = t_write_addr;
|
|
|
|
} else {
|
|
vg_assert(IS_(read) && IS_(write));
|
|
X_CC = MOD_CC;
|
|
vg_assert(INVALID_TEMPREG != t_read_addr &&
|
|
INVALID_TEMPREG != t_write_addr);
|
|
t_data_addr = t_read_addr;
|
|
}
|
|
|
|
if (!BB_seen_before)
|
|
init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
|
|
|
|
/* 2nd arg: data addr */
|
|
uInstr1(cb, PUSH, 4, TempReg, t_data_addr);
|
|
stack_used += 4;
|
|
}
|
|
#undef IS_
|
|
|
|
/* 1st arg: CC addr */
|
|
t_CC_addr = newTemp(cb);
|
|
uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
|
|
uLiteral(cb, BBCC_ptr);
|
|
uInstr1(cb, PUSH, 4, TempReg, t_CC_addr);
|
|
stack_used += 4;
|
|
|
|
/* Call function and return. */
|
|
uInstr1(cb, CALLM, 0, Lit16, helper);
|
|
uInstr1(cb, CLEAR, 0, Lit16, stack_used);
|
|
|
|
/* Restore the caller-save registers now the call is done */
|
|
uInstr1(cb, POP, 4, RealReg, R_EDX);
|
|
uInstr1(cb, POP, 4, RealReg, R_ECX);
|
|
uInstr1(cb, POP, 4, RealReg, R_EAX);
|
|
|
|
VG_(copyUInstr)(cb, u_in);
|
|
|
|
/* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
|
|
BBCC_ptr += CC_size;
|
|
instr_addr += instr_size;
|
|
t_CC_addr = t_read_addr = t_write_addr =
|
|
t_data_addr = INVALID_TEMPREG;
|
|
data_size = INVALID_DATA_SIZE;
|
|
break;
|
|
|
|
|
|
/* For memory-ref instrs, copy the data_addr into a temporary to be
|
|
* passed to the cachesim_log_function at the end of the instruction.
|
|
*/
|
|
case LOAD:
|
|
t_read_addr = newTemp(cb);
|
|
uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
|
|
data_size = u_in->size;
|
|
VG_(copyUInstr)(cb, u_in);
|
|
break;
|
|
|
|
case FPU_R:
|
|
t_read_addr = newTemp(cb);
|
|
uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
|
|
data_size = u_in->size;
|
|
VG_(copyUInstr)(cb, u_in);
|
|
break;
|
|
|
|
/* Note that we must set t_write_addr even for mod instructions;
|
|
* that's how the code above determines whether it does a write;
|
|
* without it, it would think a mod instruction is a read.
|
|
* As for the MOV, if it's a mod instruction it's redundant, but it's
|
|
* not expensive and mod instructions are rare anyway. */
|
|
case STORE:
|
|
case FPU_W:
|
|
t_write_addr = newTemp(cb);
|
|
uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
|
|
data_size = u_in->size;
|
|
VG_(copyUInstr)(cb, u_in);
|
|
break;
|
|
|
|
case NOP: case CALLM_E: case CALLM_S:
|
|
break;
|
|
|
|
default:
|
|
VG_(copyUInstr)(cb, u_in);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Just check everything looks ok */
|
|
vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
|
|
|
|
VG_(freeCodeBlock)(cb_in);
|
|
return cb;
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Cache simulation stuff ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
/* Total reads/writes/misses. Calculated during CC traversal at the end. */
|
|
static CC Ir_total;
|
|
static CC Dr_total;
|
|
static CC Dw_total;
|
|
|
|
void VG_(init_cachesim)(void)
|
|
{
|
|
/* Make sure the output file can be written. */
|
|
Int fd = VG_(open_write)(OUT_FILE);
|
|
if (-1 == fd) {
|
|
fd = VG_(create_and_write)(OUT_FILE);
|
|
if (-1 == fd) {
|
|
file_err();
|
|
}
|
|
}
|
|
VG_(close)(fd);
|
|
|
|
initCC(&Ir_total);
|
|
initCC(&Dr_total);
|
|
initCC(&Dw_total);
|
|
|
|
cachesim_I1_initcache();
|
|
cachesim_D1_initcache();
|
|
cachesim_L2_initcache();
|
|
|
|
init_BBCC_table();
|
|
}
|
|
|
|
void VG_(cachesim_log_non_mem_instr)(iCC* cc)
|
|
{
|
|
//VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
|
|
// cc, cc->instr_addr, cc->instr_size)
|
|
VGP_PUSHCC(VgpCacheSimulate);
|
|
cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
|
|
cc->I.a++;
|
|
VGP_POPCC;
|
|
}
|
|
|
|
void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
|
|
{
|
|
//VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
|
|
// cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
|
|
VGP_PUSHCC(VgpCacheSimulate);
|
|
cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
|
|
cc->I.a++;
|
|
|
|
cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
|
|
cc->D.a++;
|
|
VGP_POPCC;
|
|
}
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- Printing of output file and summary stats ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
int get_line_num(Addr instr_addr)
|
|
{
|
|
Char filename[FILENAME_LEN] = "???";
|
|
UInt line_num;
|
|
Bool found;
|
|
|
|
found = VG_(what_line_is_this)(instr_addr, filename,
|
|
FILENAME_LEN, &line_num);
|
|
if (!found) {
|
|
line_num = 0;
|
|
}
|
|
return line_num;
|
|
}
|
|
|
|
static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
|
|
Char *first_instr_fn)
|
|
{
|
|
Addr BBCC_ptr0, BBCC_ptr;
|
|
Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4];
|
|
UInt line_num;
|
|
|
|
BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
|
|
|
|
VG_(write)(fd, (void*)"\n", 1);
|
|
|
|
VG_(strcpy)(curr_file, first_instr_fl);
|
|
|
|
while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
|
|
|
|
/* We pretend the CC is an iCC for getting the tag. This is ok
|
|
* because both CC types have tag as their first byte. Once we know
|
|
* the type, we can cast and act appropriately. */
|
|
|
|
Char fl_buf[FILENAME_LEN];
|
|
Char fn_buf[FN_NAME_LEN];
|
|
|
|
/* Assumes instr_addr position is same for both CCs. */
|
|
Addr instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
|
|
get_file_fn_names(instr_addr, fl_buf, fn_buf);
|
|
|
|
/* Allow for filename switching in the middle of a BB; if this happens,
|
|
* must print the new filename with the function name. */
|
|
if (0 != strcmp(fl_buf, curr_file)) {
|
|
VG_(strcpy)(curr_file, fl_buf);
|
|
VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
|
|
VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
|
|
}
|
|
|
|
switch ( ((iCC*)BBCC_ptr)->tag ) {
|
|
|
|
#define ADD_CC_TO(CC_type, cc, total) \
|
|
total.a += ((CC_type*)BBCC_ptr)->cc.a; \
|
|
total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
|
|
total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
|
|
|
|
case INSTR_CC:
|
|
line_num = get_line_num(((iCC*)BBCC_ptr)->instr_addr);
|
|
sprint_iCC(buf, line_num, (iCC*)BBCC_ptr);
|
|
ADD_CC_TO(iCC, I, Ir_total);
|
|
BBCC_ptr += sizeof(iCC);
|
|
break;
|
|
|
|
case READ_CC:
|
|
case MOD_CC:
|
|
line_num = get_line_num(((idCC*)BBCC_ptr)->instr_addr);
|
|
sprint_read_or_mod_CC(buf, line_num, (idCC*)BBCC_ptr);
|
|
ADD_CC_TO(idCC, I, Ir_total);
|
|
ADD_CC_TO(idCC, D, Dr_total);
|
|
BBCC_ptr += sizeof(idCC);
|
|
break;
|
|
|
|
case WRITE_CC:
|
|
line_num = get_line_num(((idCC*)BBCC_ptr)->instr_addr);
|
|
sprint_write_CC(buf, line_num, (idCC*)BBCC_ptr);
|
|
ADD_CC_TO(idCC, I, Ir_total);
|
|
ADD_CC_TO(idCC, D, Dw_total);
|
|
BBCC_ptr += sizeof(idCC);
|
|
break;
|
|
|
|
#undef ADD_CC_TO
|
|
|
|
default:
|
|
VG_(panic)("Unknown CC type in fprint_BBCC()\n");
|
|
break;
|
|
}
|
|
distinct_instrs++;
|
|
|
|
/* If the function name for this instruction doesn't match that of the
|
|
* first instruction in the BB, print out a warning. */
|
|
if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
|
|
VG_(printf)("Mismatched function names\n");
|
|
VG_(printf)(" filenames: BB:%s, instr:%s; "
|
|
"fn_names: BB:%s, instr:%s; "
|
|
"line: %d\n",
|
|
first_instr_fl, fl_buf,
|
|
first_instr_fn, fn_buf,
|
|
line_num);
|
|
}
|
|
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
}
|
|
/* If we switched filenames in the middle of the BB without switching back,
|
|
* switch back now because the subsequent BB may be relying on falling under
|
|
* the original file name. */
|
|
if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
|
|
VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
|
|
VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
|
|
}
|
|
//VG_(write)(fd, (void*)"#}\n", 3);
|
|
|
|
vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
|
|
}
|
|
|
|
static void fprint_BBCC_table_and_calc_totals(Int client_argc,
|
|
Char** client_argv)
|
|
{
|
|
Int fd;
|
|
Char buf[BUF_LEN];
|
|
file_node *curr_file_node;
|
|
fn_node *curr_fn_node;
|
|
BBCC *curr_BBCC;
|
|
Int i,j,k;
|
|
|
|
VGP_PUSHCC(VgpCacheDump);
|
|
fd = VG_(open_write)(OUT_FILE);
|
|
if (-1 == fd) { file_err(); }
|
|
|
|
/* "desc:" lines (giving I1/D1/L2 cache configuration) */
|
|
VG_(write)(fd, (void*)I1_desc_line, VG_(strlen)(I1_desc_line));
|
|
VG_(write)(fd, (void*)D1_desc_line, VG_(strlen)(D1_desc_line));
|
|
VG_(write)(fd, (void*)L2_desc_line, VG_(strlen)(L2_desc_line));
|
|
|
|
/* "cmd:" line */
|
|
VG_(strcpy)(buf, "cmd:");
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
for (i = 0; i < client_argc; i++) {
|
|
VG_(sprintf)(buf, " %s", client_argv[i]);
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
}
|
|
/* "events:" line */
|
|
VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
|
|
/* Six loops here: three for the hash table arrays, and three for the
|
|
* chains hanging off the hash table arrays. */
|
|
for (i = 0; i < N_FILE_ENTRIES; i++) {
|
|
curr_file_node = BBCC_table[i];
|
|
while (curr_file_node != NULL) {
|
|
VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
|
|
for (j = 0; j < N_FN_ENTRIES; j++) {
|
|
curr_fn_node = curr_file_node->fns[j];
|
|
while (curr_fn_node != NULL) {
|
|
VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
|
|
for (k = 0; k < N_BBCC_ENTRIES; k++) {
|
|
curr_BBCC = curr_fn_node->BBCCs[k];
|
|
while (curr_BBCC != NULL) {
|
|
fprint_BBCC(fd, curr_BBCC,
|
|
|
|
curr_file_node->filename,
|
|
curr_fn_node->fn_name);
|
|
|
|
curr_BBCC = curr_BBCC->next;
|
|
}
|
|
}
|
|
curr_fn_node = curr_fn_node->next;
|
|
}
|
|
}
|
|
curr_file_node = curr_file_node->next;
|
|
}
|
|
}
|
|
|
|
/* Summary stats must come after rest of table, since we calculate them
|
|
* during traversal. */
|
|
VG_(sprintf)(buf, "summary: "
|
|
"%llu %llu %llu "
|
|
"%llu %llu %llu "
|
|
"%llu %llu %llu\n",
|
|
Ir_total.a, Ir_total.m1, Ir_total.m2,
|
|
Dr_total.a, Dr_total.m1, Dr_total.m2,
|
|
Dw_total.a, Dw_total.m1, Dw_total.m2);
|
|
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
|
|
VG_(close)(fd);
|
|
}
|
|
|
|
/* Adds commas to ULong, right justifying in a field field_width wide, returns
|
|
* the string in buf. */
|
|
Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
|
|
{
|
|
int len, n_commas, i, j, new_len, space;
|
|
|
|
VG_(sprintf)(buf, "%lu", n);
|
|
len = VG_(strlen)(buf);
|
|
n_commas = (len - 1) / 3;
|
|
new_len = len + n_commas;
|
|
space = field_width - new_len;
|
|
|
|
/* Allow for printing a number in a field_width smaller than it's size */
|
|
if (space < 0) space = 0;
|
|
|
|
/* Make j = -1 because we copy the '\0' before doing the numbers in groups
|
|
* of three. */
|
|
for (j = -1, i = len ; i >= 0; i--) {
|
|
buf[i + n_commas + space] = buf[i];
|
|
|
|
if (3 == ++j) {
|
|
j = 0;
|
|
n_commas--;
|
|
buf[i + n_commas + space] = ',';
|
|
}
|
|
}
|
|
/* Right justify in field. */
|
|
for (i = 0; i < space; i++) buf[i] = ' ';
|
|
return new_len;
|
|
}
|
|
|
|
void percentify(Int n, Int pow, Int field_width, char buf[])
|
|
{
|
|
int i, len, space;
|
|
|
|
VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
|
|
len = VG_(strlen)(buf);
|
|
space = field_width - len;
|
|
i = len;
|
|
|
|
/* Right justify in field */
|
|
for ( ; i >= 0; i--) buf[i + space] = buf[i];
|
|
for (i = 0; i < space; i++) buf[i] = ' ';
|
|
}
|
|
|
|
void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
|
|
{
|
|
CC D_total;
|
|
ULong L2_total_m, L2_total_mr, L2_total_mw;
|
|
char buf1[RESULTS_BUF],
|
|
buf2[RESULTS_BUF],
|
|
buf3[RESULTS_BUF];
|
|
Int l1, l2, l3;
|
|
Int p;
|
|
|
|
fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
|
|
|
|
/* I cache results. Use the I_refs value to determine the first column
|
|
* width. */
|
|
l1 = commify(Ir_total.a, 0, buf1);
|
|
VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
|
|
|
|
commify(Ir_total.m1, l1, buf1);
|
|
VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
|
|
|
|
commify(Ir_total.m2, l1, buf1);
|
|
VG_(message)(Vg_UserMsg, "L2 misses: %s", buf1);
|
|
|
|
p = 100;
|
|
|
|
percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
|
|
VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
|
|
|
|
percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
|
|
VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
|
|
VG_(message)(Vg_UserMsg, "");
|
|
|
|
/* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
|
|
* width of columns 2 & 3. */
|
|
D_total.a = Dr_total.a + Dw_total.a;
|
|
D_total.m1 = Dr_total.m1 + Dw_total.m1;
|
|
D_total.m2 = Dr_total.m2 + Dw_total.m2;
|
|
|
|
commify( D_total.a, 0, buf1);
|
|
l2 = commify(Dr_total.a, 0, buf2);
|
|
l3 = commify(Dw_total.a, 0, buf3);
|
|
VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
|
|
buf1, buf2, buf3);
|
|
|
|
commify( D_total.m1, l1, buf1);
|
|
commify(Dr_total.m1, l2, buf2);
|
|
commify(Dw_total.m1, l3, buf3);
|
|
VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
|
|
buf1, buf2, buf3);
|
|
|
|
commify( D_total.m2, l1, buf1);
|
|
commify(Dr_total.m2, l2, buf2);
|
|
commify(Dw_total.m2, l3, buf3);
|
|
VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
|
|
buf1, buf2, buf3);
|
|
|
|
p = 10;
|
|
|
|
percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
|
|
percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
|
|
percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
|
|
VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
|
|
|
|
percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
|
|
percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
|
|
percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
|
|
VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
|
|
VG_(message)(Vg_UserMsg, "");
|
|
|
|
/* L2 overall results */
|
|
L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
|
|
L2_total_mr = Dr_total.m2 + Ir_total.m2;
|
|
L2_total_mw = Dw_total.m2;
|
|
|
|
commify(L2_total_m, l1, buf1);
|
|
commify(L2_total_mr, l2, buf2);
|
|
commify(L2_total_mw, l3, buf3);
|
|
VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
|
|
buf1, buf2, buf3);
|
|
|
|
percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
|
|
percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
|
|
percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
|
|
VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
|
|
|
|
|
|
/* Hash table stats */
|
|
if (VG_(clo_verbosity) > 1) {
|
|
int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
|
|
file_line_debug_BBs + no_debug_BBs;
|
|
|
|
VG_(message)(Vg_DebugMsg, "");
|
|
VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
|
|
VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
|
|
VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
|
|
VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
|
|
full_debug_BBs * 100 / BB_lookups,
|
|
full_debug_BBs);
|
|
VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
|
|
file_line_debug_BBs * 100 / BB_lookups,
|
|
file_line_debug_BBs);
|
|
VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
|
|
fn_name_debug_BBs * 100 / BB_lookups,
|
|
fn_name_debug_BBs);
|
|
VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
|
|
no_debug_BBs * 100 / BB_lookups,
|
|
no_debug_BBs);
|
|
VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
|
|
VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
|
|
}
|
|
VGP_POPCC;
|
|
}
|
|
|