mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-03 18:13:01 +00:00
later gccs don't complain about these. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@9019
5026 lines
159 KiB
C
5026 lines
159 KiB
C
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- LibHB: a library for implementing and checking ---*/
|
|
/*--- the happens-before relationship in concurrent programs. ---*/
|
|
/*--- libhb_main.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of LibHB, a library for implementing and checking
|
|
the happens-before relationship in concurrent programs.
|
|
|
|
Copyright (C) 2008-2008 OpenWorks Ltd
|
|
info@open-works.co.uk
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "pub_tool_basics.h"
|
|
#include "pub_tool_libcassert.h"
|
|
#include "pub_tool_libcbase.h"
|
|
#include "pub_tool_libcprint.h"
|
|
#include "pub_tool_mallocfree.h"
|
|
#include "pub_tool_wordfm.h"
|
|
#include "pub_tool_sparsewa.h"
|
|
#include "pub_tool_xarray.h"
|
|
#include "pub_tool_oset.h"
|
|
#include "pub_tool_threadstate.h"
|
|
#include "pub_tool_aspacemgr.h"
|
|
#include "pub_tool_execontext.h"
|
|
#include "pub_tool_errormgr.h"
|
|
#include "pub_tool_options.h" // VG_(clo_verbosity)
|
|
#include "hg_basics.h"
|
|
#include "hg_wordset.h"
|
|
#include "hg_lock_n_thread.h"
|
|
#include "hg_errors.h"
|
|
|
|
#include "libhb.h"
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// Debugging #defines //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/* Check the sanity of shadow values in the core memory state
|
|
machine. Change #if 0 to #if 1 to enable this. */
|
|
#if 0
|
|
# define CHECK_MSM 1
|
|
#else
|
|
# define CHECK_MSM 0
|
|
#endif
|
|
|
|
|
|
/* Check sanity (reference counts, etc) in the conflicting access
|
|
machinery. Change #if 0 to #if 1 to enable this. */
|
|
#if 0
|
|
# define CHECK_CEM 1
|
|
#else
|
|
# define CHECK_CEM 0
|
|
#endif
|
|
|
|
|
|
/* Check sanity in the compressed shadow memory machinery,
|
|
particularly in its caching innards. Unfortunately there's no
|
|
almost-zero-cost way to make them selectable at run time. Hence
|
|
set the #if 0 to #if 1 and rebuild if you want them. */
|
|
#if 0
|
|
# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
|
|
# define inline __attribute__((noinline))
|
|
/* probably want to ditch -fomit-frame-pointer too */
|
|
#else
|
|
# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
|
|
#endif
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// Forward declarations //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/* fwds for
|
|
Globals needed by other parts of the library. These are set
|
|
once at startup and then never changed. */
|
|
static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
|
|
static ExeContext* (*main_get_EC)( Thr* ) = NULL;
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION BEGIN compressed shadow memory //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef __HB_ZSM_H
|
|
#define __HB_ZSM_H
|
|
|
|
typedef ULong SVal;
|
|
|
|
/* This value has special significance to the implementation, and callers
|
|
may not store it in the shadow memory. */
|
|
#define SVal_INVALID (3ULL << 62)
|
|
|
|
/* This is the default value for shadow memory. Initially the shadow
|
|
memory contains no accessible areas and so all reads produce this
|
|
value. TODO: make this caller-defineable. */
|
|
#define SVal_NOACCESS (2ULL << 62)
|
|
|
|
/* Initialise the library. Once initialised, it will (or may) call
|
|
rcinc and rcdec in response to all the calls below, in order to
|
|
allow the user to do reference counting on the SVals stored herein.
|
|
It is important to understand, however, that due to internal
|
|
caching, the reference counts are in general inaccurate, and can be
|
|
both above or below the true reference count for an item. In
|
|
particular, the library may indicate that the reference count for
|
|
an item is zero, when in fact it is not.
|
|
|
|
To make the reference counting exact and therefore non-pointless,
|
|
call zsm_flush_cache. Immediately after it returns, the reference
|
|
counts for all items, as deduced by the caller by observing calls
|
|
to rcinc and rcdec, will be correct, and so any items with a zero
|
|
reference count may be freed (or at least considered to be
|
|
unreferenced by this library).
|
|
*/
|
|
static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
|
|
|
|
static void zsm_set_range ( Addr, SizeT, SVal );
|
|
static SVal zsm_read8 ( Addr );
|
|
static void zsm_copy_range ( Addr, Addr, SizeT );
|
|
static void zsm_flush_cache ( void );
|
|
|
|
#endif /* ! __HB_ZSM_H */
|
|
|
|
|
|
/* Round a up to the next multiple of N. N must be a power of 2 */
|
|
#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
|
|
/* Round a down to the next multiple of N. N must be a power of 2 */
|
|
#define ROUNDDN(a, N) ((a) & ~(N-1))
|
|
|
|
|
|
|
|
/* ------ User-supplied RC functions ------ */
|
|
static void(*rcinc)(SVal) = NULL;
|
|
static void(*rcdec)(SVal) = NULL;
|
|
|
|
|
|
/* ------ CacheLine ------ */
|
|
|
|
#define N_LINE_BITS 6 /* must be >= 3 */
|
|
#define N_LINE_ARANGE (1 << N_LINE_BITS)
|
|
#define N_LINE_TREES (N_LINE_ARANGE >> 3)
|
|
|
|
typedef
|
|
struct {
|
|
UShort descrs[N_LINE_TREES];
|
|
SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
|
|
}
|
|
CacheLine;
|
|
|
|
#define TREE_DESCR_16_0 (1<<0)
|
|
#define TREE_DESCR_32_0 (1<<1)
|
|
#define TREE_DESCR_16_1 (1<<2)
|
|
#define TREE_DESCR_64 (1<<3)
|
|
#define TREE_DESCR_16_2 (1<<4)
|
|
#define TREE_DESCR_32_1 (1<<5)
|
|
#define TREE_DESCR_16_3 (1<<6)
|
|
#define TREE_DESCR_8_0 (1<<7)
|
|
#define TREE_DESCR_8_1 (1<<8)
|
|
#define TREE_DESCR_8_2 (1<<9)
|
|
#define TREE_DESCR_8_3 (1<<10)
|
|
#define TREE_DESCR_8_4 (1<<11)
|
|
#define TREE_DESCR_8_5 (1<<12)
|
|
#define TREE_DESCR_8_6 (1<<13)
|
|
#define TREE_DESCR_8_7 (1<<14)
|
|
#define TREE_DESCR_DTY (1<<15)
|
|
|
|
typedef
|
|
struct {
|
|
SVal dict[4]; /* can represent up to 4 diff values in the line */
|
|
UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
|
|
dict indexes */
|
|
/* if dict[0] == SVal_INVALID then dict[1] is the index of the
|
|
LineF to use, and dict[2..] are also SVal_INVALID. */
|
|
}
|
|
LineZ; /* compressed rep for a cache line */
|
|
|
|
typedef
|
|
struct {
|
|
Bool inUse;
|
|
SVal w64s[N_LINE_ARANGE];
|
|
}
|
|
LineF; /* full rep for a cache line */
|
|
|
|
/* Shadow memory.
|
|
Primary map is a WordFM Addr SecMap*.
|
|
SecMaps cover some page-size-ish section of address space and hold
|
|
a compressed representation.
|
|
CacheLine-sized chunks of SecMaps are copied into a Cache, being
|
|
decompressed when moved into the cache and recompressed on the
|
|
way out. Because of this, the cache must operate as a writeback
|
|
cache, not a writethrough one.
|
|
|
|
Each SecMap must hold a power-of-2 number of CacheLines. Hence
|
|
N_SECMAP_BITS must >= N_LINE_BITS.
|
|
*/
|
|
#define N_SECMAP_BITS 13
|
|
#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
|
|
|
|
// # CacheLines held by a SecMap
|
|
#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
|
|
|
|
/* The data in the SecMap is held in the array of LineZs. Each LineZ
|
|
either carries the required data directly, in a compressed
|
|
representation, or it holds (in .dict[0]) an index to the LineF in
|
|
.linesF that holds the full representation.
|
|
|
|
Currently-unused LineF's have their .inUse bit set to zero.
|
|
Since each in-use LineF is referred to be exactly one LineZ,
|
|
the number of .linesZ[] that refer to .linesF should equal
|
|
the number of .linesF[] that have .inUse == True.
|
|
|
|
RC obligations: the RCs presented to the user include exactly
|
|
the values in:
|
|
* direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
|
|
* F reps that are in use (.inUse == True)
|
|
|
|
Hence the following actions at the following transitions are required:
|
|
|
|
F rep: .inUse==True -> .inUse==False -- rcdec_LineF
|
|
F rep: .inUse==False -> .inUse==True -- rcinc_LineF
|
|
Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
|
|
Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
|
|
*/
|
|
typedef
|
|
struct {
|
|
UInt magic;
|
|
LineZ linesZ[N_SECMAP_ZLINES];
|
|
LineF* linesF;
|
|
UInt linesF_size;
|
|
}
|
|
SecMap;
|
|
|
|
#define SecMap_MAGIC 0x571e58cbU
|
|
|
|
static inline Bool is_sane_SecMap ( SecMap* sm ) {
|
|
return sm != NULL && sm->magic == SecMap_MAGIC;
|
|
}
|
|
|
|
/* ------ Cache ------ */
|
|
|
|
#define N_WAY_BITS 16
|
|
#define N_WAY_NENT (1 << N_WAY_BITS)
|
|
|
|
/* Each tag is the address of the associated CacheLine, rounded down
|
|
to a CacheLine address boundary. A CacheLine size must be a power
|
|
of 2 and must be 8 or more. Hence an easy way to initialise the
|
|
cache so it is empty is to set all the tag values to any value % 8
|
|
!= 0, eg 1. This means all queries in the cache initially miss.
|
|
It does however require us to detect and not writeback, any line
|
|
with a bogus tag. */
|
|
typedef
|
|
struct {
|
|
CacheLine lyns0[N_WAY_NENT];
|
|
Addr tags0[N_WAY_NENT];
|
|
}
|
|
Cache;
|
|
|
|
static inline Bool is_valid_scache_tag ( Addr tag ) {
|
|
/* a valid tag should be naturally aligned to the start of
|
|
a CacheLine. */
|
|
return 0 == (tag & (N_LINE_ARANGE - 1));
|
|
}
|
|
|
|
|
|
/* --------- Primary data structures --------- */
|
|
|
|
/* Shadow memory primary map */
|
|
static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
|
|
static Cache cache_shmem;
|
|
|
|
|
|
static UWord stats__secmaps_search = 0; // # SM finds
|
|
static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
|
|
static UWord stats__secmaps_allocd = 0; // # SecMaps issued
|
|
static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
|
|
static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
|
|
static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
|
|
static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
|
|
static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
|
|
static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
|
|
static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
|
|
static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
|
|
static UWord stats__cache_F_fetches = 0; // # F lines fetched
|
|
static UWord stats__cache_F_wbacks = 0; // # F lines written back
|
|
static UWord stats__cache_invals = 0; // # cache invals
|
|
static UWord stats__cache_flushes = 0; // # cache flushes
|
|
static UWord stats__cache_totrefs = 0; // # total accesses
|
|
static UWord stats__cache_totmisses = 0; // # misses
|
|
static ULong stats__cache_make_New_arange = 0; // total arange made New
|
|
static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
|
|
static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
|
|
static UWord stats__cline_read64s = 0; // # calls to s_m_read64
|
|
static UWord stats__cline_read32s = 0; // # calls to s_m_read32
|
|
static UWord stats__cline_read16s = 0; // # calls to s_m_read16
|
|
static UWord stats__cline_read8s = 0; // # calls to s_m_read8
|
|
static UWord stats__cline_write64s = 0; // # calls to s_m_write64
|
|
static UWord stats__cline_write32s = 0; // # calls to s_m_write32
|
|
static UWord stats__cline_write16s = 0; // # calls to s_m_write16
|
|
static UWord stats__cline_write8s = 0; // # calls to s_m_write8
|
|
static UWord stats__cline_set64s = 0; // # calls to s_m_set64
|
|
static UWord stats__cline_set32s = 0; // # calls to s_m_set32
|
|
static UWord stats__cline_set16s = 0; // # calls to s_m_set16
|
|
static UWord stats__cline_set8s = 0; // # calls to s_m_set8
|
|
static UWord stats__cline_get8s = 0; // # calls to s_m_get8
|
|
static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
|
|
static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
|
|
static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
|
|
static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
|
|
static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
|
|
static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
|
|
static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
|
|
|
|
static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
|
|
return a & ~(N_SECMAP_ARANGE - 1);
|
|
}
|
|
static inline UWord shmem__get_SecMap_offset ( Addr a ) {
|
|
return a & (N_SECMAP_ARANGE - 1);
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------*/
|
|
/*--- map_shmem :: WordFM Addr SecMap ---*/
|
|
/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
|
|
/*----------------------------------------------------------------*/
|
|
|
|
/*--------------- SecMap allocation --------------- */
|
|
|
|
static HChar* shmem__bigchunk_next = NULL;
|
|
static HChar* shmem__bigchunk_end1 = NULL;
|
|
|
|
static void* shmem__bigchunk_alloc ( SizeT n )
|
|
{
|
|
const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
|
|
tl_assert(n > 0);
|
|
n = VG_ROUNDUP(n, 16);
|
|
tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
|
|
tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
|
|
<= (SSizeT)sHMEM__BIGCHUNK_SIZE);
|
|
if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
|
|
if (0)
|
|
VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
|
|
(Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
|
|
shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
|
|
if (shmem__bigchunk_next == NULL)
|
|
VG_(out_of_memory_NORETURN)(
|
|
"helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
|
|
shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
|
|
}
|
|
tl_assert(shmem__bigchunk_next);
|
|
tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
|
|
tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
|
|
shmem__bigchunk_next += n;
|
|
return shmem__bigchunk_next - n;
|
|
}
|
|
|
|
static SecMap* shmem__alloc_SecMap ( void )
|
|
{
|
|
Word i, j;
|
|
SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
|
|
if (0) VG_(printf)("alloc_SecMap %p\n",sm);
|
|
tl_assert(sm);
|
|
sm->magic = SecMap_MAGIC;
|
|
for (i = 0; i < N_SECMAP_ZLINES; i++) {
|
|
sm->linesZ[i].dict[0] = SVal_NOACCESS;
|
|
sm->linesZ[i].dict[1] = SVal_INVALID;
|
|
sm->linesZ[i].dict[2] = SVal_INVALID;
|
|
sm->linesZ[i].dict[3] = SVal_INVALID;
|
|
for (j = 0; j < N_LINE_ARANGE/4; j++)
|
|
sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
|
|
}
|
|
sm->linesF = NULL;
|
|
sm->linesF_size = 0;
|
|
stats__secmaps_allocd++;
|
|
stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
|
|
stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
|
|
stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
|
|
return sm;
|
|
}
|
|
|
|
typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
|
|
static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
|
|
|
|
static SecMap* shmem__find_SecMap ( Addr ga )
|
|
{
|
|
SecMap* sm = NULL;
|
|
Addr gaKey = shmem__round_to_SecMap_base(ga);
|
|
// Cache
|
|
stats__secmaps_search++;
|
|
if (LIKELY(gaKey == smCache[0].gaKey))
|
|
return smCache[0].sm;
|
|
if (LIKELY(gaKey == smCache[1].gaKey)) {
|
|
SMCacheEnt tmp = smCache[0];
|
|
smCache[0] = smCache[1];
|
|
smCache[1] = tmp;
|
|
return smCache[0].sm;
|
|
}
|
|
if (gaKey == smCache[2].gaKey) {
|
|
SMCacheEnt tmp = smCache[1];
|
|
smCache[1] = smCache[2];
|
|
smCache[2] = tmp;
|
|
return smCache[1].sm;
|
|
}
|
|
// end Cache
|
|
stats__secmaps_search_slow++;
|
|
if (VG_(lookupFM)( map_shmem,
|
|
NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
|
|
tl_assert(sm != NULL);
|
|
smCache[2] = smCache[1];
|
|
smCache[1] = smCache[0];
|
|
smCache[0].gaKey = gaKey;
|
|
smCache[0].sm = sm;
|
|
} else {
|
|
tl_assert(sm == NULL);
|
|
}
|
|
return sm;
|
|
}
|
|
|
|
static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
|
|
{
|
|
SecMap* sm = shmem__find_SecMap ( ga );
|
|
if (LIKELY(sm)) {
|
|
return sm;
|
|
} else {
|
|
/* create a new one */
|
|
Addr gaKey = shmem__round_to_SecMap_base(ga);
|
|
sm = shmem__alloc_SecMap();
|
|
tl_assert(sm);
|
|
VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
|
|
return sm;
|
|
}
|
|
}
|
|
|
|
|
|
/* ------------ LineF and LineZ related ------------ */
|
|
|
|
static void rcinc_LineF ( LineF* lineF ) {
|
|
UWord i;
|
|
tl_assert(lineF->inUse);
|
|
for (i = 0; i < N_LINE_ARANGE; i++)
|
|
rcinc(lineF->w64s[i]);
|
|
}
|
|
|
|
static void rcdec_LineF ( LineF* lineF ) {
|
|
UWord i;
|
|
tl_assert(lineF->inUse);
|
|
for (i = 0; i < N_LINE_ARANGE; i++)
|
|
rcdec(lineF->w64s[i]);
|
|
}
|
|
|
|
static void rcinc_LineZ ( LineZ* lineZ ) {
|
|
tl_assert(lineZ->dict[0] != SVal_INVALID);
|
|
rcinc(lineZ->dict[0]);
|
|
if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
|
|
if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
|
|
if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
|
|
}
|
|
|
|
static void rcdec_LineZ ( LineZ* lineZ ) {
|
|
tl_assert(lineZ->dict[0] != SVal_INVALID);
|
|
rcdec(lineZ->dict[0]);
|
|
if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
|
|
if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
|
|
if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
|
|
}
|
|
|
|
inline
|
|
static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
|
|
Word bix, shft, mask, prep;
|
|
tl_assert(ix >= 0);
|
|
bix = ix >> 2;
|
|
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
|
|
mask = 3 << shft;
|
|
prep = b2 << shft;
|
|
arr[bix] = (arr[bix] & ~mask) | prep;
|
|
}
|
|
|
|
inline
|
|
static UWord read_twobit_array ( UChar* arr, UWord ix ) {
|
|
Word bix, shft;
|
|
tl_assert(ix >= 0);
|
|
bix = ix >> 2;
|
|
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
|
|
return (arr[bix] >> shft) & 3;
|
|
}
|
|
|
|
/* Given address 'tag', find either the Z or F line containing relevant
|
|
data, so it can be read into the cache.
|
|
*/
|
|
static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
|
|
/*OUT*/LineF** fp, Addr tag ) {
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
UWord zix;
|
|
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
|
|
UWord smoff = shmem__get_SecMap_offset(tag);
|
|
/* since smoff is derived from a valid tag, it should be
|
|
cacheline-aligned. */
|
|
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
|
|
zix = smoff >> N_LINE_BITS;
|
|
tl_assert(zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
lineF = NULL;
|
|
if (lineZ->dict[0] == SVal_INVALID) {
|
|
UInt fix = (UInt)lineZ->dict[1];
|
|
tl_assert(sm->linesF);
|
|
tl_assert(sm->linesF_size > 0);
|
|
tl_assert(fix >= 0 && fix < sm->linesF_size);
|
|
lineF = &sm->linesF[fix];
|
|
tl_assert(lineF->inUse);
|
|
lineZ = NULL;
|
|
}
|
|
*zp = lineZ;
|
|
*fp = lineF;
|
|
}
|
|
|
|
/* Given address 'tag', return the relevant SecMap and the index of
|
|
the LineZ within it, in the expectation that the line is to be
|
|
overwritten. Regardless of whether 'tag' is currently associated
|
|
with a Z or F representation, to rcdec on the current
|
|
representation, in recognition of the fact that the contents are
|
|
just about to be overwritten. */
|
|
static __attribute__((noinline))
|
|
void find_Z_for_writing ( /*OUT*/SecMap** smp,
|
|
/*OUT*/Word* zixp,
|
|
Addr tag ) {
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
UWord zix;
|
|
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
|
|
UWord smoff = shmem__get_SecMap_offset(tag);
|
|
/* since smoff is derived from a valid tag, it should be
|
|
cacheline-aligned. */
|
|
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
|
|
zix = smoff >> N_LINE_BITS;
|
|
tl_assert(zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
lineF = NULL;
|
|
/* re RCs, we are freeing up this LineZ/LineF so that new data can
|
|
be parked in it. Hence have to rcdec it accordingly. */
|
|
/* If lineZ has an associated lineF, free it up. */
|
|
if (lineZ->dict[0] == SVal_INVALID) {
|
|
UInt fix = (UInt)lineZ->dict[1];
|
|
tl_assert(sm->linesF);
|
|
tl_assert(sm->linesF_size > 0);
|
|
tl_assert(fix >= 0 && fix < sm->linesF_size);
|
|
lineF = &sm->linesF[fix];
|
|
tl_assert(lineF->inUse);
|
|
rcdec_LineF(lineF);
|
|
lineF->inUse = False;
|
|
} else {
|
|
rcdec_LineZ(lineZ);
|
|
}
|
|
*smp = sm;
|
|
*zixp = zix;
|
|
}
|
|
|
|
static __attribute__((noinline))
|
|
void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
|
|
UInt i, new_size;
|
|
LineF* nyu;
|
|
|
|
if (sm->linesF) {
|
|
tl_assert(sm->linesF_size > 0);
|
|
} else {
|
|
tl_assert(sm->linesF_size == 0);
|
|
}
|
|
|
|
if (sm->linesF) {
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
if (!sm->linesF[i].inUse) {
|
|
*fixp = (Word)i;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* No free F line found. Expand existing array and try again. */
|
|
new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
|
|
nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
|
|
new_size * sizeof(LineF) );
|
|
tl_assert(nyu);
|
|
|
|
stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
|
|
stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
|
|
* sizeof(LineF);
|
|
|
|
if (0)
|
|
VG_(printf)("SM %p: expand F array from %d to %d\n",
|
|
sm, (Int)sm->linesF_size, new_size);
|
|
|
|
for (i = 0; i < new_size; i++)
|
|
nyu[i].inUse = False;
|
|
|
|
if (sm->linesF) {
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
tl_assert(sm->linesF[i].inUse);
|
|
nyu[i] = sm->linesF[i];
|
|
}
|
|
VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
|
|
HG_(free)(sm->linesF);
|
|
}
|
|
|
|
sm->linesF = nyu;
|
|
sm->linesF_size = new_size;
|
|
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
if (!sm->linesF[i].inUse) {
|
|
*fixp = (Word)i;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*NOTREACHED*/
|
|
tl_assert(0);
|
|
}
|
|
|
|
|
|
/* ------------ CacheLine and implicit-tree related ------------ */
|
|
|
|
__attribute__((unused))
|
|
static void pp_CacheLine ( CacheLine* cl ) {
|
|
Word i;
|
|
if (!cl) {
|
|
VG_(printf)("%s","pp_CacheLine(NULL)\n");
|
|
return;
|
|
}
|
|
for (i = 0; i < N_LINE_TREES; i++)
|
|
VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
|
|
for (i = 0; i < N_LINE_ARANGE; i++)
|
|
VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
|
|
}
|
|
|
|
static UChar descr_to_validbits ( UShort descr )
|
|
{
|
|
/* a.k.a Party Time for gcc's constant folder */
|
|
# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
|
|
b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
|
|
( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
|
|
( (b8_5) << 12) | ( (b8_4) << 11) | \
|
|
( (b8_3) << 10) | ( (b8_2) << 9) | \
|
|
( (b8_1) << 8) | ( (b8_0) << 7) | \
|
|
( (b16_3) << 6) | ( (b32_1) << 5) | \
|
|
( (b16_2) << 4) | ( (b64) << 3) | \
|
|
( (b16_1) << 2) | ( (b32_0) << 1) | \
|
|
( (b16_0) << 0) ) )
|
|
|
|
# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
|
|
( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
|
|
( (bit5) << 5) | ( (bit4) << 4) | \
|
|
( (bit3) << 3) | ( (bit2) << 2) | \
|
|
( (bit1) << 1) | ( (bit0) << 0) ) )
|
|
|
|
/* these should all get folded out at compile time */
|
|
tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
|
|
|
|
switch (descr) {
|
|
/*
|
|
+--------------------------------- TREE_DESCR_8_7
|
|
| +------------------- TREE_DESCR_8_0
|
|
| | +---------------- TREE_DESCR_16_3
|
|
| | | +-------------- TREE_DESCR_32_1
|
|
| | | | +------------ TREE_DESCR_16_2
|
|
| | | | | +--------- TREE_DESCR_64
|
|
| | | | | | +------ TREE_DESCR_16_1
|
|
| | | | | | | +---- TREE_DESCR_32_0
|
|
| | | | | | | | +-- TREE_DESCR_16_0
|
|
| | | | | | | | |
|
|
| | | | | | | | | GRANULARITY, 7 -> 0 */
|
|
case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
|
|
return BYTE(1,1,1,1,1,1,1,1);
|
|
case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
|
|
return BYTE(1,1,0,1,1,1,1,1);
|
|
case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
|
|
return BYTE(0,1,1,1,1,1,1,1);
|
|
case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
|
|
return BYTE(0,1,0,1,1,1,1,1);
|
|
|
|
case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
|
|
return BYTE(1,1,1,1,1,1,0,1);
|
|
case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
|
|
return BYTE(1,1,0,1,1,1,0,1);
|
|
case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
|
|
return BYTE(0,1,1,1,1,1,0,1);
|
|
case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
|
|
return BYTE(0,1,0,1,1,1,0,1);
|
|
|
|
case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
|
|
return BYTE(1,1,1,1,0,1,1,1);
|
|
case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
|
|
return BYTE(1,1,0,1,0,1,1,1);
|
|
case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
|
|
return BYTE(0,1,1,1,0,1,1,1);
|
|
case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
|
|
return BYTE(0,1,0,1,0,1,1,1);
|
|
|
|
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
|
|
return BYTE(1,1,1,1,0,1,0,1);
|
|
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
|
|
return BYTE(1,1,0,1,0,1,0,1);
|
|
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
|
|
return BYTE(0,1,1,1,0,1,0,1);
|
|
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
|
|
return BYTE(0,1,0,1,0,1,0,1);
|
|
|
|
case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
|
|
return BYTE(0,0,0,1,1,1,1,1);
|
|
case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
|
|
return BYTE(0,0,0,1,1,1,0,1);
|
|
case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
|
|
return BYTE(0,0,0,1,0,1,1,1);
|
|
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
|
|
return BYTE(0,0,0,1,0,1,0,1);
|
|
|
|
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
|
|
return BYTE(1,1,1,1,0,0,0,1);
|
|
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
|
|
return BYTE(1,1,0,1,0,0,0,1);
|
|
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
|
|
return BYTE(0,1,1,1,0,0,0,1);
|
|
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
|
|
return BYTE(0,1,0,1,0,0,0,1);
|
|
|
|
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
|
|
return BYTE(0,0,0,1,0,0,0,1);
|
|
|
|
case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
|
|
return BYTE(0,0,0,0,0,0,0,1);
|
|
|
|
default: return BYTE(0,0,0,0,0,0,0,0);
|
|
/* INVALID - any valid descr produces at least one
|
|
valid bit in tree[0..7]*/
|
|
}
|
|
/* NOTREACHED*/
|
|
tl_assert(0);
|
|
|
|
# undef DESCR
|
|
# undef BYTE
|
|
}
|
|
|
|
__attribute__((unused))
|
|
static Bool is_sane_Descr ( UShort descr ) {
|
|
return descr_to_validbits(descr) != 0;
|
|
}
|
|
|
|
static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
|
|
VG_(sprintf)(dst,
|
|
"%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
|
|
(Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_64) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
|
|
);
|
|
}
|
|
static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
|
|
VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
|
|
(Int)((byte & 128) ? 1 : 0),
|
|
(Int)((byte & 64) ? 1 : 0),
|
|
(Int)((byte & 32) ? 1 : 0),
|
|
(Int)((byte & 16) ? 1 : 0),
|
|
(Int)((byte & 8) ? 1 : 0),
|
|
(Int)((byte & 4) ? 1 : 0),
|
|
(Int)((byte & 2) ? 1 : 0),
|
|
(Int)((byte & 1) ? 1 : 0)
|
|
);
|
|
}
|
|
|
|
static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
|
|
Word i;
|
|
UChar validbits = descr_to_validbits(descr);
|
|
HChar buf[128], buf2[128];
|
|
if (validbits == 0)
|
|
goto bad;
|
|
for (i = 0; i < 8; i++) {
|
|
if (validbits & (1<<i)) {
|
|
if (tree[i] == SVal_INVALID)
|
|
goto bad;
|
|
} else {
|
|
if (tree[i] != SVal_INVALID)
|
|
goto bad;
|
|
}
|
|
}
|
|
return True;
|
|
bad:
|
|
sprintf_Descr( buf, descr );
|
|
sprintf_Byte( buf2, validbits );
|
|
VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
|
|
VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
|
|
VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
|
|
for (i = 0; i < 8; i++)
|
|
VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
|
|
VG_(printf)("%s","}\n");
|
|
return 0;
|
|
}
|
|
|
|
static Bool is_sane_CacheLine ( CacheLine* cl )
|
|
{
|
|
Word tno, cloff;
|
|
|
|
if (!cl) goto bad;
|
|
|
|
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
|
|
UShort descr = cl->descrs[tno];
|
|
SVal* tree = &cl->svals[cloff];
|
|
if (!is_sane_Descr_and_Tree(descr, tree))
|
|
goto bad;
|
|
}
|
|
tl_assert(cloff == N_LINE_ARANGE);
|
|
return True;
|
|
bad:
|
|
pp_CacheLine(cl);
|
|
return False;
|
|
}
|
|
|
|
static UShort normalise_tree ( /*MOD*/SVal* tree )
|
|
{
|
|
UShort descr;
|
|
/* pre: incoming tree[0..7] does not have any invalid shvals, in
|
|
particular no zeroes. */
|
|
if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
|
|
|| tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
|
|
|| tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
|
|
|| tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
|
|
tl_assert(0);
|
|
|
|
descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
|
|
| TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
|
|
| TREE_DESCR_8_1 | TREE_DESCR_8_0;
|
|
/* build 16-bit layer */
|
|
if (tree[1] == tree[0]) {
|
|
tree[1] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
|
|
descr |= TREE_DESCR_16_0;
|
|
}
|
|
if (tree[3] == tree[2]) {
|
|
tree[3] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
|
|
descr |= TREE_DESCR_16_1;
|
|
}
|
|
if (tree[5] == tree[4]) {
|
|
tree[5] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
|
|
descr |= TREE_DESCR_16_2;
|
|
}
|
|
if (tree[7] == tree[6]) {
|
|
tree[7] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
|
|
descr |= TREE_DESCR_16_3;
|
|
}
|
|
/* build 32-bit layer */
|
|
if (tree[2] == tree[0]
|
|
&& (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
|
|
tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
|
|
descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
|
|
descr |= TREE_DESCR_32_0;
|
|
}
|
|
if (tree[6] == tree[4]
|
|
&& (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
|
|
tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
|
|
descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
|
|
descr |= TREE_DESCR_32_1;
|
|
}
|
|
/* build 64-bit layer */
|
|
if (tree[4] == tree[0]
|
|
&& (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
|
|
tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
|
|
descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
|
|
descr |= TREE_DESCR_64;
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
/* This takes a cacheline where all the data is at the leaves
|
|
(w8[..]) and builds a correctly normalised tree. */
|
|
static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
|
|
{
|
|
Word tno, cloff;
|
|
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
|
|
SVal* tree = &cl->svals[cloff];
|
|
cl->descrs[tno] = normalise_tree( tree );
|
|
}
|
|
tl_assert(cloff == N_LINE_ARANGE);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
stats__cline_normalises++;
|
|
}
|
|
|
|
|
|
typedef struct { UChar count; SVal sval; } CountedSVal;
|
|
|
|
static
|
|
void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
|
|
/*OUT*/Word* dstUsedP,
|
|
Word nDst, CacheLine* src )
|
|
{
|
|
Word tno, cloff, dstUsed;
|
|
|
|
tl_assert(nDst == N_LINE_ARANGE);
|
|
dstUsed = 0;
|
|
|
|
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
|
|
UShort descr = src->descrs[tno];
|
|
SVal* tree = &src->svals[cloff];
|
|
|
|
/* sequentialise the tree described by (descr,tree). */
|
|
# define PUT(_n,_v) \
|
|
do { dst[dstUsed ].count = (_n); \
|
|
dst[dstUsed++].sval = (_v); \
|
|
} while (0)
|
|
|
|
/* byte 0 */
|
|
if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
|
|
if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
|
|
if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
|
|
if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
|
|
/* byte 1 */
|
|
if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
|
|
/* byte 2 */
|
|
if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
|
|
if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
|
|
/* byte 3 */
|
|
if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
|
|
/* byte 4 */
|
|
if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
|
|
if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
|
|
if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
|
|
/* byte 5 */
|
|
if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
|
|
/* byte 6 */
|
|
if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
|
|
if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
|
|
/* byte 7 */
|
|
if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
|
|
|
|
# undef PUT
|
|
/* END sequentialise the tree described by (descr,tree). */
|
|
|
|
}
|
|
tl_assert(cloff == N_LINE_ARANGE);
|
|
tl_assert(dstUsed <= nDst);
|
|
|
|
*dstUsedP = dstUsed;
|
|
}
|
|
|
|
/* Write the cacheline 'wix' to backing store. Where it ends up
|
|
is determined by its tag field. */
|
|
static __attribute__((noinline)) void cacheline_wback ( UWord wix )
|
|
{
|
|
Word i, j, k, m;
|
|
Addr tag;
|
|
SecMap* sm;
|
|
CacheLine* cl;
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
Word zix, fix, csvalsUsed;
|
|
CountedSVal csvals[N_LINE_ARANGE];
|
|
SVal sv;
|
|
|
|
if (0)
|
|
VG_(printf)("scache wback line %d\n", (Int)wix);
|
|
|
|
tl_assert(wix >= 0 && wix < N_WAY_NENT);
|
|
|
|
tag = cache_shmem.tags0[wix];
|
|
cl = &cache_shmem.lyns0[wix];
|
|
|
|
/* The cache line may have been invalidated; if so, ignore it. */
|
|
if (!is_valid_scache_tag(tag))
|
|
return;
|
|
|
|
/* Where are we going to put it? */
|
|
sm = NULL;
|
|
lineZ = NULL;
|
|
lineF = NULL;
|
|
zix = fix = -1;
|
|
|
|
/* find the Z line to write in and rcdec it or the associated F
|
|
line. */
|
|
find_Z_for_writing( &sm, &zix, tag );
|
|
|
|
tl_assert(sm);
|
|
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
|
|
/* Generate the data to be stored */
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
|
|
csvalsUsed = -1;
|
|
sequentialise_CacheLine( csvals, &csvalsUsed,
|
|
N_LINE_ARANGE, cl );
|
|
tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
|
|
if (0) VG_(printf)("%lu ", csvalsUsed);
|
|
|
|
lineZ->dict[0] = lineZ->dict[1]
|
|
= lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
|
|
|
|
/* i indexes actual shadow values, k is cursor in csvals */
|
|
i = 0;
|
|
for (k = 0; k < csvalsUsed; k++) {
|
|
|
|
sv = csvals[k].sval;
|
|
if (CHECK_ZSM)
|
|
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
|
|
/* do we already have it? */
|
|
if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
|
|
if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
|
|
if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
|
|
if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
|
|
/* no. look for a free slot. */
|
|
if (CHECK_ZSM)
|
|
tl_assert(sv != SVal_INVALID);
|
|
if (lineZ->dict[0]
|
|
== SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
|
|
if (lineZ->dict[1]
|
|
== SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
|
|
if (lineZ->dict[2]
|
|
== SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
|
|
if (lineZ->dict[3]
|
|
== SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
|
|
break; /* we'll have to use the f rep */
|
|
dict_ok:
|
|
m = csvals[k].count;
|
|
if (m == 8) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
write_twobit_array( lineZ->ix2s, i+1, j );
|
|
write_twobit_array( lineZ->ix2s, i+2, j );
|
|
write_twobit_array( lineZ->ix2s, i+3, j );
|
|
write_twobit_array( lineZ->ix2s, i+4, j );
|
|
write_twobit_array( lineZ->ix2s, i+5, j );
|
|
write_twobit_array( lineZ->ix2s, i+6, j );
|
|
write_twobit_array( lineZ->ix2s, i+7, j );
|
|
i += 8;
|
|
}
|
|
else if (m == 4) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
write_twobit_array( lineZ->ix2s, i+1, j );
|
|
write_twobit_array( lineZ->ix2s, i+2, j );
|
|
write_twobit_array( lineZ->ix2s, i+3, j );
|
|
i += 4;
|
|
}
|
|
else if (m == 1) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
i += 1;
|
|
}
|
|
else if (m == 2) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
write_twobit_array( lineZ->ix2s, i+1, j );
|
|
i += 2;
|
|
}
|
|
else {
|
|
tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
|
|
}
|
|
|
|
}
|
|
|
|
if (LIKELY(i == N_LINE_ARANGE)) {
|
|
/* Construction of the compressed representation was
|
|
successful. */
|
|
rcinc_LineZ(lineZ);
|
|
stats__cache_Z_wbacks++;
|
|
} else {
|
|
/* Cannot use the compressed(z) representation. Use the full(f)
|
|
rep instead. */
|
|
tl_assert(i >= 0 && i < N_LINE_ARANGE);
|
|
alloc_F_for_writing( sm, &fix );
|
|
tl_assert(sm->linesF);
|
|
tl_assert(sm->linesF_size > 0);
|
|
tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
|
|
lineF = &sm->linesF[fix];
|
|
tl_assert(!lineF->inUse);
|
|
lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
|
|
lineZ->dict[1] = (SVal)fix;
|
|
lineF->inUse = True;
|
|
i = 0;
|
|
for (k = 0; k < csvalsUsed; k++) {
|
|
if (CHECK_ZSM)
|
|
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
|
|
sv = csvals[k].sval;
|
|
if (CHECK_ZSM)
|
|
tl_assert(sv != SVal_INVALID);
|
|
for (m = csvals[k].count; m > 0; m--) {
|
|
lineF->w64s[i] = sv;
|
|
i++;
|
|
}
|
|
}
|
|
tl_assert(i == N_LINE_ARANGE);
|
|
rcinc_LineF(lineF);
|
|
stats__cache_F_wbacks++;
|
|
}
|
|
}
|
|
|
|
/* Fetch the cacheline 'wix' from the backing store. The tag
|
|
associated with 'wix' is assumed to have already been filled in;
|
|
hence that is used to determine where in the backing store to read
|
|
from. */
|
|
static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
|
|
{
|
|
Word i;
|
|
Addr tag;
|
|
CacheLine* cl;
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
|
|
if (0)
|
|
VG_(printf)("scache fetch line %d\n", (Int)wix);
|
|
|
|
tl_assert(wix >= 0 && wix < N_WAY_NENT);
|
|
|
|
tag = cache_shmem.tags0[wix];
|
|
cl = &cache_shmem.lyns0[wix];
|
|
|
|
/* reject nonsense requests */
|
|
tl_assert(is_valid_scache_tag(tag));
|
|
|
|
lineZ = NULL;
|
|
lineF = NULL;
|
|
find_ZF_for_reading( &lineZ, &lineF, tag );
|
|
tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
|
|
|
|
/* expand the data into the bottom layer of the tree, then get
|
|
cacheline_normalise to build the descriptor array. */
|
|
if (lineF) {
|
|
tl_assert(lineF->inUse);
|
|
for (i = 0; i < N_LINE_ARANGE; i++) {
|
|
cl->svals[i] = lineF->w64s[i];
|
|
}
|
|
stats__cache_F_fetches++;
|
|
} else {
|
|
for (i = 0; i < N_LINE_ARANGE; i++) {
|
|
SVal sv;
|
|
UWord ix = read_twobit_array( lineZ->ix2s, i );
|
|
/* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
|
|
sv = lineZ->dict[ix];
|
|
tl_assert(sv != SVal_INVALID);
|
|
cl->svals[i] = sv;
|
|
}
|
|
stats__cache_Z_fetches++;
|
|
}
|
|
normalise_CacheLine( cl );
|
|
}
|
|
|
|
static void shmem__invalidate_scache ( void ) {
|
|
Word wix;
|
|
if (0) VG_(printf)("%s","scache inval\n");
|
|
tl_assert(!is_valid_scache_tag(1));
|
|
for (wix = 0; wix < N_WAY_NENT; wix++) {
|
|
cache_shmem.tags0[wix] = 1/*INVALID*/;
|
|
}
|
|
stats__cache_invals++;
|
|
}
|
|
|
|
static void shmem__flush_and_invalidate_scache ( void ) {
|
|
Word wix;
|
|
Addr tag;
|
|
if (0) VG_(printf)("%s","scache flush and invalidate\n");
|
|
tl_assert(!is_valid_scache_tag(1));
|
|
for (wix = 0; wix < N_WAY_NENT; wix++) {
|
|
tag = cache_shmem.tags0[wix];
|
|
if (tag == 1/*INVALID*/) {
|
|
/* already invalid; nothing to do */
|
|
} else {
|
|
tl_assert(is_valid_scache_tag(tag));
|
|
cacheline_wback( wix );
|
|
}
|
|
cache_shmem.tags0[wix] = 1/*INVALID*/;
|
|
}
|
|
stats__cache_flushes++;
|
|
stats__cache_invals++;
|
|
}
|
|
|
|
|
|
static inline Bool aligned16 ( Addr a ) {
|
|
return 0 == (a & 1);
|
|
}
|
|
static inline Bool aligned32 ( Addr a ) {
|
|
return 0 == (a & 3);
|
|
}
|
|
static inline Bool aligned64 ( Addr a ) {
|
|
return 0 == (a & 7);
|
|
}
|
|
static inline UWord get_cacheline_offset ( Addr a ) {
|
|
return (UWord)(a & (N_LINE_ARANGE - 1));
|
|
}
|
|
static inline Addr cacheline_ROUNDUP ( Addr a ) {
|
|
return ROUNDUP(a, N_LINE_ARANGE);
|
|
}
|
|
static inline Addr cacheline_ROUNDDN ( Addr a ) {
|
|
return ROUNDDN(a, N_LINE_ARANGE);
|
|
}
|
|
static inline UWord get_treeno ( Addr a ) {
|
|
return get_cacheline_offset(a) >> 3;
|
|
}
|
|
static inline UWord get_tree_offset ( Addr a ) {
|
|
return a & 7;
|
|
}
|
|
|
|
static __attribute__((noinline))
|
|
CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
|
|
static inline CacheLine* get_cacheline ( Addr a )
|
|
{
|
|
/* tag is 'a' with the in-line offset masked out,
|
|
eg a[31]..a[4] 0000 */
|
|
Addr tag = a & ~(N_LINE_ARANGE - 1);
|
|
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
stats__cache_totrefs++;
|
|
if (LIKELY(tag == cache_shmem.tags0[wix])) {
|
|
return &cache_shmem.lyns0[wix];
|
|
} else {
|
|
return get_cacheline_MISS( a );
|
|
}
|
|
}
|
|
|
|
static __attribute__((noinline))
|
|
CacheLine* get_cacheline_MISS ( Addr a )
|
|
{
|
|
/* tag is 'a' with the in-line offset masked out,
|
|
eg a[31]..a[4] 0000 */
|
|
|
|
CacheLine* cl;
|
|
Addr* tag_old_p;
|
|
Addr tag = a & ~(N_LINE_ARANGE - 1);
|
|
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
|
|
tl_assert(tag != cache_shmem.tags0[wix]);
|
|
|
|
/* Dump the old line into the backing store. */
|
|
stats__cache_totmisses++;
|
|
|
|
cl = &cache_shmem.lyns0[wix];
|
|
tag_old_p = &cache_shmem.tags0[wix];
|
|
|
|
if (is_valid_scache_tag( *tag_old_p )) {
|
|
/* EXPENSIVE and REDUNDANT: callee does it */
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
cacheline_wback( wix );
|
|
}
|
|
/* and reload the new one */
|
|
*tag_old_p = tag;
|
|
cacheline_fetch( wix );
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
return cl;
|
|
}
|
|
|
|
static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
|
|
stats__cline_64to32pulldown++;
|
|
switch (toff) {
|
|
case 0: case 4:
|
|
tl_assert(descr & TREE_DESCR_64);
|
|
tree[4] = tree[0];
|
|
descr &= ~TREE_DESCR_64;
|
|
descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
|
|
stats__cline_32to16pulldown++;
|
|
switch (toff) {
|
|
case 0: case 2:
|
|
if (!(descr & TREE_DESCR_32_0)) {
|
|
descr = pulldown_to_32(tree, 0, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_32_0);
|
|
tree[2] = tree[0];
|
|
descr &= ~TREE_DESCR_32_0;
|
|
descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
|
|
break;
|
|
case 4: case 6:
|
|
if (!(descr & TREE_DESCR_32_1)) {
|
|
descr = pulldown_to_32(tree, 4, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_32_1);
|
|
tree[6] = tree[4];
|
|
descr &= ~TREE_DESCR_32_1;
|
|
descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
|
|
stats__cline_16to8pulldown++;
|
|
switch (toff) {
|
|
case 0: case 1:
|
|
if (!(descr & TREE_DESCR_16_0)) {
|
|
descr = pulldown_to_16(tree, 0, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_0);
|
|
tree[1] = tree[0];
|
|
descr &= ~TREE_DESCR_16_0;
|
|
descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
|
|
break;
|
|
case 2: case 3:
|
|
if (!(descr & TREE_DESCR_16_1)) {
|
|
descr = pulldown_to_16(tree, 2, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_1);
|
|
tree[3] = tree[2];
|
|
descr &= ~TREE_DESCR_16_1;
|
|
descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
|
|
break;
|
|
case 4: case 5:
|
|
if (!(descr & TREE_DESCR_16_2)) {
|
|
descr = pulldown_to_16(tree, 4, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_2);
|
|
tree[5] = tree[4];
|
|
descr &= ~TREE_DESCR_16_2;
|
|
descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
|
|
break;
|
|
case 6: case 7:
|
|
if (!(descr & TREE_DESCR_16_3)) {
|
|
descr = pulldown_to_16(tree, 6, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_3);
|
|
tree[7] = tree[6];
|
|
descr &= ~TREE_DESCR_16_3;
|
|
descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
|
|
static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
|
|
UShort mask;
|
|
switch (toff) {
|
|
case 0:
|
|
mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_0;
|
|
break;
|
|
case 2:
|
|
mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_1;
|
|
break;
|
|
case 4:
|
|
mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_2;
|
|
break;
|
|
case 6:
|
|
mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_3;
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
|
|
UShort mask;
|
|
switch (toff) {
|
|
case 0:
|
|
if (!(descr & TREE_DESCR_16_0))
|
|
descr = pullup_descr_to_16(descr, 0);
|
|
if (!(descr & TREE_DESCR_16_1))
|
|
descr = pullup_descr_to_16(descr, 2);
|
|
mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_32_0;
|
|
break;
|
|
case 4:
|
|
if (!(descr & TREE_DESCR_16_2))
|
|
descr = pullup_descr_to_16(descr, 4);
|
|
if (!(descr & TREE_DESCR_16_3))
|
|
descr = pullup_descr_to_16(descr, 6);
|
|
mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_32_1;
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
|
|
switch (toff) {
|
|
case 0: case 4:
|
|
return 0 != (descr & TREE_DESCR_64);
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
|
|
switch (toff) {
|
|
case 0:
|
|
return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
|
|
case 2:
|
|
return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
|
|
case 4:
|
|
return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
|
|
case 6:
|
|
return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
/* ------------ Cache management ------------ */
|
|
|
|
static void zsm_flush_cache ( void )
|
|
{
|
|
shmem__flush_and_invalidate_scache();
|
|
}
|
|
|
|
|
|
static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
|
|
{
|
|
tl_assert( sizeof(UWord) == sizeof(Addr) );
|
|
|
|
rcinc = p_rcinc;
|
|
rcdec = p_rcdec;
|
|
|
|
tl_assert(map_shmem == NULL);
|
|
map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
|
|
HG_(free),
|
|
NULL/*unboxed UWord cmp*/);
|
|
tl_assert(map_shmem != NULL);
|
|
shmem__invalidate_scache();
|
|
|
|
/* a SecMap must contain an integral number of CacheLines */
|
|
tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
|
|
/* also ... a CacheLine holds an integral number of trees */
|
|
tl_assert(0 == (N_LINE_ARANGE % 8));
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION END compressed shadow memory //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION BEGIN vts primitives //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef __HB_VTS_H
|
|
#define __HB_VTS_H
|
|
|
|
/* VtsIDs can't exceed 30 bits, since they have to be packed into the
|
|
lowest 30 bits of an SVal. */
|
|
typedef UInt VtsID;
|
|
#define VtsID_INVALID 0xFFFFFFFF
|
|
|
|
/* A VTS contains .ts, its vector clock, and also .id, a field to hold
|
|
a backlink for the caller's convenience. Since we have no idea
|
|
what to set that to in the library, it always gets set to
|
|
VtsID_INVALID. */
|
|
typedef
|
|
struct {
|
|
VtsID id;
|
|
XArray* ts; /* XArray* ScalarTS(abstract) */
|
|
}
|
|
VTS;
|
|
|
|
|
|
/* Create a new, empty VTS. */
|
|
VTS* VTS__new ( void );
|
|
|
|
/* Delete this VTS in its entirety. */
|
|
void VTS__delete ( VTS* vts );
|
|
|
|
/* Create a new singleton VTS. */
|
|
VTS* VTS__singleton ( Thr* thr, ULong tym );
|
|
|
|
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
|
|
not modified. */
|
|
VTS* VTS__tick ( Thr* me, VTS* vts );
|
|
|
|
/* Return a new VTS constructed as the join (max) of the 2 args.
|
|
Neither arg is modified. */
|
|
VTS* VTS__join ( VTS* a, VTS* b );
|
|
|
|
/* Compute the partial ordering relation of the two args. */
|
|
typedef
|
|
enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
|
|
POrd;
|
|
|
|
POrd VTS__cmp ( VTS* a, VTS* b );
|
|
|
|
/* Compute an arbitrary structural (total) ordering on the two args,
|
|
based on their VCs, so they can be looked up in a table, tree, etc.
|
|
Returns -1, 0 or 1. */
|
|
Word VTS__cmp_structural ( VTS* a, VTS* b );
|
|
|
|
/* Debugging only. Display the given VTS in the buffer. */
|
|
void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
|
|
|
|
/* Debugging only. Return vts[index], so to speak. */
|
|
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
|
|
|
|
#endif /* ! __HB_VTS_H */
|
|
|
|
|
|
/*--------------- to do with Vector Timestamps ---------------*/
|
|
|
|
/* Scalar Timestamp */
|
|
typedef
|
|
struct {
|
|
Thr* thr;
|
|
ULong tym;
|
|
}
|
|
ScalarTS;
|
|
|
|
|
|
static Bool is_sane_VTS ( VTS* vts )
|
|
{
|
|
UWord i, n;
|
|
ScalarTS *st1, *st2;
|
|
if (!vts) return False;
|
|
if (!vts->ts) return False;
|
|
n = VG_(sizeXA)( vts->ts );
|
|
if (n >= 2) {
|
|
for (i = 0; i < n-1; i++) {
|
|
st1 = VG_(indexXA)( vts->ts, i );
|
|
st2 = VG_(indexXA)( vts->ts, i+1 );
|
|
if (st1->thr >= st2->thr)
|
|
return False;
|
|
if (st1->tym == 0 || st2->tym == 0)
|
|
return False;
|
|
}
|
|
}
|
|
return True;
|
|
}
|
|
|
|
|
|
/* Create a new, empty VTS.
|
|
*/
|
|
VTS* VTS__new ( void )
|
|
{
|
|
VTS* vts;
|
|
vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
|
|
tl_assert(vts);
|
|
vts->id = VtsID_INVALID;
|
|
vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
|
|
HG_(free), sizeof(ScalarTS) );
|
|
tl_assert(vts->ts);
|
|
return vts;
|
|
}
|
|
|
|
|
|
/* Delete this VTS in its entirety.
|
|
*/
|
|
void VTS__delete ( VTS* vts )
|
|
{
|
|
tl_assert(vts);
|
|
tl_assert(vts->ts);
|
|
VG_(deleteXA)( vts->ts );
|
|
HG_(free)(vts);
|
|
}
|
|
|
|
|
|
/* Create a new singleton VTS.
|
|
*/
|
|
VTS* VTS__singleton ( Thr* thr, ULong tym ) {
|
|
ScalarTS st;
|
|
VTS* vts;
|
|
tl_assert(thr);
|
|
tl_assert(tym >= 1);
|
|
vts = VTS__new();
|
|
st.thr = thr;
|
|
st.tym = tym;
|
|
VG_(addToXA)( vts->ts, &st );
|
|
return vts;
|
|
}
|
|
|
|
|
|
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
|
|
not modified.
|
|
*/
|
|
VTS* VTS__tick ( Thr* me, VTS* vts )
|
|
{
|
|
ScalarTS* here = NULL;
|
|
ScalarTS tmp;
|
|
VTS* res;
|
|
Word i, n;
|
|
tl_assert(me);
|
|
tl_assert(is_sane_VTS(vts));
|
|
//if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
|
|
// (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
|
|
res = VTS__new();
|
|
n = VG_(sizeXA)( vts->ts );
|
|
|
|
/* main loop doesn't handle zero-entry case correctly, so
|
|
special-case it. */
|
|
if (n == 0) {
|
|
tmp.thr = me;
|
|
tmp.tym = 1;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
tl_assert(is_sane_VTS(res));
|
|
return res;
|
|
}
|
|
|
|
for (i = 0; i < n; i++) {
|
|
here = VG_(indexXA)( vts->ts, i );
|
|
if (me < here->thr) {
|
|
/* We just went past 'me', without seeing it. */
|
|
tmp.thr = me;
|
|
tmp.tym = 1;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
tmp = *here;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
i++;
|
|
break;
|
|
}
|
|
else if (me == here->thr) {
|
|
tmp = *here;
|
|
tmp.tym++;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
i++;
|
|
break;
|
|
}
|
|
else /* me > here->thr */ {
|
|
tmp = *here;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
}
|
|
}
|
|
tl_assert(i >= 0 && i <= n);
|
|
if (i == n && here && here->thr < me) {
|
|
tmp.thr = me;
|
|
tmp.tym = 1;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
} else {
|
|
for (/*keepgoing*/; i < n; i++) {
|
|
here = VG_(indexXA)( vts->ts, i );
|
|
tmp = *here;
|
|
VG_(addToXA)( res->ts, &tmp );
|
|
}
|
|
}
|
|
tl_assert(is_sane_VTS(res));
|
|
//if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
|
|
// (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
|
|
return res;
|
|
}
|
|
|
|
|
|
/* Return a new VTS constructed as the join (max) of the 2 args.
|
|
Neither arg is modified.
|
|
*/
|
|
VTS* VTS__join ( VTS* a, VTS* b )
|
|
{
|
|
Word ia, ib, useda, usedb;
|
|
ULong tyma, tymb, tymMax;
|
|
Thr* thr;
|
|
VTS* res;
|
|
ScalarTS *tmpa, *tmpb;
|
|
|
|
tl_assert(a && a->ts);
|
|
tl_assert(b && b->ts);
|
|
useda = VG_(sizeXA)( a->ts );
|
|
usedb = VG_(sizeXA)( b->ts );
|
|
|
|
res = VTS__new();
|
|
ia = ib = 0;
|
|
|
|
while (1) {
|
|
|
|
/* This logic is to enumerate triples (thr, tyma, tymb) drawn
|
|
from a and b in order, where thr is the next Thr*
|
|
occurring in either a or b, and tyma/b are the relevant
|
|
scalar timestamps, taking into account implicit zeroes. */
|
|
tl_assert(ia >= 0 && ia <= useda);
|
|
tl_assert(ib >= 0 && ib <= usedb);
|
|
tmpa = tmpb = NULL;
|
|
|
|
if (ia == useda && ib == usedb) {
|
|
/* both empty - done */
|
|
break;
|
|
}
|
|
else
|
|
if (ia == useda && ib != usedb) {
|
|
/* a empty, use up b */
|
|
tmpb = VG_(indexXA)( b->ts, ib );
|
|
thr = tmpb->thr;
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
ib++;
|
|
}
|
|
else
|
|
if (ia != useda && ib == usedb) {
|
|
/* b empty, use up a */
|
|
tmpa = VG_(indexXA)( a->ts, ia );
|
|
thr = tmpa->thr;
|
|
tyma = tmpa->tym;
|
|
tymb = 0;
|
|
ia++;
|
|
}
|
|
else {
|
|
/* both not empty; extract lowest-Thr*'d triple */
|
|
tmpa = VG_(indexXA)( a->ts, ia );
|
|
tmpb = VG_(indexXA)( b->ts, ib );
|
|
if (tmpa->thr < tmpb->thr) {
|
|
/* a has the lowest unconsidered Thr* */
|
|
thr = tmpa->thr;
|
|
tyma = tmpa->tym;
|
|
tymb = 0;
|
|
ia++;
|
|
}
|
|
else
|
|
if (tmpa->thr > tmpb->thr) {
|
|
/* b has the lowest unconsidered Thr* */
|
|
thr = tmpb->thr;
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
ib++;
|
|
} else {
|
|
/* they both next mention the same Thr* */
|
|
tl_assert(tmpa->thr == tmpb->thr);
|
|
thr = tmpa->thr; /* == tmpb->thr */
|
|
tyma = tmpa->tym;
|
|
tymb = tmpb->tym;
|
|
ia++;
|
|
ib++;
|
|
}
|
|
}
|
|
|
|
/* having laboriously determined (thr, tyma, tymb), do something
|
|
useful with it. */
|
|
tymMax = tyma > tymb ? tyma : tymb;
|
|
if (tymMax > 0) {
|
|
ScalarTS st;
|
|
st.thr = thr;
|
|
st.tym = tymMax;
|
|
VG_(addToXA)( res->ts, &st );
|
|
}
|
|
|
|
}
|
|
|
|
tl_assert(is_sane_VTS( res ));
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/* Compute the partial ordering relation of the two args.
|
|
*/
|
|
POrd VTS__cmp ( VTS* a, VTS* b )
|
|
{
|
|
Word ia, ib, useda, usedb;
|
|
ULong tyma, tymb;
|
|
Thr* thr;
|
|
ScalarTS *tmpa, *tmpb;
|
|
|
|
Bool all_leq = True;
|
|
Bool all_geq = True;
|
|
|
|
tl_assert(a && a->ts);
|
|
tl_assert(b && b->ts);
|
|
useda = VG_(sizeXA)( a->ts );
|
|
usedb = VG_(sizeXA)( b->ts );
|
|
|
|
ia = ib = 0;
|
|
|
|
while (1) {
|
|
|
|
/* This logic is to enumerate triples (thr, tyma, tymb) drawn
|
|
from a and b in order, where thr is the next Thr*
|
|
occurring in either a or b, and tyma/b are the relevant
|
|
scalar timestamps, taking into account implicit zeroes. */
|
|
tl_assert(ia >= 0 && ia <= useda);
|
|
tl_assert(ib >= 0 && ib <= usedb);
|
|
tmpa = tmpb = NULL;
|
|
|
|
if (ia == useda && ib == usedb) {
|
|
/* both empty - done */
|
|
break;
|
|
}
|
|
else
|
|
if (ia == useda && ib != usedb) {
|
|
/* a empty, use up b */
|
|
tmpb = VG_(indexXA)( b->ts, ib );
|
|
thr = tmpb->thr;
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
ib++;
|
|
}
|
|
else
|
|
if (ia != useda && ib == usedb) {
|
|
/* b empty, use up a */
|
|
tmpa = VG_(indexXA)( a->ts, ia );
|
|
thr = tmpa->thr;
|
|
tyma = tmpa->tym;
|
|
tymb = 0;
|
|
ia++;
|
|
}
|
|
else {
|
|
/* both not empty; extract lowest-Thr*'d triple */
|
|
tmpa = VG_(indexXA)( a->ts, ia );
|
|
tmpb = VG_(indexXA)( b->ts, ib );
|
|
if (tmpa->thr < tmpb->thr) {
|
|
/* a has the lowest unconsidered Thr* */
|
|
thr = tmpa->thr;
|
|
tyma = tmpa->tym;
|
|
tymb = 0;
|
|
ia++;
|
|
}
|
|
else
|
|
if (tmpa->thr > tmpb->thr) {
|
|
/* b has the lowest unconsidered Thr* */
|
|
thr = tmpb->thr;
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
ib++;
|
|
} else {
|
|
/* they both next mention the same Thr* */
|
|
tl_assert(tmpa->thr == tmpb->thr);
|
|
thr = tmpa->thr; /* == tmpb->thr */
|
|
tyma = tmpa->tym;
|
|
tymb = tmpb->tym;
|
|
ia++;
|
|
ib++;
|
|
}
|
|
}
|
|
|
|
/* having laboriously determined (thr, tyma, tymb), do something
|
|
useful with it. */
|
|
if (tyma < tymb)
|
|
all_geq = False;
|
|
if (tyma > tymb)
|
|
all_leq = False;
|
|
}
|
|
|
|
if (all_leq && all_geq)
|
|
return POrd_EQ;
|
|
/* now we know they aren't equal, so either all_leq or all_geq or
|
|
both are false. */
|
|
if (all_leq)
|
|
return POrd_LT;
|
|
if (all_geq)
|
|
return POrd_GT;
|
|
/* hmm, neither all_geq or all_leq. This means unordered. */
|
|
return POrd_UN;
|
|
}
|
|
|
|
|
|
/* Compute an arbitrary structural (total) ordering on the two args,
|
|
based on their VCs, so they can be looked up in a table, tree, etc.
|
|
Returns -1, 0 or 1. (really just 'deriving Ord' :-)
|
|
*/
|
|
Word VTS__cmp_structural ( VTS* a, VTS* b )
|
|
{
|
|
/* We just need to generate an arbitrary total ordering based on
|
|
a->ts and b->ts. Preferably do it in a way which comes across likely
|
|
differences relatively quickly. */
|
|
Word i, useda, usedb;
|
|
ScalarTS *tmpa, *tmpb;
|
|
|
|
tl_assert(a && a->ts);
|
|
tl_assert(b && b->ts);
|
|
useda = VG_(sizeXA)( a->ts );
|
|
usedb = VG_(sizeXA)( b->ts );
|
|
|
|
if (useda < usedb) return -1;
|
|
if (useda > usedb) return 1;
|
|
|
|
/* Same length vectors, so let's step through them together. */
|
|
tl_assert(useda == usedb);
|
|
for (i = 0; i < useda; i++) {
|
|
tmpa = VG_(indexXA)( a->ts, i );
|
|
tmpb = VG_(indexXA)( b->ts, i );
|
|
if (tmpa->tym < tmpb->tym) return -1;
|
|
if (tmpa->tym > tmpb->tym) return 1;
|
|
if (tmpa->thr < tmpb->thr) return -1;
|
|
if (tmpa->thr > tmpb->thr) return 1;
|
|
}
|
|
|
|
/* They're identical. */
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Debugging only. Display the given VTS in the buffer.
|
|
*/
|
|
void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
|
|
ScalarTS* st;
|
|
HChar unit[64];
|
|
Word i, n;
|
|
Int avail = nBuf;
|
|
tl_assert(vts && vts->ts);
|
|
tl_assert(nBuf > 16);
|
|
buf[0] = '[';
|
|
buf[1] = 0;
|
|
n = VG_(sizeXA)( vts->ts );
|
|
for (i = 0; i < n; i++) {
|
|
tl_assert(avail >= 40);
|
|
st = VG_(indexXA)( vts->ts, i );
|
|
VG_(memset)(unit, 0, sizeof(unit));
|
|
VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
|
|
st->thr, st->tym);
|
|
if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
|
|
VG_(strcat)(buf, " ...]");
|
|
buf[nBuf-1] = 0;
|
|
return;
|
|
}
|
|
VG_(strcat)(buf, unit);
|
|
avail -= VG_(strlen)(unit);
|
|
}
|
|
VG_(strcat)(buf, "]");
|
|
buf[nBuf-1] = 0;
|
|
}
|
|
|
|
|
|
/* Debugging only. Return vts[index], so to speak.
|
|
*/
|
|
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
|
|
UWord i, n;
|
|
tl_assert(vts && vts->ts);
|
|
n = VG_(sizeXA)( vts->ts );
|
|
for (i = 0; i < n; i++) {
|
|
ScalarTS* st = VG_(indexXA)( vts->ts, i );
|
|
if (st->thr == idx)
|
|
return st->tym;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION END vts primitives //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION BEGIN main library //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// VTS set //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static WordFM* /* VTS* void void */ vts_set = NULL;
|
|
|
|
static void vts_set_init ( void )
|
|
{
|
|
tl_assert(!vts_set);
|
|
vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
|
|
HG_(free),
|
|
(Word(*)(UWord,UWord))VTS__cmp_structural );
|
|
tl_assert(vts_set);
|
|
}
|
|
|
|
/* Given a newly made VTS, look in vts_set to see if we already have
|
|
an identical one. If yes, free up this one and return instead a
|
|
pointer to the existing one. If no, add this one to the set and
|
|
return the same pointer. Caller differentiates the two cases by
|
|
comparing returned pointer with the supplied one (although that
|
|
does require that the supplied VTS is not already in the set).
|
|
*/
|
|
static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
|
|
{
|
|
UWord keyW, valW;
|
|
/* lookup cand (by value) */
|
|
if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
|
|
/* found it */
|
|
tl_assert(valW == 0);
|
|
/* if this fails, cand (by ref) was already present (!) */
|
|
tl_assert(keyW != (UWord)cand);
|
|
VTS__delete(cand);
|
|
return (VTS*)keyW;
|
|
} else {
|
|
/* not present. Add and return pointer to same. */
|
|
VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
|
|
return cand;
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// VTS table //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static void VtsID__invalidate_caches ( void ); /* fwds */
|
|
|
|
/* A type to hold VTS table entries. Invariants:
|
|
If .vts == NULL, then this entry is not in use, so:
|
|
- .rc == 0
|
|
- this entry is on the freelist (unfortunately, does not imply
|
|
any constraints on value for .nextfree)
|
|
If .vts != NULL, then this entry is in use:
|
|
- .vts is findable in vts_set
|
|
- .vts->id == this entry number
|
|
- no specific value for .rc (even 0 is OK)
|
|
- this entry is not on freelist, so .nextfree == VtsID_INVALID
|
|
*/
|
|
typedef
|
|
struct {
|
|
VTS* vts; /* vts, in vts_set */
|
|
UWord rc; /* reference count - enough for entire aspace */
|
|
VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
|
|
}
|
|
VtsTE;
|
|
|
|
/* The VTS table. */
|
|
static XArray* /* of VtsTE */ vts_tab = NULL;
|
|
|
|
/* An index into the VTS table, indicating the start of the list of
|
|
free (available for use) entries. If the list is empty, this is
|
|
VtsID_INVALID. */
|
|
static VtsID vts_tab_freelist = VtsID_INVALID;
|
|
|
|
/* Do a GC of vts_tab when the freelist becomes empty AND the size of
|
|
vts_tab equals or exceeds this size. After GC, the value here is
|
|
set appropriately so as to check for the next GC point. */
|
|
static Word vts_next_GC_at = 1000;
|
|
|
|
static void vts_tab_init ( void )
|
|
{
|
|
vts_tab
|
|
= VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
|
|
HG_(free), sizeof(VtsTE) );
|
|
vts_tab_freelist
|
|
= VtsID_INVALID;
|
|
tl_assert(vts_tab);
|
|
}
|
|
|
|
/* Add ii to the free list, checking that it looks out-of-use. */
|
|
static void add_to_free_list ( VtsID ii )
|
|
{
|
|
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts == NULL);
|
|
tl_assert(ie->rc == 0);
|
|
tl_assert(ie->freelink == VtsID_INVALID);
|
|
ie->freelink = vts_tab_freelist;
|
|
vts_tab_freelist = ii;
|
|
}
|
|
|
|
/* Get an entry from the free list. This will return VtsID_INVALID if
|
|
the free list is empty. */
|
|
static VtsID get_from_free_list ( void )
|
|
{
|
|
VtsID ii;
|
|
VtsTE* ie;
|
|
if (vts_tab_freelist == VtsID_INVALID)
|
|
return VtsID_INVALID;
|
|
ii = vts_tab_freelist;
|
|
ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts == NULL);
|
|
tl_assert(ie->rc == 0);
|
|
vts_tab_freelist = ie->freelink;
|
|
return ii;
|
|
}
|
|
|
|
/* Produce a new VtsID that can be used, either by getting it from
|
|
the freelist, or, if that is empty, by expanding vts_tab. */
|
|
static VtsID get_new_VtsID ( void )
|
|
{
|
|
VtsID ii;
|
|
VtsTE te;
|
|
ii = get_from_free_list();
|
|
if (ii != VtsID_INVALID)
|
|
return ii;
|
|
te.vts = NULL;
|
|
te.rc = 0;
|
|
te.freelink = VtsID_INVALID;
|
|
ii = (VtsID)VG_(addToXA)( vts_tab, &te );
|
|
return ii;
|
|
}
|
|
|
|
|
|
/* Indirect callback from lib_zsm. */
|
|
static void VtsID__rcinc ( VtsID ii )
|
|
{
|
|
VtsTE* ie;
|
|
/* VG_(indexXA) does a range check for us */
|
|
ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts); /* else it's not in use */
|
|
tl_assert(ie->rc < ~0UL); /* else we can't continue */
|
|
tl_assert(ie->vts->id == ii);
|
|
ie->rc++;
|
|
}
|
|
|
|
/* Indirect callback from lib_zsm. */
|
|
static void VtsID__rcdec ( VtsID ii )
|
|
{
|
|
VtsTE* ie;
|
|
/* VG_(indexXA) does a range check for us */
|
|
ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts); /* else it's not in use */
|
|
tl_assert(ie->rc > 0); /* else RC snafu */
|
|
tl_assert(ie->vts->id == ii);
|
|
ie->rc--;
|
|
}
|
|
|
|
|
|
/* Look up 'cand' in our collection of VTSs. If present, deallocate
|
|
it and return the VtsID for the pre-existing version. If not
|
|
present, add it to both vts_tab and vts_set, allocate a fresh VtsID
|
|
for it, and return that. */
|
|
static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
|
|
{
|
|
VTS* auld;
|
|
tl_assert(cand->id == VtsID_INVALID);
|
|
auld = vts_set__find_and_dealloc__or_add(cand);
|
|
if (auld != cand) {
|
|
/* We already have an Aulde one. Use that. */
|
|
VtsTE* ie;
|
|
tl_assert(auld->id != VtsID_INVALID);
|
|
ie = VG_(indexXA)( vts_tab, auld->id );
|
|
tl_assert(ie->vts == auld);
|
|
return auld->id;
|
|
} else {
|
|
VtsID ii = get_new_VtsID();
|
|
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
|
|
ie->vts = cand;
|
|
ie->rc = 0;
|
|
ie->freelink = VtsID_INVALID;
|
|
cand->id = ii;
|
|
return ii;
|
|
}
|
|
}
|
|
|
|
|
|
static void show_vts_stats ( HChar* caller )
|
|
{
|
|
UWord nSet, nTab, nLive;
|
|
ULong totrc;
|
|
UWord n, i;
|
|
nSet = VG_(sizeFM)( vts_set );
|
|
nTab = VG_(sizeXA)( vts_tab );
|
|
totrc = 0;
|
|
nLive = 0;
|
|
n = VG_(sizeXA)( vts_tab );
|
|
for (i = 0; i < n; i++) {
|
|
VtsTE* ie = VG_(indexXA)( vts_tab, i );
|
|
if (ie->vts) {
|
|
nLive++;
|
|
totrc += (ULong)ie->rc;
|
|
} else {
|
|
tl_assert(ie->rc == 0);
|
|
}
|
|
}
|
|
VG_(printf)(" show_vts_stats %s\n", caller);
|
|
VG_(printf)(" vts_tab size %4lu\n", nTab);
|
|
VG_(printf)(" vts_tab live %4lu\n", nLive);
|
|
VG_(printf)(" vts_set size %4lu\n", nSet);
|
|
VG_(printf)(" total rc %4llu\n", totrc);
|
|
}
|
|
|
|
/* NOT TO BE CALLED FROM WITHIN libzsm. */
|
|
__attribute__((noinline))
|
|
static void vts_tab__do_GC ( Bool show_stats )
|
|
{
|
|
UWord i, nTab, nLive, nFreed;
|
|
|
|
/* check this is actually necessary. */
|
|
tl_assert(vts_tab_freelist == VtsID_INVALID);
|
|
|
|
/* empty the caches for partial order checks and binary joins. We
|
|
could do better and prune out the entries to be deleted, but it
|
|
ain't worth the hassle. */
|
|
VtsID__invalidate_caches();
|
|
|
|
/* First, make the reference counts up to date. */
|
|
zsm_flush_cache();
|
|
|
|
nTab = VG_(sizeXA)( vts_tab );
|
|
|
|
if (show_stats) {
|
|
VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
|
|
show_vts_stats("before GC");
|
|
}
|
|
|
|
/* Now we can inspect the entire vts_tab. Any entries
|
|
with zero .rc fields are now no longer in use and can be
|
|
free list, removed from vts_set, and deleted. */
|
|
nFreed = 0;
|
|
for (i = 0; i < nTab; i++) {
|
|
Bool present;
|
|
UWord oldK = 0, oldV = 0;
|
|
VtsTE* te = VG_(indexXA)( vts_tab, i );
|
|
if (te->vts == NULL) {
|
|
tl_assert(te->rc == 0);
|
|
continue; /* already on the free list (presumably) */
|
|
}
|
|
if (te->rc > 0)
|
|
continue; /* in use */
|
|
/* Ok, we got one we can free. */
|
|
tl_assert(te->vts->id == i);
|
|
/* first, remove it from vts_set. */
|
|
present = VG_(delFromFM)( vts_set,
|
|
&oldK, &oldV, (UWord)te->vts );
|
|
tl_assert(present); /* else it isn't in vts_set ?! */
|
|
tl_assert(oldV == 0); /* no info stored in vts_set val fields */
|
|
tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
|
|
/* now free the VTS itself */
|
|
VTS__delete(te->vts);
|
|
te->vts = NULL;
|
|
/* and finally put this entry on the free list */
|
|
tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
|
|
add_to_free_list( i );
|
|
nFreed++;
|
|
}
|
|
|
|
/* Now figure out when the next GC should be. We'll allow the
|
|
number of VTSs to double before GCing again. Except of course
|
|
that since we can't (or, at least, don't) shrink vts_tab, we
|
|
can't set the threshhold value smaller than it. */
|
|
tl_assert(nFreed <= nTab);
|
|
nLive = nTab - nFreed;
|
|
tl_assert(nLive >= 0 && nLive <= nTab);
|
|
vts_next_GC_at = 2 * nLive;
|
|
if (vts_next_GC_at < nTab)
|
|
vts_next_GC_at = nTab;
|
|
|
|
if (show_stats) {
|
|
show_vts_stats("after GC");
|
|
VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
|
|
}
|
|
|
|
if (VG_(clo_verbosity) > 1) {
|
|
static UInt ctr = 0;
|
|
tl_assert(nTab > 0);
|
|
VG_(message)(Vg_DebugMsg,
|
|
"libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
|
|
ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Vts IDs //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
//////////////////////////
|
|
static ULong stats__getOrdering_queries = 0;
|
|
static ULong stats__getOrdering_misses = 0;
|
|
static ULong stats__join2_queries = 0;
|
|
static ULong stats__join2_misses = 0;
|
|
|
|
static inline UInt ROL32 ( UInt w, Int n ) {
|
|
w = (w << n) | (w >> (32-n));
|
|
return w;
|
|
}
|
|
static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
|
|
UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
|
|
return hash % nTab;
|
|
}
|
|
|
|
#define N_GETORDERING_CACHE 1023
|
|
static
|
|
struct { VtsID vi1; VtsID vi2; POrd ord; }
|
|
getOrdering_cache[N_GETORDERING_CACHE];
|
|
|
|
#define N_JOIN2_CACHE 1023
|
|
static
|
|
struct { VtsID vi1; VtsID vi2; VtsID res; }
|
|
join2_cache[N_JOIN2_CACHE];
|
|
|
|
static void VtsID__invalidate_caches ( void ) {
|
|
Int i;
|
|
for (i = 0; i < N_GETORDERING_CACHE; i++) {
|
|
getOrdering_cache[i].vi1 = VtsID_INVALID;
|
|
getOrdering_cache[i].vi2 = VtsID_INVALID;
|
|
getOrdering_cache[i].ord = 0; /* an invalid POrd value */
|
|
}
|
|
for (i = 0; i < N_JOIN2_CACHE; i++) {
|
|
join2_cache[i].vi1 = VtsID_INVALID;
|
|
join2_cache[i].vi2 = VtsID_INVALID;
|
|
join2_cache[i].res = VtsID_INVALID;
|
|
}
|
|
}
|
|
//////////////////////////
|
|
|
|
//static Bool VtsID__is_valid ( VtsID vi ) {
|
|
// VtsTE* ve;
|
|
// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
|
|
// return False;
|
|
// ve = VG_(indexXA)( vts_tab, vi );
|
|
// if (!ve->vts)
|
|
// return False;
|
|
// tl_assert(ve->vts->id == vi);
|
|
// return True;
|
|
//}
|
|
|
|
static VTS* VtsID__to_VTS ( VtsID vi ) {
|
|
VtsTE* te = VG_(indexXA)( vts_tab, vi );
|
|
tl_assert(te->vts);
|
|
return te->vts;
|
|
}
|
|
|
|
static void VtsID__pp ( VtsID vi ) {
|
|
HChar buf[100];
|
|
VTS* vts = VtsID__to_VTS(vi);
|
|
VTS__show( buf, sizeof(buf)-1, vts );
|
|
buf[sizeof(buf)-1] = 0;
|
|
VG_(printf)("%s", buf);
|
|
}
|
|
|
|
/* compute partial ordering relation of vi1 and vi2. */
|
|
__attribute__((noinline))
|
|
static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
|
|
UInt hash;
|
|
POrd ord;
|
|
VTS *v1, *v2;
|
|
//if (vi1 == vi2) return POrd_EQ;
|
|
tl_assert(vi1 != vi2);
|
|
////++
|
|
stats__getOrdering_queries++;
|
|
hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
|
|
if (getOrdering_cache[hash].vi1 == vi1
|
|
&& getOrdering_cache[hash].vi2 == vi2)
|
|
return getOrdering_cache[hash].ord;
|
|
stats__getOrdering_misses++;
|
|
////--
|
|
v1 = VtsID__to_VTS(vi1);
|
|
v2 = VtsID__to_VTS(vi2);
|
|
ord = VTS__cmp( v1, v2 );
|
|
////++
|
|
getOrdering_cache[hash].vi1 = vi1;
|
|
getOrdering_cache[hash].vi2 = vi2;
|
|
getOrdering_cache[hash].ord = ord;
|
|
////--
|
|
return ord;
|
|
}
|
|
static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
|
|
return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
|
|
}
|
|
|
|
/* compute binary join */
|
|
__attribute__((noinline))
|
|
static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
|
|
UInt hash;
|
|
VtsID res;
|
|
VTS *vts1, *vts2, *nyu;
|
|
//if (vi1 == vi2) return vi1;
|
|
tl_assert(vi1 != vi2);
|
|
////++
|
|
stats__join2_queries++;
|
|
hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
|
|
if (join2_cache[hash].vi1 == vi1
|
|
&& join2_cache[hash].vi2 == vi2)
|
|
return join2_cache[hash].res;
|
|
stats__join2_misses++;
|
|
////--
|
|
vts1 = VtsID__to_VTS(vi1);
|
|
vts2 = VtsID__to_VTS(vi2);
|
|
nyu = VTS__join(vts1,vts2);
|
|
res = vts_tab__find_and_dealloc__or_add(nyu);
|
|
////++
|
|
join2_cache[hash].vi1 = vi1;
|
|
join2_cache[hash].vi2 = vi2;
|
|
join2_cache[hash].res = res;
|
|
////--
|
|
return res;
|
|
}
|
|
static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
|
|
return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
|
|
}
|
|
|
|
/* create a singleton VTS, namely [thr:1] */
|
|
static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
|
|
VTS* nyu = VTS__singleton(thr,tym);
|
|
return vts_tab__find_and_dealloc__or_add(nyu);
|
|
}
|
|
|
|
/* tick operation, creates value 1 if specified index is absent */
|
|
static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
|
|
VTS* vts = VtsID__to_VTS(vi);
|
|
VTS* nyu = VTS__tick(idx,vts);
|
|
return vts_tab__find_and_dealloc__or_add(nyu);
|
|
}
|
|
|
|
/* index into a VTS (only for assertions) */
|
|
static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
|
|
VTS* vts = VtsID__to_VTS(vi);
|
|
return VTS__indexAt_SLOW( vts, idx );
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Threads //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
struct _Thr {
|
|
/* Current VTSs for this thread. They change as we go along. viR
|
|
is the VTS to be used for reads, viW for writes. Usually they
|
|
are the same, but can differ when we deal with reader-writer
|
|
locks. It is always the case that VtsID__getOrdering(viW,viR)
|
|
== POrd_LT or POrdEQ -- that is, viW must be the same, or
|
|
lagging behind, viR. */
|
|
VtsID viR;
|
|
VtsID viW;
|
|
/* opaque (to us) data we hold on behalf of the library's user. */
|
|
void* opaque;
|
|
};
|
|
|
|
static Thr* Thr__new ( void ) {
|
|
Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
|
|
thr->viR = VtsID_INVALID;
|
|
thr->viW = VtsID_INVALID;
|
|
return thr;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Shadow Values //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
|
|
// hb_zsm.h. We have to do everything else here.
|
|
|
|
/* SVal is 64 bit unsigned int.
|
|
|
|
<---------30---------> <---------30--------->
|
|
00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
|
|
01 X--------------------X XX X--------------------X E(rror)
|
|
10 X--------------------X XX X--------------------X A: SVal_NOACCESS
|
|
11 X--------------------X XX X--------------------X I: SVal_INVALID
|
|
*/
|
|
#define SVAL_TAGMASK (3ULL << 62)
|
|
|
|
static inline Bool SVal__isC ( SVal s ) {
|
|
return (0ULL << 62) == (s & SVAL_TAGMASK);
|
|
}
|
|
static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
|
|
//tl_assert(VtsID__is_valid(rmini));
|
|
//tl_assert(VtsID__is_valid(wmini));
|
|
return (((ULong)rmini) << 32) | ((ULong)wmini);
|
|
}
|
|
static inline VtsID SVal__unC_Rmin ( SVal s ) {
|
|
tl_assert(SVal__isC(s));
|
|
return (VtsID)(s >> 32);
|
|
}
|
|
static inline VtsID SVal__unC_Wmin ( SVal s ) {
|
|
tl_assert(SVal__isC(s));
|
|
return (VtsID)(s & 0xFFFFFFFFULL);
|
|
}
|
|
|
|
static Bool SVal__isE ( SVal s ) {
|
|
return (1ULL << 62) == (s & SVAL_TAGMASK);
|
|
}
|
|
static SVal SVal__mkE ( void ) {
|
|
return 1ULL << 62;
|
|
}
|
|
|
|
static Bool SVal__isA ( SVal s ) {
|
|
return (2ULL << 62) == (s & SVAL_TAGMASK);
|
|
}
|
|
static SVal SVal__mkA ( void ) {
|
|
return 2ULL << 62;
|
|
}
|
|
|
|
/* Direct callback from lib_zsm. */
|
|
static void SVal__rcinc ( SVal s ) {
|
|
if (SVal__isC(s)) {
|
|
VtsID__rcinc( SVal__unC_Rmin(s) );
|
|
VtsID__rcinc( SVal__unC_Wmin(s) );
|
|
}
|
|
}
|
|
|
|
/* Direct callback from lib_zsm. */
|
|
static void SVal__rcdec ( SVal s ) {
|
|
if (SVal__isC(s)) {
|
|
VtsID__rcdec( SVal__unC_Rmin(s) );
|
|
VtsID__rcdec( SVal__unC_Wmin(s) );
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// A simple group (memory) allocator //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
//////////////// BEGIN general group allocator
|
|
typedef
|
|
struct {
|
|
UWord elemSzB; /* element size */
|
|
UWord nPerGroup; /* # elems per group */
|
|
void* (*alloc)(HChar*, SizeT); /* group allocator */
|
|
HChar* cc; /* group allocator's cc */
|
|
void (*free)(void*); /* group allocator's free-er (unused) */
|
|
/* XArray of void* (pointers to groups). The groups themselves.
|
|
Each element is a pointer to a block of size (elemSzB *
|
|
nPerGroup) bytes. */
|
|
XArray* groups;
|
|
/* next free element. Is a pointer to an element in one of the
|
|
groups pointed to by .groups. */
|
|
void* nextFree;
|
|
}
|
|
GroupAlloc;
|
|
|
|
static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
|
|
UWord elemSzB,
|
|
UWord nPerGroup,
|
|
void* (*alloc)(HChar*, SizeT),
|
|
HChar* cc,
|
|
void (*free)(void*) )
|
|
{
|
|
tl_assert(0 == (elemSzB % sizeof(UWord)));
|
|
tl_assert(elemSzB >= sizeof(UWord));
|
|
tl_assert(nPerGroup >= 100); /* let's say */
|
|
tl_assert(alloc);
|
|
tl_assert(cc);
|
|
tl_assert(free);
|
|
tl_assert(ga);
|
|
VG_(memset)(ga, 0, sizeof(*ga));
|
|
ga->elemSzB = elemSzB;
|
|
ga->nPerGroup = nPerGroup;
|
|
ga->groups = NULL;
|
|
ga->alloc = alloc;
|
|
ga->cc = cc;
|
|
ga->free = free;
|
|
ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
|
|
ga->nextFree = NULL;
|
|
tl_assert(ga->groups);
|
|
}
|
|
|
|
/* The freelist is empty. Allocate a new group and put all the new
|
|
elements in it onto the freelist. */
|
|
__attribute__((noinline))
|
|
static void gal_add_new_group ( GroupAlloc* ga )
|
|
{
|
|
Word i;
|
|
UWord* group;
|
|
tl_assert(ga);
|
|
tl_assert(ga->nextFree == NULL);
|
|
group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
|
|
tl_assert(group);
|
|
/* extend the freelist through the new group. Place the freelist
|
|
pointer in the first word of each element. That's why the
|
|
element size must be at least one word. */
|
|
for (i = ga->nPerGroup-1; i >= 0; i--) {
|
|
UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
|
|
UWord* elem = (UWord*)elemC;
|
|
tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
|
|
*elem = (UWord)ga->nextFree;
|
|
ga->nextFree = elem;
|
|
}
|
|
/* and add to our collection of groups */
|
|
VG_(addToXA)( ga->groups, &group );
|
|
}
|
|
|
|
inline static void* gal_Alloc ( GroupAlloc* ga )
|
|
{
|
|
UWord* elem;
|
|
if (UNLIKELY(ga->nextFree == NULL)) {
|
|
gal_add_new_group(ga);
|
|
}
|
|
elem = ga->nextFree;
|
|
ga->nextFree = (void*)*elem;
|
|
*elem = 0; /* unnecessary, but just to be on the safe side */
|
|
return elem;
|
|
}
|
|
|
|
inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
|
|
{
|
|
tl_assert(n == ga->elemSzB);
|
|
return gal_Alloc( ga );
|
|
}
|
|
|
|
inline static void gal_Free ( GroupAlloc* ga, void* p )
|
|
{
|
|
UWord* elem = (UWord*)p;
|
|
*elem = (UWord)ga->nextFree;
|
|
ga->nextFree = elem;
|
|
}
|
|
//////////////// END general group allocator
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Change-event map2 //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
|
|
|
|
/* This is in two parts:
|
|
|
|
1. An OSet of RCECs. This is a set of reference-counted stack
|
|
traces. When the reference count of a stack trace becomes zero,
|
|
it is removed from the set and freed up. The intent is to have
|
|
a set of stack traces which can be referred to from (2), but to
|
|
only represent each one once. The set is indexed/searched by
|
|
ordering on the stack trace vectors.
|
|
|
|
2. A SparseWA of OldRefs. These store information about each old
|
|
ref that we need to record. It is indexed by address of the
|
|
location for which the information is recorded. For LRU
|
|
purposes, each OldRef also contains a generation number,
|
|
indicating when it was most recently accessed.
|
|
|
|
The important part of an OldRef is, however, its accs[] array.
|
|
This is an array of N_OLDREF_ACCS which binds (thread, R/W,
|
|
size) triples to RCECs. This allows us to collect the last
|
|
access-traceback by up to N_OLDREF_ACCS different triples for
|
|
this location. The accs[] array is a MTF-array. If a binding
|
|
falls off the end, that's too bad -- we will lose info about
|
|
that triple's access to this location.
|
|
|
|
When the SparseWA becomes too big, we can throw away the OldRefs
|
|
whose generation numbers are below some threshold; hence doing
|
|
approximate LRU discarding. For each discarded OldRef we must
|
|
of course decrement the reference count on the all RCECs it
|
|
refers to, in order that entries from (1) eventually get
|
|
discarded too.
|
|
|
|
A major improvement in reliability of this mechanism would be to
|
|
have a dynamically sized OldRef.accs[] array, so no entries ever
|
|
fall off the end. In investigations (Dec 08) it appears that a
|
|
major cause for the non-availability of conflicting-access traces
|
|
in race reports is caused by the fixed size of this array. I
|
|
suspect for most OldRefs, only a few entries are used, but for a
|
|
minority of cases there is an overflow, leading to info lossage.
|
|
Investigations also suggest this is very workload and scheduling
|
|
sensitive. Therefore a dynamic sizing would be better.
|
|
|
|
However, dynamic sizing would defeat the use of a GroupAllocator
|
|
for OldRef structures. And that's important for performance. So
|
|
it's not straightforward to do.
|
|
*/
|
|
|
|
|
|
static UWord stats__ctxt_rcdec1 = 0;
|
|
static UWord stats__ctxt_rcdec2 = 0;
|
|
static UWord stats__ctxt_rcdec3 = 0;
|
|
static UWord stats__ctxt_rcdec_calls = 0;
|
|
static UWord stats__ctxt_rcdec_discards = 0;
|
|
static UWord stats__ctxt_rcdec1_eq = 0;
|
|
|
|
static UWord stats__ctxt_tab_curr = 0;
|
|
static UWord stats__ctxt_tab_max = 0;
|
|
|
|
static UWord stats__ctxt_tab_qs = 0;
|
|
static UWord stats__ctxt_tab_cmps = 0;
|
|
|
|
|
|
///////////////////////////////////////////////////////
|
|
//// Part (1): An OSet of RCECs
|
|
///
|
|
|
|
#define N_FRAMES 8
|
|
|
|
// (UInt) `echo "Reference Counted Execution Context" | md5sum`
|
|
#define RCEC_MAGIC 0xab88abb2UL
|
|
|
|
//#define N_RCEC_TAB 98317 /* prime */
|
|
#define N_RCEC_TAB 196613 /* prime */
|
|
|
|
typedef
|
|
struct _RCEC {
|
|
UWord magic; /* sanity check only */
|
|
struct _RCEC* next;
|
|
UWord rc;
|
|
UWord rcX; /* used for crosschecking */
|
|
UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
|
|
}
|
|
RCEC;
|
|
|
|
static RCEC** contextTab = NULL; /* hash table of RCEC*s */
|
|
|
|
|
|
/* Gives an arbitrary total order on RCEC .frames fields */
|
|
static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
|
|
Word i;
|
|
tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
|
|
tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
|
|
if (ec1->frames[0] < ec2->frames[0]) return -1;
|
|
if (ec1->frames[0] > ec2->frames[0]) return 1;
|
|
for (i = 1; i < 1 + N_FRAMES; i++) {
|
|
if (ec1->frames[i] < ec2->frames[i]) return -1;
|
|
if (ec1->frames[i] > ec2->frames[i]) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Dec the ref of this RCEC. */
|
|
static void ctxt__rcdec ( RCEC* ec )
|
|
{
|
|
stats__ctxt_rcdec_calls++;
|
|
tl_assert(ec && ec->magic == RCEC_MAGIC);
|
|
tl_assert(ec->rc > 0);
|
|
ec->rc--;
|
|
}
|
|
|
|
static void ctxt__rcinc ( RCEC* ec )
|
|
{
|
|
tl_assert(ec && ec->magic == RCEC_MAGIC);
|
|
ec->rc++;
|
|
}
|
|
|
|
|
|
//////////// BEGIN RCEC group allocator
|
|
static GroupAlloc rcec_group_allocator;
|
|
|
|
static RCEC* alloc_RCEC ( void ) {
|
|
return gal_Alloc ( &rcec_group_allocator );
|
|
}
|
|
|
|
static void free_RCEC ( RCEC* rcec ) {
|
|
tl_assert(rcec->magic == RCEC_MAGIC);
|
|
gal_Free( &rcec_group_allocator, rcec );
|
|
}
|
|
//////////// END OldRef group allocator
|
|
|
|
|
|
/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
|
|
move it one step closer the the front of the list, so as to make
|
|
subsequent searches for it cheaper. */
|
|
static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
|
|
{
|
|
RCEC *ec0, *ec1, *ec2;
|
|
if (ec == *headp)
|
|
tl_assert(0); /* already at head of list */
|
|
tl_assert(ec != NULL);
|
|
ec0 = *headp;
|
|
ec1 = NULL;
|
|
ec2 = NULL;
|
|
while (True) {
|
|
if (ec0 == NULL || ec0 == ec) break;
|
|
ec2 = ec1;
|
|
ec1 = ec0;
|
|
ec0 = ec0->next;
|
|
}
|
|
tl_assert(ec0 == ec);
|
|
if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
|
|
RCEC* tmp;
|
|
/* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
|
|
predecessor. Swap ec0 and ec1, that is, move ec0 one step
|
|
closer to the start of the list. */
|
|
tl_assert(ec2->next == ec1);
|
|
tl_assert(ec1->next == ec0);
|
|
tmp = ec0->next;
|
|
ec2->next = ec0;
|
|
ec0->next = ec1;
|
|
ec1->next = tmp;
|
|
}
|
|
else
|
|
if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
|
|
/* it's second in the list. */
|
|
tl_assert(*headp == ec1);
|
|
tl_assert(ec1->next == ec0);
|
|
ec1->next = ec0->next;
|
|
ec0->next = ec1;
|
|
*headp = ec0;
|
|
}
|
|
}
|
|
|
|
|
|
/* Find the given RCEC in the tree, and return a pointer to it. Or,
|
|
if not present, add the given one to the tree (by making a copy of
|
|
it, so the caller can immediately deallocate the original) and
|
|
return a pointer to the copy. The caller can safely have 'example'
|
|
on its stack, since we will always return a pointer to a copy of
|
|
it, not to the original. Note that the inserted node will have .rc
|
|
of zero and so the caller must immediatly increment it. */
|
|
__attribute__((noinline))
|
|
static RCEC* ctxt__find_or_add ( RCEC* example )
|
|
{
|
|
UWord hent;
|
|
RCEC* copy;
|
|
tl_assert(example && example->magic == RCEC_MAGIC);
|
|
tl_assert(example->rc == 0);
|
|
|
|
/* Search the hash table to see if we already have it. */
|
|
stats__ctxt_tab_qs++;
|
|
hent = example->frames[0] % N_RCEC_TAB;
|
|
copy = contextTab[hent];
|
|
while (1) {
|
|
if (!copy) break;
|
|
tl_assert(copy->magic == RCEC_MAGIC);
|
|
stats__ctxt_tab_cmps++;
|
|
if (0 == RCEC__cmp_by_frames(copy, example)) break;
|
|
copy = copy->next;
|
|
}
|
|
|
|
if (copy) {
|
|
tl_assert(copy != example);
|
|
/* optimisation: if it's not at the head of its list, move 1
|
|
step fwds, to make future searches cheaper */
|
|
if (copy != contextTab[hent]) {
|
|
move_RCEC_one_step_forward( &contextTab[hent], copy );
|
|
}
|
|
} else {
|
|
copy = alloc_RCEC();
|
|
tl_assert(copy != example);
|
|
*copy = *example;
|
|
copy->next = contextTab[hent];
|
|
contextTab[hent] = copy;
|
|
stats__ctxt_tab_curr++;
|
|
if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
|
|
stats__ctxt_tab_max = stats__ctxt_tab_curr;
|
|
}
|
|
return copy;
|
|
}
|
|
|
|
static inline UWord ROLW ( UWord w, Int n )
|
|
{
|
|
Int bpw = 8 * sizeof(UWord);
|
|
w = (w << n) | (w >> (bpw-n));
|
|
return w;
|
|
}
|
|
|
|
__attribute__((noinline))
|
|
static RCEC* get_RCEC ( Thr* thr )
|
|
{
|
|
UWord hash, i;
|
|
RCEC example;
|
|
example.magic = RCEC_MAGIC;
|
|
example.rc = 0;
|
|
example.rcX = 0;
|
|
main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
|
|
hash = 0;
|
|
for (i = 1; i < 1 + N_FRAMES; i++) {
|
|
hash ^= example.frames[i];
|
|
hash = ROLW(hash, 19);
|
|
}
|
|
example.frames[0] = hash;
|
|
return ctxt__find_or_add( &example );
|
|
}
|
|
|
|
///////////////////////////////////////////////////////
|
|
//// Part (2):
|
|
/// A SparseWA guest-addr -> OldRef, that refers to (1)
|
|
///
|
|
|
|
// (UInt) `echo "Old Reference Information" | md5sum`
|
|
#define OldRef_MAGIC 0x30b1f075UL
|
|
|
|
/* Records an access: a thread and a context. The size
|
|
(1,2,4,8) and read-or-writeness are also encoded as
|
|
follows: bottom bit of .thr is 1 if write, 0 if read
|
|
bottom 2 bits of .rcec are encode size:
|
|
00 = 1, 01 = 2, 10 = 4, 11 = 8
|
|
*/
|
|
typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
|
|
|
|
#define N_OLDREF_ACCS 5
|
|
|
|
typedef
|
|
struct {
|
|
UWord magic; /* sanity check only */
|
|
UWord gen; /* when most recently accessed */
|
|
/* or free list when not in use */
|
|
/* unused slots in this array have .thr == NULL */
|
|
Thr_n_RCEC accs[N_OLDREF_ACCS];
|
|
}
|
|
OldRef;
|
|
|
|
|
|
//////////// BEGIN OldRef group allocator
|
|
static GroupAlloc oldref_group_allocator;
|
|
|
|
static OldRef* alloc_OldRef ( void ) {
|
|
return gal_Alloc ( &oldref_group_allocator );
|
|
}
|
|
|
|
static void free_OldRef ( OldRef* r ) {
|
|
tl_assert(r->magic == OldRef_MAGIC);
|
|
gal_Free( &oldref_group_allocator, r );
|
|
}
|
|
//////////// END OldRef group allocator
|
|
|
|
|
|
static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
|
|
static UWord oldrefGen = 0; /* current LRU generation # */
|
|
static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
|
|
static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
|
|
|
|
inline static void* ptr_or_UWord ( void* p, UWord w ) {
|
|
return (void*)( ((UWord)p) | ((UWord)w) );
|
|
}
|
|
inline static void* ptr_and_UWord ( void* p, UWord w ) {
|
|
return (void*)( ((UWord)p) & ((UWord)w) );
|
|
}
|
|
|
|
inline static UInt min_UInt ( UInt a, UInt b ) {
|
|
return a < b ? a : b;
|
|
}
|
|
|
|
/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
|
|
first interval is lower, 1 if the first interval is higher, and 0
|
|
if there is any overlap. Redundant paranoia with casting is there
|
|
following what looked distinctly like a bug in gcc-4.1.2, in which
|
|
some of the comparisons were done signedly instead of
|
|
unsignedly. */
|
|
/* Copied from exp-ptrcheck/sg_main.c */
|
|
static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
|
|
Addr a2, SizeT n2 ) {
|
|
UWord a1w = (UWord)a1;
|
|
UWord n1w = (UWord)n1;
|
|
UWord a2w = (UWord)a2;
|
|
UWord n2w = (UWord)n2;
|
|
tl_assert(n1w > 0 && n2w > 0);
|
|
if (a1w + n1w <= a2w) return -1L;
|
|
if (a2w + n2w <= a1w) return 1L;
|
|
return 0;
|
|
}
|
|
|
|
static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
|
|
{
|
|
OldRef* ref;
|
|
RCEC* rcec;
|
|
Word i, j;
|
|
UWord keyW, valW;
|
|
Bool b;
|
|
|
|
rcec = get_RCEC( thr );
|
|
ctxt__rcinc(rcec);
|
|
|
|
/* encode the size and writeness of the transaction in the bottom
|
|
two bits of thr and rcec. */
|
|
thr = ptr_or_UWord(thr, isW ? 1 : 0);
|
|
switch (szB) {
|
|
/* This doesn't look particularly branch-predictor friendly. */
|
|
case 1: rcec = ptr_or_UWord(rcec, 0); break;
|
|
case 2: rcec = ptr_or_UWord(rcec, 1); break;
|
|
case 4: rcec = ptr_or_UWord(rcec, 2); break;
|
|
case 8: rcec = ptr_or_UWord(rcec, 3); break;
|
|
default: tl_assert(0);
|
|
}
|
|
|
|
/* Look in the map to see if we already have this. */
|
|
b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
|
|
|
|
if (b) {
|
|
|
|
/* We already have a record for this address. We now need to
|
|
see if we have a stack trace pertaining to this (thread, R/W,
|
|
size) triple. */
|
|
tl_assert(keyW == a);
|
|
ref = (OldRef*)valW;
|
|
tl_assert(ref->magic == OldRef_MAGIC);
|
|
|
|
tl_assert(thr);
|
|
for (i = 0; i < N_OLDREF_ACCS; i++) {
|
|
if (ref->accs[i].thr != thr)
|
|
continue;
|
|
/* since .thr encodes both the accessing thread and the
|
|
read/writeness, we know now that at least those features
|
|
of the access match this entry. So we just need to check
|
|
the size indication. Do this by inspecting the lowest 2 bits of
|
|
.rcec, which contain the encoded size info. */
|
|
if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
|
|
continue;
|
|
/* else we have a match, so stop looking. */
|
|
break;
|
|
}
|
|
|
|
if (i < N_OLDREF_ACCS) {
|
|
/* thread 'thr' has an entry at index 'i'. Update it. */
|
|
if (i > 0) {
|
|
Thr_n_RCEC tmp = ref->accs[i-1];
|
|
ref->accs[i-1] = ref->accs[i];
|
|
ref->accs[i] = tmp;
|
|
i--;
|
|
}
|
|
if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
|
|
stats__ctxt_rcdec1++;
|
|
ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
|
|
ref->accs[i].rcec = rcec;
|
|
tl_assert(ref->accs[i].thr == thr);
|
|
} else {
|
|
/* No entry for this (thread, R/W, size) triple. Shuffle all
|
|
of them down one slot, and put the new entry at the start
|
|
of the array. */
|
|
if (ref->accs[N_OLDREF_ACCS-1].thr) {
|
|
/* the last slot is in use. We must dec the rc on the
|
|
associated rcec. */
|
|
tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
|
|
stats__ctxt_rcdec2++;
|
|
if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
|
|
VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
|
|
ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
|
|
} else {
|
|
tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
|
|
}
|
|
for (j = N_OLDREF_ACCS-1; j >= 1; j--)
|
|
ref->accs[j] = ref->accs[j-1];
|
|
ref->accs[0].thr = thr;
|
|
ref->accs[0].rcec = rcec;
|
|
/* thr==NULL is used to signify an empty slot, so we can't
|
|
add a NULL thr. */
|
|
tl_assert(ptr_and_UWord(thr, ~3) != 0);
|
|
}
|
|
|
|
ref->gen = oldrefGen;
|
|
|
|
} else {
|
|
|
|
/* We don't have a record for this address. Create a new one. */
|
|
if (oldrefTreeN >= oldrefGenIncAt) {
|
|
oldrefGen++;
|
|
oldrefGenIncAt = oldrefTreeN + 50000;
|
|
if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
|
|
oldrefGen, oldrefTreeN );
|
|
}
|
|
|
|
ref = alloc_OldRef();
|
|
ref->magic = OldRef_MAGIC;
|
|
ref->gen = oldrefGen;
|
|
ref->accs[0].rcec = rcec;
|
|
ref->accs[0].thr = thr;
|
|
/* thr==NULL is used to signify an empty slot, so we can't add a
|
|
NULL thr. */
|
|
tl_assert(ptr_and_UWord(thr, ~3) != 0);
|
|
for (j = 1; j < N_OLDREF_ACCS; j++) {
|
|
ref->accs[j].thr = NULL;
|
|
ref->accs[j].rcec = NULL;
|
|
}
|
|
VG_(addToSWA)( oldrefTree, a, (UWord)ref );
|
|
oldrefTreeN++;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
|
|
/*OUT*/Thr** resThr,
|
|
/*OUT*/SizeT* resSzB,
|
|
/*OUT*/Bool* resIsW,
|
|
Thr* thr, Addr a, SizeT szB, Bool isW )
|
|
{
|
|
Word i, j;
|
|
OldRef* ref;
|
|
UWord keyW, valW;
|
|
Bool b;
|
|
|
|
Thr* cand_thr;
|
|
RCEC* cand_rcec;
|
|
Bool cand_isW;
|
|
SizeT cand_szB;
|
|
Addr cand_a;
|
|
|
|
Addr toCheck[15];
|
|
Int nToCheck = 0;
|
|
|
|
tl_assert(thr);
|
|
tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
|
|
toCheck[nToCheck++] = a;
|
|
for (i = -7; i < (Word)szB; i++) {
|
|
if (i != 0)
|
|
toCheck[nToCheck++] = a + i;
|
|
}
|
|
tl_assert(nToCheck <= 15);
|
|
|
|
/* Now see if we can find a suitable matching event for
|
|
any of the addresses in toCheck[0 .. nToCheck-1]. */
|
|
for (j = 0; j < nToCheck; j++) {
|
|
|
|
cand_a = toCheck[j];
|
|
// VG_(printf)("test %ld %p\n", j, cand_a);
|
|
|
|
b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
|
|
if (!b)
|
|
continue;
|
|
|
|
ref = (OldRef*)valW;
|
|
tl_assert(keyW == cand_a);
|
|
tl_assert(ref->magic == OldRef_MAGIC);
|
|
tl_assert(ref->accs[0].thr); /* first slot must always be used */
|
|
|
|
cand_thr = NULL;
|
|
cand_rcec = NULL;
|
|
cand_isW = False;
|
|
cand_szB = 0;
|
|
|
|
for (i = 0; i < N_OLDREF_ACCS; i++) {
|
|
Thr_n_RCEC* cand = &ref->accs[i];
|
|
cand_thr = ptr_and_UWord(cand->thr, ~3);
|
|
cand_rcec = ptr_and_UWord(cand->rcec, ~3);
|
|
/* Decode the writeness from the bottom bit of .thr. */
|
|
cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
|
|
/* Decode the size from the bottom two bits of .rcec. */
|
|
switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
|
|
case 0: cand_szB = 1; break;
|
|
case 1: cand_szB = 2; break;
|
|
case 2: cand_szB = 4; break;
|
|
case 3: cand_szB = 8; break;
|
|
default: tl_assert(0);
|
|
}
|
|
|
|
if (cand_thr == NULL)
|
|
/* This slot isn't in use. Ignore it. */
|
|
continue;
|
|
|
|
if (cand_thr == thr)
|
|
/* This is an access by the same thread, but we're only
|
|
interested in accesses from other threads. Ignore. */
|
|
continue;
|
|
|
|
if ((!cand_isW) && (!isW))
|
|
/* We don't want to report a read racing against another
|
|
read; that's stupid. So in this case move on. */
|
|
continue;
|
|
|
|
if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
|
|
/* No overlap with the access we're asking about. Ignore. */
|
|
continue;
|
|
|
|
/* We have a match. Stop searching. */
|
|
break;
|
|
}
|
|
|
|
tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
|
|
|
|
if (i < N_OLDREF_ACCS) {
|
|
/* return with success */
|
|
tl_assert(cand_thr);
|
|
tl_assert(cand_rcec);
|
|
tl_assert(cand_rcec->magic == RCEC_MAGIC);
|
|
tl_assert(cand_szB >= 1);
|
|
*resEC = VG_(make_ExeContext_from_StackTrace)(
|
|
&cand_rcec->frames[1],
|
|
min_UInt(N_FRAMES, VG_(clo_backtrace_size))
|
|
);
|
|
*resThr = cand_thr;
|
|
*resSzB = cand_szB;
|
|
*resIsW = cand_isW;
|
|
return True;
|
|
}
|
|
|
|
/* consider next address in toCheck[] */
|
|
} /* for (j = 0; j < nToCheck; j++) */
|
|
|
|
/* really didn't find anything. */
|
|
return False;
|
|
}
|
|
|
|
static void event_map_init ( void )
|
|
{
|
|
Word i;
|
|
|
|
/* Context (RCEC) group allocator */
|
|
init_GroupAlloc ( &rcec_group_allocator,
|
|
sizeof(RCEC),
|
|
1000 /* RCECs per group */,
|
|
HG_(zalloc),
|
|
"libhb.event_map_init.1 (RCEC groups)",
|
|
HG_(free) );
|
|
|
|
/* Context table */
|
|
tl_assert(!contextTab);
|
|
contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
|
|
N_RCEC_TAB * sizeof(RCEC*) );
|
|
tl_assert(contextTab);
|
|
for (i = 0; i < N_RCEC_TAB; i++)
|
|
contextTab[i] = NULL;
|
|
|
|
/* Oldref group allocator */
|
|
init_GroupAlloc ( &oldref_group_allocator,
|
|
sizeof(OldRef),
|
|
1000 /* OldRefs per group */,
|
|
HG_(zalloc),
|
|
"libhb.event_map_init.3 (OldRef groups)",
|
|
HG_(free) );
|
|
|
|
/* Oldref tree */
|
|
tl_assert(!oldrefTree);
|
|
oldrefTree = VG_(newSWA)(
|
|
HG_(zalloc),
|
|
"libhb.event_map_init.4 (oldref tree)",
|
|
HG_(free)
|
|
);
|
|
tl_assert(oldrefTree);
|
|
|
|
oldrefGen = 0;
|
|
oldrefGenIncAt = 0;
|
|
oldrefTreeN = 0;
|
|
}
|
|
|
|
static void event_map__check_reference_counts ( Bool before )
|
|
{
|
|
RCEC* rcec;
|
|
OldRef* oldref;
|
|
Word i;
|
|
UWord nEnts = 0;
|
|
UWord keyW, valW;
|
|
|
|
/* Set the 'check' reference counts to zero. Also, optionally
|
|
check that the real reference counts are non-zero. We allow
|
|
these to fall to zero before a GC, but the GC must get rid of
|
|
all those that are zero, hence none should be zero after a
|
|
GC. */
|
|
for (i = 0; i < N_RCEC_TAB; i++) {
|
|
for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
|
|
nEnts++;
|
|
tl_assert(rcec);
|
|
tl_assert(rcec->magic == RCEC_MAGIC);
|
|
if (!before)
|
|
tl_assert(rcec->rc > 0);
|
|
rcec->rcX = 0;
|
|
}
|
|
}
|
|
|
|
/* check that the stats are sane */
|
|
tl_assert(nEnts == stats__ctxt_tab_curr);
|
|
tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
|
|
|
|
/* visit all the referencing points, inc check ref counts */
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
oldref = (OldRef*)valW;
|
|
tl_assert(oldref->magic == OldRef_MAGIC);
|
|
for (i = 0; i < N_OLDREF_ACCS; i++) {
|
|
Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
|
|
RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
|
|
if (aThr) {
|
|
tl_assert(aRef);
|
|
tl_assert(aRef->magic == RCEC_MAGIC);
|
|
aRef->rcX++;
|
|
} else {
|
|
tl_assert(!aRef);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* compare check ref counts with actual */
|
|
for (i = 0; i < N_RCEC_TAB; i++) {
|
|
for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
|
|
tl_assert(rcec->rc == rcec->rcX);
|
|
}
|
|
}
|
|
}
|
|
|
|
__attribute__((noinline))
|
|
static void event_map_maybe_GC ( void )
|
|
{
|
|
OldRef* oldref;
|
|
UWord keyW, valW, retained, maxGen;
|
|
XArray* refs2del;
|
|
Word i, j, n2del;
|
|
|
|
UWord* genMap = NULL;
|
|
UWord genMap_min = 0;
|
|
UWord genMap_size = 0;
|
|
|
|
if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
|
|
return;
|
|
|
|
if (0)
|
|
VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
|
|
|
|
/* Check for sane command line params. Limit values must match
|
|
those in hg_process_cmd_line_option. */
|
|
tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
|
|
tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
|
|
|
|
/* Check our counting is sane (expensive) */
|
|
if (CHECK_CEM)
|
|
tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
|
|
|
|
/* Check the reference counts (expensive) */
|
|
if (CHECK_CEM)
|
|
event_map__check_reference_counts( True/*before*/ );
|
|
|
|
/* Compute the distribution of generation values in the ref tree.
|
|
There are likely only to be a few different generation numbers
|
|
in the whole tree, but we don't know what they are. Hence use a
|
|
dynamically resized array of counters. The array is genMap[0
|
|
.. genMap_size-1], where genMap[0] is the count for the
|
|
generation number genMap_min, genMap[1] is the count for
|
|
genMap_min+1, etc. If a new number is seen outside the range
|
|
[genMap_min .. genMap_min + genMap_size - 1] then the array is
|
|
copied into a larger array, and genMap_min and genMap_size are
|
|
adjusted accordingly. */
|
|
|
|
/* genMap :: generation-number -> count-of-nodes-with-that-number */
|
|
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
|
|
UWord ea, key;
|
|
oldref = (OldRef*)valW;
|
|
key = oldref->gen;
|
|
|
|
/* BEGIN find 'ea', which is the index in genMap holding the
|
|
count for generation number 'key'. */
|
|
if (UNLIKELY(genMap == NULL)) {
|
|
/* deal with the first key to be seen, so that the following
|
|
cases don't need to handle the complexity of a NULL count
|
|
array. */
|
|
genMap_min = key;
|
|
genMap_size = 1;
|
|
genMap = HG_(zalloc)( "libhb.emmG.1a",
|
|
genMap_size * sizeof(UWord) );
|
|
ea = 0;
|
|
if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
|
|
key, genMap_min, genMap_min+genMap_size- 1 );
|
|
}
|
|
else
|
|
if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
|
|
/* this is the expected (almost-always-happens) case: 'key'
|
|
is already mapped in the array. */
|
|
ea = key - genMap_min;
|
|
}
|
|
else
|
|
if (key < genMap_min) {
|
|
/* 'key' appears before the start of the current array.
|
|
Extend the current array by allocating a larger one and
|
|
copying the current one to the upper end of it. */
|
|
Word more;
|
|
UWord* map2;
|
|
more = genMap_min - key;
|
|
tl_assert(more > 0);
|
|
map2 = HG_(zalloc)( "libhb.emmG.1b",
|
|
(genMap_size + more) * sizeof(UWord) );
|
|
VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
|
|
HG_(free)( genMap );
|
|
genMap = map2;
|
|
genMap_size += more;
|
|
genMap_min -= more;
|
|
ea = 0;
|
|
tl_assert(genMap_min == key);
|
|
if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
|
|
key, genMap_min, genMap_min+genMap_size- 1 );
|
|
}
|
|
else {
|
|
/* 'key' appears after the end of the current array. Extend
|
|
the current array by allocating a larger one and copying
|
|
the current one to the lower end of it. */
|
|
Word more;
|
|
UWord* map2;
|
|
tl_assert(key >= genMap_min + genMap_size);
|
|
more = key - (genMap_min + genMap_size) + 1;
|
|
tl_assert(more > 0);
|
|
map2 = HG_(zalloc)( "libhb.emmG.1c",
|
|
(genMap_size + more) * sizeof(UWord) );
|
|
VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
|
|
HG_(free)( genMap );
|
|
genMap = map2;
|
|
genMap_size += more;
|
|
ea = genMap_size - 1;;
|
|
tl_assert(genMap_min + genMap_size - 1 == key);
|
|
if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
|
|
key, genMap_min, genMap_min+genMap_size- 1 );
|
|
}
|
|
/* END find 'ea' from 'key' */
|
|
|
|
tl_assert(ea >= 0 && ea < genMap_size);
|
|
/* and the whole point of this elaborate computation of 'ea' is .. */
|
|
genMap[ea]++;
|
|
}
|
|
|
|
tl_assert(genMap);
|
|
tl_assert(genMap_size > 0);
|
|
|
|
/* Sanity check what we just computed */
|
|
{ UWord sum = 0;
|
|
for (i = 0; i < genMap_size; i++) {
|
|
if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
|
|
i + genMap_min, genMap[i] );
|
|
sum += genMap[i];
|
|
}
|
|
tl_assert(sum == oldrefTreeN);
|
|
}
|
|
|
|
/* Figure out how many generations to throw away */
|
|
retained = oldrefTreeN;
|
|
maxGen = 0;
|
|
|
|
for (i = 0; i < genMap_size; i++) {
|
|
keyW = i + genMap_min;
|
|
valW = genMap[i];
|
|
tl_assert(keyW > 0); /* can't allow a generation # 0 */
|
|
if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
|
|
tl_assert(keyW >= maxGen);
|
|
tl_assert(retained >= valW);
|
|
if (retained - valW
|
|
> (UWord)(HG_(clo_conflict_cache_size)
|
|
* EVENT_MAP_GC_DISCARD_FRACTION)) {
|
|
retained -= valW;
|
|
maxGen = keyW;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
HG_(free)(genMap);
|
|
|
|
tl_assert(retained >= 0 && retained <= oldrefTreeN);
|
|
|
|
/* Now make up a big list of the oldrefTree entries we want to
|
|
delete. We can't simultaneously traverse the tree and delete
|
|
stuff from it, so first we need to copy them off somewhere
|
|
else. (sigh) */
|
|
refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
|
|
HG_(free), sizeof(Addr) );
|
|
|
|
if (retained < oldrefTreeN) {
|
|
|
|
/* This is the normal (expected) case. We discard any ref whose
|
|
generation number <= maxGen. */
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
oldref = (OldRef*)valW;
|
|
tl_assert(oldref->magic == OldRef_MAGIC);
|
|
if (oldref->gen <= maxGen) {
|
|
VG_(addToXA)( refs2del, &keyW );
|
|
}
|
|
}
|
|
if (VG_(clo_verbosity) > 1) {
|
|
VG_(message)(Vg_DebugMsg,
|
|
"libhb: EvM GC: delete generations %lu and below, "
|
|
"retaining %lu entries",
|
|
maxGen, retained );
|
|
}
|
|
|
|
} else {
|
|
|
|
static UInt rand_seed = 0; /* leave as static */
|
|
|
|
/* Degenerate case: there's only one generation in the entire
|
|
tree, so we need to have some other way of deciding which
|
|
refs to throw away. Just throw out half of them randomly. */
|
|
tl_assert(retained == oldrefTreeN);
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
UInt n;
|
|
oldref = (OldRef*)valW;
|
|
tl_assert(oldref->magic == OldRef_MAGIC);
|
|
n = VG_(random)( &rand_seed );
|
|
if ((n & 0xFFF) < 0x800) {
|
|
VG_(addToXA)( refs2del, &keyW );
|
|
retained--;
|
|
}
|
|
}
|
|
if (VG_(clo_verbosity) > 1) {
|
|
VG_(message)(Vg_DebugMsg,
|
|
"libhb: EvM GC: randomly delete half the entries, "
|
|
"retaining %lu entries",
|
|
retained );
|
|
}
|
|
|
|
}
|
|
|
|
n2del = VG_(sizeXA)( refs2del );
|
|
tl_assert(n2del == (Word)(oldrefTreeN - retained));
|
|
|
|
if (0) VG_(printf)("%s","deleting entries\n");
|
|
for (i = 0; i < n2del; i++) {
|
|
Bool b;
|
|
Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
|
|
b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
|
|
tl_assert(b);
|
|
tl_assert(keyW == ga2del);
|
|
oldref = (OldRef*)valW;
|
|
for (j = 0; j < N_OLDREF_ACCS; j++) {
|
|
Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
|
|
RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
|
|
if (aRef) {
|
|
tl_assert(aThr);
|
|
stats__ctxt_rcdec3++;
|
|
ctxt__rcdec( aRef );
|
|
} else {
|
|
tl_assert(!aThr);
|
|
}
|
|
}
|
|
|
|
free_OldRef( oldref );
|
|
}
|
|
|
|
VG_(deleteXA)( refs2del );
|
|
|
|
tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
|
|
|
|
oldrefTreeN = retained;
|
|
oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
|
|
|
|
/* Throw away all RCECs with zero reference counts */
|
|
for (i = 0; i < N_RCEC_TAB; i++) {
|
|
RCEC** pp = &contextTab[i];
|
|
RCEC* p = *pp;
|
|
while (p) {
|
|
if (p->rc == 0) {
|
|
*pp = p->next;
|
|
free_RCEC(p);
|
|
p = *pp;
|
|
tl_assert(stats__ctxt_tab_curr > 0);
|
|
stats__ctxt_tab_curr--;
|
|
} else {
|
|
pp = &p->next;
|
|
p = p->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check the reference counts (expensive) */
|
|
if (CHECK_CEM)
|
|
event_map__check_reference_counts( False/*after*/ );
|
|
|
|
//if (0)
|
|
//VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
|
|
// VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
|
|
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Core MSM //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/* Logic in msm_read/msm_write updated/verified after re-analysis,
|
|
19 Nov 08. */
|
|
|
|
/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
|
|
nonzero, the effect is that when a race is detected for a location,
|
|
that location is put into a special 'error' state and no further
|
|
checking of it is done until it returns to a 'normal' state, which
|
|
requires it to be deallocated and reallocated.
|
|
|
|
This is a bad idea, because of the interaction with suppressions.
|
|
Suppose there is a race on the location, but the error is
|
|
suppressed. The location now is marked as in-error. Now any
|
|
subsequent race -- including ones we want to see -- will never be
|
|
detected until the location is deallocated and reallocated.
|
|
|
|
Hence set MSM_RACE2ERR to zero. This causes raced-on locations to
|
|
remain in the normal 'C' (constrained) state, but places on them
|
|
the constraint that the next accesses happen-after both the
|
|
existing constraint and the relevant vector clock of the thread
|
|
doing the racing access.
|
|
*/
|
|
#define MSM_RACE2ERR 0
|
|
|
|
static ULong stats__msm_read = 0;
|
|
static ULong stats__msm_read_change = 0;
|
|
static ULong stats__msm_write = 0;
|
|
static ULong stats__msm_write_change = 0;
|
|
|
|
__attribute__((noinline))
|
|
static void record_race_info ( Thr* acc_thr,
|
|
Addr acc_addr, SizeT szB, Bool isWrite )
|
|
{
|
|
/* Call here to report a race. We just hand it onwards to
|
|
HG_(record_error_Race). If that in turn discovers that the
|
|
error is going to be collected, then that queries the
|
|
conflicting-event map. The alternative would be to query it
|
|
right here. But that causes a lot of pointless queries for
|
|
errors which will shortly be discarded as duplicates, and can
|
|
become a performance overhead; so we defer the query until we
|
|
know the error is not a duplicate. */
|
|
tl_assert(acc_thr->opaque);
|
|
HG_(record_error_Race)( acc_thr->opaque, acc_addr,
|
|
szB, isWrite, NULL/*mb_lastlock*/ );
|
|
}
|
|
|
|
static Bool is_sane_SVal_C ( SVal sv ) {
|
|
POrd ord;
|
|
if (!SVal__isC(sv)) return True;
|
|
ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
|
|
if (ord == POrd_EQ || ord == POrd_LT) return True;
|
|
return False;
|
|
}
|
|
|
|
|
|
/* Compute new state following a read */
|
|
static inline SVal msm_read ( SVal svOld,
|
|
/* The following are only needed for
|
|
creating error reports. */
|
|
Thr* acc_thr,
|
|
Addr acc_addr, SizeT szB )
|
|
{
|
|
SVal svNew = SVal_INVALID;
|
|
stats__msm_read++;
|
|
|
|
/* Redundant sanity check on the constraints */
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svOld));
|
|
}
|
|
|
|
if (SVal__isC(svOld)) {
|
|
POrd ord;
|
|
VtsID tviR = acc_thr->viR;
|
|
VtsID tviW = acc_thr->viW;
|
|
VtsID rmini = SVal__unC_Rmin(svOld);
|
|
VtsID wmini = SVal__unC_Wmin(svOld);
|
|
|
|
ord = VtsID__getOrdering(rmini,tviR);
|
|
if (ord == POrd_EQ || ord == POrd_LT) {
|
|
/* no race */
|
|
/* Note: RWLOCK subtlety: use tviW, not tviR */
|
|
svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
|
|
goto out;
|
|
} else {
|
|
/* assert on sanity of constraints. */
|
|
POrd ordxx = VtsID__getOrdering(rmini,wmini);
|
|
tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
|
|
svNew = MSM_RACE2ERR
|
|
? SVal__mkE()
|
|
/* see comments on corresponding fragment in
|
|
msm_write for explanation. */
|
|
/* aggressive setting: */
|
|
/*
|
|
: SVal__mkC( VtsID__join2(wmini,tviR),
|
|
VtsID__join2(wmini,tviW) );
|
|
*/
|
|
/* "consistent" setting: */
|
|
: SVal__mkC( VtsID__join2(rmini,tviR),
|
|
VtsID__join2(wmini,tviW) );
|
|
record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ );
|
|
goto out;
|
|
}
|
|
}
|
|
if (SVal__isA(svOld)) {
|
|
/* reading no-access memory (sigh); leave unchanged */
|
|
/* check for no pollution */
|
|
tl_assert(svOld == SVal_NOACCESS);
|
|
svNew = SVal_NOACCESS;
|
|
goto out;
|
|
}
|
|
if (SVal__isE(svOld)) {
|
|
/* no race, location is already "in error" */
|
|
svNew = SVal__mkE();
|
|
goto out;
|
|
}
|
|
VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
|
|
tl_assert(0);
|
|
|
|
out:
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svNew));
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
if (svNew != svOld && HG_(clo_show_conflicts)) {
|
|
if (SVal__isC(svOld) && SVal__isC(svNew)) {
|
|
event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
|
|
stats__msm_read_change++;
|
|
}
|
|
}
|
|
return svNew;
|
|
}
|
|
|
|
|
|
/* Compute new state following a write */
|
|
static inline SVal msm_write ( SVal svOld,
|
|
/* The following are only needed for
|
|
creating error reports. */
|
|
Thr* acc_thr,
|
|
Addr acc_addr, SizeT szB )
|
|
{
|
|
SVal svNew = SVal_INVALID;
|
|
stats__msm_write++;
|
|
|
|
/* Redundant sanity check on the constraints */
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svOld));
|
|
}
|
|
|
|
if (SVal__isC(svOld)) {
|
|
POrd ord;
|
|
VtsID tviW = acc_thr->viW;
|
|
VtsID wmini = SVal__unC_Wmin(svOld);
|
|
|
|
ord = VtsID__getOrdering(wmini,tviW);
|
|
if (ord == POrd_EQ || ord == POrd_LT) {
|
|
/* no race */
|
|
svNew = SVal__mkC( tviW, tviW );
|
|
goto out;
|
|
} else {
|
|
VtsID tviR = acc_thr->viR;
|
|
VtsID rmini = SVal__unC_Rmin(svOld);
|
|
/* assert on sanity of constraints. */
|
|
POrd ordxx = VtsID__getOrdering(rmini,wmini);
|
|
tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
|
|
svNew = MSM_RACE2ERR
|
|
? SVal__mkE()
|
|
/* One possibility is, after a race is seen, to
|
|
set the location's constraints as aggressively
|
|
(as far ahead) as possible. However, that just
|
|
causes lots more races to be reported, which is
|
|
very confusing. Hence don't do this. */
|
|
/*
|
|
: SVal__mkC( VtsID__join2(wmini,tviR),
|
|
VtsID__join2(wmini,tviW) );
|
|
*/
|
|
/* instead, re-set the constraints in a way which
|
|
is consistent with (ie, as they would have been
|
|
computed anyway) had no race been detected. */
|
|
: SVal__mkC( VtsID__join2(rmini,tviR),
|
|
VtsID__join2(wmini,tviW) );
|
|
record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ );
|
|
goto out;
|
|
}
|
|
}
|
|
if (SVal__isA(svOld)) {
|
|
/* writing no-access memory (sigh); leave unchanged */
|
|
/* check for no pollution */
|
|
tl_assert(svOld == SVal_NOACCESS);
|
|
svNew = SVal_NOACCESS;
|
|
goto out;
|
|
}
|
|
if (SVal__isE(svOld)) {
|
|
/* no race, location is already "in error" */
|
|
svNew = SVal__mkE();
|
|
goto out;
|
|
}
|
|
VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
|
|
tl_assert(0);
|
|
|
|
out:
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svNew));
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
if (svNew != svOld && HG_(clo_show_conflicts)) {
|
|
if (SVal__isC(svOld) && SVal__isC(svNew)) {
|
|
event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
|
|
stats__msm_write_change++;
|
|
}
|
|
}
|
|
return svNew;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Apply core MSM to specific memory locations //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/*------------- ZSM accesses: 8 bit apply ------------- */
|
|
|
|
void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_read8s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_read( svOld, thr,a,1 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
}
|
|
|
|
void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_read8s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_write( svOld, thr,a,1 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
}
|
|
|
|
/*------------- ZSM accesses: 16 bit apply ------------- */
|
|
|
|
void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_read16s++;
|
|
if (UNLIKELY(!aligned16(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
|
|
if (valid_value_is_below_me_16(descr, toff)) {
|
|
goto slowcase;
|
|
} else {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_read( svOld, thr,a,2 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_16to8splits++;
|
|
zsm_apply8___msm_read( thr, a + 0 );
|
|
zsm_apply8___msm_read( thr, a + 1 );
|
|
}
|
|
|
|
void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_read16s++;
|
|
if (UNLIKELY(!aligned16(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
|
|
if (valid_value_is_below_me_16(descr, toff)) {
|
|
goto slowcase;
|
|
} else {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_write( svOld, thr,a,2 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_16to8splits++;
|
|
zsm_apply8___msm_write( thr, a + 0 );
|
|
zsm_apply8___msm_write( thr, a + 1 );
|
|
}
|
|
|
|
/*------------- ZSM accesses: 32 bit apply ------------- */
|
|
|
|
void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
if (UNLIKELY(!aligned32(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 or 4 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
|
|
if (valid_value_is_above_me_32(descr, toff)) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
|
|
} else {
|
|
goto slowcase;
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_read( svOld, thr,a,4 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_32to16splits++;
|
|
zsm_apply16___msm_read( thr, a + 0 );
|
|
zsm_apply16___msm_read( thr, a + 2 );
|
|
}
|
|
|
|
void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
if (UNLIKELY(!aligned32(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 or 4 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
|
|
if (valid_value_is_above_me_32(descr, toff)) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
|
|
} else {
|
|
goto slowcase;
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_write( svOld, thr,a,4 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_32to16splits++;
|
|
zsm_apply16___msm_write( thr, a + 0 );
|
|
zsm_apply16___msm_write( thr, a + 2 );
|
|
}
|
|
|
|
/*------------- ZSM accesses: 64 bit apply ------------- */
|
|
|
|
void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_read64s++;
|
|
if (UNLIKELY(!aligned64(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, unused */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
|
|
goto slowcase;
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_read( svOld, thr,a,8 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_64to32splits++;
|
|
zsm_apply32___msm_read( thr, a + 0 );
|
|
zsm_apply32___msm_read( thr, a + 4 );
|
|
}
|
|
|
|
void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_read64s++;
|
|
if (UNLIKELY(!aligned64(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, unused */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
|
|
goto slowcase;
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msm_write( svOld, thr,a,8 );
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_64to32splits++;
|
|
zsm_apply32___msm_write( thr, a + 0 );
|
|
zsm_apply32___msm_write( thr, a + 4 );
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 8 bit write --------------- */
|
|
|
|
static
|
|
void zsm_write8 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_set8s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 16 bit write --------------- */
|
|
|
|
static
|
|
void zsm_write16 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_set16s++;
|
|
if (UNLIKELY(!aligned16(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
|
|
if (valid_value_is_below_me_16(descr, toff)) {
|
|
/* Writing at this level. Need to fix up 'descr'. */
|
|
cl->descrs[tno] = pullup_descr_to_16(descr, toff);
|
|
/* At this point, the tree does not match cl->descr[tno] any
|
|
more. The assignments below will fix it up. */
|
|
} else {
|
|
/* We can't indiscriminately write on the w16 node as in the
|
|
w64 case, as that might make the node inconsistent with
|
|
its parent. So first, pull down to this level. */
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff + 0] = svNew;
|
|
cl->svals[cloff + 1] = SVal_INVALID;
|
|
return;
|
|
slowcase: /* misaligned */
|
|
stats__cline_16to8splits++;
|
|
zsm_write8( a + 0, svNew );
|
|
zsm_write8( a + 1, svNew );
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 32 bit write --------------- */
|
|
|
|
static
|
|
void zsm_write32 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_set32s++;
|
|
if (UNLIKELY(!aligned32(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 or 4 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
|
|
if (valid_value_is_above_me_32(descr, toff)) {
|
|
/* We can't indiscriminately write on the w32 node as in the
|
|
w64 case, as that might make the node inconsistent with
|
|
its parent. So first, pull down to this level. */
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
} else {
|
|
/* Writing at this level. Need to fix up 'descr'. */
|
|
cl->descrs[tno] = pullup_descr_to_32(descr, toff);
|
|
/* At this point, the tree does not match cl->descr[tno] any
|
|
more. The assignments below will fix it up. */
|
|
}
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff + 0] = svNew;
|
|
cl->svals[cloff + 1] = SVal_INVALID;
|
|
cl->svals[cloff + 2] = SVal_INVALID;
|
|
cl->svals[cloff + 3] = SVal_INVALID;
|
|
return;
|
|
slowcase: /* misaligned */
|
|
stats__cline_32to16splits++;
|
|
zsm_write16( a + 0, svNew );
|
|
zsm_write16( a + 2, svNew );
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 64 bit write --------------- */
|
|
|
|
static
|
|
void zsm_write64 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
stats__cline_set64s++;
|
|
if (UNLIKELY(!aligned64(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 */
|
|
cl->descrs[tno] = TREE_DESCR_64;
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff + 0] = svNew;
|
|
cl->svals[cloff + 1] = SVal_INVALID;
|
|
cl->svals[cloff + 2] = SVal_INVALID;
|
|
cl->svals[cloff + 3] = SVal_INVALID;
|
|
cl->svals[cloff + 4] = SVal_INVALID;
|
|
cl->svals[cloff + 5] = SVal_INVALID;
|
|
cl->svals[cloff + 6] = SVal_INVALID;
|
|
cl->svals[cloff + 7] = SVal_INVALID;
|
|
return;
|
|
slowcase: /* misaligned */
|
|
stats__cline_64to32splits++;
|
|
zsm_write32( a + 0, svNew );
|
|
zsm_write32( a + 4, svNew );
|
|
}
|
|
|
|
/*------------- ZSM accesses: 8 bit read/copy ------------- */
|
|
|
|
static
|
|
SVal zsm_read8 ( Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_get8s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
}
|
|
return cl->svals[cloff];
|
|
}
|
|
|
|
static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
|
|
SVal sv;
|
|
stats__cline_copy8s++;
|
|
sv = zsm_read8( src );
|
|
zsm_write8( dst, sv );
|
|
}
|
|
|
|
/* ------------ Shadow memory range setting ops ------------ */
|
|
|
|
void zsm_apply_range___msm_read ( Thr* thr,
|
|
Addr a, SizeT len )
|
|
{
|
|
/* fast track a couple of common cases */
|
|
if (len == 4 && aligned32(a)) {
|
|
zsm_apply32___msm_read( thr, a );
|
|
return;
|
|
}
|
|
if (len == 8 && aligned64(a)) {
|
|
zsm_apply64___msm_read( thr, a );
|
|
return;
|
|
}
|
|
|
|
/* be completely general (but as efficient as possible) */
|
|
if (len == 0) return;
|
|
|
|
if (!aligned16(a) && len >= 1) {
|
|
zsm_apply8___msm_read( thr, a );
|
|
a += 1;
|
|
len -= 1;
|
|
tl_assert(aligned16(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned32(a) && len >= 2) {
|
|
zsm_apply16___msm_read( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
tl_assert(aligned32(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned64(a) && len >= 4) {
|
|
zsm_apply32___msm_read( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 8) {
|
|
tl_assert(aligned64(a));
|
|
while (len >= 8) {
|
|
zsm_apply64___msm_read( thr, a );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 4)
|
|
tl_assert(aligned32(a));
|
|
if (len >= 4) {
|
|
zsm_apply32___msm_read( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 2)
|
|
tl_assert(aligned16(a));
|
|
if (len >= 2) {
|
|
zsm_apply16___msm_read( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 1) {
|
|
zsm_apply8___msm_read( thr, a );
|
|
a += 1;
|
|
len -= 1;
|
|
}
|
|
tl_assert(len == 0);
|
|
}
|
|
|
|
|
|
|
|
void zsm_apply_range___msm_write ( Thr* thr,
|
|
Addr a, SizeT len )
|
|
{
|
|
/* fast track a couple of common cases */
|
|
if (len == 4 && aligned32(a)) {
|
|
zsm_apply32___msm_write( thr, a );
|
|
return;
|
|
}
|
|
if (len == 8 && aligned64(a)) {
|
|
zsm_apply64___msm_write( thr, a );
|
|
return;
|
|
}
|
|
|
|
/* be completely general (but as efficient as possible) */
|
|
if (len == 0) return;
|
|
|
|
if (!aligned16(a) && len >= 1) {
|
|
zsm_apply8___msm_write( thr, a );
|
|
a += 1;
|
|
len -= 1;
|
|
tl_assert(aligned16(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned32(a) && len >= 2) {
|
|
zsm_apply16___msm_write( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
tl_assert(aligned32(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned64(a) && len >= 4) {
|
|
zsm_apply32___msm_write( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 8) {
|
|
tl_assert(aligned64(a));
|
|
while (len >= 8) {
|
|
zsm_apply64___msm_write( thr, a );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 4)
|
|
tl_assert(aligned32(a));
|
|
if (len >= 4) {
|
|
zsm_apply32___msm_write( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 2)
|
|
tl_assert(aligned16(a));
|
|
if (len >= 2) {
|
|
zsm_apply16___msm_write( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 1) {
|
|
zsm_apply8___msm_write( thr, a );
|
|
a += 1;
|
|
len -= 1;
|
|
}
|
|
tl_assert(len == 0);
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Block-copy states (needed for implementing realloc()). */
|
|
|
|
static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
|
|
{
|
|
SizeT i;
|
|
if (len == 0)
|
|
return;
|
|
|
|
/* assert for non-overlappingness */
|
|
tl_assert(src+len <= dst || dst+len <= src);
|
|
|
|
/* To be simple, just copy byte by byte. But so as not to wreck
|
|
performance for later accesses to dst[0 .. len-1], normalise
|
|
destination lines as we finish with them, and also normalise the
|
|
line containing the first and last address. */
|
|
for (i = 0; i < len; i++) {
|
|
Bool normalise
|
|
= get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
|
|
|| i == 0 /* first in range */
|
|
|| i == len-1; /* last in range */
|
|
zsm_copy8( src+i, dst+i, normalise );
|
|
}
|
|
}
|
|
|
|
|
|
/* For setting address ranges to a given value. Has considerable
|
|
sophistication so as to avoid generating large numbers of pointless
|
|
cache loads/writebacks for large ranges. */
|
|
|
|
/* Do small ranges in-cache, in the obvious way. */
|
|
static
|
|
void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
|
|
{
|
|
/* fast track a couple of common cases */
|
|
if (len == 4 && aligned32(a)) {
|
|
zsm_write32( a, svNew );
|
|
return;
|
|
}
|
|
if (len == 8 && aligned64(a)) {
|
|
zsm_write64( a, svNew );
|
|
return;
|
|
}
|
|
|
|
/* be completely general (but as efficient as possible) */
|
|
if (len == 0) return;
|
|
|
|
if (!aligned16(a) && len >= 1) {
|
|
zsm_write8( a, svNew );
|
|
a += 1;
|
|
len -= 1;
|
|
tl_assert(aligned16(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned32(a) && len >= 2) {
|
|
zsm_write16( a, svNew );
|
|
a += 2;
|
|
len -= 2;
|
|
tl_assert(aligned32(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned64(a) && len >= 4) {
|
|
zsm_write32( a, svNew );
|
|
a += 4;
|
|
len -= 4;
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 8) {
|
|
tl_assert(aligned64(a));
|
|
while (len >= 8) {
|
|
zsm_write64( a, svNew );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 4)
|
|
tl_assert(aligned32(a));
|
|
if (len >= 4) {
|
|
zsm_write32( a, svNew );
|
|
a += 4;
|
|
len -= 4;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 2)
|
|
tl_assert(aligned16(a));
|
|
if (len >= 2) {
|
|
zsm_write16( a, svNew );
|
|
a += 2;
|
|
len -= 2;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 1) {
|
|
zsm_write8( a, svNew );
|
|
a += 1;
|
|
len -= 1;
|
|
}
|
|
tl_assert(len == 0);
|
|
}
|
|
|
|
|
|
/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
|
|
for larger ranges, try to operate directly on the out-of-cache
|
|
representation, rather than dragging lines into the cache,
|
|
overwriting them, and forcing them out. This turns out to be an
|
|
important performance optimisation. */
|
|
|
|
static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
|
|
{
|
|
tl_assert(svNew != SVal_INVALID);
|
|
stats__cache_make_New_arange += (ULong)len;
|
|
|
|
if (0 && len > 500)
|
|
VG_(printf)("make New ( %#lx, %ld )\n", a, len );
|
|
|
|
if (0) {
|
|
static UWord n_New_in_cache = 0;
|
|
static UWord n_New_not_in_cache = 0;
|
|
/* tag is 'a' with the in-line offset masked out,
|
|
eg a[31]..a[4] 0000 */
|
|
Addr tag = a & ~(N_LINE_ARANGE - 1);
|
|
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
if (LIKELY(tag == cache_shmem.tags0[wix])) {
|
|
n_New_in_cache++;
|
|
} else {
|
|
n_New_not_in_cache++;
|
|
}
|
|
if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
|
|
VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
|
|
n_New_in_cache, n_New_not_in_cache );
|
|
}
|
|
|
|
if (LIKELY(len < 2 * N_LINE_ARANGE)) {
|
|
zsm_set_range_SMALL( a, len, svNew );
|
|
} else {
|
|
Addr before_start = a;
|
|
Addr aligned_start = cacheline_ROUNDUP(a);
|
|
Addr after_start = cacheline_ROUNDDN(a + len);
|
|
UWord before_len = aligned_start - before_start;
|
|
UWord aligned_len = after_start - aligned_start;
|
|
UWord after_len = a + len - after_start;
|
|
tl_assert(before_start <= aligned_start);
|
|
tl_assert(aligned_start <= after_start);
|
|
tl_assert(before_len < N_LINE_ARANGE);
|
|
tl_assert(after_len < N_LINE_ARANGE);
|
|
tl_assert(get_cacheline_offset(aligned_start) == 0);
|
|
if (get_cacheline_offset(a) == 0) {
|
|
tl_assert(before_len == 0);
|
|
tl_assert(a == aligned_start);
|
|
}
|
|
if (get_cacheline_offset(a+len) == 0) {
|
|
tl_assert(after_len == 0);
|
|
tl_assert(after_start == a+len);
|
|
}
|
|
if (before_len > 0) {
|
|
zsm_set_range_SMALL( before_start, before_len, svNew );
|
|
}
|
|
if (after_len > 0) {
|
|
zsm_set_range_SMALL( after_start, after_len, svNew );
|
|
}
|
|
stats__cache_make_New_inZrep += (ULong)aligned_len;
|
|
|
|
while (1) {
|
|
Addr tag;
|
|
UWord wix;
|
|
if (aligned_start >= after_start)
|
|
break;
|
|
tl_assert(get_cacheline_offset(aligned_start) == 0);
|
|
tag = aligned_start & ~(N_LINE_ARANGE - 1);
|
|
wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
if (tag == cache_shmem.tags0[wix]) {
|
|
UWord i;
|
|
for (i = 0; i < N_LINE_ARANGE / 8; i++)
|
|
zsm_write64( aligned_start + i * 8, svNew );
|
|
} else {
|
|
UWord i;
|
|
Word zix;
|
|
SecMap* sm;
|
|
LineZ* lineZ;
|
|
/* This line is not in the cache. Do not force it in; instead
|
|
modify it in-place. */
|
|
/* find the Z line to write in and rcdec it or the
|
|
associated F line. */
|
|
find_Z_for_writing( &sm, &zix, tag );
|
|
tl_assert(sm);
|
|
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
lineZ->dict[0] = svNew;
|
|
lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
|
|
for (i = 0; i < N_LINE_ARANGE/4; i++)
|
|
lineZ->ix2s[i] = 0; /* all refer to dict[0] */
|
|
rcinc_LineZ(lineZ);
|
|
}
|
|
aligned_start += N_LINE_ARANGE;
|
|
aligned_len -= N_LINE_ARANGE;
|
|
}
|
|
tl_assert(aligned_start == after_start);
|
|
tl_assert(aligned_len == 0);
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Synchronisation objects //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
// (UInt) `echo "Synchronisation object" | md5sum`
|
|
#define SO_MAGIC 0x56b3c5b0U
|
|
|
|
struct _SO {
|
|
VtsID viR; /* r-clock of sender */
|
|
VtsID viW; /* w-clock of sender */
|
|
UInt magic;
|
|
};
|
|
|
|
static SO* SO__Alloc ( void ) {
|
|
SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
|
|
so->viR = VtsID_INVALID;
|
|
so->viW = VtsID_INVALID;
|
|
so->magic = SO_MAGIC;
|
|
return so;
|
|
}
|
|
static void SO__Dealloc ( SO* so ) {
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
if (so->viR == VtsID_INVALID) {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
} else {
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
VtsID__rcdec(so->viR);
|
|
VtsID__rcdec(so->viW);
|
|
}
|
|
so->magic = 0;
|
|
HG_(free)( so );
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Top Level API //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static void show_thread_state ( HChar* str, Thr* t )
|
|
{
|
|
if (1) return;
|
|
if (t->viR == t->viW) {
|
|
VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
|
|
VtsID__pp( t->viR );
|
|
VG_(printf)("%s","\n");
|
|
} else {
|
|
VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
|
|
VtsID__pp( t->viR );
|
|
VG_(printf)(" viW %u==", t->viW);
|
|
VtsID__pp( t->viW );
|
|
VG_(printf)("%s","\n");
|
|
}
|
|
}
|
|
|
|
|
|
Thr* libhb_init (
|
|
void (*get_stacktrace)( Thr*, Addr*, UWord ),
|
|
ExeContext* (*get_EC)( Thr* )
|
|
)
|
|
{
|
|
Thr* thr;
|
|
VtsID vi;
|
|
tl_assert(get_stacktrace);
|
|
tl_assert(get_EC);
|
|
main_get_stacktrace = get_stacktrace;
|
|
main_get_EC = get_EC;
|
|
|
|
// No need to initialise hg_wordfm.
|
|
// No need to initialise hg_wordset.
|
|
|
|
vts_set_init();
|
|
vts_tab_init();
|
|
event_map_init();
|
|
VtsID__invalidate_caches();
|
|
|
|
// initialise shadow memory
|
|
zsm_init( SVal__rcinc, SVal__rcdec );
|
|
|
|
thr = Thr__new();
|
|
vi = VtsID__mk_Singleton( thr, 1 );
|
|
thr->viR = vi;
|
|
thr->viW = vi;
|
|
VtsID__rcinc(thr->viR);
|
|
VtsID__rcinc(thr->viW);
|
|
|
|
show_thread_state(" root", thr);
|
|
return thr;
|
|
}
|
|
|
|
Thr* libhb_create ( Thr* parent )
|
|
{
|
|
/* The child's VTSs are copies of the parent's VTSs, but ticked at
|
|
the child's index. Since the child's index is guaranteed
|
|
unique, it has never been seen before, so the implicit value
|
|
before the tick is zero and after that is one. */
|
|
Thr* child = Thr__new();
|
|
|
|
child->viR = VtsID__tick( parent->viR, child );
|
|
child->viW = VtsID__tick( parent->viW, child );
|
|
VtsID__rcinc(child->viR);
|
|
VtsID__rcinc(child->viW);
|
|
|
|
tl_assert(VtsID__indexAt( child->viR, child ) == 1);
|
|
tl_assert(VtsID__indexAt( child->viW, child ) == 1);
|
|
|
|
/* and the parent has to move along too */
|
|
VtsID__rcdec(parent->viR);
|
|
VtsID__rcdec(parent->viW);
|
|
parent->viR = VtsID__tick( parent->viR, parent );
|
|
parent->viW = VtsID__tick( parent->viW, parent );
|
|
VtsID__rcinc(parent->viR);
|
|
VtsID__rcinc(parent->viW);
|
|
|
|
show_thread_state(" child", child);
|
|
show_thread_state("parent", parent);
|
|
|
|
return child;
|
|
}
|
|
|
|
/* Shut down the library, and print stats (in fact that's _all_
|
|
this is for. */
|
|
void libhb_shutdown ( Bool show_stats )
|
|
{
|
|
if (show_stats) {
|
|
VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
|
|
VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
|
|
stats__secmaps_allocd,
|
|
stats__secmap_ga_space_covered);
|
|
VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
|
|
stats__secmap_linesZ_allocd,
|
|
stats__secmap_linesZ_bytes);
|
|
VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
|
|
stats__secmap_linesF_allocd,
|
|
stats__secmap_linesF_bytes);
|
|
VG_(printf)(" secmaps: %'10lu iterator steppings\n",
|
|
stats__secmap_iterator_steppings);
|
|
VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
|
|
stats__secmaps_search, stats__secmaps_search_slow);
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
|
|
stats__cache_totrefs, stats__cache_totmisses );
|
|
VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
|
|
stats__cache_Z_fetches, stats__cache_F_fetches );
|
|
VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
|
|
stats__cache_Z_wbacks, stats__cache_F_wbacks );
|
|
VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
|
|
stats__cache_invals, stats__cache_flushes );
|
|
VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
|
|
stats__cache_make_New_arange,
|
|
stats__cache_make_New_inZrep);
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)(" cline: %'10lu normalises\n",
|
|
stats__cline_normalises );
|
|
VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
|
|
stats__cline_read64s,
|
|
stats__cline_read32s,
|
|
stats__cline_read16s,
|
|
stats__cline_read8s );
|
|
VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
|
|
stats__cline_write64s,
|
|
stats__cline_write32s,
|
|
stats__cline_write16s,
|
|
stats__cline_write8s );
|
|
VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
|
|
stats__cline_set64s,
|
|
stats__cline_set32s,
|
|
stats__cline_set16s,
|
|
stats__cline_set8s );
|
|
VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
|
|
stats__cline_get8s, stats__cline_copy8s );
|
|
VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
|
|
stats__cline_64to32splits,
|
|
stats__cline_32to16splits,
|
|
stats__cline_16to8splits );
|
|
VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
|
|
stats__cline_64to32pulldown,
|
|
stats__cline_32to16pulldown,
|
|
stats__cline_16to8pulldown );
|
|
if (0)
|
|
VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
|
|
(Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
|
|
|
|
VG_(printf)("%s","\n");
|
|
|
|
VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
|
|
stats__msm_read, stats__msm_read_change);
|
|
VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
|
|
stats__msm_write, stats__msm_write_change);
|
|
VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
|
|
stats__getOrdering_queries, stats__getOrdering_misses);
|
|
VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
|
|
stats__join2_queries, stats__join2_misses);
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)(
|
|
" libhb: %ld entries in vts_table (approximately %lu bytes)\n",
|
|
VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
|
|
);
|
|
VG_(printf)( " libhb: %lu entries in vts_set\n",
|
|
VG_(sizeFM)( vts_set ) );
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
|
|
stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
|
|
stats__ctxt_rcdec2,
|
|
stats__ctxt_rcdec3 );
|
|
VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
|
|
stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
|
|
VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
|
|
(UWord)N_RCEC_TAB,
|
|
stats__ctxt_tab_curr );
|
|
VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
|
|
stats__ctxt_tab_qs,
|
|
stats__ctxt_tab_cmps );
|
|
#if 0
|
|
VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
|
|
VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
|
|
VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
|
|
VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
|
|
VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
|
|
VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
|
|
VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
|
|
VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
|
|
VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
|
|
VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
|
|
VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
|
|
VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
|
|
VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
|
|
VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
|
|
|
|
VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
|
|
VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
|
|
VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
|
|
VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
|
|
#endif
|
|
|
|
VG_(printf)("%s","<<< END libhb stats >>>\n");
|
|
VG_(printf)("%s","\n");
|
|
|
|
}
|
|
}
|
|
|
|
void libhb_async_exit ( Thr* thr )
|
|
{
|
|
/* is there anything we need to do? */
|
|
}
|
|
|
|
/* Both Segs and SOs point to VTSs. However, there is no sharing, so
|
|
a Seg that points at a VTS is its one-and-only owner, and ditto for
|
|
a SO that points at a VTS. */
|
|
|
|
SO* libhb_so_alloc ( void )
|
|
{
|
|
return SO__Alloc();
|
|
}
|
|
|
|
void libhb_so_dealloc ( SO* so )
|
|
{
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
SO__Dealloc(so);
|
|
}
|
|
|
|
/* See comments in libhb.h for details on the meaning of
|
|
strong vs weak sends and strong vs weak receives. */
|
|
void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
|
|
{
|
|
/* Copy the VTSs from 'thr' into the sync object, and then move
|
|
the thread along one step. */
|
|
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
|
|
/* stay sane .. a thread's read-clock must always lead or be the
|
|
same as its write-clock */
|
|
{ POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
|
|
tl_assert(ord == POrd_EQ || ord == POrd_LT);
|
|
}
|
|
|
|
/* since we're overwriting the VtsIDs in the SO, we need to drop
|
|
any references made by the previous contents thereof */
|
|
if (so->viR == VtsID_INVALID) {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
so->viR = thr->viR;
|
|
so->viW = thr->viW;
|
|
VtsID__rcinc(so->viR);
|
|
VtsID__rcinc(so->viW);
|
|
} else {
|
|
/* In a strong send, we dump any previous VC in the SO and
|
|
install the sending thread's VC instead. For a weak send we
|
|
must join2 with what's already there. */
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
VtsID__rcdec(so->viR);
|
|
VtsID__rcdec(so->viW);
|
|
so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
|
|
so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
|
|
VtsID__rcinc(so->viR);
|
|
VtsID__rcinc(so->viW);
|
|
}
|
|
|
|
/* move both parent clocks along */
|
|
VtsID__rcdec(thr->viR);
|
|
VtsID__rcdec(thr->viW);
|
|
thr->viR = VtsID__tick( thr->viR, thr );
|
|
thr->viW = VtsID__tick( thr->viW, thr );
|
|
VtsID__rcinc(thr->viR);
|
|
VtsID__rcinc(thr->viW);
|
|
if (strong_send)
|
|
show_thread_state("s-send", thr);
|
|
else
|
|
show_thread_state("w-send", thr);
|
|
}
|
|
|
|
void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
|
|
{
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
|
|
if (so->viR != VtsID_INVALID) {
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
|
|
/* Weak receive (basically, an R-acquisition of a R-W lock).
|
|
This advances the read-clock of the receiver, but not the
|
|
write-clock. */
|
|
VtsID__rcdec(thr->viR);
|
|
thr->viR = VtsID__join2( thr->viR, so->viR );
|
|
VtsID__rcinc(thr->viR);
|
|
|
|
/* For a strong receive, we also advance the receiver's write
|
|
clock, which means the receive as a whole is essentially
|
|
equivalent to a W-acquisition of a R-W lock. */
|
|
if (strong_recv) {
|
|
VtsID__rcdec(thr->viW);
|
|
thr->viW = VtsID__join2( thr->viW, so->viW );
|
|
VtsID__rcinc(thr->viW);
|
|
}
|
|
|
|
if (strong_recv)
|
|
show_thread_state("s-recv", thr);
|
|
else
|
|
show_thread_state("w-recv", thr);
|
|
|
|
} else {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
/* Deal with degenerate case: 'so' has no vts, so there has been
|
|
no message posted to it. Just ignore this case. */
|
|
show_thread_state("d-recv", thr);
|
|
}
|
|
}
|
|
|
|
Bool libhb_so_everSent ( SO* so )
|
|
{
|
|
if (so->viR == VtsID_INVALID) {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
return False;
|
|
} else {
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
return True;
|
|
}
|
|
}
|
|
|
|
#define XXX1 0 // 0x67a106c
|
|
#define XXX2 0
|
|
|
|
static Bool TRACEME(Addr a, SizeT szB) {
|
|
if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
|
|
if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
|
|
return False;
|
|
}
|
|
static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
|
|
SVal sv = zsm_read8(a);
|
|
VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
|
|
show_thread_state("", thr);
|
|
VG_(printf)("%s","\n");
|
|
}
|
|
|
|
void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
|
|
{
|
|
SVal sv = SVal__mkC(thr->viW, thr->viW);
|
|
tl_assert(is_sane_SVal_C(sv));
|
|
if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
|
|
zsm_set_range( a, szB, sv );
|
|
if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
|
|
}
|
|
|
|
void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
|
|
{
|
|
if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
|
|
zsm_set_range( a, szB, SVal__mkA() );
|
|
if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
|
|
}
|
|
|
|
void* libhb_get_Thr_opaque ( Thr* thr ) {
|
|
tl_assert(thr);
|
|
return thr->opaque;
|
|
}
|
|
|
|
void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
|
|
tl_assert(thr);
|
|
thr->opaque = v;
|
|
}
|
|
|
|
void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
|
|
{
|
|
zsm_copy_range(dst, src, len);
|
|
}
|
|
|
|
void libhb_maybe_GC ( void )
|
|
{
|
|
event_map_maybe_GC();
|
|
/* If there are still freelist entries available, no need for a
|
|
GC. */
|
|
if (vts_tab_freelist != VtsID_INVALID)
|
|
return;
|
|
/* So all the table entries are full, and we're having to expand
|
|
the table. But did we hit the threshhold point yet? */
|
|
if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
|
|
return;
|
|
vts_tab__do_GC( False/*don't show stats*/ );
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION END main library //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end libhb_main.c ---*/
|
|
/*--------------------------------------------------------------------*/
|