mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-03 18:13:01 +00:00
6580 lines
215 KiB
C
6580 lines
215 KiB
C
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- LibHB: a library for implementing and checking ---*/
|
|
/*--- the happens-before relationship in concurrent programs. ---*/
|
|
/*--- libhb_main.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of LibHB, a library for implementing and checking
|
|
the happens-before relationship in concurrent programs.
|
|
|
|
Copyright (C) 2008-2013 OpenWorks Ltd
|
|
info@open-works.co.uk
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "pub_tool_basics.h"
|
|
#include "pub_tool_poolalloc.h"
|
|
#include "pub_tool_libcassert.h"
|
|
#include "pub_tool_libcbase.h"
|
|
#include "pub_tool_libcprint.h"
|
|
#include "pub_tool_mallocfree.h"
|
|
#include "pub_tool_wordfm.h"
|
|
#include "pub_tool_sparsewa.h"
|
|
#include "pub_tool_xarray.h"
|
|
#include "pub_tool_oset.h"
|
|
#include "pub_tool_threadstate.h"
|
|
#include "pub_tool_aspacemgr.h"
|
|
#include "pub_tool_execontext.h"
|
|
#include "pub_tool_errormgr.h"
|
|
#include "pub_tool_options.h" // VG_(clo_stats)
|
|
#include "hg_basics.h"
|
|
#include "hg_wordset.h"
|
|
#include "hg_lock_n_thread.h"
|
|
#include "hg_errors.h"
|
|
|
|
#include "libhb.h"
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// Debugging #defines //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/* Check the sanity of shadow values in the core memory state
|
|
machine. Change #if 0 to #if 1 to enable this. */
|
|
#if 0
|
|
# define CHECK_MSM 1
|
|
#else
|
|
# define CHECK_MSM 0
|
|
#endif
|
|
|
|
|
|
/* Check sanity (reference counts, etc) in the conflicting access
|
|
machinery. Change #if 0 to #if 1 to enable this. */
|
|
#if 0
|
|
# define CHECK_CEM 1
|
|
#else
|
|
# define CHECK_CEM 0
|
|
#endif
|
|
|
|
|
|
/* Check sanity in the compressed shadow memory machinery,
|
|
particularly in its caching innards. Unfortunately there's no
|
|
almost-zero-cost way to make them selectable at run time. Hence
|
|
set the #if 0 to #if 1 and rebuild if you want them. */
|
|
#if 0
|
|
# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
|
|
# define inline __attribute__((noinline))
|
|
/* probably want to ditch -fomit-frame-pointer too */
|
|
#else
|
|
# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
|
|
#endif
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// data decls: VtsID //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
|
|
bits, since they have to be packed into the lowest 30 bits of an
|
|
SVal. */
|
|
typedef UInt VtsID;
|
|
#define VtsID_INVALID 0xFFFFFFFF
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// data decls: SVal //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
typedef ULong SVal;
|
|
|
|
/* This value has special significance to the implementation, and callers
|
|
may not store it in the shadow memory. */
|
|
#define SVal_INVALID (3ULL << 62)
|
|
|
|
/* This is the default value for shadow memory. Initially the shadow
|
|
memory contains no accessible areas and so all reads produce this
|
|
value. TODO: make this caller-defineable. */
|
|
#define SVal_NOACCESS (2ULL << 62)
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// data decls: ScalarTS //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/* Scalar Timestamp. We have to store a lot of these, so there is
|
|
some effort to make them as small as possible. Logically they are
|
|
a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
|
|
We pack it into 64 bits by representing the Thr* using a ThrID, a
|
|
small integer (18 bits), and a 46 bit integer for the timestamp
|
|
number. The 46/18 split is arbitary, but has the effect that
|
|
Helgrind can only handle programs that create 2^18 or fewer threads
|
|
over their entire lifetime, and have no more than 2^46 timestamp
|
|
ticks (synchronisation operations on the same thread).
|
|
|
|
This doesn't seem like much of a limitation. 2^46 ticks is
|
|
7.06e+13, and if each tick (optimistically) takes the machine 1000
|
|
cycles to process, then the minimum time to process that many ticks
|
|
at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
|
|
but VTS ticks, which isn't realistic.
|
|
|
|
NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
|
|
32 since a ThrID is a UInt. 29 comes from the fact that
|
|
'Thr_n_RCEC', which records information about old accesses, packs
|
|
not only a ThrID but also 2+1 other bits (access size and
|
|
writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
|
|
|
|
NB2: thrid values are issued upwards from 1024, and values less
|
|
than that aren't valid. This isn't per se necessary (any order
|
|
will do, so long as they are unique), but it does help ensure they
|
|
are less likely to get confused with the various other kinds of
|
|
small-integer thread ids drifting around (eg, TId). See also NB5.
|
|
|
|
NB3: this probably also relies on the fact that Thr's are never
|
|
deallocated -- they exist forever. Hence the 1-1 mapping from
|
|
Thr's to thrid values (set up in Thr__new) persists forever.
|
|
|
|
NB4: temp_max_sized_VTS is allocated at startup and never freed.
|
|
It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
|
|
ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
|
|
making the memory use for this go sky-high. With
|
|
SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
|
|
like an OK tradeoff. If more than 256k threads need to be
|
|
supported, we could change SCALARTS_N_THRBITS to 20, which would
|
|
facilitate supporting 1 million threads at the cost of 8MB storage
|
|
for temp_max_sized_VTS.
|
|
|
|
NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
|
|
ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
|
|
must never be a valid ThrID. Given NB2 that's OK.
|
|
*/
|
|
#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
|
|
|
|
#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
|
|
typedef
|
|
struct {
|
|
ThrID thrid : SCALARTS_N_THRBITS;
|
|
ULong tym : SCALARTS_N_TYMBITS;
|
|
}
|
|
ScalarTS;
|
|
|
|
#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// data decls: Filter //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
// baseline: 5, 9
|
|
#define FI_LINE_SZB_LOG2 5
|
|
#define FI_NUM_LINES_LOG2 10
|
|
|
|
#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
|
|
#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
|
|
|
|
#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
|
|
#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
|
|
|
|
#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
|
|
& (Addr)(FI_NUM_LINES-1) )
|
|
|
|
|
|
/* In the lines, each 8 bytes are treated individually, and are mapped
|
|
to a UShort. Regardless of endianness of the underlying machine,
|
|
bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
|
|
the highest address.
|
|
|
|
Of each bit pair, the higher numbered bit is set if a R has been
|
|
seen, so the actual layout is:
|
|
|
|
15 14 ... 01 00
|
|
|
|
R W for addr+7 ... R W for addr+0
|
|
|
|
So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
|
|
*/
|
|
|
|
/* tags are separated from lines. tags are Addrs and are
|
|
the base address of the line. */
|
|
typedef
|
|
struct {
|
|
UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
|
|
}
|
|
FiLine;
|
|
|
|
typedef
|
|
struct {
|
|
Addr tags[FI_NUM_LINES];
|
|
FiLine lines[FI_NUM_LINES];
|
|
}
|
|
Filter;
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// data decls: Thr, ULong_n_EC //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
// Records stacks for H1 history mechanism (DRD-style)
|
|
typedef
|
|
struct { ULong ull; ExeContext* ec; }
|
|
ULong_n_EC;
|
|
|
|
|
|
/* How many of the above records to collect for each thread? Older
|
|
ones are dumped when we run out of space. 62.5k requires 1MB per
|
|
thread, since each ULong_n_EC record is 16 bytes long. When more
|
|
than N_KWs_N_STACKs_PER_THREAD are present, the older half are
|
|
deleted to make space. Hence in the worst case we will be able to
|
|
produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
|
|
Kw transitions (segments in this thread). For the current setting
|
|
that gives a guaranteed stack for at least the last 31.25k
|
|
segments. */
|
|
#define N_KWs_N_STACKs_PER_THREAD 62500
|
|
|
|
|
|
struct _Thr {
|
|
/* Current VTSs for this thread. They change as we go along. viR
|
|
is the VTS to be used for reads, viW for writes. Usually they
|
|
are the same, but can differ when we deal with reader-writer
|
|
locks. It is always the case that
|
|
VtsID__cmpLEQ(viW,viR) == True
|
|
that is, viW must be the same, or lagging behind, viR. */
|
|
VtsID viR;
|
|
VtsID viW;
|
|
|
|
/* Is initially False, and is set to True after the thread really
|
|
has done a low-level exit. When True, we expect to never see
|
|
any more memory references done by this thread. */
|
|
Bool llexit_done;
|
|
|
|
/* Is initially False, and is set to True after the thread has been
|
|
joined with (reaped by some other thread). After this point, we
|
|
do not expect to see any uses of .viR or .viW, so it is safe to
|
|
set them to VtsID_INVALID. */
|
|
Bool joinedwith_done;
|
|
|
|
/* A small integer giving a unique identity to this Thr. See
|
|
comments on the definition of ScalarTS for details. */
|
|
ThrID thrid : SCALARTS_N_THRBITS;
|
|
|
|
/* A filter that removes references for which we believe that
|
|
msmcread/msmcwrite will not change the state, nor report a
|
|
race. */
|
|
Filter* filter;
|
|
|
|
/* A pointer back to the top level Thread structure. There is a
|
|
1-1 mapping between Thread and Thr structures -- each Thr points
|
|
at its corresponding Thread, and vice versa. Really, Thr and
|
|
Thread should be merged into a single structure. */
|
|
Thread* hgthread;
|
|
|
|
/* The ULongs (scalar Kws) in this accumulate in strictly
|
|
increasing order, without duplicates. This is important because
|
|
we need to be able to find a given scalar Kw in this array
|
|
later, by binary search. */
|
|
XArray* /* ULong_n_EC */ local_Kws_n_stacks;
|
|
};
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// data decls: SO //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
// (UInt) `echo "Synchronisation object" | md5sum`
|
|
#define SO_MAGIC 0x56b3c5b0U
|
|
|
|
struct _SO {
|
|
struct _SO* admin_prev;
|
|
struct _SO* admin_next;
|
|
VtsID viR; /* r-clock of sender */
|
|
VtsID viW; /* w-clock of sender */
|
|
UInt magic;
|
|
};
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// Forward declarations //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/* fwds for
|
|
Globals needed by other parts of the library. These are set
|
|
once at startup and then never changed. */
|
|
static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
|
|
static ExeContext* (*main_get_EC)( Thr* ) = NULL;
|
|
|
|
/* misc fn and data fwdses */
|
|
static void VtsID__rcinc ( VtsID ii );
|
|
static void VtsID__rcdec ( VtsID ii );
|
|
|
|
static inline Bool SVal__isC ( SVal s );
|
|
static inline VtsID SVal__unC_Rmin ( SVal s );
|
|
static inline VtsID SVal__unC_Wmin ( SVal s );
|
|
static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
|
|
|
|
/* A double linked list of all the SO's. */
|
|
SO* admin_SO;
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION BEGIN compressed shadow memory //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef __HB_ZSM_H
|
|
#define __HB_ZSM_H
|
|
|
|
/* Initialise the library. Once initialised, it will (or may) call
|
|
rcinc and rcdec in response to all the calls below, in order to
|
|
allow the user to do reference counting on the SVals stored herein.
|
|
It is important to understand, however, that due to internal
|
|
caching, the reference counts are in general inaccurate, and can be
|
|
both above or below the true reference count for an item. In
|
|
particular, the library may indicate that the reference count for
|
|
an item is zero, when in fact it is not.
|
|
|
|
To make the reference counting exact and therefore non-pointless,
|
|
call zsm_flush_cache. Immediately after it returns, the reference
|
|
counts for all items, as deduced by the caller by observing calls
|
|
to rcinc and rcdec, will be correct, and so any items with a zero
|
|
reference count may be freed (or at least considered to be
|
|
unreferenced by this library).
|
|
*/
|
|
static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
|
|
|
|
static void zsm_sset_range ( Addr, SizeT, SVal );
|
|
static void zsm_scopy_range ( Addr, Addr, SizeT );
|
|
static void zsm_flush_cache ( void );
|
|
|
|
#endif /* ! __HB_ZSM_H */
|
|
|
|
|
|
/* Round a up to the next multiple of N. N must be a power of 2 */
|
|
#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
|
|
/* Round a down to the next multiple of N. N must be a power of 2 */
|
|
#define ROUNDDN(a, N) ((a) & ~(N-1))
|
|
|
|
|
|
|
|
/* ------ User-supplied RC functions ------ */
|
|
static void(*rcinc)(SVal) = NULL;
|
|
static void(*rcdec)(SVal) = NULL;
|
|
|
|
|
|
/* ------ CacheLine ------ */
|
|
|
|
#define N_LINE_BITS 6 /* must be >= 3 */
|
|
#define N_LINE_ARANGE (1 << N_LINE_BITS)
|
|
#define N_LINE_TREES (N_LINE_ARANGE >> 3)
|
|
|
|
typedef
|
|
struct {
|
|
UShort descrs[N_LINE_TREES];
|
|
SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
|
|
}
|
|
CacheLine;
|
|
|
|
#define TREE_DESCR_16_0 (1<<0)
|
|
#define TREE_DESCR_32_0 (1<<1)
|
|
#define TREE_DESCR_16_1 (1<<2)
|
|
#define TREE_DESCR_64 (1<<3)
|
|
#define TREE_DESCR_16_2 (1<<4)
|
|
#define TREE_DESCR_32_1 (1<<5)
|
|
#define TREE_DESCR_16_3 (1<<6)
|
|
#define TREE_DESCR_8_0 (1<<7)
|
|
#define TREE_DESCR_8_1 (1<<8)
|
|
#define TREE_DESCR_8_2 (1<<9)
|
|
#define TREE_DESCR_8_3 (1<<10)
|
|
#define TREE_DESCR_8_4 (1<<11)
|
|
#define TREE_DESCR_8_5 (1<<12)
|
|
#define TREE_DESCR_8_6 (1<<13)
|
|
#define TREE_DESCR_8_7 (1<<14)
|
|
#define TREE_DESCR_DTY (1<<15)
|
|
|
|
typedef
|
|
struct {
|
|
SVal dict[4]; /* can represent up to 4 diff values in the line */
|
|
UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
|
|
dict indexes */
|
|
/* if dict[0] == SVal_INVALID then dict[1] is the index of the
|
|
LineF to use, and dict[2..] are also SVal_INVALID. */
|
|
}
|
|
LineZ; /* compressed rep for a cache line */
|
|
|
|
typedef
|
|
struct {
|
|
Bool inUse;
|
|
SVal w64s[N_LINE_ARANGE];
|
|
}
|
|
LineF; /* full rep for a cache line */
|
|
|
|
/* Shadow memory.
|
|
Primary map is a WordFM Addr SecMap*.
|
|
SecMaps cover some page-size-ish section of address space and hold
|
|
a compressed representation.
|
|
CacheLine-sized chunks of SecMaps are copied into a Cache, being
|
|
decompressed when moved into the cache and recompressed on the
|
|
way out. Because of this, the cache must operate as a writeback
|
|
cache, not a writethrough one.
|
|
|
|
Each SecMap must hold a power-of-2 number of CacheLines. Hence
|
|
N_SECMAP_BITS must >= N_LINE_BITS.
|
|
*/
|
|
#define N_SECMAP_BITS 13
|
|
#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
|
|
|
|
// # CacheLines held by a SecMap
|
|
#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
|
|
|
|
/* The data in the SecMap is held in the array of LineZs. Each LineZ
|
|
either carries the required data directly, in a compressed
|
|
representation, or it holds (in .dict[0]) an index to the LineF in
|
|
.linesF that holds the full representation.
|
|
|
|
Currently-unused LineF's have their .inUse bit set to zero.
|
|
Since each in-use LineF is referred to be exactly one LineZ,
|
|
the number of .linesZ[] that refer to .linesF should equal
|
|
the number of .linesF[] that have .inUse == True.
|
|
|
|
RC obligations: the RCs presented to the user include exactly
|
|
the values in:
|
|
* direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
|
|
* F reps that are in use (.inUse == True)
|
|
|
|
Hence the following actions at the following transitions are required:
|
|
|
|
F rep: .inUse==True -> .inUse==False -- rcdec_LineF
|
|
F rep: .inUse==False -> .inUse==True -- rcinc_LineF
|
|
Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
|
|
Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
|
|
*/
|
|
typedef
|
|
struct {
|
|
UInt magic;
|
|
LineZ linesZ[N_SECMAP_ZLINES];
|
|
LineF* linesF;
|
|
UInt linesF_size;
|
|
}
|
|
SecMap;
|
|
|
|
#define SecMap_MAGIC 0x571e58cbU
|
|
|
|
static inline Bool is_sane_SecMap ( SecMap* sm ) {
|
|
return sm != NULL && sm->magic == SecMap_MAGIC;
|
|
}
|
|
|
|
/* ------ Cache ------ */
|
|
|
|
#define N_WAY_BITS 16
|
|
#define N_WAY_NENT (1 << N_WAY_BITS)
|
|
|
|
/* Each tag is the address of the associated CacheLine, rounded down
|
|
to a CacheLine address boundary. A CacheLine size must be a power
|
|
of 2 and must be 8 or more. Hence an easy way to initialise the
|
|
cache so it is empty is to set all the tag values to any value % 8
|
|
!= 0, eg 1. This means all queries in the cache initially miss.
|
|
It does however require us to detect and not writeback, any line
|
|
with a bogus tag. */
|
|
typedef
|
|
struct {
|
|
CacheLine lyns0[N_WAY_NENT];
|
|
Addr tags0[N_WAY_NENT];
|
|
}
|
|
Cache;
|
|
|
|
static inline Bool is_valid_scache_tag ( Addr tag ) {
|
|
/* a valid tag should be naturally aligned to the start of
|
|
a CacheLine. */
|
|
return 0 == (tag & (N_LINE_ARANGE - 1));
|
|
}
|
|
|
|
|
|
/* --------- Primary data structures --------- */
|
|
|
|
/* Shadow memory primary map */
|
|
static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
|
|
static Cache cache_shmem;
|
|
|
|
|
|
static UWord stats__secmaps_search = 0; // # SM finds
|
|
static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
|
|
static UWord stats__secmaps_allocd = 0; // # SecMaps issued
|
|
static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
|
|
static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
|
|
static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
|
|
static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
|
|
static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
|
|
static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
|
|
static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
|
|
static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
|
|
static UWord stats__cache_F_fetches = 0; // # F lines fetched
|
|
static UWord stats__cache_F_wbacks = 0; // # F lines written back
|
|
static UWord stats__cache_invals = 0; // # cache invals
|
|
static UWord stats__cache_flushes = 0; // # cache flushes
|
|
static UWord stats__cache_totrefs = 0; // # total accesses
|
|
static UWord stats__cache_totmisses = 0; // # misses
|
|
static ULong stats__cache_make_New_arange = 0; // total arange made New
|
|
static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
|
|
static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
|
|
static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
|
|
static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
|
|
static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
|
|
static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
|
|
static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
|
|
static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
|
|
static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
|
|
static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
|
|
static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
|
|
static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
|
|
static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
|
|
static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
|
|
static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
|
|
static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
|
|
static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
|
|
static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
|
|
static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
|
|
static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
|
|
static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
|
|
static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
|
|
static UWord stats__vts__tick = 0; // # calls to VTS__tick
|
|
static UWord stats__vts__join = 0; // # calls to VTS__join
|
|
static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
|
|
static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
|
|
|
|
// # calls to VTS__cmp_structural w/ slow case
|
|
static UWord stats__vts__cmp_structural_slow = 0;
|
|
|
|
// # calls to VTS__indexAt_SLOW
|
|
static UWord stats__vts__indexat_slow = 0;
|
|
|
|
// # calls to vts_set__find__or__clone_and_add
|
|
static UWord stats__vts_set__focaa = 0;
|
|
|
|
// # calls to vts_set__find__or__clone_and_add that lead to an
|
|
// allocation
|
|
static UWord stats__vts_set__focaa_a = 0;
|
|
|
|
|
|
static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
|
|
return a & ~(N_SECMAP_ARANGE - 1);
|
|
}
|
|
static inline UWord shmem__get_SecMap_offset ( Addr a ) {
|
|
return a & (N_SECMAP_ARANGE - 1);
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------*/
|
|
/*--- map_shmem :: WordFM Addr SecMap ---*/
|
|
/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
|
|
/*----------------------------------------------------------------*/
|
|
|
|
/*--------------- SecMap allocation --------------- */
|
|
|
|
static HChar* shmem__bigchunk_next = NULL;
|
|
static HChar* shmem__bigchunk_end1 = NULL;
|
|
|
|
static void* shmem__bigchunk_alloc ( SizeT n )
|
|
{
|
|
const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
|
|
tl_assert(n > 0);
|
|
n = VG_ROUNDUP(n, 16);
|
|
tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
|
|
tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
|
|
<= (SSizeT)sHMEM__BIGCHUNK_SIZE);
|
|
if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
|
|
if (0)
|
|
VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
|
|
(Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
|
|
shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
|
|
if (shmem__bigchunk_next == NULL)
|
|
VG_(out_of_memory_NORETURN)(
|
|
"helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
|
|
shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
|
|
}
|
|
tl_assert(shmem__bigchunk_next);
|
|
tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
|
|
tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
|
|
shmem__bigchunk_next += n;
|
|
return shmem__bigchunk_next - n;
|
|
}
|
|
|
|
static SecMap* shmem__alloc_SecMap ( void )
|
|
{
|
|
Word i, j;
|
|
SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
|
|
if (0) VG_(printf)("alloc_SecMap %p\n",sm);
|
|
tl_assert(sm);
|
|
sm->magic = SecMap_MAGIC;
|
|
for (i = 0; i < N_SECMAP_ZLINES; i++) {
|
|
sm->linesZ[i].dict[0] = SVal_NOACCESS;
|
|
sm->linesZ[i].dict[1] = SVal_INVALID;
|
|
sm->linesZ[i].dict[2] = SVal_INVALID;
|
|
sm->linesZ[i].dict[3] = SVal_INVALID;
|
|
for (j = 0; j < N_LINE_ARANGE/4; j++)
|
|
sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
|
|
}
|
|
sm->linesF = NULL;
|
|
sm->linesF_size = 0;
|
|
stats__secmaps_allocd++;
|
|
stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
|
|
stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
|
|
stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
|
|
return sm;
|
|
}
|
|
|
|
typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
|
|
static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
|
|
|
|
static SecMap* shmem__find_SecMap ( Addr ga )
|
|
{
|
|
SecMap* sm = NULL;
|
|
Addr gaKey = shmem__round_to_SecMap_base(ga);
|
|
// Cache
|
|
stats__secmaps_search++;
|
|
if (LIKELY(gaKey == smCache[0].gaKey))
|
|
return smCache[0].sm;
|
|
if (LIKELY(gaKey == smCache[1].gaKey)) {
|
|
SMCacheEnt tmp = smCache[0];
|
|
smCache[0] = smCache[1];
|
|
smCache[1] = tmp;
|
|
return smCache[0].sm;
|
|
}
|
|
if (gaKey == smCache[2].gaKey) {
|
|
SMCacheEnt tmp = smCache[1];
|
|
smCache[1] = smCache[2];
|
|
smCache[2] = tmp;
|
|
return smCache[1].sm;
|
|
}
|
|
// end Cache
|
|
stats__secmaps_search_slow++;
|
|
if (VG_(lookupFM)( map_shmem,
|
|
NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
|
|
tl_assert(sm != NULL);
|
|
smCache[2] = smCache[1];
|
|
smCache[1] = smCache[0];
|
|
smCache[0].gaKey = gaKey;
|
|
smCache[0].sm = sm;
|
|
} else {
|
|
tl_assert(sm == NULL);
|
|
}
|
|
return sm;
|
|
}
|
|
|
|
static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
|
|
{
|
|
SecMap* sm = shmem__find_SecMap ( ga );
|
|
if (LIKELY(sm)) {
|
|
return sm;
|
|
} else {
|
|
/* create a new one */
|
|
Addr gaKey = shmem__round_to_SecMap_base(ga);
|
|
sm = shmem__alloc_SecMap();
|
|
tl_assert(sm);
|
|
VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
|
|
return sm;
|
|
}
|
|
}
|
|
|
|
|
|
/* ------------ LineF and LineZ related ------------ */
|
|
|
|
static void rcinc_LineF ( LineF* lineF ) {
|
|
UWord i;
|
|
tl_assert(lineF->inUse);
|
|
for (i = 0; i < N_LINE_ARANGE; i++)
|
|
rcinc(lineF->w64s[i]);
|
|
}
|
|
|
|
static void rcdec_LineF ( LineF* lineF ) {
|
|
UWord i;
|
|
tl_assert(lineF->inUse);
|
|
for (i = 0; i < N_LINE_ARANGE; i++)
|
|
rcdec(lineF->w64s[i]);
|
|
}
|
|
|
|
static void rcinc_LineZ ( LineZ* lineZ ) {
|
|
tl_assert(lineZ->dict[0] != SVal_INVALID);
|
|
rcinc(lineZ->dict[0]);
|
|
if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
|
|
if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
|
|
if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
|
|
}
|
|
|
|
static void rcdec_LineZ ( LineZ* lineZ ) {
|
|
tl_assert(lineZ->dict[0] != SVal_INVALID);
|
|
rcdec(lineZ->dict[0]);
|
|
if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
|
|
if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
|
|
if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
|
|
}
|
|
|
|
inline
|
|
static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
|
|
Word bix, shft, mask, prep;
|
|
tl_assert(ix >= 0);
|
|
bix = ix >> 2;
|
|
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
|
|
mask = 3 << shft;
|
|
prep = b2 << shft;
|
|
arr[bix] = (arr[bix] & ~mask) | prep;
|
|
}
|
|
|
|
inline
|
|
static UWord read_twobit_array ( UChar* arr, UWord ix ) {
|
|
Word bix, shft;
|
|
tl_assert(ix >= 0);
|
|
bix = ix >> 2;
|
|
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
|
|
return (arr[bix] >> shft) & 3;
|
|
}
|
|
|
|
/* Given address 'tag', find either the Z or F line containing relevant
|
|
data, so it can be read into the cache.
|
|
*/
|
|
static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
|
|
/*OUT*/LineF** fp, Addr tag ) {
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
UWord zix;
|
|
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
|
|
UWord smoff = shmem__get_SecMap_offset(tag);
|
|
/* since smoff is derived from a valid tag, it should be
|
|
cacheline-aligned. */
|
|
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
|
|
zix = smoff >> N_LINE_BITS;
|
|
tl_assert(zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
lineF = NULL;
|
|
if (lineZ->dict[0] == SVal_INVALID) {
|
|
UInt fix = (UInt)lineZ->dict[1];
|
|
tl_assert(sm->linesF);
|
|
tl_assert(sm->linesF_size > 0);
|
|
tl_assert(fix >= 0 && fix < sm->linesF_size);
|
|
lineF = &sm->linesF[fix];
|
|
tl_assert(lineF->inUse);
|
|
lineZ = NULL;
|
|
}
|
|
*zp = lineZ;
|
|
*fp = lineF;
|
|
}
|
|
|
|
/* Given address 'tag', return the relevant SecMap and the index of
|
|
the LineZ within it, in the expectation that the line is to be
|
|
overwritten. Regardless of whether 'tag' is currently associated
|
|
with a Z or F representation, to rcdec on the current
|
|
representation, in recognition of the fact that the contents are
|
|
just about to be overwritten. */
|
|
static __attribute__((noinline))
|
|
void find_Z_for_writing ( /*OUT*/SecMap** smp,
|
|
/*OUT*/Word* zixp,
|
|
Addr tag ) {
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
UWord zix;
|
|
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
|
|
UWord smoff = shmem__get_SecMap_offset(tag);
|
|
/* since smoff is derived from a valid tag, it should be
|
|
cacheline-aligned. */
|
|
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
|
|
zix = smoff >> N_LINE_BITS;
|
|
tl_assert(zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
lineF = NULL;
|
|
/* re RCs, we are freeing up this LineZ/LineF so that new data can
|
|
be parked in it. Hence have to rcdec it accordingly. */
|
|
/* If lineZ has an associated lineF, free it up. */
|
|
if (lineZ->dict[0] == SVal_INVALID) {
|
|
UInt fix = (UInt)lineZ->dict[1];
|
|
tl_assert(sm->linesF);
|
|
tl_assert(sm->linesF_size > 0);
|
|
tl_assert(fix >= 0 && fix < sm->linesF_size);
|
|
lineF = &sm->linesF[fix];
|
|
tl_assert(lineF->inUse);
|
|
rcdec_LineF(lineF);
|
|
lineF->inUse = False;
|
|
} else {
|
|
rcdec_LineZ(lineZ);
|
|
}
|
|
*smp = sm;
|
|
*zixp = zix;
|
|
}
|
|
|
|
static __attribute__((noinline))
|
|
void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
|
|
UInt i, new_size;
|
|
LineF* nyu;
|
|
|
|
if (sm->linesF) {
|
|
tl_assert(sm->linesF_size > 0);
|
|
} else {
|
|
tl_assert(sm->linesF_size == 0);
|
|
}
|
|
|
|
if (sm->linesF) {
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
if (!sm->linesF[i].inUse) {
|
|
*fixp = (Word)i;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* No free F line found. Expand existing array and try again. */
|
|
new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
|
|
nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
|
|
new_size * sizeof(LineF) );
|
|
tl_assert(nyu);
|
|
|
|
stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
|
|
stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
|
|
* sizeof(LineF);
|
|
|
|
if (0)
|
|
VG_(printf)("SM %p: expand F array from %d to %d\n",
|
|
sm, (Int)sm->linesF_size, new_size);
|
|
|
|
for (i = 0; i < new_size; i++)
|
|
nyu[i].inUse = False;
|
|
|
|
if (sm->linesF) {
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
tl_assert(sm->linesF[i].inUse);
|
|
nyu[i] = sm->linesF[i];
|
|
}
|
|
VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
|
|
HG_(free)(sm->linesF);
|
|
}
|
|
|
|
sm->linesF = nyu;
|
|
sm->linesF_size = new_size;
|
|
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
if (!sm->linesF[i].inUse) {
|
|
*fixp = (Word)i;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*NOTREACHED*/
|
|
tl_assert(0);
|
|
}
|
|
|
|
|
|
/* ------------ CacheLine and implicit-tree related ------------ */
|
|
|
|
__attribute__((unused))
|
|
static void pp_CacheLine ( CacheLine* cl ) {
|
|
Word i;
|
|
if (!cl) {
|
|
VG_(printf)("%s","pp_CacheLine(NULL)\n");
|
|
return;
|
|
}
|
|
for (i = 0; i < N_LINE_TREES; i++)
|
|
VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
|
|
for (i = 0; i < N_LINE_ARANGE; i++)
|
|
VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
|
|
}
|
|
|
|
static UChar descr_to_validbits ( UShort descr )
|
|
{
|
|
/* a.k.a Party Time for gcc's constant folder */
|
|
# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
|
|
b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
|
|
( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
|
|
( (b8_5) << 12) | ( (b8_4) << 11) | \
|
|
( (b8_3) << 10) | ( (b8_2) << 9) | \
|
|
( (b8_1) << 8) | ( (b8_0) << 7) | \
|
|
( (b16_3) << 6) | ( (b32_1) << 5) | \
|
|
( (b16_2) << 4) | ( (b64) << 3) | \
|
|
( (b16_1) << 2) | ( (b32_0) << 1) | \
|
|
( (b16_0) << 0) ) )
|
|
|
|
# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
|
|
( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
|
|
( (bit5) << 5) | ( (bit4) << 4) | \
|
|
( (bit3) << 3) | ( (bit2) << 2) | \
|
|
( (bit1) << 1) | ( (bit0) << 0) ) )
|
|
|
|
/* these should all get folded out at compile time */
|
|
tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
|
|
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
|
|
|
|
switch (descr) {
|
|
/*
|
|
+--------------------------------- TREE_DESCR_8_7
|
|
| +------------------- TREE_DESCR_8_0
|
|
| | +---------------- TREE_DESCR_16_3
|
|
| | | +-------------- TREE_DESCR_32_1
|
|
| | | | +------------ TREE_DESCR_16_2
|
|
| | | | | +--------- TREE_DESCR_64
|
|
| | | | | | +------ TREE_DESCR_16_1
|
|
| | | | | | | +---- TREE_DESCR_32_0
|
|
| | | | | | | | +-- TREE_DESCR_16_0
|
|
| | | | | | | | |
|
|
| | | | | | | | | GRANULARITY, 7 -> 0 */
|
|
case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
|
|
return BYTE(1,1,1,1,1,1,1,1);
|
|
case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
|
|
return BYTE(1,1,0,1,1,1,1,1);
|
|
case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
|
|
return BYTE(0,1,1,1,1,1,1,1);
|
|
case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
|
|
return BYTE(0,1,0,1,1,1,1,1);
|
|
|
|
case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
|
|
return BYTE(1,1,1,1,1,1,0,1);
|
|
case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
|
|
return BYTE(1,1,0,1,1,1,0,1);
|
|
case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
|
|
return BYTE(0,1,1,1,1,1,0,1);
|
|
case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
|
|
return BYTE(0,1,0,1,1,1,0,1);
|
|
|
|
case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
|
|
return BYTE(1,1,1,1,0,1,1,1);
|
|
case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
|
|
return BYTE(1,1,0,1,0,1,1,1);
|
|
case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
|
|
return BYTE(0,1,1,1,0,1,1,1);
|
|
case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
|
|
return BYTE(0,1,0,1,0,1,1,1);
|
|
|
|
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
|
|
return BYTE(1,1,1,1,0,1,0,1);
|
|
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
|
|
return BYTE(1,1,0,1,0,1,0,1);
|
|
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
|
|
return BYTE(0,1,1,1,0,1,0,1);
|
|
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
|
|
return BYTE(0,1,0,1,0,1,0,1);
|
|
|
|
case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
|
|
return BYTE(0,0,0,1,1,1,1,1);
|
|
case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
|
|
return BYTE(0,0,0,1,1,1,0,1);
|
|
case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
|
|
return BYTE(0,0,0,1,0,1,1,1);
|
|
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
|
|
return BYTE(0,0,0,1,0,1,0,1);
|
|
|
|
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
|
|
return BYTE(1,1,1,1,0,0,0,1);
|
|
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
|
|
return BYTE(1,1,0,1,0,0,0,1);
|
|
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
|
|
return BYTE(0,1,1,1,0,0,0,1);
|
|
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
|
|
return BYTE(0,1,0,1,0,0,0,1);
|
|
|
|
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
|
|
return BYTE(0,0,0,1,0,0,0,1);
|
|
|
|
case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
|
|
return BYTE(0,0,0,0,0,0,0,1);
|
|
|
|
default: return BYTE(0,0,0,0,0,0,0,0);
|
|
/* INVALID - any valid descr produces at least one
|
|
valid bit in tree[0..7]*/
|
|
}
|
|
/* NOTREACHED*/
|
|
tl_assert(0);
|
|
|
|
# undef DESCR
|
|
# undef BYTE
|
|
}
|
|
|
|
__attribute__((unused))
|
|
static Bool is_sane_Descr ( UShort descr ) {
|
|
return descr_to_validbits(descr) != 0;
|
|
}
|
|
|
|
static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
|
|
VG_(sprintf)(dst,
|
|
"%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
|
|
(Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_64) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
|
|
(Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
|
|
);
|
|
}
|
|
static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
|
|
VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
|
|
(Int)((byte & 128) ? 1 : 0),
|
|
(Int)((byte & 64) ? 1 : 0),
|
|
(Int)((byte & 32) ? 1 : 0),
|
|
(Int)((byte & 16) ? 1 : 0),
|
|
(Int)((byte & 8) ? 1 : 0),
|
|
(Int)((byte & 4) ? 1 : 0),
|
|
(Int)((byte & 2) ? 1 : 0),
|
|
(Int)((byte & 1) ? 1 : 0)
|
|
);
|
|
}
|
|
|
|
static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
|
|
Word i;
|
|
UChar validbits = descr_to_validbits(descr);
|
|
HChar buf[128], buf2[128];
|
|
if (validbits == 0)
|
|
goto bad;
|
|
for (i = 0; i < 8; i++) {
|
|
if (validbits & (1<<i)) {
|
|
if (tree[i] == SVal_INVALID)
|
|
goto bad;
|
|
} else {
|
|
if (tree[i] != SVal_INVALID)
|
|
goto bad;
|
|
}
|
|
}
|
|
return True;
|
|
bad:
|
|
sprintf_Descr( buf, descr );
|
|
sprintf_Byte( buf2, validbits );
|
|
VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
|
|
VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
|
|
VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
|
|
for (i = 0; i < 8; i++)
|
|
VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
|
|
VG_(printf)("%s","}\n");
|
|
return 0;
|
|
}
|
|
|
|
static Bool is_sane_CacheLine ( CacheLine* cl )
|
|
{
|
|
Word tno, cloff;
|
|
|
|
if (!cl) goto bad;
|
|
|
|
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
|
|
UShort descr = cl->descrs[tno];
|
|
SVal* tree = &cl->svals[cloff];
|
|
if (!is_sane_Descr_and_Tree(descr, tree))
|
|
goto bad;
|
|
}
|
|
tl_assert(cloff == N_LINE_ARANGE);
|
|
return True;
|
|
bad:
|
|
pp_CacheLine(cl);
|
|
return False;
|
|
}
|
|
|
|
static UShort normalise_tree ( /*MOD*/SVal* tree )
|
|
{
|
|
UShort descr;
|
|
/* pre: incoming tree[0..7] does not have any invalid shvals, in
|
|
particular no zeroes. */
|
|
if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
|
|
|| tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
|
|
|| tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
|
|
|| tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
|
|
tl_assert(0);
|
|
|
|
descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
|
|
| TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
|
|
| TREE_DESCR_8_1 | TREE_DESCR_8_0;
|
|
/* build 16-bit layer */
|
|
if (tree[1] == tree[0]) {
|
|
tree[1] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
|
|
descr |= TREE_DESCR_16_0;
|
|
}
|
|
if (tree[3] == tree[2]) {
|
|
tree[3] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
|
|
descr |= TREE_DESCR_16_1;
|
|
}
|
|
if (tree[5] == tree[4]) {
|
|
tree[5] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
|
|
descr |= TREE_DESCR_16_2;
|
|
}
|
|
if (tree[7] == tree[6]) {
|
|
tree[7] = SVal_INVALID;
|
|
descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
|
|
descr |= TREE_DESCR_16_3;
|
|
}
|
|
/* build 32-bit layer */
|
|
if (tree[2] == tree[0]
|
|
&& (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
|
|
tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
|
|
descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
|
|
descr |= TREE_DESCR_32_0;
|
|
}
|
|
if (tree[6] == tree[4]
|
|
&& (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
|
|
tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
|
|
descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
|
|
descr |= TREE_DESCR_32_1;
|
|
}
|
|
/* build 64-bit layer */
|
|
if (tree[4] == tree[0]
|
|
&& (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
|
|
tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
|
|
descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
|
|
descr |= TREE_DESCR_64;
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
/* This takes a cacheline where all the data is at the leaves
|
|
(w8[..]) and builds a correctly normalised tree. */
|
|
static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
|
|
{
|
|
Word tno, cloff;
|
|
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
|
|
SVal* tree = &cl->svals[cloff];
|
|
cl->descrs[tno] = normalise_tree( tree );
|
|
}
|
|
tl_assert(cloff == N_LINE_ARANGE);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
stats__cline_normalises++;
|
|
}
|
|
|
|
|
|
typedef struct { UChar count; SVal sval; } CountedSVal;
|
|
|
|
static
|
|
void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
|
|
/*OUT*/Word* dstUsedP,
|
|
Word nDst, CacheLine* src )
|
|
{
|
|
Word tno, cloff, dstUsed;
|
|
|
|
tl_assert(nDst == N_LINE_ARANGE);
|
|
dstUsed = 0;
|
|
|
|
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
|
|
UShort descr = src->descrs[tno];
|
|
SVal* tree = &src->svals[cloff];
|
|
|
|
/* sequentialise the tree described by (descr,tree). */
|
|
# define PUT(_n,_v) \
|
|
do { dst[dstUsed ].count = (_n); \
|
|
dst[dstUsed++].sval = (_v); \
|
|
} while (0)
|
|
|
|
/* byte 0 */
|
|
if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
|
|
if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
|
|
if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
|
|
if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
|
|
/* byte 1 */
|
|
if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
|
|
/* byte 2 */
|
|
if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
|
|
if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
|
|
/* byte 3 */
|
|
if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
|
|
/* byte 4 */
|
|
if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
|
|
if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
|
|
if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
|
|
/* byte 5 */
|
|
if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
|
|
/* byte 6 */
|
|
if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
|
|
if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
|
|
/* byte 7 */
|
|
if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
|
|
|
|
# undef PUT
|
|
/* END sequentialise the tree described by (descr,tree). */
|
|
|
|
}
|
|
tl_assert(cloff == N_LINE_ARANGE);
|
|
tl_assert(dstUsed <= nDst);
|
|
|
|
*dstUsedP = dstUsed;
|
|
}
|
|
|
|
/* Write the cacheline 'wix' to backing store. Where it ends up
|
|
is determined by its tag field. */
|
|
static __attribute__((noinline)) void cacheline_wback ( UWord wix )
|
|
{
|
|
Word i, j, k, m;
|
|
Addr tag;
|
|
SecMap* sm;
|
|
CacheLine* cl;
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
Word zix, fix, csvalsUsed;
|
|
CountedSVal csvals[N_LINE_ARANGE];
|
|
SVal sv;
|
|
|
|
if (0)
|
|
VG_(printf)("scache wback line %d\n", (Int)wix);
|
|
|
|
tl_assert(wix >= 0 && wix < N_WAY_NENT);
|
|
|
|
tag = cache_shmem.tags0[wix];
|
|
cl = &cache_shmem.lyns0[wix];
|
|
|
|
/* The cache line may have been invalidated; if so, ignore it. */
|
|
if (!is_valid_scache_tag(tag))
|
|
return;
|
|
|
|
/* Where are we going to put it? */
|
|
sm = NULL;
|
|
lineZ = NULL;
|
|
lineF = NULL;
|
|
zix = fix = -1;
|
|
|
|
/* find the Z line to write in and rcdec it or the associated F
|
|
line. */
|
|
find_Z_for_writing( &sm, &zix, tag );
|
|
|
|
tl_assert(sm);
|
|
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
|
|
/* Generate the data to be stored */
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
|
|
csvalsUsed = -1;
|
|
sequentialise_CacheLine( csvals, &csvalsUsed,
|
|
N_LINE_ARANGE, cl );
|
|
tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
|
|
if (0) VG_(printf)("%lu ", csvalsUsed);
|
|
|
|
lineZ->dict[0] = lineZ->dict[1]
|
|
= lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
|
|
|
|
/* i indexes actual shadow values, k is cursor in csvals */
|
|
i = 0;
|
|
for (k = 0; k < csvalsUsed; k++) {
|
|
|
|
sv = csvals[k].sval;
|
|
if (CHECK_ZSM)
|
|
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
|
|
/* do we already have it? */
|
|
if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
|
|
if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
|
|
if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
|
|
if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
|
|
/* no. look for a free slot. */
|
|
if (CHECK_ZSM)
|
|
tl_assert(sv != SVal_INVALID);
|
|
if (lineZ->dict[0]
|
|
== SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
|
|
if (lineZ->dict[1]
|
|
== SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
|
|
if (lineZ->dict[2]
|
|
== SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
|
|
if (lineZ->dict[3]
|
|
== SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
|
|
break; /* we'll have to use the f rep */
|
|
dict_ok:
|
|
m = csvals[k].count;
|
|
if (m == 8) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
write_twobit_array( lineZ->ix2s, i+1, j );
|
|
write_twobit_array( lineZ->ix2s, i+2, j );
|
|
write_twobit_array( lineZ->ix2s, i+3, j );
|
|
write_twobit_array( lineZ->ix2s, i+4, j );
|
|
write_twobit_array( lineZ->ix2s, i+5, j );
|
|
write_twobit_array( lineZ->ix2s, i+6, j );
|
|
write_twobit_array( lineZ->ix2s, i+7, j );
|
|
i += 8;
|
|
}
|
|
else if (m == 4) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
write_twobit_array( lineZ->ix2s, i+1, j );
|
|
write_twobit_array( lineZ->ix2s, i+2, j );
|
|
write_twobit_array( lineZ->ix2s, i+3, j );
|
|
i += 4;
|
|
}
|
|
else if (m == 1) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
i += 1;
|
|
}
|
|
else if (m == 2) {
|
|
write_twobit_array( lineZ->ix2s, i+0, j );
|
|
write_twobit_array( lineZ->ix2s, i+1, j );
|
|
i += 2;
|
|
}
|
|
else {
|
|
tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
|
|
}
|
|
|
|
}
|
|
|
|
if (LIKELY(i == N_LINE_ARANGE)) {
|
|
/* Construction of the compressed representation was
|
|
successful. */
|
|
rcinc_LineZ(lineZ);
|
|
stats__cache_Z_wbacks++;
|
|
} else {
|
|
/* Cannot use the compressed(z) representation. Use the full(f)
|
|
rep instead. */
|
|
tl_assert(i >= 0 && i < N_LINE_ARANGE);
|
|
alloc_F_for_writing( sm, &fix );
|
|
tl_assert(sm->linesF);
|
|
tl_assert(sm->linesF_size > 0);
|
|
tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
|
|
lineF = &sm->linesF[fix];
|
|
tl_assert(!lineF->inUse);
|
|
lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
|
|
lineZ->dict[1] = (SVal)fix;
|
|
lineF->inUse = True;
|
|
i = 0;
|
|
for (k = 0; k < csvalsUsed; k++) {
|
|
if (CHECK_ZSM)
|
|
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
|
|
sv = csvals[k].sval;
|
|
if (CHECK_ZSM)
|
|
tl_assert(sv != SVal_INVALID);
|
|
for (m = csvals[k].count; m > 0; m--) {
|
|
lineF->w64s[i] = sv;
|
|
i++;
|
|
}
|
|
}
|
|
tl_assert(i == N_LINE_ARANGE);
|
|
rcinc_LineF(lineF);
|
|
stats__cache_F_wbacks++;
|
|
}
|
|
}
|
|
|
|
/* Fetch the cacheline 'wix' from the backing store. The tag
|
|
associated with 'wix' is assumed to have already been filled in;
|
|
hence that is used to determine where in the backing store to read
|
|
from. */
|
|
static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
|
|
{
|
|
Word i;
|
|
Addr tag;
|
|
CacheLine* cl;
|
|
LineZ* lineZ;
|
|
LineF* lineF;
|
|
|
|
if (0)
|
|
VG_(printf)("scache fetch line %d\n", (Int)wix);
|
|
|
|
tl_assert(wix >= 0 && wix < N_WAY_NENT);
|
|
|
|
tag = cache_shmem.tags0[wix];
|
|
cl = &cache_shmem.lyns0[wix];
|
|
|
|
/* reject nonsense requests */
|
|
tl_assert(is_valid_scache_tag(tag));
|
|
|
|
lineZ = NULL;
|
|
lineF = NULL;
|
|
find_ZF_for_reading( &lineZ, &lineF, tag );
|
|
tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
|
|
|
|
/* expand the data into the bottom layer of the tree, then get
|
|
cacheline_normalise to build the descriptor array. */
|
|
if (lineF) {
|
|
tl_assert(lineF->inUse);
|
|
for (i = 0; i < N_LINE_ARANGE; i++) {
|
|
cl->svals[i] = lineF->w64s[i];
|
|
}
|
|
stats__cache_F_fetches++;
|
|
} else {
|
|
for (i = 0; i < N_LINE_ARANGE; i++) {
|
|
SVal sv;
|
|
UWord ix = read_twobit_array( lineZ->ix2s, i );
|
|
/* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
|
|
sv = lineZ->dict[ix];
|
|
tl_assert(sv != SVal_INVALID);
|
|
cl->svals[i] = sv;
|
|
}
|
|
stats__cache_Z_fetches++;
|
|
}
|
|
normalise_CacheLine( cl );
|
|
}
|
|
|
|
static void shmem__invalidate_scache ( void ) {
|
|
Word wix;
|
|
if (0) VG_(printf)("%s","scache inval\n");
|
|
tl_assert(!is_valid_scache_tag(1));
|
|
for (wix = 0; wix < N_WAY_NENT; wix++) {
|
|
cache_shmem.tags0[wix] = 1/*INVALID*/;
|
|
}
|
|
stats__cache_invals++;
|
|
}
|
|
|
|
static void shmem__flush_and_invalidate_scache ( void ) {
|
|
Word wix;
|
|
Addr tag;
|
|
if (0) VG_(printf)("%s","scache flush and invalidate\n");
|
|
tl_assert(!is_valid_scache_tag(1));
|
|
for (wix = 0; wix < N_WAY_NENT; wix++) {
|
|
tag = cache_shmem.tags0[wix];
|
|
if (tag == 1/*INVALID*/) {
|
|
/* already invalid; nothing to do */
|
|
} else {
|
|
tl_assert(is_valid_scache_tag(tag));
|
|
cacheline_wback( wix );
|
|
}
|
|
cache_shmem.tags0[wix] = 1/*INVALID*/;
|
|
}
|
|
stats__cache_flushes++;
|
|
stats__cache_invals++;
|
|
}
|
|
|
|
|
|
static inline Bool aligned16 ( Addr a ) {
|
|
return 0 == (a & 1);
|
|
}
|
|
static inline Bool aligned32 ( Addr a ) {
|
|
return 0 == (a & 3);
|
|
}
|
|
static inline Bool aligned64 ( Addr a ) {
|
|
return 0 == (a & 7);
|
|
}
|
|
static inline UWord get_cacheline_offset ( Addr a ) {
|
|
return (UWord)(a & (N_LINE_ARANGE - 1));
|
|
}
|
|
static inline Addr cacheline_ROUNDUP ( Addr a ) {
|
|
return ROUNDUP(a, N_LINE_ARANGE);
|
|
}
|
|
static inline Addr cacheline_ROUNDDN ( Addr a ) {
|
|
return ROUNDDN(a, N_LINE_ARANGE);
|
|
}
|
|
static inline UWord get_treeno ( Addr a ) {
|
|
return get_cacheline_offset(a) >> 3;
|
|
}
|
|
static inline UWord get_tree_offset ( Addr a ) {
|
|
return a & 7;
|
|
}
|
|
|
|
static __attribute__((noinline))
|
|
CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
|
|
static inline CacheLine* get_cacheline ( Addr a )
|
|
{
|
|
/* tag is 'a' with the in-line offset masked out,
|
|
eg a[31]..a[4] 0000 */
|
|
Addr tag = a & ~(N_LINE_ARANGE - 1);
|
|
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
stats__cache_totrefs++;
|
|
if (LIKELY(tag == cache_shmem.tags0[wix])) {
|
|
return &cache_shmem.lyns0[wix];
|
|
} else {
|
|
return get_cacheline_MISS( a );
|
|
}
|
|
}
|
|
|
|
static __attribute__((noinline))
|
|
CacheLine* get_cacheline_MISS ( Addr a )
|
|
{
|
|
/* tag is 'a' with the in-line offset masked out,
|
|
eg a[31]..a[4] 0000 */
|
|
|
|
CacheLine* cl;
|
|
Addr* tag_old_p;
|
|
Addr tag = a & ~(N_LINE_ARANGE - 1);
|
|
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
|
|
tl_assert(tag != cache_shmem.tags0[wix]);
|
|
|
|
/* Dump the old line into the backing store. */
|
|
stats__cache_totmisses++;
|
|
|
|
cl = &cache_shmem.lyns0[wix];
|
|
tag_old_p = &cache_shmem.tags0[wix];
|
|
|
|
if (is_valid_scache_tag( *tag_old_p )) {
|
|
/* EXPENSIVE and REDUNDANT: callee does it */
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
cacheline_wback( wix );
|
|
}
|
|
/* and reload the new one */
|
|
*tag_old_p = tag;
|
|
cacheline_fetch( wix );
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
return cl;
|
|
}
|
|
|
|
static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
|
|
stats__cline_64to32pulldown++;
|
|
switch (toff) {
|
|
case 0: case 4:
|
|
tl_assert(descr & TREE_DESCR_64);
|
|
tree[4] = tree[0];
|
|
descr &= ~TREE_DESCR_64;
|
|
descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
|
|
stats__cline_32to16pulldown++;
|
|
switch (toff) {
|
|
case 0: case 2:
|
|
if (!(descr & TREE_DESCR_32_0)) {
|
|
descr = pulldown_to_32(tree, 0, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_32_0);
|
|
tree[2] = tree[0];
|
|
descr &= ~TREE_DESCR_32_0;
|
|
descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
|
|
break;
|
|
case 4: case 6:
|
|
if (!(descr & TREE_DESCR_32_1)) {
|
|
descr = pulldown_to_32(tree, 4, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_32_1);
|
|
tree[6] = tree[4];
|
|
descr &= ~TREE_DESCR_32_1;
|
|
descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
|
|
stats__cline_16to8pulldown++;
|
|
switch (toff) {
|
|
case 0: case 1:
|
|
if (!(descr & TREE_DESCR_16_0)) {
|
|
descr = pulldown_to_16(tree, 0, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_0);
|
|
tree[1] = tree[0];
|
|
descr &= ~TREE_DESCR_16_0;
|
|
descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
|
|
break;
|
|
case 2: case 3:
|
|
if (!(descr & TREE_DESCR_16_1)) {
|
|
descr = pulldown_to_16(tree, 2, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_1);
|
|
tree[3] = tree[2];
|
|
descr &= ~TREE_DESCR_16_1;
|
|
descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
|
|
break;
|
|
case 4: case 5:
|
|
if (!(descr & TREE_DESCR_16_2)) {
|
|
descr = pulldown_to_16(tree, 4, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_2);
|
|
tree[5] = tree[4];
|
|
descr &= ~TREE_DESCR_16_2;
|
|
descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
|
|
break;
|
|
case 6: case 7:
|
|
if (!(descr & TREE_DESCR_16_3)) {
|
|
descr = pulldown_to_16(tree, 6, descr);
|
|
}
|
|
tl_assert(descr & TREE_DESCR_16_3);
|
|
tree[7] = tree[6];
|
|
descr &= ~TREE_DESCR_16_3;
|
|
descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
|
|
static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
|
|
UShort mask;
|
|
switch (toff) {
|
|
case 0:
|
|
mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_0;
|
|
break;
|
|
case 2:
|
|
mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_1;
|
|
break;
|
|
case 4:
|
|
mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_2;
|
|
break;
|
|
case 6:
|
|
mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_16_3;
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
|
|
UShort mask;
|
|
switch (toff) {
|
|
case 0:
|
|
if (!(descr & TREE_DESCR_16_0))
|
|
descr = pullup_descr_to_16(descr, 0);
|
|
if (!(descr & TREE_DESCR_16_1))
|
|
descr = pullup_descr_to_16(descr, 2);
|
|
mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_32_0;
|
|
break;
|
|
case 4:
|
|
if (!(descr & TREE_DESCR_16_2))
|
|
descr = pullup_descr_to_16(descr, 4);
|
|
if (!(descr & TREE_DESCR_16_3))
|
|
descr = pullup_descr_to_16(descr, 6);
|
|
mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
|
|
tl_assert( (descr & mask) == mask );
|
|
descr &= ~mask;
|
|
descr |= TREE_DESCR_32_1;
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
return descr;
|
|
}
|
|
|
|
static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
|
|
switch (toff) {
|
|
case 0: case 4:
|
|
return 0 != (descr & TREE_DESCR_64);
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
|
|
switch (toff) {
|
|
case 0:
|
|
return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
|
|
case 2:
|
|
return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
|
|
case 4:
|
|
return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
|
|
case 6:
|
|
return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
}
|
|
|
|
/* ------------ Cache management ------------ */
|
|
|
|
static void zsm_flush_cache ( void )
|
|
{
|
|
shmem__flush_and_invalidate_scache();
|
|
}
|
|
|
|
|
|
static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
|
|
{
|
|
tl_assert( sizeof(UWord) == sizeof(Addr) );
|
|
|
|
rcinc = p_rcinc;
|
|
rcdec = p_rcdec;
|
|
|
|
tl_assert(map_shmem == NULL);
|
|
map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
|
|
HG_(free),
|
|
NULL/*unboxed UWord cmp*/);
|
|
tl_assert(map_shmem != NULL);
|
|
shmem__invalidate_scache();
|
|
|
|
/* a SecMap must contain an integral number of CacheLines */
|
|
tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
|
|
/* also ... a CacheLine holds an integral number of trees */
|
|
tl_assert(0 == (N_LINE_ARANGE % 8));
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION END compressed shadow memory //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION BEGIN vts primitives //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
|
|
being compact stand-ins for Thr*'s. Use these functions to map
|
|
between them. */
|
|
static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
|
|
static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
|
|
|
|
__attribute__((noreturn))
|
|
static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
|
|
{
|
|
if (due_to_nThrs) {
|
|
const HChar* s =
|
|
"\n"
|
|
"Helgrind: cannot continue, run aborted: too many threads.\n"
|
|
"Sorry. Helgrind can only handle programs that create\n"
|
|
"%'llu or fewer threads over their entire lifetime.\n"
|
|
"\n";
|
|
VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
|
|
} else {
|
|
const HChar* s =
|
|
"\n"
|
|
"Helgrind: cannot continue, run aborted: too many\n"
|
|
"synchronisation events. Sorry. Helgrind can only handle\n"
|
|
"programs which perform %'llu or fewer\n"
|
|
"inter-thread synchronisation events (locks, unlocks, etc).\n"
|
|
"\n";
|
|
VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
|
|
}
|
|
VG_(exit)(1);
|
|
/*NOTREACHED*/
|
|
tl_assert(0); /*wtf?!*/
|
|
}
|
|
|
|
|
|
/* The dead thread (ThrID, actually) table. A thread may only be
|
|
listed here if we have been notified thereof by libhb_async_exit.
|
|
New entries are added at the end. The order isn't important, but
|
|
the ThrID values must be unique. This table lists the identity of
|
|
all threads that have ever died -- none are ever removed. We keep
|
|
this table so as to be able to prune entries from VTSs. We don't
|
|
actually need to keep the set of threads that have ever died --
|
|
only the threads that have died since the previous round of
|
|
pruning. But it's useful for sanity check purposes to keep the
|
|
entire set, so we do. */
|
|
static XArray* /* of ThrID */ verydead_thread_table = NULL;
|
|
|
|
/* Arbitrary total ordering on ThrIDs. */
|
|
static Int cmp__ThrID ( const void* v1, const void* v2 ) {
|
|
ThrID id1 = *(const ThrID*)v1;
|
|
ThrID id2 = *(const ThrID*)v2;
|
|
if (id1 < id2) return -1;
|
|
if (id1 > id2) return 1;
|
|
return 0;
|
|
}
|
|
|
|
static void verydead_thread_table_init ( void )
|
|
{
|
|
tl_assert(!verydead_thread_table);
|
|
verydead_thread_table
|
|
= VG_(newXA)( HG_(zalloc),
|
|
"libhb.verydead_thread_table_init.1",
|
|
HG_(free), sizeof(ThrID) );
|
|
tl_assert(verydead_thread_table);
|
|
VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
|
|
}
|
|
|
|
|
|
/* A VTS contains .ts, its vector clock, and also .id, a field to hold
|
|
a backlink for the caller's convenience. Since we have no idea
|
|
what to set that to in the library, it always gets set to
|
|
VtsID_INVALID. */
|
|
typedef
|
|
struct {
|
|
VtsID id;
|
|
UInt usedTS;
|
|
UInt sizeTS;
|
|
ScalarTS ts[0];
|
|
}
|
|
VTS;
|
|
|
|
/* Allocate a VTS capable of storing 'sizeTS' entries. */
|
|
static VTS* VTS__new ( const HChar* who, UInt sizeTS );
|
|
|
|
/* Make a clone of 'vts', sizing the new array to exactly match the
|
|
number of ScalarTSs present. */
|
|
static VTS* VTS__clone ( const HChar* who, VTS* vts );
|
|
|
|
/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
|
|
array is sized exactly to hold the number of required elements.
|
|
'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
|
|
must be in strictly increasing order. */
|
|
static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
|
|
|
|
/* Delete this VTS in its entirety. */
|
|
static void VTS__delete ( VTS* vts );
|
|
|
|
/* Create a new singleton VTS in 'out'. Caller must have
|
|
pre-allocated 'out' sufficiently big to hold the result in all
|
|
possible cases. */
|
|
static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
|
|
|
|
/* Create in 'out' a VTS which is the same as 'vts' except with
|
|
vts[me]++, so to speak. Caller must have pre-allocated 'out'
|
|
sufficiently big to hold the result in all possible cases. */
|
|
static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
|
|
|
|
/* Create in 'out' a VTS which is the join (max) of 'a' and
|
|
'b'. Caller must have pre-allocated 'out' sufficiently big to hold
|
|
the result in all possible cases. */
|
|
static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
|
|
|
|
/* Compute the partial ordering relation of the two args. Although we
|
|
could be completely general and return an enumeration value (EQ,
|
|
LT, GT, UN), in fact we only need LEQ, and so we may as well
|
|
hardwire that fact.
|
|
|
|
Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
|
|
invald ThrID). In the latter case, the returned ThrID indicates
|
|
the discovered point for which they are not. There may be more
|
|
than one such point, but we only care about seeing one of them, not
|
|
all of them. This rather strange convention is used because
|
|
sometimes we want to know the actual index at which they first
|
|
differ. */
|
|
static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
|
|
|
|
/* Compute an arbitrary structural (total) ordering on the two args,
|
|
based on their VCs, so they can be looked up in a table, tree, etc.
|
|
Returns -1, 0 or 1. */
|
|
static Word VTS__cmp_structural ( VTS* a, VTS* b );
|
|
|
|
/* Debugging only. Display the given VTS in the buffer. */
|
|
static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
|
|
|
|
/* Debugging only. Return vts[index], so to speak. */
|
|
static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
|
|
|
|
/* Notify the VTS machinery that a thread has been declared
|
|
comprehensively dead: that is, it has done an async exit AND it has
|
|
been joined with. This should ensure that its local clocks (.viR
|
|
and .viW) will never again change, and so all mentions of this
|
|
thread from all VTSs in the system may be removed. */
|
|
static void VTS__declare_thread_very_dead ( Thr* idx );
|
|
|
|
/*--------------- to do with Vector Timestamps ---------------*/
|
|
|
|
static Bool is_sane_VTS ( VTS* vts )
|
|
{
|
|
UWord i, n;
|
|
ScalarTS *st1, *st2;
|
|
if (!vts) return False;
|
|
if (vts->usedTS > vts->sizeTS) return False;
|
|
n = vts->usedTS;
|
|
if (n == 1) {
|
|
st1 = &vts->ts[0];
|
|
if (st1->tym == 0)
|
|
return False;
|
|
}
|
|
else
|
|
if (n >= 2) {
|
|
for (i = 0; i < n-1; i++) {
|
|
st1 = &vts->ts[i];
|
|
st2 = &vts->ts[i+1];
|
|
if (st1->thrid >= st2->thrid)
|
|
return False;
|
|
if (st1->tym == 0 || st2->tym == 0)
|
|
return False;
|
|
}
|
|
}
|
|
return True;
|
|
}
|
|
|
|
|
|
/* Create a new, empty VTS.
|
|
*/
|
|
static VTS* VTS__new ( const HChar* who, UInt sizeTS )
|
|
{
|
|
VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
|
|
tl_assert(vts->usedTS == 0);
|
|
vts->sizeTS = sizeTS;
|
|
*(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
|
|
return vts;
|
|
}
|
|
|
|
/* Clone this VTS.
|
|
*/
|
|
static VTS* VTS__clone ( const HChar* who, VTS* vts )
|
|
{
|
|
tl_assert(vts);
|
|
tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
|
|
UInt nTS = vts->usedTS;
|
|
VTS* clone = VTS__new(who, nTS);
|
|
clone->id = vts->id;
|
|
clone->sizeTS = nTS;
|
|
clone->usedTS = nTS;
|
|
UInt i;
|
|
for (i = 0; i < nTS; i++) {
|
|
clone->ts[i] = vts->ts[i];
|
|
}
|
|
tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
|
|
return clone;
|
|
}
|
|
|
|
|
|
/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
|
|
must be in strictly increasing order. We could obviously do this
|
|
much more efficiently (in linear time) if necessary.
|
|
*/
|
|
static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
|
|
{
|
|
UInt i, j;
|
|
tl_assert(vts);
|
|
tl_assert(thridsToDel);
|
|
tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
|
|
UInt nTS = vts->usedTS;
|
|
/* Figure out how many ScalarTSs will remain in the output. */
|
|
UInt nReq = nTS;
|
|
for (i = 0; i < nTS; i++) {
|
|
ThrID thrid = vts->ts[i].thrid;
|
|
if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
|
|
nReq--;
|
|
}
|
|
tl_assert(nReq <= nTS);
|
|
/* Copy the ones that will remain. */
|
|
VTS* res = VTS__new(who, nReq);
|
|
j = 0;
|
|
for (i = 0; i < nTS; i++) {
|
|
ThrID thrid = vts->ts[i].thrid;
|
|
if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
|
|
continue;
|
|
res->ts[j++] = vts->ts[i];
|
|
}
|
|
tl_assert(j == nReq);
|
|
tl_assert(j == res->sizeTS);
|
|
res->usedTS = j;
|
|
tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
|
|
return res;
|
|
}
|
|
|
|
|
|
/* Delete this VTS in its entirety.
|
|
*/
|
|
static void VTS__delete ( VTS* vts )
|
|
{
|
|
tl_assert(vts);
|
|
tl_assert(vts->usedTS <= vts->sizeTS);
|
|
tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
|
|
HG_(free)(vts);
|
|
}
|
|
|
|
|
|
/* Create a new singleton VTS.
|
|
*/
|
|
static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
|
|
{
|
|
tl_assert(thr);
|
|
tl_assert(tym >= 1);
|
|
tl_assert(out);
|
|
tl_assert(out->usedTS == 0);
|
|
tl_assert(out->sizeTS >= 1);
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi].thrid = Thr__to_ThrID(thr);
|
|
out->ts[hi].tym = tym;
|
|
}
|
|
|
|
|
|
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
|
|
not modified.
|
|
*/
|
|
static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
|
|
{
|
|
UInt i, n;
|
|
ThrID me_thrid;
|
|
Bool found = False;
|
|
|
|
stats__vts__tick++;
|
|
|
|
tl_assert(out);
|
|
tl_assert(out->usedTS == 0);
|
|
if (vts->usedTS >= ThrID_MAX_VALID)
|
|
scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
|
|
tl_assert(out->sizeTS >= 1 + vts->usedTS);
|
|
|
|
tl_assert(me);
|
|
me_thrid = Thr__to_ThrID(me);
|
|
tl_assert(is_sane_VTS(vts));
|
|
n = vts->usedTS;
|
|
|
|
/* Copy all entries which precede 'me'. */
|
|
for (i = 0; i < n; i++) {
|
|
ScalarTS* here = &vts->ts[i];
|
|
if (UNLIKELY(here->thrid >= me_thrid))
|
|
break;
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi] = *here;
|
|
}
|
|
|
|
/* 'i' now indicates the next entry to copy, if any.
|
|
There are 3 possibilities:
|
|
(a) there is no next entry (we used them all up already):
|
|
add (me_thrid,1) to the output, and quit
|
|
(b) there is a next entry, and its thrid > me_thrid:
|
|
add (me_thrid,1) to the output, then copy the remaining entries
|
|
(c) there is a next entry, and its thrid == me_thrid:
|
|
copy it to the output but increment its timestamp value.
|
|
Then copy the remaining entries. (c) is the common case.
|
|
*/
|
|
tl_assert(i >= 0 && i <= n);
|
|
if (i == n) { /* case (a) */
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi].thrid = me_thrid;
|
|
out->ts[hi].tym = 1;
|
|
} else {
|
|
/* cases (b) and (c) */
|
|
ScalarTS* here = &vts->ts[i];
|
|
if (me_thrid == here->thrid) { /* case (c) */
|
|
if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
|
|
/* We're hosed. We have to stop. */
|
|
scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
|
|
}
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi].thrid = here->thrid;
|
|
out->ts[hi].tym = here->tym + 1;
|
|
i++;
|
|
found = True;
|
|
} else { /* case (b) */
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi].thrid = me_thrid;
|
|
out->ts[hi].tym = 1;
|
|
}
|
|
/* And copy any remaining entries. */
|
|
for (/*keepgoing*/; i < n; i++) {
|
|
ScalarTS* here2 = &vts->ts[i];
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi] = *here2;
|
|
}
|
|
}
|
|
|
|
tl_assert(is_sane_VTS(out));
|
|
tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
|
|
tl_assert(out->usedTS <= out->sizeTS);
|
|
}
|
|
|
|
|
|
/* Return a new VTS constructed as the join (max) of the 2 args.
|
|
Neither arg is modified.
|
|
*/
|
|
static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
|
|
{
|
|
UInt ia, ib, useda, usedb;
|
|
ULong tyma, tymb, tymMax;
|
|
ThrID thrid;
|
|
UInt ncommon = 0;
|
|
|
|
stats__vts__join++;
|
|
|
|
tl_assert(a);
|
|
tl_assert(b);
|
|
useda = a->usedTS;
|
|
usedb = b->usedTS;
|
|
|
|
tl_assert(out);
|
|
tl_assert(out->usedTS == 0);
|
|
/* overly conservative test, but doing better involves comparing
|
|
the two VTSs, which we don't want to do at this point. */
|
|
if (useda + usedb >= ThrID_MAX_VALID)
|
|
scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
|
|
tl_assert(out->sizeTS >= useda + usedb);
|
|
|
|
ia = ib = 0;
|
|
|
|
while (1) {
|
|
|
|
/* This logic is to enumerate triples (thrid, tyma, tymb) drawn
|
|
from a and b in order, where thrid is the next ThrID
|
|
occurring in either a or b, and tyma/b are the relevant
|
|
scalar timestamps, taking into account implicit zeroes. */
|
|
tl_assert(ia >= 0 && ia <= useda);
|
|
tl_assert(ib >= 0 && ib <= usedb);
|
|
|
|
if (ia == useda && ib == usedb) {
|
|
/* both empty - done */
|
|
break;
|
|
|
|
} else if (ia == useda && ib != usedb) {
|
|
/* a empty, use up b */
|
|
ScalarTS* tmpb = &b->ts[ib];
|
|
thrid = tmpb->thrid;
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
ib++;
|
|
|
|
} else if (ia != useda && ib == usedb) {
|
|
/* b empty, use up a */
|
|
ScalarTS* tmpa = &a->ts[ia];
|
|
thrid = tmpa->thrid;
|
|
tyma = tmpa->tym;
|
|
tymb = 0;
|
|
ia++;
|
|
|
|
} else {
|
|
/* both not empty; extract lowest-ThrID'd triple */
|
|
ScalarTS* tmpa = &a->ts[ia];
|
|
ScalarTS* tmpb = &b->ts[ib];
|
|
if (tmpa->thrid < tmpb->thrid) {
|
|
/* a has the lowest unconsidered ThrID */
|
|
thrid = tmpa->thrid;
|
|
tyma = tmpa->tym;
|
|
tymb = 0;
|
|
ia++;
|
|
} else if (tmpa->thrid > tmpb->thrid) {
|
|
/* b has the lowest unconsidered ThrID */
|
|
thrid = tmpb->thrid;
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
ib++;
|
|
} else {
|
|
/* they both next mention the same ThrID */
|
|
tl_assert(tmpa->thrid == tmpb->thrid);
|
|
thrid = tmpa->thrid; /* == tmpb->thrid */
|
|
tyma = tmpa->tym;
|
|
tymb = tmpb->tym;
|
|
ia++;
|
|
ib++;
|
|
ncommon++;
|
|
}
|
|
}
|
|
|
|
/* having laboriously determined (thr, tyma, tymb), do something
|
|
useful with it. */
|
|
tymMax = tyma > tymb ? tyma : tymb;
|
|
if (tymMax > 0) {
|
|
UInt hi = out->usedTS++;
|
|
out->ts[hi].thrid = thrid;
|
|
out->ts[hi].tym = tymMax;
|
|
}
|
|
|
|
}
|
|
|
|
tl_assert(is_sane_VTS(out));
|
|
tl_assert(out->usedTS <= out->sizeTS);
|
|
tl_assert(out->usedTS == useda + usedb - ncommon);
|
|
}
|
|
|
|
|
|
/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
|
|
they are, or the first ThrID for which they are not (no valid ThrID
|
|
has the value zero). This rather strange convention is used
|
|
because sometimes we want to know the actual index at which they
|
|
first differ. */
|
|
static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
|
|
{
|
|
Word ia, ib, useda, usedb;
|
|
ULong tyma, tymb;
|
|
|
|
stats__vts__cmpLEQ++;
|
|
|
|
tl_assert(a);
|
|
tl_assert(b);
|
|
useda = a->usedTS;
|
|
usedb = b->usedTS;
|
|
|
|
ia = ib = 0;
|
|
|
|
while (1) {
|
|
|
|
/* This logic is to enumerate doubles (tyma, tymb) drawn
|
|
from a and b in order, and tyma/b are the relevant
|
|
scalar timestamps, taking into account implicit zeroes. */
|
|
ThrID thrid;
|
|
|
|
tl_assert(ia >= 0 && ia <= useda);
|
|
tl_assert(ib >= 0 && ib <= usedb);
|
|
|
|
if (ia == useda && ib == usedb) {
|
|
/* both empty - done */
|
|
break;
|
|
|
|
} else if (ia == useda && ib != usedb) {
|
|
/* a empty, use up b */
|
|
ScalarTS* tmpb = &b->ts[ib];
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
thrid = tmpb->thrid;
|
|
ib++;
|
|
|
|
} else if (ia != useda && ib == usedb) {
|
|
/* b empty, use up a */
|
|
ScalarTS* tmpa = &a->ts[ia];
|
|
tyma = tmpa->tym;
|
|
thrid = tmpa->thrid;
|
|
tymb = 0;
|
|
ia++;
|
|
|
|
} else {
|
|
/* both not empty; extract lowest-ThrID'd triple */
|
|
ScalarTS* tmpa = &a->ts[ia];
|
|
ScalarTS* tmpb = &b->ts[ib];
|
|
if (tmpa->thrid < tmpb->thrid) {
|
|
/* a has the lowest unconsidered ThrID */
|
|
tyma = tmpa->tym;
|
|
thrid = tmpa->thrid;
|
|
tymb = 0;
|
|
ia++;
|
|
}
|
|
else
|
|
if (tmpa->thrid > tmpb->thrid) {
|
|
/* b has the lowest unconsidered ThrID */
|
|
tyma = 0;
|
|
tymb = tmpb->tym;
|
|
thrid = tmpb->thrid;
|
|
ib++;
|
|
} else {
|
|
/* they both next mention the same ThrID */
|
|
tl_assert(tmpa->thrid == tmpb->thrid);
|
|
tyma = tmpa->tym;
|
|
thrid = tmpa->thrid;
|
|
tymb = tmpb->tym;
|
|
ia++;
|
|
ib++;
|
|
}
|
|
}
|
|
|
|
/* having laboriously determined (tyma, tymb), do something
|
|
useful with it. */
|
|
if (tyma > tymb) {
|
|
/* not LEQ at this index. Quit, since the answer is
|
|
determined already. */
|
|
tl_assert(thrid >= 1024);
|
|
return thrid;
|
|
}
|
|
}
|
|
|
|
return 0; /* all points are LEQ => return an invalid ThrID */
|
|
}
|
|
|
|
|
|
/* Compute an arbitrary structural (total) ordering on the two args,
|
|
based on their VCs, so they can be looked up in a table, tree, etc.
|
|
Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
|
|
performance critical so there is some effort expended to make it sa
|
|
fast as possible.
|
|
*/
|
|
Word VTS__cmp_structural ( VTS* a, VTS* b )
|
|
{
|
|
/* We just need to generate an arbitrary total ordering based on
|
|
a->ts and b->ts. Preferably do it in a way which comes across likely
|
|
differences relatively quickly. */
|
|
Word i;
|
|
Word useda = 0, usedb = 0;
|
|
ScalarTS *ctsa = NULL, *ctsb = NULL;
|
|
|
|
stats__vts__cmp_structural++;
|
|
|
|
tl_assert(a);
|
|
tl_assert(b);
|
|
|
|
ctsa = &a->ts[0]; useda = a->usedTS;
|
|
ctsb = &b->ts[0]; usedb = b->usedTS;
|
|
|
|
if (LIKELY(useda == usedb)) {
|
|
ScalarTS *tmpa = NULL, *tmpb = NULL;
|
|
stats__vts__cmp_structural_slow++;
|
|
/* Same length vectors. Find the first difference, if any, as
|
|
fast as possible. */
|
|
for (i = 0; i < useda; i++) {
|
|
tmpa = &ctsa[i];
|
|
tmpb = &ctsb[i];
|
|
if (LIKELY(tmpa->tym == tmpb->tym
|
|
&& tmpa->thrid == tmpb->thrid))
|
|
continue;
|
|
else
|
|
break;
|
|
}
|
|
if (UNLIKELY(i == useda)) {
|
|
/* They're identical. */
|
|
return 0;
|
|
} else {
|
|
tl_assert(i >= 0 && i < useda);
|
|
if (tmpa->tym < tmpb->tym) return -1;
|
|
if (tmpa->tym > tmpb->tym) return 1;
|
|
if (tmpa->thrid < tmpb->thrid) return -1;
|
|
if (tmpa->thrid > tmpb->thrid) return 1;
|
|
/* we just established them as non-identical, hence: */
|
|
}
|
|
/*NOTREACHED*/
|
|
tl_assert(0);
|
|
}
|
|
|
|
if (useda < usedb) return -1;
|
|
if (useda > usedb) return 1;
|
|
/*NOTREACHED*/
|
|
tl_assert(0);
|
|
}
|
|
|
|
|
|
/* Debugging only. Display the given VTS in the buffer.
|
|
*/
|
|
void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
|
|
{
|
|
ScalarTS* st;
|
|
HChar unit[64];
|
|
Word i, n;
|
|
Int avail = nBuf;
|
|
tl_assert(vts && vts->ts);
|
|
tl_assert(nBuf > 16);
|
|
buf[0] = '[';
|
|
buf[1] = 0;
|
|
n = vts->usedTS;
|
|
for (i = 0; i < n; i++) {
|
|
tl_assert(avail >= 40);
|
|
st = &vts->ts[i];
|
|
VG_(memset)(unit, 0, sizeof(unit));
|
|
VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
|
|
st->thrid, (ULong)st->tym);
|
|
if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
|
|
VG_(strcat)(buf, " ...]");
|
|
buf[nBuf-1] = 0;
|
|
return;
|
|
}
|
|
VG_(strcat)(buf, unit);
|
|
avail -= VG_(strlen)(unit);
|
|
}
|
|
VG_(strcat)(buf, "]");
|
|
buf[nBuf-1] = 0;
|
|
}
|
|
|
|
|
|
/* Debugging only. Return vts[index], so to speak.
|
|
*/
|
|
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
|
|
{
|
|
UWord i, n;
|
|
ThrID idx_thrid = Thr__to_ThrID(idx);
|
|
stats__vts__indexat_slow++;
|
|
tl_assert(vts && vts->ts);
|
|
n = vts->usedTS;
|
|
for (i = 0; i < n; i++) {
|
|
ScalarTS* st = &vts->ts[i];
|
|
if (st->thrid == idx_thrid)
|
|
return st->tym;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* See comment on prototype above.
|
|
*/
|
|
static void VTS__declare_thread_very_dead ( Thr* thr )
|
|
{
|
|
if (0) VG_(printf)("VTQ: tae %p\n", thr);
|
|
|
|
tl_assert(thr->llexit_done);
|
|
tl_assert(thr->joinedwith_done);
|
|
|
|
ThrID nyu;
|
|
nyu = Thr__to_ThrID(thr);
|
|
VG_(addToXA)( verydead_thread_table, &nyu );
|
|
|
|
/* We can only get here if we're assured that we'll never again
|
|
need to look at this thread's ::viR or ::viW. Set them to
|
|
VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
|
|
mostly so that we don't wind up pruning them (as that would be
|
|
nonsensical: the only interesting ScalarTS entry for a dead
|
|
thread is its own index, and the pruning will remove that.). */
|
|
VtsID__rcdec(thr->viR);
|
|
VtsID__rcdec(thr->viW);
|
|
thr->viR = VtsID_INVALID;
|
|
thr->viW = VtsID_INVALID;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION END vts primitives //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION BEGIN main library //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// VTS set //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static WordFM* /* WordFM VTS* void */ vts_set = NULL;
|
|
|
|
static void vts_set_init ( void )
|
|
{
|
|
tl_assert(!vts_set);
|
|
vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
|
|
HG_(free),
|
|
(Word(*)(UWord,UWord))VTS__cmp_structural );
|
|
tl_assert(vts_set);
|
|
}
|
|
|
|
/* Given a VTS, look in vts_set to see if we already have a
|
|
structurally identical one. If yes, return the pair (True, pointer
|
|
to the existing one). If no, clone this one, add the clone to the
|
|
set, and return (False, pointer to the clone). */
|
|
static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
|
|
{
|
|
UWord keyW, valW;
|
|
stats__vts_set__focaa++;
|
|
tl_assert(cand->id == VtsID_INVALID);
|
|
/* lookup cand (by value) */
|
|
if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
|
|
/* found it */
|
|
tl_assert(valW == 0);
|
|
/* if this fails, cand (by ref) was already present (!) */
|
|
tl_assert(keyW != (UWord)cand);
|
|
*res = (VTS*)keyW;
|
|
return True;
|
|
} else {
|
|
/* not present. Clone, add and return address of clone. */
|
|
stats__vts_set__focaa_a++;
|
|
VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
|
|
tl_assert(clone != cand);
|
|
VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
|
|
*res = clone;
|
|
return False;
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// VTS table //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static void VtsID__invalidate_caches ( void ); /* fwds */
|
|
|
|
/* A type to hold VTS table entries. Invariants:
|
|
If .vts == NULL, then this entry is not in use, so:
|
|
- .rc == 0
|
|
- this entry is on the freelist (unfortunately, does not imply
|
|
any constraints on value for .freelink)
|
|
If .vts != NULL, then this entry is in use:
|
|
- .vts is findable in vts_set
|
|
- .vts->id == this entry number
|
|
- no specific value for .rc (even 0 is OK)
|
|
- this entry is not on freelist, so .freelink == VtsID_INVALID
|
|
*/
|
|
typedef
|
|
struct {
|
|
VTS* vts; /* vts, in vts_set */
|
|
UWord rc; /* reference count - enough for entire aspace */
|
|
VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
|
|
VtsID remap; /* used only during pruning */
|
|
}
|
|
VtsTE;
|
|
|
|
/* The VTS table. */
|
|
static XArray* /* of VtsTE */ vts_tab = NULL;
|
|
|
|
/* An index into the VTS table, indicating the start of the list of
|
|
free (available for use) entries. If the list is empty, this is
|
|
VtsID_INVALID. */
|
|
static VtsID vts_tab_freelist = VtsID_INVALID;
|
|
|
|
/* Do a GC of vts_tab when the freelist becomes empty AND the size of
|
|
vts_tab equals or exceeds this size. After GC, the value here is
|
|
set appropriately so as to check for the next GC point. */
|
|
static Word vts_next_GC_at = 1000;
|
|
|
|
static void vts_tab_init ( void )
|
|
{
|
|
vts_tab
|
|
= VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
|
|
HG_(free), sizeof(VtsTE) );
|
|
vts_tab_freelist
|
|
= VtsID_INVALID;
|
|
tl_assert(vts_tab);
|
|
}
|
|
|
|
/* Add ii to the free list, checking that it looks out-of-use. */
|
|
static void add_to_free_list ( VtsID ii )
|
|
{
|
|
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts == NULL);
|
|
tl_assert(ie->rc == 0);
|
|
tl_assert(ie->freelink == VtsID_INVALID);
|
|
ie->freelink = vts_tab_freelist;
|
|
vts_tab_freelist = ii;
|
|
}
|
|
|
|
/* Get an entry from the free list. This will return VtsID_INVALID if
|
|
the free list is empty. */
|
|
static VtsID get_from_free_list ( void )
|
|
{
|
|
VtsID ii;
|
|
VtsTE* ie;
|
|
if (vts_tab_freelist == VtsID_INVALID)
|
|
return VtsID_INVALID;
|
|
ii = vts_tab_freelist;
|
|
ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts == NULL);
|
|
tl_assert(ie->rc == 0);
|
|
vts_tab_freelist = ie->freelink;
|
|
return ii;
|
|
}
|
|
|
|
/* Produce a new VtsID that can be used, either by getting it from
|
|
the freelist, or, if that is empty, by expanding vts_tab. */
|
|
static VtsID get_new_VtsID ( void )
|
|
{
|
|
VtsID ii;
|
|
VtsTE te;
|
|
ii = get_from_free_list();
|
|
if (ii != VtsID_INVALID)
|
|
return ii;
|
|
te.vts = NULL;
|
|
te.rc = 0;
|
|
te.freelink = VtsID_INVALID;
|
|
te.remap = VtsID_INVALID;
|
|
ii = (VtsID)VG_(addToXA)( vts_tab, &te );
|
|
return ii;
|
|
}
|
|
|
|
|
|
/* Indirect callback from lib_zsm. */
|
|
static void VtsID__rcinc ( VtsID ii )
|
|
{
|
|
VtsTE* ie;
|
|
/* VG_(indexXA) does a range check for us */
|
|
ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts); /* else it's not in use */
|
|
tl_assert(ie->rc < ~0UL); /* else we can't continue */
|
|
tl_assert(ie->vts->id == ii);
|
|
ie->rc++;
|
|
}
|
|
|
|
/* Indirect callback from lib_zsm. */
|
|
static void VtsID__rcdec ( VtsID ii )
|
|
{
|
|
VtsTE* ie;
|
|
/* VG_(indexXA) does a range check for us */
|
|
ie = VG_(indexXA)( vts_tab, ii );
|
|
tl_assert(ie->vts); /* else it's not in use */
|
|
tl_assert(ie->rc > 0); /* else RC snafu */
|
|
tl_assert(ie->vts->id == ii);
|
|
ie->rc--;
|
|
}
|
|
|
|
|
|
/* Look up 'cand' in our collection of VTSs. If present, return the
|
|
VtsID for the pre-existing version. If not present, clone it, add
|
|
the clone to both vts_tab and vts_set, allocate a fresh VtsID for
|
|
it, and return that. */
|
|
static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
|
|
{
|
|
VTS* in_tab = NULL;
|
|
tl_assert(cand->id == VtsID_INVALID);
|
|
Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
|
|
tl_assert(in_tab);
|
|
if (already_have) {
|
|
/* We already have a copy of 'cand'. Use that. */
|
|
VtsTE* ie;
|
|
tl_assert(in_tab->id != VtsID_INVALID);
|
|
ie = VG_(indexXA)( vts_tab, in_tab->id );
|
|
tl_assert(ie->vts == in_tab);
|
|
return in_tab->id;
|
|
} else {
|
|
VtsID ii = get_new_VtsID();
|
|
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
|
|
ie->vts = in_tab;
|
|
ie->rc = 0;
|
|
ie->freelink = VtsID_INVALID;
|
|
in_tab->id = ii;
|
|
return ii;
|
|
}
|
|
}
|
|
|
|
|
|
static void show_vts_stats ( const HChar* caller )
|
|
{
|
|
UWord nSet, nTab, nLive;
|
|
ULong totrc;
|
|
UWord n, i;
|
|
nSet = VG_(sizeFM)( vts_set );
|
|
nTab = VG_(sizeXA)( vts_tab );
|
|
totrc = 0;
|
|
nLive = 0;
|
|
n = VG_(sizeXA)( vts_tab );
|
|
for (i = 0; i < n; i++) {
|
|
VtsTE* ie = VG_(indexXA)( vts_tab, i );
|
|
if (ie->vts) {
|
|
nLive++;
|
|
totrc += (ULong)ie->rc;
|
|
} else {
|
|
tl_assert(ie->rc == 0);
|
|
}
|
|
}
|
|
VG_(printf)(" show_vts_stats %s\n", caller);
|
|
VG_(printf)(" vts_tab size %4lu\n", nTab);
|
|
VG_(printf)(" vts_tab live %4lu\n", nLive);
|
|
VG_(printf)(" vts_set size %4lu\n", nSet);
|
|
VG_(printf)(" total rc %4llu\n", totrc);
|
|
}
|
|
|
|
|
|
/* --- Helpers for VtsID pruning --- */
|
|
|
|
static
|
|
void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
|
|
/*MOD*/XArray* /* of VtsTE */ new_tab,
|
|
VtsID* ii )
|
|
{
|
|
VtsTE *old_te, *new_te;
|
|
VtsID old_id, new_id;
|
|
/* We're relying here on VG_(indexXA)'s range checking to assert on
|
|
any stupid values, in particular *ii == VtsID_INVALID. */
|
|
old_id = *ii;
|
|
old_te = VG_(indexXA)( old_tab, old_id );
|
|
old_te->rc--;
|
|
new_id = old_te->remap;
|
|
new_te = VG_(indexXA)( new_tab, new_id );
|
|
new_te->rc++;
|
|
*ii = new_id;
|
|
}
|
|
|
|
static
|
|
void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
|
|
/*MOD*/XArray* /* of VtsTE */ new_tab,
|
|
SVal* s )
|
|
{
|
|
SVal old_sv, new_sv;
|
|
old_sv = *s;
|
|
if (SVal__isC(old_sv)) {
|
|
VtsID rMin, wMin;
|
|
rMin = SVal__unC_Rmin(old_sv);
|
|
wMin = SVal__unC_Wmin(old_sv);
|
|
remap_VtsID( old_tab, new_tab, &rMin );
|
|
remap_VtsID( old_tab, new_tab, &wMin );
|
|
new_sv = SVal__mkC( rMin, wMin );
|
|
*s = new_sv;
|
|
}
|
|
}
|
|
|
|
|
|
/* NOT TO BE CALLED FROM WITHIN libzsm. */
|
|
__attribute__((noinline))
|
|
static void vts_tab__do_GC ( Bool show_stats )
|
|
{
|
|
UWord i, nTab, nLive, nFreed;
|
|
|
|
/* ---------- BEGIN VTS GC ---------- */
|
|
/* check this is actually necessary. */
|
|
tl_assert(vts_tab_freelist == VtsID_INVALID);
|
|
|
|
/* empty the caches for partial order checks and binary joins. We
|
|
could do better and prune out the entries to be deleted, but it
|
|
ain't worth the hassle. */
|
|
VtsID__invalidate_caches();
|
|
|
|
/* First, make the reference counts up to date. */
|
|
zsm_flush_cache();
|
|
|
|
nTab = VG_(sizeXA)( vts_tab );
|
|
|
|
if (show_stats) {
|
|
VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
|
|
show_vts_stats("before GC");
|
|
}
|
|
|
|
/* Now we can inspect the entire vts_tab. Any entries with zero
|
|
.rc fields are now no longer in use and can be put back on the
|
|
free list, removed from vts_set, and deleted. */
|
|
nFreed = 0;
|
|
for (i = 0; i < nTab; i++) {
|
|
Bool present;
|
|
UWord oldK = 0, oldV = 12345;
|
|
VtsTE* te = VG_(indexXA)( vts_tab, i );
|
|
if (te->vts == NULL) {
|
|
tl_assert(te->rc == 0);
|
|
continue; /* already on the free list (presumably) */
|
|
}
|
|
if (te->rc > 0)
|
|
continue; /* in use */
|
|
/* Ok, we got one we can free. */
|
|
tl_assert(te->vts->id == i);
|
|
/* first, remove it from vts_set. */
|
|
present = VG_(delFromFM)( vts_set,
|
|
&oldK, &oldV, (UWord)te->vts );
|
|
tl_assert(present); /* else it isn't in vts_set ?! */
|
|
tl_assert(oldV == 0); /* no info stored in vts_set val fields */
|
|
tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
|
|
/* now free the VTS itself */
|
|
VTS__delete(te->vts);
|
|
te->vts = NULL;
|
|
/* and finally put this entry on the free list */
|
|
tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
|
|
add_to_free_list( i );
|
|
nFreed++;
|
|
}
|
|
|
|
/* Now figure out when the next GC should be. We'll allow the
|
|
number of VTSs to double before GCing again. Except of course
|
|
that since we can't (or, at least, don't) shrink vts_tab, we
|
|
can't set the threshhold value smaller than it. */
|
|
tl_assert(nFreed <= nTab);
|
|
nLive = nTab - nFreed;
|
|
tl_assert(nLive >= 0 && nLive <= nTab);
|
|
vts_next_GC_at = 2 * nLive;
|
|
if (vts_next_GC_at < nTab)
|
|
vts_next_GC_at = nTab;
|
|
|
|
if (show_stats) {
|
|
show_vts_stats("after GC");
|
|
VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
|
|
}
|
|
|
|
if (VG_(clo_stats)) {
|
|
static UInt ctr = 1;
|
|
tl_assert(nTab > 0);
|
|
VG_(message)(Vg_DebugMsg,
|
|
"libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
|
|
ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
|
|
}
|
|
/* ---------- END VTS GC ---------- */
|
|
|
|
/* Decide whether to do VTS pruning. We have one of three
|
|
settings. */
|
|
static UInt pruning_auto_ctr = 0; /* do not make non-static */
|
|
|
|
Bool do_pruning = False;
|
|
switch (HG_(clo_vts_pruning)) {
|
|
case 0: /* never */
|
|
break;
|
|
case 1: /* auto */
|
|
do_pruning = (++pruning_auto_ctr % 5) == 0;
|
|
break;
|
|
case 2: /* always */
|
|
do_pruning = True;
|
|
break;
|
|
default:
|
|
tl_assert(0);
|
|
}
|
|
|
|
/* The rest of this routine only handles pruning, so we can
|
|
quit at this point if it is not to be done. */
|
|
if (!do_pruning)
|
|
return;
|
|
|
|
/* ---------- BEGIN VTS PRUNING ---------- */
|
|
/* We begin by sorting the backing table on its .thr values, so as
|
|
to (1) check they are unique [else something has gone wrong,
|
|
since it means we must have seen some Thr* exiting more than
|
|
once, which can't happen], and (2) so that we can quickly look
|
|
up the dead-thread entries as we work through the VTSs. */
|
|
VG_(sortXA)( verydead_thread_table );
|
|
/* Sanity check: check for unique .sts.thr values. */
|
|
UWord nBT = VG_(sizeXA)( verydead_thread_table );
|
|
if (nBT > 0) {
|
|
ThrID thrid1, thrid2;
|
|
thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
|
|
for (i = 1; i < nBT; i++) {
|
|
thrid1 = thrid2;
|
|
thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
|
|
tl_assert(thrid1 < thrid2);
|
|
}
|
|
}
|
|
/* Ok, so the dead thread table has unique and in-order keys. */
|
|
|
|
/* We will run through the old table, and create a new table and
|
|
set, at the same time setting the .remap entries in the old
|
|
table to point to the new entries. Then, visit every VtsID in
|
|
the system, and replace all of them with new ones, using the
|
|
.remap entries in the old table. Finally, we can delete the old
|
|
table and set. */
|
|
|
|
XArray* /* of VtsTE */ new_tab
|
|
= VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
|
|
HG_(free), sizeof(VtsTE) );
|
|
|
|
/* WordFM VTS* void */
|
|
WordFM* new_set
|
|
= VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
|
|
HG_(free),
|
|
(Word(*)(UWord,UWord))VTS__cmp_structural );
|
|
|
|
/* Visit each old VTS. For each one:
|
|
|
|
* make a pruned version
|
|
|
|
* search new_set for the pruned version, yielding either
|
|
Nothing (not present) or the new VtsID for it.
|
|
|
|
* if not present, allocate a new VtsID for it, insert (pruned
|
|
VTS, new VtsID) in the tree, and set
|
|
remap_table[old VtsID] = new VtsID.
|
|
|
|
* if present, set remap_table[old VtsID] = new VtsID, where
|
|
new VtsID was determined by the tree lookup. Then free up
|
|
the clone.
|
|
*/
|
|
|
|
UWord nBeforePruning = 0, nAfterPruning = 0;
|
|
UWord nSTSsBefore = 0, nSTSsAfter = 0;
|
|
VtsID new_VtsID_ctr = 0;
|
|
|
|
for (i = 0; i < nTab; i++) {
|
|
|
|
/* For each old VTS .. */
|
|
VtsTE* old_te = VG_(indexXA)( vts_tab, i );
|
|
VTS* old_vts = old_te->vts;
|
|
tl_assert(old_te->remap == VtsID_INVALID);
|
|
|
|
/* Skip it if not in use */
|
|
if (old_te->rc == 0) {
|
|
tl_assert(old_vts == NULL);
|
|
continue;
|
|
}
|
|
tl_assert(old_vts != NULL);
|
|
tl_assert(old_vts->id == i);
|
|
tl_assert(old_vts->ts != NULL);
|
|
|
|
/* It is in use. Make a pruned version. */
|
|
nBeforePruning++;
|
|
nSTSsBefore += old_vts->usedTS;
|
|
VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
|
|
old_vts, verydead_thread_table);
|
|
tl_assert(new_vts->sizeTS == new_vts->usedTS);
|
|
tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
|
|
== 0x0ddC0ffeeBadF00dULL);
|
|
|
|
/* Get rid of the old VTS and the tree entry. It's a bit more
|
|
complex to incrementally delete the VTSs now than to nuke
|
|
them all after we're done, but the upside is that we don't
|
|
wind up temporarily storing potentially two complete copies
|
|
of each VTS and hence spiking memory use. */
|
|
UWord oldK = 0, oldV = 12345;
|
|
Bool present = VG_(delFromFM)( vts_set,
|
|
&oldK, &oldV, (UWord)old_vts );
|
|
tl_assert(present); /* else it isn't in vts_set ?! */
|
|
tl_assert(oldV == 0); /* no info stored in vts_set val fields */
|
|
tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
|
|
/* now free the VTS itself */
|
|
VTS__delete(old_vts);
|
|
old_te->vts = NULL;
|
|
old_vts = NULL;
|
|
|
|
/* NO MENTIONS of old_vts allowed beyond this point. */
|
|
|
|
/* Ok, we have the pruned copy in new_vts. See if a
|
|
structurally identical version is already present in new_set.
|
|
If so, delete the one we just made and move on; if not, add
|
|
it. */
|
|
VTS* identical_version = NULL;
|
|
UWord valW = 12345;
|
|
if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
|
|
(UWord)new_vts)) {
|
|
// already have it
|
|
tl_assert(valW == 0);
|
|
tl_assert(identical_version != NULL);
|
|
tl_assert(identical_version != new_vts);
|
|
VTS__delete(new_vts);
|
|
new_vts = identical_version;
|
|
tl_assert(new_vts->id != VtsID_INVALID);
|
|
} else {
|
|
tl_assert(valW == 12345);
|
|
tl_assert(identical_version == NULL);
|
|
new_vts->id = new_VtsID_ctr++;
|
|
Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
|
|
tl_assert(!b);
|
|
VtsTE new_te;
|
|
new_te.vts = new_vts;
|
|
new_te.rc = 0;
|
|
new_te.freelink = VtsID_INVALID;
|
|
new_te.remap = VtsID_INVALID;
|
|
Word j = VG_(addToXA)( new_tab, &new_te );
|
|
tl_assert(j <= i);
|
|
tl_assert(j == new_VtsID_ctr - 1);
|
|
// stats
|
|
nAfterPruning++;
|
|
nSTSsAfter += new_vts->usedTS;
|
|
}
|
|
old_te->remap = new_vts->id;
|
|
|
|
} /* for (i = 0; i < nTab; i++) */
|
|
|
|
/* At this point, we have:
|
|
* the old VTS table, with its .remap entries set,
|
|
and with all .vts == NULL.
|
|
* the old VTS tree should be empty, since it and the old VTSs
|
|
it contained have been incrementally deleted was we worked
|
|
through the old table.
|
|
* the new VTS table, with all .rc == 0, all .freelink and .remap
|
|
== VtsID_INVALID.
|
|
* the new VTS tree.
|
|
*/
|
|
tl_assert( VG_(sizeFM)(vts_set) == 0 );
|
|
|
|
/* Now actually apply the mapping. */
|
|
/* Visit all the VtsIDs in the entire system. Where do we expect
|
|
to find them?
|
|
(a) in shadow memory -- the LineZs and LineFs
|
|
(b) in our collection of struct _Thrs.
|
|
(c) in our collection of struct _SOs.
|
|
Nowhere else, AFAICS. Not in the zsm cache, because that just
|
|
got invalidated.
|
|
|
|
Using the .remap fields in vts_tab, map each old VtsID to a new
|
|
VtsID. For each old VtsID, dec its rc; and for each new one,
|
|
inc it. This sets up the new refcounts, and it also gives a
|
|
cheap sanity check of the old ones: all old refcounts should be
|
|
zero after this operation.
|
|
*/
|
|
|
|
/* Do the mappings for (a) above: iterate over the Primary shadow
|
|
mem map (WordFM Addr SecMap*). */
|
|
UWord secmapW = 0;
|
|
VG_(initIterFM)( map_shmem );
|
|
while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
|
|
UWord j;
|
|
SecMap* sm = (SecMap*)secmapW;
|
|
tl_assert(sm->magic == SecMap_MAGIC);
|
|
/* Deal with the LineZs */
|
|
for (i = 0; i < N_SECMAP_ZLINES; i++) {
|
|
LineZ* lineZ = &sm->linesZ[i];
|
|
if (lineZ->dict[0] == SVal_INVALID)
|
|
continue; /* not in use -- data is in F rep instead */
|
|
for (j = 0; j < 4; j++)
|
|
remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
|
|
}
|
|
/* Deal with the LineFs */
|
|
for (i = 0; i < sm->linesF_size; i++) {
|
|
LineF* lineF = &sm->linesF[i];
|
|
if (!lineF->inUse)
|
|
continue;
|
|
for (j = 0; j < N_LINE_ARANGE; j++)
|
|
remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
|
|
}
|
|
}
|
|
VG_(doneIterFM)( map_shmem );
|
|
|
|
/* Do the mappings for (b) above: visit our collection of struct
|
|
_Thrs. */
|
|
Thread* hgthread = get_admin_threads();
|
|
tl_assert(hgthread);
|
|
while (hgthread) {
|
|
Thr* hbthr = hgthread->hbthr;
|
|
tl_assert(hbthr);
|
|
/* Threads that are listed in the prunable set have their viR
|
|
and viW set to VtsID_INVALID, so we can't mess with them. */
|
|
if (hbthr->llexit_done && hbthr->joinedwith_done) {
|
|
tl_assert(hbthr->viR == VtsID_INVALID);
|
|
tl_assert(hbthr->viW == VtsID_INVALID);
|
|
hgthread = hgthread->admin;
|
|
continue;
|
|
}
|
|
remap_VtsID( vts_tab, new_tab, &hbthr->viR );
|
|
remap_VtsID( vts_tab, new_tab, &hbthr->viW );
|
|
hgthread = hgthread->admin;
|
|
}
|
|
|
|
/* Do the mappings for (c) above: visit the struct _SOs. */
|
|
SO* so = admin_SO;
|
|
while (so) {
|
|
if (so->viR != VtsID_INVALID)
|
|
remap_VtsID( vts_tab, new_tab, &so->viR );
|
|
if (so->viW != VtsID_INVALID)
|
|
remap_VtsID( vts_tab, new_tab, &so->viW );
|
|
so = so->admin_next;
|
|
}
|
|
|
|
/* So, we're nearly done (with this incredibly complex operation).
|
|
Check the refcounts for the old VtsIDs all fell to zero, as
|
|
expected. Any failure is serious. */
|
|
for (i = 0; i < nTab; i++) {
|
|
VtsTE* te = VG_(indexXA)( vts_tab, i );
|
|
tl_assert(te->vts == NULL);
|
|
/* This is the assert proper. Note we're also asserting
|
|
zeroness for old entries which are unmapped (hence have
|
|
.remap == VtsID_INVALID). That's OK. */
|
|
tl_assert(te->rc == 0);
|
|
}
|
|
|
|
/* Install the new table and set. */
|
|
VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
|
|
vts_set = new_set;
|
|
VG_(deleteXA)( vts_tab );
|
|
vts_tab = new_tab;
|
|
|
|
/* The freelist of vts_tab entries is empty now, because we've
|
|
compacted all of the live entries at the low end of the
|
|
table. */
|
|
vts_tab_freelist = VtsID_INVALID;
|
|
|
|
/* Sanity check vts_set and vts_tab. */
|
|
|
|
/* Because all the live entries got slid down to the bottom of vts_tab: */
|
|
tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
|
|
|
|
/* Assert that the vts_tab and vts_set entries point at each other
|
|
in the required way */
|
|
UWord wordK = 0, wordV = 0;
|
|
VG_(initIterFM)( vts_set );
|
|
while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
|
|
tl_assert(wordK != 0);
|
|
tl_assert(wordV == 0);
|
|
VTS* vts = (VTS*)wordK;
|
|
tl_assert(vts->id != VtsID_INVALID);
|
|
VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
|
|
tl_assert(te->vts == vts);
|
|
}
|
|
VG_(doneIterFM)( vts_set );
|
|
|
|
/* Also iterate over the table, and check each entry is
|
|
plausible. */
|
|
nTab = VG_(sizeXA)( vts_tab );
|
|
for (i = 0; i < nTab; i++) {
|
|
VtsTE* te = VG_(indexXA)( vts_tab, i );
|
|
tl_assert(te->vts);
|
|
tl_assert(te->vts->id == i);
|
|
tl_assert(te->rc > 0); /* 'cos we just GC'd */
|
|
tl_assert(te->freelink == VtsID_INVALID); /* in use */
|
|
tl_assert(te->remap == VtsID_INVALID); /* not relevant */
|
|
}
|
|
|
|
/* And we're done. Bwahahaha. Ha. Ha. Ha. */
|
|
if (VG_(clo_stats)) {
|
|
static UInt ctr = 1;
|
|
tl_assert(nTab > 0);
|
|
VG_(message)(
|
|
Vg_DebugMsg,
|
|
"libhb: VTS PR: #%u before %lu (avg sz %lu) "
|
|
"after %lu (avg sz %lu)\n",
|
|
ctr++,
|
|
nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
|
|
nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
|
|
);
|
|
}
|
|
if (0)
|
|
VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
|
|
"after pruning %lu (avg sz %lu)\n",
|
|
nBeforePruning, nSTSsBefore / nBeforePruning,
|
|
nAfterPruning, nSTSsAfter / nAfterPruning);
|
|
/* ---------- END VTS PRUNING ---------- */
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Vts IDs //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
//////////////////////////
|
|
/* A temporary, max-sized VTS which is used as a temporary (the first
|
|
argument) in VTS__singleton, VTS__tick and VTS__join operations. */
|
|
static VTS* temp_max_sized_VTS = NULL;
|
|
|
|
//////////////////////////
|
|
static ULong stats__cmpLEQ_queries = 0;
|
|
static ULong stats__cmpLEQ_misses = 0;
|
|
static ULong stats__join2_queries = 0;
|
|
static ULong stats__join2_misses = 0;
|
|
|
|
static inline UInt ROL32 ( UInt w, Int n ) {
|
|
w = (w << n) | (w >> (32-n));
|
|
return w;
|
|
}
|
|
static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
|
|
UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
|
|
return hash % nTab;
|
|
}
|
|
|
|
#define N_CMPLEQ_CACHE 1023
|
|
static
|
|
struct { VtsID vi1; VtsID vi2; Bool leq; }
|
|
cmpLEQ_cache[N_CMPLEQ_CACHE];
|
|
|
|
#define N_JOIN2_CACHE 1023
|
|
static
|
|
struct { VtsID vi1; VtsID vi2; VtsID res; }
|
|
join2_cache[N_JOIN2_CACHE];
|
|
|
|
static void VtsID__invalidate_caches ( void ) {
|
|
Int i;
|
|
for (i = 0; i < N_CMPLEQ_CACHE; i++) {
|
|
cmpLEQ_cache[i].vi1 = VtsID_INVALID;
|
|
cmpLEQ_cache[i].vi2 = VtsID_INVALID;
|
|
cmpLEQ_cache[i].leq = False;
|
|
}
|
|
for (i = 0; i < N_JOIN2_CACHE; i++) {
|
|
join2_cache[i].vi1 = VtsID_INVALID;
|
|
join2_cache[i].vi2 = VtsID_INVALID;
|
|
join2_cache[i].res = VtsID_INVALID;
|
|
}
|
|
}
|
|
//////////////////////////
|
|
|
|
//static Bool VtsID__is_valid ( VtsID vi ) {
|
|
// VtsTE* ve;
|
|
// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
|
|
// return False;
|
|
// ve = VG_(indexXA)( vts_tab, vi );
|
|
// if (!ve->vts)
|
|
// return False;
|
|
// tl_assert(ve->vts->id == vi);
|
|
// return True;
|
|
//}
|
|
|
|
static VTS* VtsID__to_VTS ( VtsID vi ) {
|
|
VtsTE* te = VG_(indexXA)( vts_tab, vi );
|
|
tl_assert(te->vts);
|
|
return te->vts;
|
|
}
|
|
|
|
static void VtsID__pp ( VtsID vi ) {
|
|
HChar buf[100];
|
|
VTS* vts = VtsID__to_VTS(vi);
|
|
VTS__show( buf, sizeof(buf)-1, vts );
|
|
buf[sizeof(buf)-1] = 0;
|
|
VG_(printf)("%s", buf);
|
|
}
|
|
|
|
/* compute partial ordering relation of vi1 and vi2. */
|
|
__attribute__((noinline))
|
|
static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
|
|
UInt hash;
|
|
Bool leq;
|
|
VTS *v1, *v2;
|
|
//if (vi1 == vi2) return True;
|
|
tl_assert(vi1 != vi2);
|
|
////++
|
|
stats__cmpLEQ_queries++;
|
|
hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
|
|
if (cmpLEQ_cache[hash].vi1 == vi1
|
|
&& cmpLEQ_cache[hash].vi2 == vi2)
|
|
return cmpLEQ_cache[hash].leq;
|
|
stats__cmpLEQ_misses++;
|
|
////--
|
|
v1 = VtsID__to_VTS(vi1);
|
|
v2 = VtsID__to_VTS(vi2);
|
|
leq = VTS__cmpLEQ( v1, v2 ) == 0;
|
|
////++
|
|
cmpLEQ_cache[hash].vi1 = vi1;
|
|
cmpLEQ_cache[hash].vi2 = vi2;
|
|
cmpLEQ_cache[hash].leq = leq;
|
|
////--
|
|
return leq;
|
|
}
|
|
static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
|
|
return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
|
|
}
|
|
|
|
/* compute binary join */
|
|
__attribute__((noinline))
|
|
static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
|
|
UInt hash;
|
|
VtsID res;
|
|
VTS *vts1, *vts2;
|
|
//if (vi1 == vi2) return vi1;
|
|
tl_assert(vi1 != vi2);
|
|
////++
|
|
stats__join2_queries++;
|
|
hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
|
|
if (join2_cache[hash].vi1 == vi1
|
|
&& join2_cache[hash].vi2 == vi2)
|
|
return join2_cache[hash].res;
|
|
stats__join2_misses++;
|
|
////--
|
|
vts1 = VtsID__to_VTS(vi1);
|
|
vts2 = VtsID__to_VTS(vi2);
|
|
temp_max_sized_VTS->usedTS = 0;
|
|
VTS__join(temp_max_sized_VTS, vts1,vts2);
|
|
res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
|
|
////++
|
|
join2_cache[hash].vi1 = vi1;
|
|
join2_cache[hash].vi2 = vi2;
|
|
join2_cache[hash].res = res;
|
|
////--
|
|
return res;
|
|
}
|
|
static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
|
|
return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
|
|
}
|
|
|
|
/* create a singleton VTS, namely [thr:1] */
|
|
static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
|
|
temp_max_sized_VTS->usedTS = 0;
|
|
VTS__singleton(temp_max_sized_VTS, thr,tym);
|
|
return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
|
|
}
|
|
|
|
/* tick operation, creates value 1 if specified index is absent */
|
|
static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
|
|
VTS* vts = VtsID__to_VTS(vi);
|
|
temp_max_sized_VTS->usedTS = 0;
|
|
VTS__tick(temp_max_sized_VTS, idx,vts);
|
|
return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
|
|
}
|
|
|
|
/* index into a VTS (only for assertions) */
|
|
static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
|
|
VTS* vts = VtsID__to_VTS(vi);
|
|
return VTS__indexAt_SLOW( vts, idx );
|
|
}
|
|
|
|
/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
|
|
any, really) element in vi1 which is pointwise greater-than the
|
|
corresponding element in vi2. If no such element exists, return
|
|
NULL. This needs to be fairly quick since it is called every time
|
|
a race is detected. */
|
|
static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
|
|
{
|
|
VTS *vts1, *vts2;
|
|
Thr* diffthr;
|
|
ThrID diffthrid;
|
|
tl_assert(vi1 != vi2);
|
|
vts1 = VtsID__to_VTS(vi1);
|
|
vts2 = VtsID__to_VTS(vi2);
|
|
tl_assert(vts1 != vts2);
|
|
diffthrid = VTS__cmpLEQ(vts1, vts2);
|
|
diffthr = Thr__from_ThrID(diffthrid);
|
|
tl_assert(diffthr); /* else they are LEQ ! */
|
|
return diffthr;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Filters //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/* Forget everything we know -- clear the filter and let everything
|
|
through. This needs to be as fast as possible, since it is called
|
|
every time the running thread changes, and every time a thread's
|
|
vector clocks change, which can be quite frequent. The obvious
|
|
fast way to do this is simply to stuff in tags which we know are
|
|
not going to match anything, since they're not aligned to the start
|
|
of a line. */
|
|
static void Filter__clear ( Filter* fi, const HChar* who )
|
|
{
|
|
UWord i;
|
|
if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
|
|
for (i = 0; i < FI_NUM_LINES; i += 8) {
|
|
fi->tags[i+0] = 1; /* impossible value -- cannot match */
|
|
fi->tags[i+1] = 1;
|
|
fi->tags[i+2] = 1;
|
|
fi->tags[i+3] = 1;
|
|
fi->tags[i+4] = 1;
|
|
fi->tags[i+5] = 1;
|
|
fi->tags[i+6] = 1;
|
|
fi->tags[i+7] = 1;
|
|
}
|
|
tl_assert(i == FI_NUM_LINES);
|
|
}
|
|
|
|
/* Clearing an arbitrary range in the filter. Unfortunately
|
|
we have to do this due to core-supplied new/die-mem events. */
|
|
|
|
static void Filter__clear_1byte ( Filter* fi, Addr a )
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0x3 << (2 * (a & 7));
|
|
/* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. clear the bits. */
|
|
UShort u16 = line->u16s[loff];
|
|
line->u16s[loff] = u16 & ~mask; /* clear them */
|
|
} else {
|
|
/* miss. The filter doesn't hold this address, so ignore. */
|
|
}
|
|
}
|
|
|
|
static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
line->u16s[loff] = 0;
|
|
} else {
|
|
/* miss. The filter doesn't hold this address, so ignore. */
|
|
}
|
|
}
|
|
|
|
static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
|
|
{
|
|
//VG_(printf)("%lu ", len);
|
|
/* slowly do part preceding 8-alignment */
|
|
while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
|
|
Filter__clear_1byte( fi, a );
|
|
a++;
|
|
len--;
|
|
}
|
|
/* vector loop */
|
|
while (len >= 8) {
|
|
Filter__clear_8bytes_aligned( fi, a );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
/* slowly do tail */
|
|
while (UNLIKELY(len > 0)) {
|
|
Filter__clear_1byte( fi, a );
|
|
a++;
|
|
len--;
|
|
}
|
|
}
|
|
|
|
|
|
/* ------ Read handlers for the filter. ------ */
|
|
|
|
static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
|
|
{
|
|
if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
|
|
return False;
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0xAAAA;
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* all R bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
|
|
{
|
|
if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
|
|
return False;
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
|
|
{
|
|
if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
|
|
return False;
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0xA << (2 * (a & 6));
|
|
/* mask is A000, 0A00, 00A0 or 000A */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
|
|
{
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0x2 << (2 * (a & 7));
|
|
/* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* ------ Write handlers for the filter. ------ */
|
|
|
|
static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
|
|
{
|
|
if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
|
|
return False;
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0xFFFF;
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
|
|
{
|
|
if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
|
|
return False;
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
|
|
{
|
|
if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
|
|
return False;
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0xF << (2 * (a & 6));
|
|
/* mask is F000, 0F00, 00F0 or 000F */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
|
|
{
|
|
{
|
|
Addr atag = FI_GET_TAG(a); /* tag of 'a' */
|
|
UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
|
|
FiLine* line = &fi->lines[lineno];
|
|
UWord loff = (a - atag) / 8;
|
|
UShort mask = 0x3 << (2 * (a & 7));
|
|
/* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
|
|
if (LIKELY( fi->tags[lineno] == atag )) {
|
|
/* hit. check line and update. */
|
|
UShort u16 = line->u16s[loff];
|
|
Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
|
|
line->u16s[loff] = u16 | mask; /* set them */
|
|
return ok;
|
|
} else {
|
|
/* miss. nuke existing line and re-use it. */
|
|
UWord i;
|
|
fi->tags[lineno] = atag;
|
|
for (i = 0; i < FI_LINE_SZB / 8; i++)
|
|
line->u16s[i] = 0;
|
|
line->u16s[loff] = mask;
|
|
return False;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Threads //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/* Maps ThrID values to their Thr*s (which contain ThrID values that
|
|
should point back to the relevant slot in the array. Lowest
|
|
numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
|
|
static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
|
|
|
|
/* And a counter to dole out ThrID values. For rationale/background,
|
|
see comments on definition of ScalarTS (far) above. */
|
|
static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
|
|
|
|
static ThrID Thr__to_ThrID ( Thr* thr ) {
|
|
return thr->thrid;
|
|
}
|
|
static Thr* Thr__from_ThrID ( UInt thrid ) {
|
|
Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
|
|
tl_assert(thr->thrid == thrid);
|
|
return thr;
|
|
}
|
|
|
|
static Thr* Thr__new ( void )
|
|
{
|
|
Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
|
|
thr->viR = VtsID_INVALID;
|
|
thr->viW = VtsID_INVALID;
|
|
thr->llexit_done = False;
|
|
thr->joinedwith_done = False;
|
|
thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
|
|
/* We only really need this at history level 1, but unfortunately
|
|
this routine is called before the command line processing is
|
|
done (sigh), so we can't rely on HG_(clo_history_level) at this
|
|
point. Hence always allocate it. Bah. */
|
|
thr->local_Kws_n_stacks
|
|
= VG_(newXA)( HG_(zalloc),
|
|
"libhb.Thr__new.3 (local_Kws_and_stacks)",
|
|
HG_(free), sizeof(ULong_n_EC) );
|
|
|
|
/* Add this Thr* <-> ThrID binding to the mapping, and
|
|
cross-check */
|
|
if (!thrid_to_thr_map) {
|
|
thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
|
|
HG_(free), sizeof(Thr*) );
|
|
tl_assert(thrid_to_thr_map);
|
|
}
|
|
|
|
if (thrid_counter >= ThrID_MAX_VALID) {
|
|
/* We're hosed. We have to stop. */
|
|
scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
|
|
}
|
|
|
|
thr->thrid = thrid_counter++;
|
|
Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
|
|
tl_assert(ix + 1024 == thr->thrid);
|
|
|
|
return thr;
|
|
}
|
|
|
|
static void note_local_Kw_n_stack_for ( Thr* thr )
|
|
{
|
|
Word nPresent;
|
|
ULong_n_EC pair;
|
|
tl_assert(thr);
|
|
|
|
// We only collect this info at history level 1 (approx)
|
|
if (HG_(clo_history_level) != 1)
|
|
return;
|
|
|
|
/* This is the scalar Kw for thr. */
|
|
pair.ull = VtsID__indexAt( thr->viW, thr );
|
|
pair.ec = main_get_EC( thr );
|
|
tl_assert(pair.ec);
|
|
tl_assert(thr->local_Kws_n_stacks);
|
|
|
|
/* check that we're not adding duplicates */
|
|
nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
|
|
|
|
/* Throw away old stacks, if necessary. We can't accumulate stuff
|
|
indefinitely. */
|
|
if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
|
|
VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
|
|
nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
|
|
if (0)
|
|
VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
|
|
thr, pair.ull, pair.ec );
|
|
}
|
|
|
|
if (nPresent > 0) {
|
|
ULong_n_EC* prevPair
|
|
= (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
|
|
tl_assert( prevPair->ull <= pair.ull );
|
|
}
|
|
|
|
if (nPresent == 0)
|
|
pair.ec = NULL;
|
|
|
|
VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
|
|
|
|
if (0)
|
|
VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
|
|
thr, pair.ull, pair.ec );
|
|
if (0)
|
|
VG_(pp_ExeContext)(pair.ec);
|
|
}
|
|
|
|
static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
|
|
const ULong_n_EC* pair2 )
|
|
{
|
|
if (pair1->ull < pair2->ull) return -1;
|
|
if (pair1->ull > pair2->ull) return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Shadow Values //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
|
|
// hb_zsm.h. We have to do everything else here.
|
|
|
|
/* SVal is 64 bit unsigned int.
|
|
|
|
<---------30---------> <---------30--------->
|
|
00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
|
|
10 X--------------------X XX X--------------------X A: SVal_NOACCESS
|
|
11 0--------------------0 00 0--------------------0 A: SVal_INVALID
|
|
|
|
*/
|
|
#define SVAL_TAGMASK (3ULL << 62)
|
|
|
|
static inline Bool SVal__isC ( SVal s ) {
|
|
return (0ULL << 62) == (s & SVAL_TAGMASK);
|
|
}
|
|
static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
|
|
//tl_assert(VtsID__is_valid(rmini));
|
|
//tl_assert(VtsID__is_valid(wmini));
|
|
return (((ULong)rmini) << 32) | ((ULong)wmini);
|
|
}
|
|
static inline VtsID SVal__unC_Rmin ( SVal s ) {
|
|
tl_assert(SVal__isC(s));
|
|
return (VtsID)(s >> 32);
|
|
}
|
|
static inline VtsID SVal__unC_Wmin ( SVal s ) {
|
|
tl_assert(SVal__isC(s));
|
|
return (VtsID)(s & 0xFFFFFFFFULL);
|
|
}
|
|
|
|
static inline Bool SVal__isA ( SVal s ) {
|
|
return (2ULL << 62) == (s & SVAL_TAGMASK);
|
|
}
|
|
static inline SVal SVal__mkA ( void ) {
|
|
return 2ULL << 62;
|
|
}
|
|
|
|
/* Direct callback from lib_zsm. */
|
|
static void SVal__rcinc ( SVal s ) {
|
|
if (SVal__isC(s)) {
|
|
VtsID__rcinc( SVal__unC_Rmin(s) );
|
|
VtsID__rcinc( SVal__unC_Wmin(s) );
|
|
}
|
|
}
|
|
|
|
/* Direct callback from lib_zsm. */
|
|
static void SVal__rcdec ( SVal s ) {
|
|
if (SVal__isC(s)) {
|
|
VtsID__rcdec( SVal__unC_Rmin(s) );
|
|
VtsID__rcdec( SVal__unC_Wmin(s) );
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Change-event map2 //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
|
|
|
|
/* This is in two parts:
|
|
|
|
1. A hash table of RCECs. This is a set of reference-counted stack
|
|
traces. When the reference count of a stack trace becomes zero,
|
|
it is removed from the set and freed up. The intent is to have
|
|
a set of stack traces which can be referred to from (2), but to
|
|
only represent each one once. The set is indexed/searched by
|
|
ordering on the stack trace vectors.
|
|
|
|
2. A SparseWA of OldRefs. These store information about each old
|
|
ref that we need to record. It is indexed by address of the
|
|
location for which the information is recorded. For LRU
|
|
purposes, each OldRef also contains a generation number,
|
|
indicating when it was most recently accessed.
|
|
|
|
The important part of an OldRef is, however, its accs[] array.
|
|
This is an array of N_OLDREF_ACCS which binds (thread, R/W,
|
|
size) triples to RCECs. This allows us to collect the last
|
|
access-traceback by up to N_OLDREF_ACCS different triples for
|
|
this location. The accs[] array is a MTF-array. If a binding
|
|
falls off the end, that's too bad -- we will lose info about
|
|
that triple's access to this location.
|
|
|
|
When the SparseWA becomes too big, we can throw away the OldRefs
|
|
whose generation numbers are below some threshold; hence doing
|
|
approximate LRU discarding. For each discarded OldRef we must
|
|
of course decrement the reference count on the all RCECs it
|
|
refers to, in order that entries from (1) eventually get
|
|
discarded too.
|
|
|
|
A major improvement in reliability of this mechanism would be to
|
|
have a dynamically sized OldRef.accs[] array, so no entries ever
|
|
fall off the end. In investigations (Dec 08) it appears that a
|
|
major cause for the non-availability of conflicting-access traces
|
|
in race reports is caused by the fixed size of this array. I
|
|
suspect for most OldRefs, only a few entries are used, but for a
|
|
minority of cases there is an overflow, leading to info lossage.
|
|
Investigations also suggest this is very workload and scheduling
|
|
sensitive. Therefore a dynamic sizing would be better.
|
|
|
|
However, dynamic sizing would defeat the use of a PoolAllocator
|
|
for OldRef structures. And that's important for performance. So
|
|
it's not straightforward to do.
|
|
*/
|
|
|
|
|
|
static UWord stats__ctxt_rcdec1 = 0;
|
|
static UWord stats__ctxt_rcdec2 = 0;
|
|
static UWord stats__ctxt_rcdec3 = 0;
|
|
static UWord stats__ctxt_rcdec_calls = 0;
|
|
static UWord stats__ctxt_rcdec_discards = 0;
|
|
static UWord stats__ctxt_rcdec1_eq = 0;
|
|
|
|
static UWord stats__ctxt_tab_curr = 0;
|
|
static UWord stats__ctxt_tab_max = 0;
|
|
|
|
static UWord stats__ctxt_tab_qs = 0;
|
|
static UWord stats__ctxt_tab_cmps = 0;
|
|
|
|
|
|
///////////////////////////////////////////////////////
|
|
//// Part (1): A hash table of RCECs
|
|
///
|
|
|
|
#define N_FRAMES 8
|
|
|
|
// (UInt) `echo "Reference Counted Execution Context" | md5sum`
|
|
#define RCEC_MAGIC 0xab88abb2UL
|
|
|
|
//#define N_RCEC_TAB 98317 /* prime */
|
|
#define N_RCEC_TAB 196613 /* prime */
|
|
|
|
typedef
|
|
struct _RCEC {
|
|
UWord magic; /* sanity check only */
|
|
struct _RCEC* next;
|
|
UWord rc;
|
|
UWord rcX; /* used for crosschecking */
|
|
UWord frames_hash; /* hash of all the frames */
|
|
UWord frames[N_FRAMES];
|
|
}
|
|
RCEC;
|
|
|
|
static RCEC** contextTab = NULL; /* hash table of RCEC*s */
|
|
|
|
|
|
/* Gives an arbitrary total order on RCEC .frames fields */
|
|
static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
|
|
Word i;
|
|
tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
|
|
tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
|
|
if (ec1->frames_hash < ec2->frames_hash) return -1;
|
|
if (ec1->frames_hash > ec2->frames_hash) return 1;
|
|
for (i = 0; i < N_FRAMES; i++) {
|
|
if (ec1->frames[i] < ec2->frames[i]) return -1;
|
|
if (ec1->frames[i] > ec2->frames[i]) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Dec the ref of this RCEC. */
|
|
static void ctxt__rcdec ( RCEC* ec )
|
|
{
|
|
stats__ctxt_rcdec_calls++;
|
|
tl_assert(ec && ec->magic == RCEC_MAGIC);
|
|
tl_assert(ec->rc > 0);
|
|
ec->rc--;
|
|
}
|
|
|
|
static void ctxt__rcinc ( RCEC* ec )
|
|
{
|
|
tl_assert(ec && ec->magic == RCEC_MAGIC);
|
|
ec->rc++;
|
|
}
|
|
|
|
|
|
//////////// BEGIN RCEC pool allocator
|
|
static PoolAlloc* rcec_pool_allocator;
|
|
|
|
static RCEC* alloc_RCEC ( void ) {
|
|
return VG_(allocEltPA) ( rcec_pool_allocator );
|
|
}
|
|
|
|
static void free_RCEC ( RCEC* rcec ) {
|
|
tl_assert(rcec->magic == RCEC_MAGIC);
|
|
VG_(freeEltPA)( rcec_pool_allocator, rcec );
|
|
}
|
|
//////////// END RCEC pool allocator
|
|
|
|
|
|
/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
|
|
move it one step closer the the front of the list, so as to make
|
|
subsequent searches for it cheaper. */
|
|
static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
|
|
{
|
|
RCEC *ec0, *ec1, *ec2;
|
|
if (ec == *headp)
|
|
tl_assert(0); /* already at head of list */
|
|
tl_assert(ec != NULL);
|
|
ec0 = *headp;
|
|
ec1 = NULL;
|
|
ec2 = NULL;
|
|
while (True) {
|
|
if (ec0 == NULL || ec0 == ec) break;
|
|
ec2 = ec1;
|
|
ec1 = ec0;
|
|
ec0 = ec0->next;
|
|
}
|
|
tl_assert(ec0 == ec);
|
|
if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
|
|
RCEC* tmp;
|
|
/* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
|
|
predecessor. Swap ec0 and ec1, that is, move ec0 one step
|
|
closer to the start of the list. */
|
|
tl_assert(ec2->next == ec1);
|
|
tl_assert(ec1->next == ec0);
|
|
tmp = ec0->next;
|
|
ec2->next = ec0;
|
|
ec0->next = ec1;
|
|
ec1->next = tmp;
|
|
}
|
|
else
|
|
if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
|
|
/* it's second in the list. */
|
|
tl_assert(*headp == ec1);
|
|
tl_assert(ec1->next == ec0);
|
|
ec1->next = ec0->next;
|
|
ec0->next = ec1;
|
|
*headp = ec0;
|
|
}
|
|
}
|
|
|
|
|
|
/* Find the given RCEC in the tree, and return a pointer to it. Or,
|
|
if not present, add the given one to the tree (by making a copy of
|
|
it, so the caller can immediately deallocate the original) and
|
|
return a pointer to the copy. The caller can safely have 'example'
|
|
on its stack, since we will always return a pointer to a copy of
|
|
it, not to the original. Note that the inserted node will have .rc
|
|
of zero and so the caller must immediatly increment it. */
|
|
__attribute__((noinline))
|
|
static RCEC* ctxt__find_or_add ( RCEC* example )
|
|
{
|
|
UWord hent;
|
|
RCEC* copy;
|
|
tl_assert(example && example->magic == RCEC_MAGIC);
|
|
tl_assert(example->rc == 0);
|
|
|
|
/* Search the hash table to see if we already have it. */
|
|
stats__ctxt_tab_qs++;
|
|
hent = example->frames_hash % N_RCEC_TAB;
|
|
copy = contextTab[hent];
|
|
while (1) {
|
|
if (!copy) break;
|
|
tl_assert(copy->magic == RCEC_MAGIC);
|
|
stats__ctxt_tab_cmps++;
|
|
if (0 == RCEC__cmp_by_frames(copy, example)) break;
|
|
copy = copy->next;
|
|
}
|
|
|
|
if (copy) {
|
|
tl_assert(copy != example);
|
|
/* optimisation: if it's not at the head of its list, move 1
|
|
step fwds, to make future searches cheaper */
|
|
if (copy != contextTab[hent]) {
|
|
move_RCEC_one_step_forward( &contextTab[hent], copy );
|
|
}
|
|
} else {
|
|
copy = alloc_RCEC();
|
|
tl_assert(copy != example);
|
|
*copy = *example;
|
|
copy->next = contextTab[hent];
|
|
contextTab[hent] = copy;
|
|
stats__ctxt_tab_curr++;
|
|
if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
|
|
stats__ctxt_tab_max = stats__ctxt_tab_curr;
|
|
}
|
|
return copy;
|
|
}
|
|
|
|
static inline UWord ROLW ( UWord w, Int n )
|
|
{
|
|
Int bpw = 8 * sizeof(UWord);
|
|
w = (w << n) | (w >> (bpw-n));
|
|
return w;
|
|
}
|
|
|
|
__attribute__((noinline))
|
|
static RCEC* get_RCEC ( Thr* thr )
|
|
{
|
|
UWord hash, i;
|
|
RCEC example;
|
|
example.magic = RCEC_MAGIC;
|
|
example.rc = 0;
|
|
example.rcX = 0;
|
|
example.next = NULL;
|
|
main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
|
|
hash = 0;
|
|
for (i = 0; i < N_FRAMES; i++) {
|
|
hash ^= example.frames[i];
|
|
hash = ROLW(hash, 19);
|
|
}
|
|
example.frames_hash = hash;
|
|
return ctxt__find_or_add( &example );
|
|
}
|
|
|
|
///////////////////////////////////////////////////////
|
|
//// Part (2):
|
|
/// A SparseWA guest-addr -> OldRef, that refers to (1)
|
|
///
|
|
|
|
// (UInt) `echo "Old Reference Information" | md5sum`
|
|
#define OldRef_MAGIC 0x30b1f075UL
|
|
|
|
/* Records an access: a thread, a context (size & writeness) and the
|
|
number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
|
|
2, 10 = 4, 11 = 8.
|
|
*/
|
|
typedef
|
|
struct {
|
|
RCEC* rcec;
|
|
WordSetID locksHeldW;
|
|
UInt thrid : SCALARTS_N_THRBITS;
|
|
UInt szLg2B : 2;
|
|
UInt isW : 1;
|
|
}
|
|
Thr_n_RCEC;
|
|
|
|
#define N_OLDREF_ACCS 5
|
|
|
|
typedef
|
|
struct {
|
|
UWord magic; /* sanity check only */
|
|
UWord gen; /* when most recently accessed */
|
|
/* or free list when not in use */
|
|
/* unused slots in this array have .thrid == 0, which is invalid */
|
|
Thr_n_RCEC accs[N_OLDREF_ACCS];
|
|
}
|
|
OldRef;
|
|
|
|
|
|
//////////// BEGIN OldRef pool allocator
|
|
static PoolAlloc* oldref_pool_allocator;
|
|
|
|
static OldRef* alloc_OldRef ( void ) {
|
|
return VG_(allocEltPA) ( oldref_pool_allocator );
|
|
}
|
|
|
|
static void free_OldRef ( OldRef* r ) {
|
|
tl_assert(r->magic == OldRef_MAGIC);
|
|
VG_(freeEltPA)( oldref_pool_allocator, r );
|
|
}
|
|
//////////// END OldRef pool allocator
|
|
|
|
|
|
static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
|
|
static UWord oldrefGen = 0; /* current LRU generation # */
|
|
static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
|
|
static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
|
|
|
|
inline static UInt min_UInt ( UInt a, UInt b ) {
|
|
return a < b ? a : b;
|
|
}
|
|
|
|
/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
|
|
first interval is lower, 1 if the first interval is higher, and 0
|
|
if there is any overlap. Redundant paranoia with casting is there
|
|
following what looked distinctly like a bug in gcc-4.1.2, in which
|
|
some of the comparisons were done signedly instead of
|
|
unsignedly. */
|
|
/* Copied from exp-ptrcheck/sg_main.c */
|
|
static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
|
|
Addr a2, SizeT n2 ) {
|
|
UWord a1w = (UWord)a1;
|
|
UWord n1w = (UWord)n1;
|
|
UWord a2w = (UWord)a2;
|
|
UWord n2w = (UWord)n2;
|
|
tl_assert(n1w > 0 && n2w > 0);
|
|
if (a1w + n1w <= a2w) return -1L;
|
|
if (a2w + n2w <= a1w) return 1L;
|
|
return 0;
|
|
}
|
|
|
|
static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
|
|
{
|
|
OldRef* ref;
|
|
RCEC* rcec;
|
|
Word i, j;
|
|
UWord keyW, valW;
|
|
Bool b;
|
|
|
|
tl_assert(thr);
|
|
ThrID thrid = thr->thrid;
|
|
tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
|
|
|
|
WordSetID locksHeldW = thr->hgthread->locksetW;
|
|
|
|
rcec = get_RCEC( thr );
|
|
ctxt__rcinc(rcec);
|
|
|
|
UInt szLg2B = 0;
|
|
switch (szB) {
|
|
/* This doesn't look particularly branch-predictor friendly. */
|
|
case 1: szLg2B = 0; break;
|
|
case 2: szLg2B = 1; break;
|
|
case 4: szLg2B = 2; break;
|
|
case 8: szLg2B = 3; break;
|
|
default: tl_assert(0);
|
|
}
|
|
|
|
/* Look in the map to see if we already have a record for this
|
|
address. */
|
|
b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
|
|
|
|
if (b) {
|
|
|
|
/* We already have a record for this address. We now need to
|
|
see if we have a stack trace pertaining to this (thrid, R/W,
|
|
size) triple. */
|
|
tl_assert(keyW == a);
|
|
ref = (OldRef*)valW;
|
|
tl_assert(ref->magic == OldRef_MAGIC);
|
|
|
|
for (i = 0; i < N_OLDREF_ACCS; i++) {
|
|
if (ref->accs[i].thrid != thrid)
|
|
continue;
|
|
if (ref->accs[i].szLg2B != szLg2B)
|
|
continue;
|
|
if (ref->accs[i].isW != (UInt)(isW & 1))
|
|
continue;
|
|
/* else we have a match, so stop looking. */
|
|
break;
|
|
}
|
|
|
|
if (i < N_OLDREF_ACCS) {
|
|
/* thread 'thr' has an entry at index 'i'. Update its RCEC. */
|
|
if (i > 0) {
|
|
Thr_n_RCEC tmp = ref->accs[i-1];
|
|
ref->accs[i-1] = ref->accs[i];
|
|
ref->accs[i] = tmp;
|
|
i--;
|
|
}
|
|
if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
|
|
stats__ctxt_rcdec1++;
|
|
ctxt__rcdec( ref->accs[i].rcec );
|
|
tl_assert(ref->accs[i].thrid == thrid);
|
|
/* Update the RCEC and the W-held lockset. */
|
|
ref->accs[i].rcec = rcec;
|
|
ref->accs[i].locksHeldW = locksHeldW;
|
|
} else {
|
|
/* No entry for this (thread, R/W, size, nWHeld) quad.
|
|
Shuffle all of them down one slot, and put the new entry
|
|
at the start of the array. */
|
|
if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
|
|
/* the last slot is in use. We must dec the rc on the
|
|
associated rcec. */
|
|
tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
|
|
stats__ctxt_rcdec2++;
|
|
if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
|
|
VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
|
|
ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
|
|
} else {
|
|
tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
|
|
}
|
|
for (j = N_OLDREF_ACCS-1; j >= 1; j--)
|
|
ref->accs[j] = ref->accs[j-1];
|
|
ref->accs[0].thrid = thrid;
|
|
ref->accs[0].szLg2B = szLg2B;
|
|
ref->accs[0].isW = (UInt)(isW & 1);
|
|
ref->accs[0].locksHeldW = locksHeldW;
|
|
ref->accs[0].rcec = rcec;
|
|
/* thrid==0 is used to signify an empty slot, so we can't
|
|
add zero thrid (such a ThrID is invalid anyway). */
|
|
/* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
|
|
}
|
|
|
|
ref->gen = oldrefGen;
|
|
|
|
} else {
|
|
|
|
/* We don't have a record for this address. Create a new one. */
|
|
if (oldrefTreeN >= oldrefGenIncAt) {
|
|
oldrefGen++;
|
|
oldrefGenIncAt = oldrefTreeN + 50000;
|
|
if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
|
|
oldrefGen, oldrefTreeN );
|
|
}
|
|
|
|
ref = alloc_OldRef();
|
|
ref->magic = OldRef_MAGIC;
|
|
ref->gen = oldrefGen;
|
|
ref->accs[0].thrid = thrid;
|
|
ref->accs[0].szLg2B = szLg2B;
|
|
ref->accs[0].isW = (UInt)(isW & 1);
|
|
ref->accs[0].locksHeldW = locksHeldW;
|
|
ref->accs[0].rcec = rcec;
|
|
|
|
/* thrid==0 is used to signify an empty slot, so we can't
|
|
add zero thrid (such a ThrID is invalid anyway). */
|
|
/* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
|
|
|
|
/* Clear out the rest of the entries */
|
|
for (j = 1; j < N_OLDREF_ACCS; j++) {
|
|
ref->accs[j].rcec = NULL;
|
|
ref->accs[j].thrid = 0;
|
|
ref->accs[j].szLg2B = 0;
|
|
ref->accs[j].isW = 0;
|
|
ref->accs[j].locksHeldW = 0;
|
|
}
|
|
VG_(addToSWA)( oldrefTree, a, (UWord)ref );
|
|
oldrefTreeN++;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/* Extract info from the conflicting-access machinery. */
|
|
Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
|
|
/*OUT*/Thr** resThr,
|
|
/*OUT*/SizeT* resSzB,
|
|
/*OUT*/Bool* resIsW,
|
|
/*OUT*/WordSetID* locksHeldW,
|
|
Thr* thr, Addr a, SizeT szB, Bool isW )
|
|
{
|
|
Word i, j;
|
|
OldRef* ref;
|
|
UWord keyW, valW;
|
|
Bool b;
|
|
|
|
ThrID cand_thrid;
|
|
RCEC* cand_rcec;
|
|
Bool cand_isW;
|
|
SizeT cand_szB;
|
|
WordSetID cand_locksHeldW;
|
|
Addr cand_a;
|
|
|
|
Addr toCheck[15];
|
|
Int nToCheck = 0;
|
|
|
|
tl_assert(thr);
|
|
tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
|
|
ThrID thrid = thr->thrid;
|
|
|
|
toCheck[nToCheck++] = a;
|
|
for (i = -7; i < (Word)szB; i++) {
|
|
if (i != 0)
|
|
toCheck[nToCheck++] = a + i;
|
|
}
|
|
tl_assert(nToCheck <= 15);
|
|
|
|
/* Now see if we can find a suitable matching event for
|
|
any of the addresses in toCheck[0 .. nToCheck-1]. */
|
|
for (j = 0; j < nToCheck; j++) {
|
|
|
|
cand_a = toCheck[j];
|
|
// VG_(printf)("test %ld %p\n", j, cand_a);
|
|
|
|
b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
|
|
if (!b)
|
|
continue;
|
|
|
|
ref = (OldRef*)valW;
|
|
tl_assert(keyW == cand_a);
|
|
tl_assert(ref->magic == OldRef_MAGIC);
|
|
tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
|
|
|
|
cand_thrid = 0; /* invalid; see comments in event_map_bind */
|
|
cand_rcec = NULL;
|
|
cand_isW = False;
|
|
cand_szB = 0;
|
|
cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
|
|
|
|
for (i = 0; i < N_OLDREF_ACCS; i++) {
|
|
Thr_n_RCEC* cand = &ref->accs[i];
|
|
cand_rcec = cand->rcec;
|
|
cand_thrid = cand->thrid;
|
|
cand_isW = (Bool)cand->isW;
|
|
cand_szB = 1 << cand->szLg2B;
|
|
cand_locksHeldW = cand->locksHeldW;
|
|
|
|
if (cand_thrid == 0)
|
|
/* This slot isn't in use. Ignore it. */
|
|
continue;
|
|
|
|
if (cand_thrid == thrid)
|
|
/* This is an access by the same thread, but we're only
|
|
interested in accesses from other threads. Ignore. */
|
|
continue;
|
|
|
|
if ((!cand_isW) && (!isW))
|
|
/* We don't want to report a read racing against another
|
|
read; that's stupid. So in this case move on. */
|
|
continue;
|
|
|
|
if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
|
|
/* No overlap with the access we're asking about. Ignore. */
|
|
continue;
|
|
|
|
/* We have a match. Stop searching. */
|
|
break;
|
|
}
|
|
|
|
tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
|
|
|
|
if (i < N_OLDREF_ACCS) {
|
|
Int n, maxNFrames;
|
|
/* return with success */
|
|
tl_assert(cand_thrid);
|
|
tl_assert(cand_rcec);
|
|
tl_assert(cand_rcec->magic == RCEC_MAGIC);
|
|
tl_assert(cand_szB >= 1);
|
|
/* Count how many non-zero frames we have. */
|
|
maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
|
|
for (n = 0; n < maxNFrames; n++) {
|
|
if (0 == cand_rcec->frames[n]) break;
|
|
}
|
|
*resEC = VG_(make_ExeContext_from_StackTrace)
|
|
(cand_rcec->frames, n);
|
|
*resThr = Thr__from_ThrID(cand_thrid);
|
|
*resSzB = cand_szB;
|
|
*resIsW = cand_isW;
|
|
*locksHeldW = cand_locksHeldW;
|
|
return True;
|
|
}
|
|
|
|
/* consider next address in toCheck[] */
|
|
} /* for (j = 0; j < nToCheck; j++) */
|
|
|
|
/* really didn't find anything. */
|
|
return False;
|
|
}
|
|
|
|
static void event_map_init ( void )
|
|
{
|
|
Word i;
|
|
|
|
/* Context (RCEC) pool allocator */
|
|
rcec_pool_allocator = VG_(newPA) (
|
|
sizeof(RCEC),
|
|
1000 /* RCECs per pool */,
|
|
HG_(zalloc),
|
|
"libhb.event_map_init.1 (RCEC pools)",
|
|
HG_(free)
|
|
);
|
|
|
|
/* Context table */
|
|
tl_assert(!contextTab);
|
|
contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
|
|
N_RCEC_TAB * sizeof(RCEC*) );
|
|
tl_assert(contextTab);
|
|
for (i = 0; i < N_RCEC_TAB; i++)
|
|
contextTab[i] = NULL;
|
|
|
|
/* Oldref pool allocator */
|
|
oldref_pool_allocator = VG_(newPA)(
|
|
sizeof(OldRef),
|
|
1000 /* OldRefs per pool */,
|
|
HG_(zalloc),
|
|
"libhb.event_map_init.3 (OldRef pools)",
|
|
HG_(free)
|
|
);
|
|
|
|
/* Oldref tree */
|
|
tl_assert(!oldrefTree);
|
|
oldrefTree = VG_(newSWA)(
|
|
HG_(zalloc),
|
|
"libhb.event_map_init.4 (oldref tree)",
|
|
HG_(free)
|
|
);
|
|
tl_assert(oldrefTree);
|
|
|
|
oldrefGen = 0;
|
|
oldrefGenIncAt = 0;
|
|
oldrefTreeN = 0;
|
|
}
|
|
|
|
static void event_map__check_reference_counts ( Bool before )
|
|
{
|
|
RCEC* rcec;
|
|
OldRef* oldref;
|
|
Word i;
|
|
UWord nEnts = 0;
|
|
UWord keyW, valW;
|
|
|
|
/* Set the 'check' reference counts to zero. Also, optionally
|
|
check that the real reference counts are non-zero. We allow
|
|
these to fall to zero before a GC, but the GC must get rid of
|
|
all those that are zero, hence none should be zero after a
|
|
GC. */
|
|
for (i = 0; i < N_RCEC_TAB; i++) {
|
|
for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
|
|
nEnts++;
|
|
tl_assert(rcec);
|
|
tl_assert(rcec->magic == RCEC_MAGIC);
|
|
if (!before)
|
|
tl_assert(rcec->rc > 0);
|
|
rcec->rcX = 0;
|
|
}
|
|
}
|
|
|
|
/* check that the stats are sane */
|
|
tl_assert(nEnts == stats__ctxt_tab_curr);
|
|
tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
|
|
|
|
/* visit all the referencing points, inc check ref counts */
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
oldref = (OldRef*)valW;
|
|
tl_assert(oldref->magic == OldRef_MAGIC);
|
|
for (i = 0; i < N_OLDREF_ACCS; i++) {
|
|
ThrID aThrID = oldref->accs[i].thrid;
|
|
RCEC* aRef = oldref->accs[i].rcec;
|
|
if (aThrID != 0) {
|
|
tl_assert(aRef);
|
|
tl_assert(aRef->magic == RCEC_MAGIC);
|
|
aRef->rcX++;
|
|
} else {
|
|
tl_assert(!aRef);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* compare check ref counts with actual */
|
|
for (i = 0; i < N_RCEC_TAB; i++) {
|
|
for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
|
|
tl_assert(rcec->rc == rcec->rcX);
|
|
}
|
|
}
|
|
}
|
|
|
|
__attribute__((noinline))
|
|
static void event_map_maybe_GC ( void )
|
|
{
|
|
OldRef* oldref;
|
|
UWord keyW, valW, retained, maxGen;
|
|
XArray* refs2del;
|
|
Word i, j, n2del;
|
|
|
|
UWord* genMap = NULL;
|
|
UWord genMap_min = 0;
|
|
UWord genMap_size = 0;
|
|
|
|
if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
|
|
return;
|
|
|
|
if (0)
|
|
VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
|
|
|
|
/* Check for sane command line params. Limit values must match
|
|
those in hg_process_cmd_line_option. */
|
|
tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
|
|
tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
|
|
|
|
/* Check our counting is sane (expensive) */
|
|
if (CHECK_CEM)
|
|
tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
|
|
|
|
/* Check the reference counts (expensive) */
|
|
if (CHECK_CEM)
|
|
event_map__check_reference_counts( True/*before*/ );
|
|
|
|
/* Compute the distribution of generation values in the ref tree.
|
|
There are likely only to be a few different generation numbers
|
|
in the whole tree, but we don't know what they are. Hence use a
|
|
dynamically resized array of counters. The array is genMap[0
|
|
.. genMap_size-1], where genMap[0] is the count for the
|
|
generation number genMap_min, genMap[1] is the count for
|
|
genMap_min+1, etc. If a new number is seen outside the range
|
|
[genMap_min .. genMap_min + genMap_size - 1] then the array is
|
|
copied into a larger array, and genMap_min and genMap_size are
|
|
adjusted accordingly. */
|
|
|
|
/* genMap :: generation-number -> count-of-nodes-with-that-number */
|
|
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
|
|
UWord ea, key;
|
|
oldref = (OldRef*)valW;
|
|
key = oldref->gen;
|
|
|
|
/* BEGIN find 'ea', which is the index in genMap holding the
|
|
count for generation number 'key'. */
|
|
if (UNLIKELY(genMap == NULL)) {
|
|
/* deal with the first key to be seen, so that the following
|
|
cases don't need to handle the complexity of a NULL count
|
|
array. */
|
|
genMap_min = key;
|
|
genMap_size = 1;
|
|
genMap = HG_(zalloc)( "libhb.emmG.1a",
|
|
genMap_size * sizeof(UWord) );
|
|
ea = 0;
|
|
if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
|
|
key, genMap_min, genMap_min+genMap_size- 1 );
|
|
}
|
|
else
|
|
if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
|
|
/* this is the expected (almost-always-happens) case: 'key'
|
|
is already mapped in the array. */
|
|
ea = key - genMap_min;
|
|
}
|
|
else
|
|
if (key < genMap_min) {
|
|
/* 'key' appears before the start of the current array.
|
|
Extend the current array by allocating a larger one and
|
|
copying the current one to the upper end of it. */
|
|
Word more;
|
|
UWord* map2;
|
|
more = genMap_min - key;
|
|
tl_assert(more > 0);
|
|
map2 = HG_(zalloc)( "libhb.emmG.1b",
|
|
(genMap_size + more) * sizeof(UWord) );
|
|
VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
|
|
HG_(free)( genMap );
|
|
genMap = map2;
|
|
genMap_size += more;
|
|
genMap_min -= more;
|
|
ea = 0;
|
|
tl_assert(genMap_min == key);
|
|
if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
|
|
key, genMap_min, genMap_min+genMap_size- 1 );
|
|
}
|
|
else {
|
|
/* 'key' appears after the end of the current array. Extend
|
|
the current array by allocating a larger one and copying
|
|
the current one to the lower end of it. */
|
|
Word more;
|
|
UWord* map2;
|
|
tl_assert(key >= genMap_min + genMap_size);
|
|
more = key - (genMap_min + genMap_size) + 1;
|
|
tl_assert(more > 0);
|
|
map2 = HG_(zalloc)( "libhb.emmG.1c",
|
|
(genMap_size + more) * sizeof(UWord) );
|
|
VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
|
|
HG_(free)( genMap );
|
|
genMap = map2;
|
|
genMap_size += more;
|
|
ea = genMap_size - 1;;
|
|
tl_assert(genMap_min + genMap_size - 1 == key);
|
|
if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
|
|
key, genMap_min, genMap_min+genMap_size- 1 );
|
|
}
|
|
/* END find 'ea' from 'key' */
|
|
|
|
tl_assert(ea >= 0 && ea < genMap_size);
|
|
/* and the whole point of this elaborate computation of 'ea' is .. */
|
|
genMap[ea]++;
|
|
}
|
|
|
|
tl_assert(genMap);
|
|
tl_assert(genMap_size > 0);
|
|
|
|
/* Sanity check what we just computed */
|
|
{ UWord sum = 0;
|
|
for (i = 0; i < genMap_size; i++) {
|
|
if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
|
|
i + genMap_min, genMap[i] );
|
|
sum += genMap[i];
|
|
}
|
|
tl_assert(sum == oldrefTreeN);
|
|
}
|
|
|
|
/* Figure out how many generations to throw away */
|
|
retained = oldrefTreeN;
|
|
maxGen = 0;
|
|
|
|
for (i = 0; i < genMap_size; i++) {
|
|
keyW = i + genMap_min;
|
|
valW = genMap[i];
|
|
tl_assert(keyW > 0); /* can't allow a generation # 0 */
|
|
if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
|
|
tl_assert(keyW >= maxGen);
|
|
tl_assert(retained >= valW);
|
|
if (retained - valW
|
|
> (UWord)(HG_(clo_conflict_cache_size)
|
|
* EVENT_MAP_GC_DISCARD_FRACTION)) {
|
|
retained -= valW;
|
|
maxGen = keyW;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
HG_(free)(genMap);
|
|
|
|
tl_assert(retained >= 0 && retained <= oldrefTreeN);
|
|
|
|
/* Now make up a big list of the oldrefTree entries we want to
|
|
delete. We can't simultaneously traverse the tree and delete
|
|
stuff from it, so first we need to copy them off somewhere
|
|
else. (sigh) */
|
|
refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
|
|
HG_(free), sizeof(Addr) );
|
|
|
|
if (retained < oldrefTreeN) {
|
|
|
|
/* This is the normal (expected) case. We discard any ref whose
|
|
generation number <= maxGen. */
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
oldref = (OldRef*)valW;
|
|
tl_assert(oldref->magic == OldRef_MAGIC);
|
|
if (oldref->gen <= maxGen) {
|
|
VG_(addToXA)( refs2del, &keyW );
|
|
}
|
|
}
|
|
if (VG_(clo_stats)) {
|
|
VG_(message)(Vg_DebugMsg,
|
|
"libhb: EvM GC: delete generations %lu and below, "
|
|
"retaining %lu entries\n",
|
|
maxGen, retained );
|
|
}
|
|
|
|
} else {
|
|
|
|
static UInt rand_seed = 0; /* leave as static */
|
|
|
|
/* Degenerate case: there's only one generation in the entire
|
|
tree, so we need to have some other way of deciding which
|
|
refs to throw away. Just throw out half of them randomly. */
|
|
tl_assert(retained == oldrefTreeN);
|
|
VG_(initIterSWA)( oldrefTree );
|
|
while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
|
|
UInt n;
|
|
oldref = (OldRef*)valW;
|
|
tl_assert(oldref->magic == OldRef_MAGIC);
|
|
n = VG_(random)( &rand_seed );
|
|
if ((n & 0xFFF) < 0x800) {
|
|
VG_(addToXA)( refs2del, &keyW );
|
|
retained--;
|
|
}
|
|
}
|
|
if (VG_(clo_stats)) {
|
|
VG_(message)(Vg_DebugMsg,
|
|
"libhb: EvM GC: randomly delete half the entries, "
|
|
"retaining %lu entries\n",
|
|
retained );
|
|
}
|
|
|
|
}
|
|
|
|
n2del = VG_(sizeXA)( refs2del );
|
|
tl_assert(n2del == (Word)(oldrefTreeN - retained));
|
|
|
|
if (0) VG_(printf)("%s","deleting entries\n");
|
|
for (i = 0; i < n2del; i++) {
|
|
Bool b;
|
|
Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
|
|
b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
|
|
tl_assert(b);
|
|
tl_assert(keyW == ga2del);
|
|
oldref = (OldRef*)valW;
|
|
for (j = 0; j < N_OLDREF_ACCS; j++) {
|
|
ThrID aThrID = oldref->accs[j].thrid;
|
|
RCEC* aRef = oldref->accs[j].rcec;
|
|
if (aRef) {
|
|
tl_assert(aThrID != 0);
|
|
stats__ctxt_rcdec3++;
|
|
ctxt__rcdec( aRef );
|
|
} else {
|
|
tl_assert(aThrID == 0);
|
|
}
|
|
}
|
|
|
|
free_OldRef( oldref );
|
|
}
|
|
|
|
VG_(deleteXA)( refs2del );
|
|
|
|
tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
|
|
|
|
oldrefTreeN = retained;
|
|
oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
|
|
|
|
/* Throw away all RCECs with zero reference counts */
|
|
for (i = 0; i < N_RCEC_TAB; i++) {
|
|
RCEC** pp = &contextTab[i];
|
|
RCEC* p = *pp;
|
|
while (p) {
|
|
if (p->rc == 0) {
|
|
*pp = p->next;
|
|
free_RCEC(p);
|
|
p = *pp;
|
|
tl_assert(stats__ctxt_tab_curr > 0);
|
|
stats__ctxt_tab_curr--;
|
|
} else {
|
|
pp = &p->next;
|
|
p = p->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check the reference counts (expensive) */
|
|
if (CHECK_CEM)
|
|
event_map__check_reference_counts( False/*after*/ );
|
|
|
|
//if (0)
|
|
//VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
|
|
// VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
|
|
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Core MSM //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
|
|
Nov 08, and again after [...],
|
|
June 09. */
|
|
|
|
static ULong stats__msmcread = 0;
|
|
static ULong stats__msmcread_change = 0;
|
|
static ULong stats__msmcwrite = 0;
|
|
static ULong stats__msmcwrite_change = 0;
|
|
|
|
/* Some notes on the H1 history mechanism:
|
|
|
|
Transition rules are:
|
|
|
|
read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
|
|
write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
|
|
|
|
After any access by a thread T to a location L, L's constraint pair
|
|
(Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
|
|
|
|
After a race by thread T conflicting with some previous access by
|
|
some other thread U, for a location with constraint (before
|
|
processing the later access) (Cr,Cw), then Cw[U] is the segment in
|
|
which the previously access lies.
|
|
|
|
Hence in record_race_info, we pass in Cfailed and Kfailed, which
|
|
are compared so as to find out which thread(s) this access
|
|
conflicts with. Once that is established, we also require the
|
|
pre-update Cw for the location, so we can index into it for those
|
|
threads, to get the scalar clock values for the point at which the
|
|
former accesses were made. (In fact we only bother to do any of
|
|
this for an arbitrarily chosen one of the conflicting threads, as
|
|
that's simpler, it avoids flooding the user with vast amounts of
|
|
mostly useless information, and because the program is wrong if it
|
|
contains any races at all -- so we don't really need to show all
|
|
conflicting access pairs initially, so long as we only show none if
|
|
none exist).
|
|
|
|
---
|
|
|
|
That requires the auxiliary proof that
|
|
|
|
(Cr `join` Kw)[T] == Kw[T]
|
|
|
|
Why should that be true? Because for any thread T, Kw[T] >= the
|
|
scalar clock value for T known by any other thread. In other
|
|
words, because T's value for its own scalar clock is at least as up
|
|
to date as the value for it known by any other thread (that is true
|
|
for both the R- and W- scalar clocks). Hence no other thread will
|
|
be able to feed in a value for that element (indirectly via a
|
|
constraint) which will exceed Kw[T], and hence the join cannot
|
|
cause that particular element to advance.
|
|
*/
|
|
|
|
__attribute__((noinline))
|
|
static void record_race_info ( Thr* acc_thr,
|
|
Addr acc_addr, SizeT szB, Bool isWrite,
|
|
VtsID Cfailed,
|
|
VtsID Kfailed,
|
|
VtsID Cw )
|
|
{
|
|
/* Call here to report a race. We just hand it onwards to
|
|
HG_(record_error_Race). If that in turn discovers that the
|
|
error is going to be collected, then, at history_level 2, that
|
|
queries the conflicting-event map. The alternative would be to
|
|
query it right here. But that causes a lot of pointless queries
|
|
for errors which will shortly be discarded as duplicates, and
|
|
can become a performance overhead; so we defer the query until
|
|
we know the error is not a duplicate. */
|
|
|
|
/* Stacks for the bounds of the (or one of the) conflicting
|
|
segment(s). These are only set at history_level 1. */
|
|
ExeContext* hist1_seg_start = NULL;
|
|
ExeContext* hist1_seg_end = NULL;
|
|
Thread* hist1_conf_thr = NULL;
|
|
|
|
tl_assert(acc_thr);
|
|
tl_assert(acc_thr->hgthread);
|
|
tl_assert(acc_thr->hgthread->hbthr == acc_thr);
|
|
tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
|
|
|
|
if (HG_(clo_history_level) == 1) {
|
|
Bool found;
|
|
Word firstIx, lastIx;
|
|
ULong_n_EC key;
|
|
|
|
/* At history_level 1, we must round up the relevant stack-pair
|
|
for the conflicting segment right now. This is because
|
|
deferring it is complex; we can't (easily) put Kfailed and
|
|
Cfailed into the XError and wait for later without
|
|
getting tied up in difficulties with VtsID reference
|
|
counting. So just do it now. */
|
|
Thr* confThr;
|
|
ULong confTym = 0;
|
|
/* Which thread are we in conflict with? There may be more than
|
|
one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
|
|
(in fact it's the one with the lowest Thr* value). */
|
|
confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
|
|
/* This must exist! since if it was NULL then there's no
|
|
conflict (semantics of return value of
|
|
VtsID__findFirst_notLEQ), and msmc{read,write}, which has
|
|
called us, just checked exactly this -- that there was in
|
|
fact a race. */
|
|
tl_assert(confThr);
|
|
|
|
/* Get the scalar clock value that the conflicting thread
|
|
introduced into the constraint. A careful examination of the
|
|
base machine rules shows that this must be the same as the
|
|
conflicting thread's scalar clock when it created this
|
|
constraint. Hence we know the scalar clock of the
|
|
conflicting thread when the conflicting access was made. */
|
|
confTym = VtsID__indexAt( Cfailed, confThr );
|
|
|
|
/* Using this scalar clock, index into the conflicting thread's
|
|
collection of stack traces made each time its vector clock
|
|
(hence its scalar clock) changed. This gives the stack
|
|
traces at the start and end of the conflicting segment (well,
|
|
as per comment just above, of one of the conflicting
|
|
segments, if there are more than one). */
|
|
key.ull = confTym;
|
|
key.ec = NULL;
|
|
/* tl_assert(confThr); -- asserted just above */
|
|
tl_assert(confThr->local_Kws_n_stacks);
|
|
firstIx = lastIx = 0;
|
|
found = VG_(lookupXA_UNSAFE)(
|
|
confThr->local_Kws_n_stacks,
|
|
&key, &firstIx, &lastIx,
|
|
(XACmpFn_t)cmp__ULong_n_EC__by_ULong
|
|
);
|
|
if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
|
|
"confTym %llu found %d (%lu,%lu)\n",
|
|
Cfailed, Kfailed, Cw,
|
|
confThr, confTym, found, firstIx, lastIx);
|
|
/* We can't indefinitely collect stack traces at VTS
|
|
transitions, since we'd eventually run out of memory. Hence
|
|
note_local_Kw_n_stack_for will eventually throw away old
|
|
ones, which in turn means we might fail to find index value
|
|
confTym in the array. */
|
|
if (found) {
|
|
ULong_n_EC *pair_start, *pair_end;
|
|
pair_start
|
|
= (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
|
|
hist1_seg_start = pair_start->ec;
|
|
if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
|
|
pair_end
|
|
= (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
|
|
lastIx+1 );
|
|
/* from properties of VG_(lookupXA) and the comparison fn used: */
|
|
tl_assert(pair_start->ull < pair_end->ull);
|
|
hist1_seg_end = pair_end->ec;
|
|
/* Could do a bit better here. It may be that pair_end
|
|
doesn't have a stack, but the following entries in the
|
|
array have the same scalar Kw and to have a stack. So
|
|
we should search a bit further along the array than
|
|
lastIx+1 if hist1_seg_end is NULL. */
|
|
} else {
|
|
if (!confThr->llexit_done)
|
|
hist1_seg_end = main_get_EC( confThr );
|
|
}
|
|
// seg_start could be NULL iff this is the first stack in the thread
|
|
//if (seg_start) VG_(pp_ExeContext)(seg_start);
|
|
//if (seg_end) VG_(pp_ExeContext)(seg_end);
|
|
hist1_conf_thr = confThr->hgthread;
|
|
}
|
|
}
|
|
|
|
HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
|
|
szB, isWrite,
|
|
hist1_conf_thr, hist1_seg_start, hist1_seg_end );
|
|
}
|
|
|
|
static Bool is_sane_SVal_C ( SVal sv ) {
|
|
Bool leq;
|
|
if (!SVal__isC(sv)) return True;
|
|
leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
|
|
return leq;
|
|
}
|
|
|
|
|
|
/* Compute new state following a read */
|
|
static inline SVal msmcread ( SVal svOld,
|
|
/* The following are only needed for
|
|
creating error reports. */
|
|
Thr* acc_thr,
|
|
Addr acc_addr, SizeT szB )
|
|
{
|
|
SVal svNew = SVal_INVALID;
|
|
stats__msmcread++;
|
|
|
|
/* Redundant sanity check on the constraints */
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svOld));
|
|
}
|
|
|
|
if (LIKELY(SVal__isC(svOld))) {
|
|
VtsID tviR = acc_thr->viR;
|
|
VtsID tviW = acc_thr->viW;
|
|
VtsID rmini = SVal__unC_Rmin(svOld);
|
|
VtsID wmini = SVal__unC_Wmin(svOld);
|
|
Bool leq = VtsID__cmpLEQ(rmini,tviR);
|
|
if (LIKELY(leq)) {
|
|
/* no race */
|
|
/* Note: RWLOCK subtlety: use tviW, not tviR */
|
|
svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
|
|
goto out;
|
|
} else {
|
|
/* assert on sanity of constraints. */
|
|
Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
|
|
tl_assert(leqxx);
|
|
// same as in non-race case
|
|
svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
|
|
record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
|
|
rmini, /* Cfailed */
|
|
tviR, /* Kfailed */
|
|
wmini /* Cw */ );
|
|
goto out;
|
|
}
|
|
}
|
|
if (SVal__isA(svOld)) {
|
|
/* reading no-access memory (sigh); leave unchanged */
|
|
/* check for no pollution */
|
|
tl_assert(svOld == SVal_NOACCESS);
|
|
svNew = SVal_NOACCESS;
|
|
goto out;
|
|
}
|
|
if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
|
|
tl_assert(0);
|
|
|
|
out:
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svNew));
|
|
}
|
|
if (UNLIKELY(svNew != svOld)) {
|
|
tl_assert(svNew != SVal_INVALID);
|
|
if (HG_(clo_history_level) >= 2
|
|
&& SVal__isC(svOld) && SVal__isC(svNew)) {
|
|
event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
|
|
stats__msmcread_change++;
|
|
}
|
|
}
|
|
return svNew;
|
|
}
|
|
|
|
|
|
/* Compute new state following a write */
|
|
static inline SVal msmcwrite ( SVal svOld,
|
|
/* The following are only needed for
|
|
creating error reports. */
|
|
Thr* acc_thr,
|
|
Addr acc_addr, SizeT szB )
|
|
{
|
|
SVal svNew = SVal_INVALID;
|
|
stats__msmcwrite++;
|
|
|
|
/* Redundant sanity check on the constraints */
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svOld));
|
|
}
|
|
|
|
if (LIKELY(SVal__isC(svOld))) {
|
|
VtsID tviW = acc_thr->viW;
|
|
VtsID wmini = SVal__unC_Wmin(svOld);
|
|
Bool leq = VtsID__cmpLEQ(wmini,tviW);
|
|
if (LIKELY(leq)) {
|
|
/* no race */
|
|
svNew = SVal__mkC( tviW, tviW );
|
|
goto out;
|
|
} else {
|
|
VtsID rmini = SVal__unC_Rmin(svOld);
|
|
/* assert on sanity of constraints. */
|
|
Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
|
|
tl_assert(leqxx);
|
|
// same as in non-race case
|
|
// proof: in the non-race case, we have
|
|
// rmini <= wmini (invar on constraints)
|
|
// tviW <= tviR (invar on thread clocks)
|
|
// wmini <= tviW (from run-time check)
|
|
// hence from transitivity of <= we have
|
|
// rmini <= wmini <= tviW
|
|
// and so join(rmini,tviW) == tviW
|
|
// and join(wmini,tviW) == tviW
|
|
// qed.
|
|
svNew = SVal__mkC( VtsID__join2(rmini, tviW),
|
|
VtsID__join2(wmini, tviW) );
|
|
record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
|
|
wmini, /* Cfailed */
|
|
tviW, /* Kfailed */
|
|
wmini /* Cw */ );
|
|
goto out;
|
|
}
|
|
}
|
|
if (SVal__isA(svOld)) {
|
|
/* writing no-access memory (sigh); leave unchanged */
|
|
/* check for no pollution */
|
|
tl_assert(svOld == SVal_NOACCESS);
|
|
svNew = SVal_NOACCESS;
|
|
goto out;
|
|
}
|
|
if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
|
|
tl_assert(0);
|
|
|
|
out:
|
|
if (CHECK_MSM) {
|
|
tl_assert(is_sane_SVal_C(svNew));
|
|
}
|
|
if (UNLIKELY(svNew != svOld)) {
|
|
tl_assert(svNew != SVal_INVALID);
|
|
if (HG_(clo_history_level) >= 2
|
|
&& SVal__isC(svOld) && SVal__isC(svNew)) {
|
|
event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
|
|
stats__msmcwrite_change++;
|
|
}
|
|
}
|
|
return svNew;
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Apply core MSM to specific memory locations //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/*------------- ZSM accesses: 8 bit sapply ------------- */
|
|
|
|
static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cread08s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcread( svOld, thr,a,1 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
}
|
|
|
|
static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cwrite08s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcwrite( svOld, thr,a,1 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
}
|
|
|
|
/*------------- ZSM accesses: 16 bit sapply ------------- */
|
|
|
|
static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cread16s++;
|
|
if (UNLIKELY(!aligned16(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
|
|
if (valid_value_is_below_me_16(descr, toff)) {
|
|
goto slowcase;
|
|
} else {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcread( svOld, thr,a,2 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_16to8splits++;
|
|
zsm_sapply08__msmcread( thr, a + 0 );
|
|
zsm_sapply08__msmcread( thr, a + 1 );
|
|
}
|
|
|
|
static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cwrite16s++;
|
|
if (UNLIKELY(!aligned16(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
|
|
if (valid_value_is_below_me_16(descr, toff)) {
|
|
goto slowcase;
|
|
} else {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcwrite( svOld, thr,a,2 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_16to8splits++;
|
|
zsm_sapply08__msmcwrite( thr, a + 0 );
|
|
zsm_sapply08__msmcwrite( thr, a + 1 );
|
|
}
|
|
|
|
/*------------- ZSM accesses: 32 bit sapply ------------- */
|
|
|
|
static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cread32s++;
|
|
if (UNLIKELY(!aligned32(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 or 4 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
|
|
if (valid_value_is_above_me_32(descr, toff)) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
|
|
} else {
|
|
goto slowcase;
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcread( svOld, thr,a,4 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_32to16splits++;
|
|
zsm_sapply16__msmcread( thr, a + 0 );
|
|
zsm_sapply16__msmcread( thr, a + 2 );
|
|
}
|
|
|
|
static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cwrite32s++;
|
|
if (UNLIKELY(!aligned32(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 or 4 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
|
|
if (valid_value_is_above_me_32(descr, toff)) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
|
|
} else {
|
|
goto slowcase;
|
|
}
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcwrite( svOld, thr,a,4 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_32to16splits++;
|
|
zsm_sapply16__msmcwrite( thr, a + 0 );
|
|
zsm_sapply16__msmcwrite( thr, a + 2 );
|
|
}
|
|
|
|
/*------------- ZSM accesses: 64 bit sapply ------------- */
|
|
|
|
static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno;
|
|
//UWord toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cread64s++;
|
|
if (UNLIKELY(!aligned64(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
//toff = get_tree_offset(a); /* == 0, unused */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
|
|
goto slowcase;
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcread( svOld, thr,a,8 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_64to32splits++;
|
|
zsm_sapply32__msmcread( thr, a + 0 );
|
|
zsm_sapply32__msmcread( thr, a + 4 );
|
|
}
|
|
|
|
static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno;
|
|
//UWord toff;
|
|
SVal svOld, svNew;
|
|
UShort descr;
|
|
stats__cline_cwrite64s++;
|
|
if (UNLIKELY(!aligned64(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
//toff = get_tree_offset(a); /* == 0, unused */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
|
|
goto slowcase;
|
|
}
|
|
svOld = cl->svals[cloff];
|
|
svNew = msmcwrite( svOld, thr,a,8 );
|
|
if (CHECK_ZSM)
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
return;
|
|
slowcase: /* misaligned, or must go further down the tree */
|
|
stats__cline_64to32splits++;
|
|
zsm_sapply32__msmcwrite( thr, a + 0 );
|
|
zsm_sapply32__msmcwrite( thr, a + 4 );
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 8 bit swrite --------------- */
|
|
|
|
static
|
|
void zsm_swrite08 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_swrite08s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff] = svNew;
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 16 bit swrite --------------- */
|
|
|
|
static
|
|
void zsm_swrite16 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_swrite16s++;
|
|
if (UNLIKELY(!aligned16(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
|
|
if (valid_value_is_below_me_16(descr, toff)) {
|
|
/* Writing at this level. Need to fix up 'descr'. */
|
|
cl->descrs[tno] = pullup_descr_to_16(descr, toff);
|
|
/* At this point, the tree does not match cl->descr[tno] any
|
|
more. The assignments below will fix it up. */
|
|
} else {
|
|
/* We can't indiscriminately write on the w16 node as in the
|
|
w64 case, as that might make the node inconsistent with
|
|
its parent. So first, pull down to this level. */
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
}
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff + 0] = svNew;
|
|
cl->svals[cloff + 1] = SVal_INVALID;
|
|
return;
|
|
slowcase: /* misaligned */
|
|
stats__cline_16to8splits++;
|
|
zsm_swrite08( a + 0, svNew );
|
|
zsm_swrite08( a + 1, svNew );
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 32 bit swrite --------------- */
|
|
|
|
static
|
|
void zsm_swrite32 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_swrite32s++;
|
|
if (UNLIKELY(!aligned32(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 or 4 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
|
|
if (valid_value_is_above_me_32(descr, toff)) {
|
|
/* We can't indiscriminately write on the w32 node as in the
|
|
w64 case, as that might make the node inconsistent with
|
|
its parent. So first, pull down to this level. */
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
|
|
if (CHECK_ZSM)
|
|
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
|
|
} else {
|
|
/* Writing at this level. Need to fix up 'descr'. */
|
|
cl->descrs[tno] = pullup_descr_to_32(descr, toff);
|
|
/* At this point, the tree does not match cl->descr[tno] any
|
|
more. The assignments below will fix it up. */
|
|
}
|
|
}
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff + 0] = svNew;
|
|
cl->svals[cloff + 1] = SVal_INVALID;
|
|
cl->svals[cloff + 2] = SVal_INVALID;
|
|
cl->svals[cloff + 3] = SVal_INVALID;
|
|
return;
|
|
slowcase: /* misaligned */
|
|
stats__cline_32to16splits++;
|
|
zsm_swrite16( a + 0, svNew );
|
|
zsm_swrite16( a + 2, svNew );
|
|
}
|
|
|
|
/*--------------- ZSM accesses: 64 bit swrite --------------- */
|
|
|
|
static
|
|
void zsm_swrite64 ( Addr a, SVal svNew ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno;
|
|
//UWord toff;
|
|
stats__cline_swrite64s++;
|
|
if (UNLIKELY(!aligned64(a))) goto slowcase;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
//toff = get_tree_offset(a); /* == 0, unused */
|
|
cl->descrs[tno] = TREE_DESCR_64;
|
|
tl_assert(svNew != SVal_INVALID);
|
|
cl->svals[cloff + 0] = svNew;
|
|
cl->svals[cloff + 1] = SVal_INVALID;
|
|
cl->svals[cloff + 2] = SVal_INVALID;
|
|
cl->svals[cloff + 3] = SVal_INVALID;
|
|
cl->svals[cloff + 4] = SVal_INVALID;
|
|
cl->svals[cloff + 5] = SVal_INVALID;
|
|
cl->svals[cloff + 6] = SVal_INVALID;
|
|
cl->svals[cloff + 7] = SVal_INVALID;
|
|
return;
|
|
slowcase: /* misaligned */
|
|
stats__cline_64to32splits++;
|
|
zsm_swrite32( a + 0, svNew );
|
|
zsm_swrite32( a + 4, svNew );
|
|
}
|
|
|
|
/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
|
|
|
|
static
|
|
SVal zsm_sread08 ( Addr a ) {
|
|
CacheLine* cl;
|
|
UWord cloff, tno, toff;
|
|
UShort descr;
|
|
stats__cline_sread08s++;
|
|
cl = get_cacheline(a);
|
|
cloff = get_cacheline_offset(a);
|
|
tno = get_treeno(a);
|
|
toff = get_tree_offset(a); /* == 0 .. 7 */
|
|
descr = cl->descrs[tno];
|
|
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
|
|
SVal* tree = &cl->svals[tno << 3];
|
|
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
|
|
}
|
|
return cl->svals[cloff];
|
|
}
|
|
|
|
static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
|
|
SVal sv;
|
|
stats__cline_scopy08s++;
|
|
sv = zsm_sread08( src );
|
|
zsm_swrite08( dst, sv );
|
|
}
|
|
|
|
|
|
/* Block-copy states (needed for implementing realloc()). Note this
|
|
doesn't change the filtering arrangements. The caller of
|
|
zsm_scopy_range needs to attend to that. */
|
|
|
|
static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
|
|
{
|
|
SizeT i;
|
|
if (len == 0)
|
|
return;
|
|
|
|
/* assert for non-overlappingness */
|
|
tl_assert(src+len <= dst || dst+len <= src);
|
|
|
|
/* To be simple, just copy byte by byte. But so as not to wreck
|
|
performance for later accesses to dst[0 .. len-1], normalise
|
|
destination lines as we finish with them, and also normalise the
|
|
line containing the first and last address. */
|
|
for (i = 0; i < len; i++) {
|
|
Bool normalise
|
|
= get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
|
|
|| i == 0 /* first in range */
|
|
|| i == len-1; /* last in range */
|
|
zsm_scopy08( src+i, dst+i, normalise );
|
|
}
|
|
}
|
|
|
|
|
|
/* For setting address ranges to a given value. Has considerable
|
|
sophistication so as to avoid generating large numbers of pointless
|
|
cache loads/writebacks for large ranges. */
|
|
|
|
/* Do small ranges in-cache, in the obvious way. */
|
|
static
|
|
void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
|
|
{
|
|
/* fast track a couple of common cases */
|
|
if (len == 4 && aligned32(a)) {
|
|
zsm_swrite32( a, svNew );
|
|
return;
|
|
}
|
|
if (len == 8 && aligned64(a)) {
|
|
zsm_swrite64( a, svNew );
|
|
return;
|
|
}
|
|
|
|
/* be completely general (but as efficient as possible) */
|
|
if (len == 0) return;
|
|
|
|
if (!aligned16(a) && len >= 1) {
|
|
zsm_swrite08( a, svNew );
|
|
a += 1;
|
|
len -= 1;
|
|
tl_assert(aligned16(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned32(a) && len >= 2) {
|
|
zsm_swrite16( a, svNew );
|
|
a += 2;
|
|
len -= 2;
|
|
tl_assert(aligned32(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned64(a) && len >= 4) {
|
|
zsm_swrite32( a, svNew );
|
|
a += 4;
|
|
len -= 4;
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 8) {
|
|
tl_assert(aligned64(a));
|
|
while (len >= 8) {
|
|
zsm_swrite64( a, svNew );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 4)
|
|
tl_assert(aligned32(a));
|
|
if (len >= 4) {
|
|
zsm_swrite32( a, svNew );
|
|
a += 4;
|
|
len -= 4;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 2)
|
|
tl_assert(aligned16(a));
|
|
if (len >= 2) {
|
|
zsm_swrite16( a, svNew );
|
|
a += 2;
|
|
len -= 2;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 1) {
|
|
zsm_swrite08( a, svNew );
|
|
//a += 1;
|
|
len -= 1;
|
|
}
|
|
tl_assert(len == 0);
|
|
}
|
|
|
|
|
|
/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
|
|
for larger ranges, try to operate directly on the out-of-cache
|
|
representation, rather than dragging lines into the cache,
|
|
overwriting them, and forcing them out. This turns out to be an
|
|
important performance optimisation.
|
|
|
|
Note that this doesn't change the filtering arrangements. The
|
|
caller of zsm_sset_range needs to attend to that. */
|
|
|
|
static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
|
|
{
|
|
tl_assert(svNew != SVal_INVALID);
|
|
stats__cache_make_New_arange += (ULong)len;
|
|
|
|
if (0 && len > 500)
|
|
VG_(printf)("make New ( %#lx, %ld )\n", a, len );
|
|
|
|
if (0) {
|
|
static UWord n_New_in_cache = 0;
|
|
static UWord n_New_not_in_cache = 0;
|
|
/* tag is 'a' with the in-line offset masked out,
|
|
eg a[31]..a[4] 0000 */
|
|
Addr tag = a & ~(N_LINE_ARANGE - 1);
|
|
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
if (LIKELY(tag == cache_shmem.tags0[wix])) {
|
|
n_New_in_cache++;
|
|
} else {
|
|
n_New_not_in_cache++;
|
|
}
|
|
if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
|
|
VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
|
|
n_New_in_cache, n_New_not_in_cache );
|
|
}
|
|
|
|
if (LIKELY(len < 2 * N_LINE_ARANGE)) {
|
|
zsm_sset_range_SMALL( a, len, svNew );
|
|
} else {
|
|
Addr before_start = a;
|
|
Addr aligned_start = cacheline_ROUNDUP(a);
|
|
Addr after_start = cacheline_ROUNDDN(a + len);
|
|
UWord before_len = aligned_start - before_start;
|
|
UWord aligned_len = after_start - aligned_start;
|
|
UWord after_len = a + len - after_start;
|
|
tl_assert(before_start <= aligned_start);
|
|
tl_assert(aligned_start <= after_start);
|
|
tl_assert(before_len < N_LINE_ARANGE);
|
|
tl_assert(after_len < N_LINE_ARANGE);
|
|
tl_assert(get_cacheline_offset(aligned_start) == 0);
|
|
if (get_cacheline_offset(a) == 0) {
|
|
tl_assert(before_len == 0);
|
|
tl_assert(a == aligned_start);
|
|
}
|
|
if (get_cacheline_offset(a+len) == 0) {
|
|
tl_assert(after_len == 0);
|
|
tl_assert(after_start == a+len);
|
|
}
|
|
if (before_len > 0) {
|
|
zsm_sset_range_SMALL( before_start, before_len, svNew );
|
|
}
|
|
if (after_len > 0) {
|
|
zsm_sset_range_SMALL( after_start, after_len, svNew );
|
|
}
|
|
stats__cache_make_New_inZrep += (ULong)aligned_len;
|
|
|
|
while (1) {
|
|
Addr tag;
|
|
UWord wix;
|
|
if (aligned_start >= after_start)
|
|
break;
|
|
tl_assert(get_cacheline_offset(aligned_start) == 0);
|
|
tag = aligned_start & ~(N_LINE_ARANGE - 1);
|
|
wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
|
|
if (tag == cache_shmem.tags0[wix]) {
|
|
UWord i;
|
|
for (i = 0; i < N_LINE_ARANGE / 8; i++)
|
|
zsm_swrite64( aligned_start + i * 8, svNew );
|
|
} else {
|
|
UWord i;
|
|
Word zix;
|
|
SecMap* sm;
|
|
LineZ* lineZ;
|
|
/* This line is not in the cache. Do not force it in; instead
|
|
modify it in-place. */
|
|
/* find the Z line to write in and rcdec it or the
|
|
associated F line. */
|
|
find_Z_for_writing( &sm, &zix, tag );
|
|
tl_assert(sm);
|
|
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
|
|
lineZ = &sm->linesZ[zix];
|
|
lineZ->dict[0] = svNew;
|
|
lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
|
|
for (i = 0; i < N_LINE_ARANGE/4; i++)
|
|
lineZ->ix2s[i] = 0; /* all refer to dict[0] */
|
|
rcinc_LineZ(lineZ);
|
|
}
|
|
aligned_start += N_LINE_ARANGE;
|
|
aligned_len -= N_LINE_ARANGE;
|
|
}
|
|
tl_assert(aligned_start == after_start);
|
|
tl_assert(aligned_len == 0);
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Front-filtering accesses //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static UWord stats__f_ac = 0;
|
|
static UWord stats__f_sk = 0;
|
|
|
|
#if 0
|
|
# define STATS__F_SHOW \
|
|
do { \
|
|
if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
|
|
VG_(printf)("filters: ac %lu sk %lu\n", \
|
|
stats__f_ac, stats__f_sk); \
|
|
} while (0)
|
|
#else
|
|
# define STATS__F_SHOW /* */
|
|
#endif
|
|
|
|
void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply08__msmcwrite(thr, a);
|
|
}
|
|
|
|
void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply16__msmcwrite(thr, a);
|
|
}
|
|
|
|
void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply32__msmcwrite(thr, a);
|
|
}
|
|
|
|
void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply64__msmcwrite(thr, a);
|
|
}
|
|
|
|
void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
|
|
{
|
|
/* fast track a couple of common cases */
|
|
if (len == 4 && aligned32(a)) {
|
|
zsm_sapply32_f__msmcwrite( thr, a );
|
|
return;
|
|
}
|
|
if (len == 8 && aligned64(a)) {
|
|
zsm_sapply64_f__msmcwrite( thr, a );
|
|
return;
|
|
}
|
|
|
|
/* be completely general (but as efficient as possible) */
|
|
if (len == 0) return;
|
|
|
|
if (!aligned16(a) && len >= 1) {
|
|
zsm_sapply08_f__msmcwrite( thr, a );
|
|
a += 1;
|
|
len -= 1;
|
|
tl_assert(aligned16(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned32(a) && len >= 2) {
|
|
zsm_sapply16_f__msmcwrite( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
tl_assert(aligned32(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned64(a) && len >= 4) {
|
|
zsm_sapply32_f__msmcwrite( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 8) {
|
|
tl_assert(aligned64(a));
|
|
while (len >= 8) {
|
|
zsm_sapply64_f__msmcwrite( thr, a );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 4)
|
|
tl_assert(aligned32(a));
|
|
if (len >= 4) {
|
|
zsm_sapply32_f__msmcwrite( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 2)
|
|
tl_assert(aligned16(a));
|
|
if (len >= 2) {
|
|
zsm_sapply16_f__msmcwrite( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 1) {
|
|
zsm_sapply08_f__msmcwrite( thr, a );
|
|
//a += 1;
|
|
len -= 1;
|
|
}
|
|
tl_assert(len == 0);
|
|
}
|
|
|
|
void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply08__msmcread(thr, a);
|
|
}
|
|
|
|
void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply16__msmcread(thr, a);
|
|
}
|
|
|
|
void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply32__msmcread(thr, a);
|
|
}
|
|
|
|
void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
|
|
stats__f_ac++;
|
|
STATS__F_SHOW;
|
|
if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
|
|
stats__f_sk++;
|
|
return;
|
|
}
|
|
zsm_sapply64__msmcread(thr, a);
|
|
}
|
|
|
|
void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
|
|
{
|
|
/* fast track a couple of common cases */
|
|
if (len == 4 && aligned32(a)) {
|
|
zsm_sapply32_f__msmcread( thr, a );
|
|
return;
|
|
}
|
|
if (len == 8 && aligned64(a)) {
|
|
zsm_sapply64_f__msmcread( thr, a );
|
|
return;
|
|
}
|
|
|
|
/* be completely general (but as efficient as possible) */
|
|
if (len == 0) return;
|
|
|
|
if (!aligned16(a) && len >= 1) {
|
|
zsm_sapply08_f__msmcread( thr, a );
|
|
a += 1;
|
|
len -= 1;
|
|
tl_assert(aligned16(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned32(a) && len >= 2) {
|
|
zsm_sapply16_f__msmcread( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
tl_assert(aligned32(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (!aligned64(a) && len >= 4) {
|
|
zsm_sapply32_f__msmcread( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 8) {
|
|
tl_assert(aligned64(a));
|
|
while (len >= 8) {
|
|
zsm_sapply64_f__msmcread( thr, a );
|
|
a += 8;
|
|
len -= 8;
|
|
}
|
|
tl_assert(aligned64(a));
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 4)
|
|
tl_assert(aligned32(a));
|
|
if (len >= 4) {
|
|
zsm_sapply32_f__msmcread( thr, a );
|
|
a += 4;
|
|
len -= 4;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 2)
|
|
tl_assert(aligned16(a));
|
|
if (len >= 2) {
|
|
zsm_sapply16_f__msmcread( thr, a );
|
|
a += 2;
|
|
len -= 2;
|
|
}
|
|
if (len == 0) return;
|
|
|
|
if (len >= 1) {
|
|
zsm_sapply08_f__msmcread( thr, a );
|
|
//a += 1;
|
|
len -= 1;
|
|
}
|
|
tl_assert(len == 0);
|
|
}
|
|
|
|
void libhb_Thr_resumes ( Thr* thr )
|
|
{
|
|
if (0) VG_(printf)("resume %p\n", thr);
|
|
tl_assert(thr);
|
|
tl_assert(!thr->llexit_done);
|
|
Filter__clear(thr->filter, "libhb_Thr_resumes");
|
|
/* A kludge, but .. if this thread doesn't have any marker stacks
|
|
at all, get one right now. This is easier than figuring out
|
|
exactly when at thread startup we can and can't take a stack
|
|
snapshot. */
|
|
if (HG_(clo_history_level) == 1) {
|
|
tl_assert(thr->local_Kws_n_stacks);
|
|
if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
|
|
note_local_Kw_n_stack_for(thr);
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Synchronisation objects //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
/* A double linked list of all the SO's. */
|
|
SO* admin_SO = NULL;
|
|
|
|
static SO* SO__Alloc ( void )
|
|
{
|
|
SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
|
|
so->viR = VtsID_INVALID;
|
|
so->viW = VtsID_INVALID;
|
|
so->magic = SO_MAGIC;
|
|
/* Add to double linked list */
|
|
if (admin_SO) {
|
|
tl_assert(admin_SO->admin_prev == NULL);
|
|
admin_SO->admin_prev = so;
|
|
so->admin_next = admin_SO;
|
|
} else {
|
|
so->admin_next = NULL;
|
|
}
|
|
so->admin_prev = NULL;
|
|
admin_SO = so;
|
|
/* */
|
|
return so;
|
|
}
|
|
|
|
static void SO__Dealloc ( SO* so )
|
|
{
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
if (so->viR == VtsID_INVALID) {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
} else {
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
VtsID__rcdec(so->viR);
|
|
VtsID__rcdec(so->viW);
|
|
}
|
|
so->magic = 0;
|
|
/* Del from double linked list */
|
|
if (so->admin_prev)
|
|
so->admin_prev->admin_next = so->admin_next;
|
|
if (so->admin_next)
|
|
so->admin_next->admin_prev = so->admin_prev;
|
|
if (so == admin_SO)
|
|
admin_SO = so->admin_next;
|
|
/* */
|
|
HG_(free)( so );
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// //
|
|
// Top Level API //
|
|
// //
|
|
/////////////////////////////////////////////////////////
|
|
|
|
static void show_thread_state ( const HChar* str, Thr* t )
|
|
{
|
|
if (1) return;
|
|
if (t->viR == t->viW) {
|
|
VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
|
|
VtsID__pp( t->viR );
|
|
VG_(printf)("%s","\n");
|
|
} else {
|
|
VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
|
|
VtsID__pp( t->viR );
|
|
VG_(printf)(" viW %u==", t->viW);
|
|
VtsID__pp( t->viW );
|
|
VG_(printf)("%s","\n");
|
|
}
|
|
}
|
|
|
|
|
|
Thr* libhb_init (
|
|
void (*get_stacktrace)( Thr*, Addr*, UWord ),
|
|
ExeContext* (*get_EC)( Thr* )
|
|
)
|
|
{
|
|
Thr* thr;
|
|
VtsID vi;
|
|
|
|
// We will have to have to store a large number of these,
|
|
// so make sure they're the size we expect them to be.
|
|
tl_assert(sizeof(ScalarTS) == 8);
|
|
|
|
/* because first 1024 unusable */
|
|
tl_assert(SCALARTS_N_THRBITS >= 11);
|
|
/* so as to fit in a UInt w/ 3 bits to spare (see defn of
|
|
Thr_n_RCEC). */
|
|
tl_assert(SCALARTS_N_THRBITS <= 29);
|
|
|
|
/* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
|
|
(32-bit). It's not correctness-critical, but there are a lot of
|
|
them, so it's important from a space viewpoint. Unfortunately
|
|
we simply can't pack it into 2 words on a 32-bit target. */
|
|
if (sizeof(UWord) == 8) {
|
|
tl_assert(sizeof(Thr_n_RCEC) == 16);
|
|
} else {
|
|
tl_assert(sizeof(Thr_n_RCEC) == 12);
|
|
}
|
|
|
|
/* Word sets really are 32 bits. Even on a 64 bit target. */
|
|
tl_assert(sizeof(WordSetID) == 4);
|
|
tl_assert(sizeof(WordSet) == sizeof(WordSetID));
|
|
|
|
tl_assert(get_stacktrace);
|
|
tl_assert(get_EC);
|
|
main_get_stacktrace = get_stacktrace;
|
|
main_get_EC = get_EC;
|
|
|
|
// No need to initialise hg_wordfm.
|
|
// No need to initialise hg_wordset.
|
|
|
|
/* Allocated once and never deallocated. Used as a temporary in
|
|
VTS singleton, tick and join operations. */
|
|
temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
|
|
temp_max_sized_VTS->id = VtsID_INVALID;
|
|
verydead_thread_table_init();
|
|
vts_set_init();
|
|
vts_tab_init();
|
|
event_map_init();
|
|
VtsID__invalidate_caches();
|
|
|
|
// initialise shadow memory
|
|
zsm_init( SVal__rcinc, SVal__rcdec );
|
|
|
|
thr = Thr__new();
|
|
vi = VtsID__mk_Singleton( thr, 1 );
|
|
thr->viR = vi;
|
|
thr->viW = vi;
|
|
VtsID__rcinc(thr->viR);
|
|
VtsID__rcinc(thr->viW);
|
|
|
|
show_thread_state(" root", thr);
|
|
return thr;
|
|
}
|
|
|
|
|
|
Thr* libhb_create ( Thr* parent )
|
|
{
|
|
/* The child's VTSs are copies of the parent's VTSs, but ticked at
|
|
the child's index. Since the child's index is guaranteed
|
|
unique, it has never been seen before, so the implicit value
|
|
before the tick is zero and after that is one. */
|
|
Thr* child = Thr__new();
|
|
|
|
child->viR = VtsID__tick( parent->viR, child );
|
|
child->viW = VtsID__tick( parent->viW, child );
|
|
Filter__clear(child->filter, "libhb_create(child)");
|
|
VtsID__rcinc(child->viR);
|
|
VtsID__rcinc(child->viW);
|
|
/* We need to do note_local_Kw_n_stack_for( child ), but it's too
|
|
early for that - it may not have a valid TId yet. So, let
|
|
libhb_Thr_resumes pick it up the first time the thread runs. */
|
|
|
|
tl_assert(VtsID__indexAt( child->viR, child ) == 1);
|
|
tl_assert(VtsID__indexAt( child->viW, child ) == 1);
|
|
|
|
/* and the parent has to move along too */
|
|
VtsID__rcdec(parent->viR);
|
|
VtsID__rcdec(parent->viW);
|
|
parent->viR = VtsID__tick( parent->viR, parent );
|
|
parent->viW = VtsID__tick( parent->viW, parent );
|
|
Filter__clear(parent->filter, "libhb_create(parent)");
|
|
VtsID__rcinc(parent->viR);
|
|
VtsID__rcinc(parent->viW);
|
|
note_local_Kw_n_stack_for( parent );
|
|
|
|
show_thread_state(" child", child);
|
|
show_thread_state("parent", parent);
|
|
|
|
return child;
|
|
}
|
|
|
|
/* Shut down the library, and print stats (in fact that's _all_
|
|
this is for. */
|
|
void libhb_shutdown ( Bool show_stats )
|
|
{
|
|
if (show_stats) {
|
|
VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
|
|
VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
|
|
stats__secmaps_allocd,
|
|
stats__secmap_ga_space_covered);
|
|
VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
|
|
stats__secmap_linesZ_allocd,
|
|
stats__secmap_linesZ_bytes);
|
|
VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
|
|
stats__secmap_linesF_allocd,
|
|
stats__secmap_linesF_bytes);
|
|
VG_(printf)(" secmaps: %'10lu iterator steppings\n",
|
|
stats__secmap_iterator_steppings);
|
|
VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
|
|
stats__secmaps_search, stats__secmaps_search_slow);
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
|
|
stats__cache_totrefs, stats__cache_totmisses );
|
|
VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
|
|
stats__cache_Z_fetches, stats__cache_F_fetches );
|
|
VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
|
|
stats__cache_Z_wbacks, stats__cache_F_wbacks );
|
|
VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
|
|
stats__cache_invals, stats__cache_flushes );
|
|
VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
|
|
stats__cache_make_New_arange,
|
|
stats__cache_make_New_inZrep);
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)(" cline: %'10lu normalises\n",
|
|
stats__cline_normalises );
|
|
VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
|
|
stats__cline_cread64s,
|
|
stats__cline_cread32s,
|
|
stats__cline_cread16s,
|
|
stats__cline_cread08s );
|
|
VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
|
|
stats__cline_cwrite64s,
|
|
stats__cline_cwrite32s,
|
|
stats__cline_cwrite16s,
|
|
stats__cline_cwrite08s );
|
|
VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
|
|
stats__cline_swrite64s,
|
|
stats__cline_swrite32s,
|
|
stats__cline_swrite16s,
|
|
stats__cline_swrite08s );
|
|
VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
|
|
stats__cline_sread08s, stats__cline_scopy08s );
|
|
VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
|
|
stats__cline_64to32splits,
|
|
stats__cline_32to16splits,
|
|
stats__cline_16to8splits );
|
|
VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
|
|
stats__cline_64to32pulldown,
|
|
stats__cline_32to16pulldown,
|
|
stats__cline_16to8pulldown );
|
|
if (0)
|
|
VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
|
|
(Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
|
|
|
|
VG_(printf)("%s","\n");
|
|
|
|
VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
|
|
stats__msmcread, stats__msmcread_change);
|
|
VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
|
|
stats__msmcwrite, stats__msmcwrite_change);
|
|
VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
|
|
stats__cmpLEQ_queries, stats__cmpLEQ_misses);
|
|
VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
|
|
stats__join2_queries, stats__join2_misses);
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
|
|
stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
|
|
VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
|
|
stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
|
|
VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
|
|
stats__vts_set__focaa, stats__vts_set__focaa_a );
|
|
VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
|
|
stats__vts__indexat_slow );
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)(
|
|
" libhb: %ld entries in vts_table (approximately %lu bytes)\n",
|
|
VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
|
|
);
|
|
VG_(printf)( " libhb: %lu entries in vts_set\n",
|
|
VG_(sizeFM)( vts_set ) );
|
|
|
|
VG_(printf)("%s","\n");
|
|
VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
|
|
stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
|
|
stats__ctxt_rcdec2,
|
|
stats__ctxt_rcdec3 );
|
|
VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
|
|
stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
|
|
VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
|
|
(UWord)N_RCEC_TAB,
|
|
stats__ctxt_tab_curr );
|
|
VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
|
|
stats__ctxt_tab_qs,
|
|
stats__ctxt_tab_cmps );
|
|
#if 0
|
|
VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
|
|
VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
|
|
VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
|
|
VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
|
|
VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
|
|
VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
|
|
VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
|
|
VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
|
|
VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
|
|
VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
|
|
VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
|
|
VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
|
|
VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
|
|
VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
|
|
|
|
VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
|
|
VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
|
|
VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
|
|
VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
|
|
#endif
|
|
|
|
VG_(printf)("%s","<<< END libhb stats >>>\n");
|
|
VG_(printf)("%s","\n");
|
|
|
|
}
|
|
}
|
|
|
|
/* Receive notification that a thread has low level exited. The
|
|
significance here is that we do not expect to see any more memory
|
|
references from it. */
|
|
void libhb_async_exit ( Thr* thr )
|
|
{
|
|
tl_assert(thr);
|
|
tl_assert(!thr->llexit_done);
|
|
thr->llexit_done = True;
|
|
|
|
/* free up Filter and local_Kws_n_stacks (well, actually not the
|
|
latter ..) */
|
|
tl_assert(thr->filter);
|
|
HG_(free)(thr->filter);
|
|
thr->filter = NULL;
|
|
|
|
/* Tell the VTS mechanism this thread has exited, so it can
|
|
participate in VTS pruning. Note this can only happen if the
|
|
thread has both ll_exited and has been joined with. */
|
|
if (thr->joinedwith_done)
|
|
VTS__declare_thread_very_dead(thr);
|
|
|
|
/* Another space-accuracy tradeoff. Do we want to be able to show
|
|
H1 history for conflicts in threads which have since exited? If
|
|
yes, then we better not free up thr->local_Kws_n_stacks. The
|
|
downside is a potential per-thread leak of up to
|
|
N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
|
|
XArray average overcommit factor is (1.5 I'd guess). */
|
|
// hence:
|
|
// VG_(deleteXA)(thr->local_Kws_n_stacks);
|
|
// thr->local_Kws_n_stacks = NULL;
|
|
}
|
|
|
|
/* Receive notification that a thread has been joined with. The
|
|
significance here is that we do not expect to see any further
|
|
references to its vector clocks (Thr::viR and Thr::viW). */
|
|
void libhb_joinedwith_done ( Thr* thr )
|
|
{
|
|
tl_assert(thr);
|
|
/* Caller must ensure that this is only ever called once per Thr. */
|
|
tl_assert(!thr->joinedwith_done);
|
|
thr->joinedwith_done = True;
|
|
if (thr->llexit_done)
|
|
VTS__declare_thread_very_dead(thr);
|
|
}
|
|
|
|
|
|
/* Both Segs and SOs point to VTSs. However, there is no sharing, so
|
|
a Seg that points at a VTS is its one-and-only owner, and ditto for
|
|
a SO that points at a VTS. */
|
|
|
|
SO* libhb_so_alloc ( void )
|
|
{
|
|
return SO__Alloc();
|
|
}
|
|
|
|
void libhb_so_dealloc ( SO* so )
|
|
{
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
SO__Dealloc(so);
|
|
}
|
|
|
|
/* See comments in libhb.h for details on the meaning of
|
|
strong vs weak sends and strong vs weak receives. */
|
|
void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
|
|
{
|
|
/* Copy the VTSs from 'thr' into the sync object, and then move
|
|
the thread along one step. */
|
|
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
|
|
/* stay sane .. a thread's read-clock must always lead or be the
|
|
same as its write-clock */
|
|
{ Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
|
|
tl_assert(leq);
|
|
}
|
|
|
|
/* since we're overwriting the VtsIDs in the SO, we need to drop
|
|
any references made by the previous contents thereof */
|
|
if (so->viR == VtsID_INVALID) {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
so->viR = thr->viR;
|
|
so->viW = thr->viW;
|
|
VtsID__rcinc(so->viR);
|
|
VtsID__rcinc(so->viW);
|
|
} else {
|
|
/* In a strong send, we dump any previous VC in the SO and
|
|
install the sending thread's VC instead. For a weak send we
|
|
must join2 with what's already there. */
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
VtsID__rcdec(so->viR);
|
|
VtsID__rcdec(so->viW);
|
|
so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
|
|
so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
|
|
VtsID__rcinc(so->viR);
|
|
VtsID__rcinc(so->viW);
|
|
}
|
|
|
|
/* move both parent clocks along */
|
|
VtsID__rcdec(thr->viR);
|
|
VtsID__rcdec(thr->viW);
|
|
thr->viR = VtsID__tick( thr->viR, thr );
|
|
thr->viW = VtsID__tick( thr->viW, thr );
|
|
if (!thr->llexit_done) {
|
|
Filter__clear(thr->filter, "libhb_so_send");
|
|
note_local_Kw_n_stack_for(thr);
|
|
}
|
|
VtsID__rcinc(thr->viR);
|
|
VtsID__rcinc(thr->viW);
|
|
|
|
if (strong_send)
|
|
show_thread_state("s-send", thr);
|
|
else
|
|
show_thread_state("w-send", thr);
|
|
}
|
|
|
|
void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
|
|
{
|
|
tl_assert(so);
|
|
tl_assert(so->magic == SO_MAGIC);
|
|
|
|
if (so->viR != VtsID_INVALID) {
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
|
|
/* Weak receive (basically, an R-acquisition of a R-W lock).
|
|
This advances the read-clock of the receiver, but not the
|
|
write-clock. */
|
|
VtsID__rcdec(thr->viR);
|
|
thr->viR = VtsID__join2( thr->viR, so->viR );
|
|
VtsID__rcinc(thr->viR);
|
|
|
|
/* At one point (r10589) it seemed safest to tick the clocks for
|
|
the receiving thread after the join. But on reflection, I
|
|
wonder if that might cause it to 'overtake' constraints,
|
|
which could lead to missing races. So, back out that part of
|
|
r10589. */
|
|
//VtsID__rcdec(thr->viR);
|
|
//thr->viR = VtsID__tick( thr->viR, thr );
|
|
//VtsID__rcinc(thr->viR);
|
|
|
|
/* For a strong receive, we also advance the receiver's write
|
|
clock, which means the receive as a whole is essentially
|
|
equivalent to a W-acquisition of a R-W lock. */
|
|
if (strong_recv) {
|
|
VtsID__rcdec(thr->viW);
|
|
thr->viW = VtsID__join2( thr->viW, so->viW );
|
|
VtsID__rcinc(thr->viW);
|
|
|
|
/* See comment just above, re r10589. */
|
|
//VtsID__rcdec(thr->viW);
|
|
//thr->viW = VtsID__tick( thr->viW, thr );
|
|
//VtsID__rcinc(thr->viW);
|
|
}
|
|
|
|
if (thr->filter)
|
|
Filter__clear(thr->filter, "libhb_so_recv");
|
|
note_local_Kw_n_stack_for(thr);
|
|
|
|
if (strong_recv)
|
|
show_thread_state("s-recv", thr);
|
|
else
|
|
show_thread_state("w-recv", thr);
|
|
|
|
} else {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
/* Deal with degenerate case: 'so' has no vts, so there has been
|
|
no message posted to it. Just ignore this case. */
|
|
show_thread_state("d-recv", thr);
|
|
}
|
|
}
|
|
|
|
Bool libhb_so_everSent ( SO* so )
|
|
{
|
|
if (so->viR == VtsID_INVALID) {
|
|
tl_assert(so->viW == VtsID_INVALID);
|
|
return False;
|
|
} else {
|
|
tl_assert(so->viW != VtsID_INVALID);
|
|
return True;
|
|
}
|
|
}
|
|
|
|
#define XXX1 0 // 0x67a106c
|
|
#define XXX2 0
|
|
|
|
static inline Bool TRACEME(Addr a, SizeT szB) {
|
|
if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
|
|
if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
|
|
return False;
|
|
}
|
|
static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
|
|
{
|
|
SVal sv = zsm_sread08(a);
|
|
VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
|
|
show_thread_state("", thr);
|
|
VG_(printf)("%s","\n");
|
|
}
|
|
|
|
void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
|
|
{
|
|
SVal sv = SVal__mkC(thr->viW, thr->viW);
|
|
tl_assert(is_sane_SVal_C(sv));
|
|
if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
|
|
zsm_sset_range( a, szB, sv );
|
|
Filter__clear_range( thr->filter, a, szB );
|
|
if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
|
|
}
|
|
|
|
void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
|
|
{
|
|
/* do nothing */
|
|
}
|
|
|
|
void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
|
|
{
|
|
/* This really does put the requested range in NoAccess. It's
|
|
expensive though. */
|
|
SVal sv = SVal_NOACCESS;
|
|
tl_assert(is_sane_SVal_C(sv));
|
|
zsm_sset_range( a, szB, sv );
|
|
Filter__clear_range( thr->filter, a, szB );
|
|
}
|
|
|
|
void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
|
|
{
|
|
SVal sv = SVal_NOACCESS;
|
|
tl_assert(is_sane_SVal_C(sv));
|
|
if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
|
|
zsm_sset_range( a, szB, sv );
|
|
Filter__clear_range( thr->filter, a, szB );
|
|
if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
|
|
}
|
|
|
|
Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
|
|
tl_assert(thr);
|
|
return thr->hgthread;
|
|
}
|
|
|
|
void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
|
|
tl_assert(thr);
|
|
thr->hgthread = hgthread;
|
|
}
|
|
|
|
void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
|
|
{
|
|
zsm_scopy_range(src, dst, len);
|
|
Filter__clear_range( thr->filter, dst, len );
|
|
}
|
|
|
|
void libhb_maybe_GC ( void )
|
|
{
|
|
event_map_maybe_GC();
|
|
/* If there are still freelist entries available, no need for a
|
|
GC. */
|
|
if (vts_tab_freelist != VtsID_INVALID)
|
|
return;
|
|
/* So all the table entries are full, and we're having to expand
|
|
the table. But did we hit the threshhold point yet? */
|
|
if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
|
|
return;
|
|
vts_tab__do_GC( False/*don't show stats*/ );
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
// //
|
|
// SECTION END main library //
|
|
// //
|
|
/////////////////////////////////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end libhb_main.c ---*/
|
|
/*--------------------------------------------------------------------*/
|