diff --git a/cachegrind/cg_arch.c b/cachegrind/cg_arch.c index 170e4cc28..2afda87c4 100644 --- a/cachegrind/cg_arch.c +++ b/cachegrind/cg_arch.c @@ -180,11 +180,14 @@ static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_rede That sometimes gives a problem. For example, some Core iX based Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288 - sets. The "fix" in this case is to increase the associativity - by 50% to 24, which reduces the number of sets to 8192, making - it a power of 2. That's what the following code does (handing - the "3/2 rescaling case".) We might need to deal with other - ratios later (5/4 ?). + sets. Some AMD cpus have T = 5MB, A = 48, L = 64, which gives + 1706.667 sets (!). + + The "fix" is to force S down to the nearest power of two below its + original value, and increase A proportionately, so as to keep the + total cache size the same. In fact to be safe we recalculate the + cache size afterwards anyway, to guarantee that it divides exactly + between the new number of sets. The "fix" is "justified" (cough, cough) by alleging that increases of associativity above about 4 have very little effect @@ -193,29 +196,78 @@ static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_rede changing the associativity is a much better option. */ +/* (Helper function) Returns the largest power of 2 that is <= |x|. + Even works when |x| == 0. */ +static UInt floor_power_of_2 ( UInt x ) +{ + x = x | (x >> 1); + x = x | (x >> 2); + x = x | (x >> 4); + x = x | (x >> 8); + x = x | (x >> 16); + return x - (x >> 1); +} + static void maybe_tweak_LLc(cache_t *LLc) { - if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) { - Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc); - if (/* stay sane */ - nSets >= 4 - /* nSets is not a power of 2 */ - && VG_(log2_64)( (ULong)nSets ) == -1 - /* nSets is 50% above a power of 2 */ - && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1 - /* associativity can be increased by exactly 50% */ - && (LLc->assoc % 2) == 0 - ) { - /* # sets is 1.5 * a power of two, but the associativity is - even, so we can increase that up by 50% and implicitly - scale the # sets down accordingly. */ - Int new_assoc = LLc->assoc + (LLc->assoc / 2); - VG_(dmsg)("warning: pretending that LL cache has associativity" - " %d instead of actual %d\n", new_assoc, LLc->assoc); - LLc->assoc = new_assoc; - } - } + if (LLc->size == 0 || LLc->assoc == 0 || LLc->line_size == 0) + return; + + tl_assert(LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0); + + UInt old_size = (UInt)LLc->size; + UInt old_assoc = (UInt)LLc->assoc; + UInt old_line_size = (UInt)LLc->line_size; + + UInt new_size = old_size; + UInt new_assoc = old_assoc; + UInt new_line_size = old_line_size; + + UInt old_nSets = old_size / (old_assoc * old_line_size); + if (old_nSets == 0) { + /* This surely can't happen; but would cause chaos with the maths + * below if it did. Just give up if it does. */ + return; + } + + if (-1 != VG_(log2_64)(old_nSets)) { + /* The number of sets is already a power of 2. Make sure that + the size divides exactly between the sets. Almost all of the + time this will have no effect. */ + new_size = old_line_size * old_assoc * old_nSets; + } else { + /* The number of sets isn't a power of two. Calculate some + scale-down factor which causes the number of sets to become a + power of two. Then, increase the associativity by that + factor. Finally, re-calculate the total size so as to make + sure it divides exactly between the sets. */ + tl_assert(old_nSets >= 0); + UInt new_nSets = floor_power_of_2 ( old_nSets ); + tl_assert(new_nSets > 0 && new_nSets < old_nSets); + Double factor = (Double)old_nSets / (Double)new_nSets; + tl_assert(factor >= 1.0); + + new_assoc = (UInt)(0.5 + factor * (Double)old_assoc); + tl_assert(new_assoc >= old_assoc); + + new_size = old_line_size * new_assoc * new_nSets; + } + + tl_assert(new_line_size == old_line_size); /* we never change this */ + if (new_size == old_size && new_assoc == old_assoc) + return; + + VG_(dmsg)("warning: " + "specified LL cache: line_size %u assoc %u total_size %'u\n", + old_line_size, old_assoc, old_size); + VG_(dmsg)("warning: " + "simulated LL cache: line_size %u assoc %u total_size %'u\n",\ + new_line_size, new_assoc, new_size); + + LLc->size = new_size; + LLc->assoc = new_assoc; + LLc->line_size = new_line_size; } void VG_(post_clo_init_configure_caches)(cache_t* I1c,