mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-07 12:44:45 +00:00
235 lines
9.2 KiB
Bash
Executable File
235 lines
9.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
########################
|
|
# Function definitions #
|
|
########################
|
|
|
|
source "$(dirname $0)/measurement-functions"
|
|
|
|
function run_test {
|
|
local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p
|
|
|
|
tmp="/tmp/test-timing.$$"
|
|
|
|
rm -f "${tmp}"
|
|
p=1
|
|
test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
|
|
read avg1 stddev1 vsz1 vszdev1 rest < "$tmp"
|
|
echo "Average time: ${avg1} +/- ${stddev1} seconds." \
|
|
" VSZ: ${vsz1} +/- ${vszdev1} KB"
|
|
|
|
if [ "${rest}" != "" ]; then
|
|
echo "Internal error ($rest)"
|
|
exit 1
|
|
fi
|
|
|
|
rm -f "${tmp}"
|
|
p=2
|
|
test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
|
|
read avg2 stddev2 vsz2 vszdev2 rest < "$tmp"
|
|
echo "Average time: ${avg2} +/- ${stddev2} seconds." \
|
|
" VSZ: ${vsz2} +/- ${vszdev2} KB"
|
|
|
|
if [ "${rest}" != "" ]; then
|
|
echo "Internal error ($rest)"
|
|
exit 1
|
|
fi
|
|
|
|
rm -f "${tmp}"
|
|
p=4
|
|
test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
|
|
read avg4 stddev4 vsz4 vszdev4 rest < "$tmp"
|
|
echo "Average time: ${avg4} +/- ${stddev4} seconds." \
|
|
" VSZ: ${vsz4} +/- ${vszdev4} KB"
|
|
rm -f "$tmp"
|
|
|
|
if [ "${rest}" != "" ]; then
|
|
echo "Internal error ($rest)"
|
|
exit 1
|
|
fi
|
|
|
|
p=1
|
|
test_output="/dev/null" \
|
|
print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
|
|
|
|
p=4
|
|
test_output="/dev/null" \
|
|
print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
|
|
|
|
p=4
|
|
test_output="${1}-drd-with-stack-var-4.out" \
|
|
print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
|
|
"$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \
|
|
"$@" -p${psep}${p} "${test_args}"
|
|
|
|
p=4
|
|
test_output="${1}-drd-without-stack-var-4.out" \
|
|
print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
|
|
"$VG" --tool=drd --first-race-only=yes --check-stack-var=no \
|
|
"$@" -p${psep}${p} "${test_args}"
|
|
|
|
p=4
|
|
test_output="${1}-helgrind-4.out" \
|
|
print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind "$@" -p${psep}${p} "${test_args}"
|
|
|
|
echo ''
|
|
}
|
|
|
|
|
|
########################
|
|
# Script body #
|
|
########################
|
|
|
|
DRD_SCRIPTS_DIR="$(dirname $0)"
|
|
if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
|
|
DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR"
|
|
fi
|
|
|
|
SPLASH2="${DRD_SCRIPTS_DIR}/../splash2"
|
|
if [ ! -e "${SPLASH2}" ]; then
|
|
echo "Error: splash2 directory not found (${SPLASH2})."
|
|
exit 1
|
|
fi
|
|
|
|
if [ "$VG" = "" ]; then
|
|
VG="${DRD_SCRIPTS_DIR}/../../vg-in-place"
|
|
fi
|
|
|
|
if [ ! -e "$VG" ]; then
|
|
echo "Could not find $VG."
|
|
exit 1
|
|
fi
|
|
|
|
######################################################################################################################
|
|
# Meaning of the different colums:
|
|
# 1. SPLASH2 test name.
|
|
# 2. Execution time in seconds for native run with argument -p1.
|
|
# 3. Virtual memory size in KB for the native run with argument -p1.
|
|
# 4. Execution time in seconds for native run with argument -p2.
|
|
# 5. Virtual memory size in KB for the native run with argument -p2.
|
|
# 6. Execution time in seconds for native run with argument -p4.
|
|
# 7. Virtual memory size in KB for the native run with argument -p4.
|
|
# 8. Execution time ratio for --tool=none -p1 versus -p1.
|
|
# 9. Virtual memory size ratio for --tool=none -p1 versus -p1.
|
|
# 10. Execution time ratio for --tool=none -p4 versus -p4.
|
|
# 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
|
|
# 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
|
|
# 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
|
|
# 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
|
|
# 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
|
|
# 16. Execution time ratio for --tool=helgrind -p4 versus -p4.
|
|
# 17. Virtual memory size ratio for --tool=helgrind -p4 versus -p4.
|
|
# 18. Execution time ratio for Intel Thread Checker -p4 versus -p4.
|
|
# 19. Execution time ratio for Intel Thread Checker -p4 versus -p4.
|
|
#
|
|
# Notes:
|
|
# - Both Helgrind and DRD use a granularity of one byte for data race detection.
|
|
# - Helgrind does detect data races on stack variables. DRD only detects
|
|
# data races on stack variables with --check-stack-var=yes.
|
|
# - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
|
|
# likely running a 32-bit OS. Not yet clear to me: which OS ? Which
|
|
# granularity does ITC use ? And which m4 macro's have been used by ITC as
|
|
# implementation of the synchronization primitives ?
|
|
#
|
|
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
|
|
############################################################################################################################
|
|
# Results: native native native none none DRD DRD HG ITC ITC
|
|
# -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4+f
|
|
# ..........................................................................................................................
|
|
# Cholesky 0.13 12016 0.06 22016 0.59 41669 11.3 4.46 2.4 2.11 24 2.37 16 2.57 28 6.10 239 82
|
|
# FFT 0.03 6692 0.02 15571 0.02 31962 12.0 7.92 20.0 2.43 121 2.84 54 3.09 117 5.48 90 41
|
|
# LU, contiguous 0.08 4100 0.05 12304 0.05 28712 13.4 12.29 20.0 2.62 117 2.73 72 3.16 156 5.53 428 128
|
|
# Ocean, contiguous 0.25 16848 0.22 25384 0.14 42528 7.6 3.75 12.9 2.09 99 2.45 68 2.68 327 5.95 90 28
|
|
# Radix 0.27 15136 0.10 23336 0.11 39728 12.8 4.06 24.5 2.17 65 2.61 46 2.82 121 6.15 222 56
|
|
# Raytrace 0.82 207104 0.59 215979 0.59 232363 9.0 1.22 11.4 1.20 251 1.48 82 1.52 205 3.78 172 53
|
|
# Water-n2 0.18 10696 0.10 27072 0.10 59832 16.4 5.40 23.8 1.79 4972 2.61 1538 2.68 110 3.54 189 39
|
|
# Water-sp 0.22 4444 0.11 13536 0.10 29928 11.6 11.41 22.7 2.52 301 2.89 55 3.17 97 4.76 183 34
|
|
# ..........................................................................................................................
|
|
# geometric mean 0.17 13023 0.10 25823 0.12 47718 11.5 5.15 14.4 2.06 174 2.45 78 2.65 120 5.06 180 51
|
|
# ..........................................................................................................................
|
|
# Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.
|
|
# Software: openSUSE 11.0 (64-bit edition), runlevel 3, gcc 4.3.1, 32 bit executables, valgrind trunk r9796.
|
|
############################################################################################################################
|
|
|
|
####
|
|
# Notes:
|
|
# - The ITC performance numbers in the above table originate from table 1 in
|
|
# the following paper:
|
|
# Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
|
|
# Accurate and efficient filtering for the Intel thread checker race
|
|
# detector, Proceedings of the 1st workshop on Architectural and system
|
|
# support for improving software dependability, San Jose, California,
|
|
# 2006. Pages: 34 - 41.
|
|
# - The input parameters for benchmarks below originate from table 1 in the
|
|
# following paper:
|
|
# The SPLASH-2 programs: characterization and methodological considerations
|
|
# Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
|
|
# 1995. Proceedings of the 22nd Annual International Symposium on Computer
|
|
# Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
|
|
# ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
|
|
####
|
|
|
|
cache_size=$(get_cache_size)
|
|
log2_cache_size=$(log2 ${cache_size})
|
|
|
|
# Cholesky
|
|
(
|
|
cd ${SPLASH2}/codes/kernels/cholesky/inputs
|
|
for f in *Z
|
|
do
|
|
gzip -cd <$f >${f%.Z}
|
|
done
|
|
test_args=tk15.O run_test ../CHOLESKY -C$((cache_size))
|
|
)
|
|
|
|
# FFT
|
|
run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16
|
|
|
|
# LU, contiguous blocks.
|
|
run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512
|
|
|
|
# LU, non-contiguous blocks.
|
|
#run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512
|
|
|
|
# Ocean
|
|
run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258
|
|
#run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258
|
|
|
|
# Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
|
|
if [ $(uname -p) = "i686" ]; then
|
|
psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10
|
|
fi
|
|
|
|
# Radix
|
|
run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024
|
|
|
|
# Raytrace
|
|
(
|
|
cd ${SPLASH2}/codes/apps/raytrace/inputs
|
|
rm -f *.env *.geo *.rl
|
|
for f in *Z
|
|
do
|
|
gzip -cd <$f >${f%.Z}
|
|
done
|
|
cd ..
|
|
test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64
|
|
)
|
|
|
|
# Water-n2
|
|
(
|
|
cd ${SPLASH2}/codes/apps/water-nsquared
|
|
test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED
|
|
)
|
|
|
|
# Water-sp
|
|
(
|
|
cd ${SPLASH2}/codes/apps/water-spatial
|
|
test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL
|
|
)
|
|
|
|
|
|
|
|
# Local variables:
|
|
# compile-command: "./run-splash2"
|
|
# End:
|