#!/bin/bash ######################## # Function definitions # ######################## source "$(dirname $0)/measurement-functions" function run_test { local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p tmp="/tmp/test-timing.$$" rm -f "${tmp}" p=1 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" read avg1 stddev1 vsz1 vszdev1 rest < "$tmp" echo "Average time: ${avg1} +/- ${stddev1} seconds." \ " VSZ: ${vsz1} +/- ${vszdev1} KB" if [ "${rest}" != "" ]; then echo "Internal error ($rest)" exit 1 fi rm -f "${tmp}" p=2 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" read avg2 stddev2 vsz2 vszdev2 rest < "$tmp" echo "Average time: ${avg2} +/- ${stddev2} seconds." \ " VSZ: ${vsz2} +/- ${vszdev2} KB" if [ "${rest}" != "" ]; then echo "Internal error ($rest)" exit 1 fi rm -f "${tmp}" p=4 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" read avg4 stddev4 vsz4 vszdev4 rest < "$tmp" echo "Average time: ${avg4} +/- ${stddev4} seconds." \ " VSZ: ${vsz4} +/- ${vszdev4} KB" rm -f "$tmp" if [ "${rest}" != "" ]; then echo "Internal error ($rest)" exit 1 fi p=1 test_output="/dev/null" \ print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}" p=4 test_output="/dev/null" \ print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}" p=4 test_output="${1}-drd-with-stack-var-4.out" \ print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \ "$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \ --drd-stats=yes "$@" -p${psep}${p} "${test_args}" p=4 test_output="${1}-drd-without-stack-var-4.out" \ print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \ "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \ --drd-stats=yes "$@" -p${psep}${p} "${test_args}" p=4 test_output="${1}-helgrind-4.out" \ print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind "$@" -p${psep}${p} "${test_args}" echo '' } ######################## # Script body # ######################## DRD_SCRIPTS_DIR="$(dirname $0)" if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR" fi SPLASH2="${DRD_SCRIPTS_DIR}/../splash2" if [ ! -e "${SPLASH2}" ]; then echo "Error: splash2 directory not found (${SPLASH2})." exit 1 fi if [ "$VG" = "" ]; then VG="${DRD_SCRIPTS_DIR}/../../vg-in-place" fi if [ ! -e "$VG" ]; then echo "Could not find $VG." exit 1 fi ###################################################################################################################### # Meaning of the different colums: # 1. SPLASH2 test name. # 2. Execution time in seconds for native run with argument -p1. # 3. Virtual memory size in KB for the native run with argument -p1. # 4. Execution time in seconds for native run with argument -p2. # 5. Virtual memory size in KB for the native run with argument -p2. # 6. Execution time in seconds for native run with argument -p4. # 7. Virtual memory size in KB for the native run with argument -p4. # 8. Execution time ratio for --tool=none -p1 versus -p1. # 9. Virtual memory size ratio for --tool=none -p1 versus -p1. # 10. Execution time ratio for --tool=none -p4 versus -p4. # 11. Virtual memory size ratio for --tool=none -p4 versus -p4. # 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. # 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. # 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4. # 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4. # 16. Execution time ratio for --tool=helgrind -p4 versus -p4. # 17. Virtual memory size ratio for --tool=helgrind -p4 versus -p4. # 18. Execution time ratio for Intel Thread Checker -p4 versus -p4. # 19. Execution time ratio for Intel Thread Checker -p4 versus -p4. # # Notes: # - Both Helgrind and DRD use a granularity of one byte for data race detection. # - Helgrind does detect data races on stack variables. DRD only detects # data races on stack variables with --check-stack-var=yes. # - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most # likely running a 32-bit OS. Not yet clear to me: which OS ? Which # granularity does ITC use ? And which m4 macro's have been used by ITC as # implementation of the synchronization primitives ? # # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ############################################################################################################################ # Results: native native native none none DRD DRD HG ITC ITC # -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4+f # .......................................................................................................................... # Cholesky 0.09 12016 0.06 22016 0.55 42352 13.4 4.86 2.4 2.08 22 2.34 11 2.53 25 5.94 239 82 # FFT 0.02 6692 0.02 15912 0.02 31963 16.5 7.92 18.0 2.43 151 2.97 53 3.22 103 5.48 90 41 # LU, contiguous 0.07 4100 0.04 12304 0.04 28712 12.9 12.29 22.5 2.62 180 2.87 87 3.16 178 5.53 428 128 # Ocean, contiguous 0.22 16848 0.14 25384 0.15 42528 6.6 3.75 10.5 2.09 112 2.65 77 2.90 271 5.95 90 28 # Radix 0.20 15136 0.10 23336 0.13 40069 12.9 4.06 20.2 2.15 58 2.66 38 2.90 95 6.03 222 56 # Raytrace 0.69 207104 0.48 215296 0.48 232362 8.1 1.22 11.6 1.20 422 1.35 76 1.38 212 3.78 172 53 # Water-n2 0.16 10696 0.09 27072 0.11 59832 14.1 5.40 20.4 1.79 3220 2.98 256 3.04 94 3.54 189 39 # Water-sp 0.20 4444 0.10 13536 0.10 29928 10.5 11.42 21.3 2.52 399 3.03 53 3.44 93 4.76 183 34 # .......................................................................................................................... # geometric mean 0.14 13024 0.09 25883 0.12 47866 11.4 5.21 13.4 2.06 195 2.54 59 2.73 110 5.03 180 51 # .......................................................................................................................... # Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM. # Software: openSUSE 11.0 (64-bit edition), runlevel 3, gcc 4.3.1, 32 bit SPLASH-2 executables, valgrind trunk r10380. ############################################################################################################################ #### # Notes: # - The ITC performance numbers in the above table originate from table 1 in # the following paper: # Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas, # Accurate and efficient filtering for the Intel thread checker race # detector, Proceedings of the 1st workshop on Architectural and system # support for improving software dependability, San Jose, California, # 2006. Pages: 34 - 41. # - The input parameters for benchmarks below originate from table 1 in the # following paper: # The SPLASH-2 programs: characterization and methodological considerations # Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A. # 1995. Proceedings of the 22nd Annual International Symposium on Computer # Architecture, 22-24 Jun 1995, Page(s): 24 - 36. # ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z #### cache_size=$(get_cache_size) log2_cache_size=$(log2 ${cache_size}) # Cholesky ( cd ${SPLASH2}/codes/kernels/cholesky/inputs for f in *Z do gzip -cd <$f >${f%.Z} done test_args=tk15.O run_test ../CHOLESKY -C$((cache_size)) ) # FFT run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16 # LU, contiguous blocks. run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512 # LU, non-contiguous blocks. #run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512 # Ocean run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258 #run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258 # Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why. if [ $(uname -p) = "i686" ]; then psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10 fi # Radix run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024 # Raytrace ( cd ${SPLASH2}/codes/apps/raytrace/inputs rm -f *.env *.geo *.rl for f in *Z do gzip -cd <$f >${f%.Z} done cd .. test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64 ) # Water-n2 ( cd ${SPLASH2}/codes/apps/water-nsquared test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED ) # Water-sp ( cd ${SPLASH2}/codes/apps/water-spatial test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL ) # Local variables: # compile-command: "./run-splash2" # End: