From 16448414985fef069e9ca7b1d08296cce1267f7c Mon Sep 17 00:00:00 2001 From: Petar Jovanovic Date: Wed, 3 Apr 2013 22:51:23 +0000 Subject: [PATCH] mips: add mips64le support for atomic_incs and annotate_hbefore tests Add mips64-le implementation of: - atomic_add_8bit - atomic_add_16bit - atomic_add_32bit - atomic_add_64bit - do_acasW Minor fixes for mips32 implementations are included as well. These functions are needed to execute atomic_incs and annotate_hbefore tests on mips64le. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13357 --- helgrind/tests/annotate_hbefore.c | 40 +++++- memcheck/tests/atomic_incs.c | 197 ++++++++++++++++++++++-------- 2 files changed, 184 insertions(+), 53 deletions(-) diff --git a/helgrind/tests/annotate_hbefore.c b/helgrind/tests/annotate_hbefore.c index cd58cef0f..3ddf3dc3e 100644 --- a/helgrind/tests/annotate_hbefore.c +++ b/helgrind/tests/annotate_hbefore.c @@ -188,7 +188,7 @@ UWord do_acasW(UWord* addr, UWord expected, UWord nyu ) #elif defined(VGA_mips32) -// mips +// mips32 /* return 1 if success, 0 if failure */ UWord do_acasW ( UWord* addr, UWord expected, UWord nyu ) { @@ -202,13 +202,47 @@ UWord do_acasW ( UWord* addr, UWord expected, UWord nyu ) "lw $t3, 4(%1)" "\n\t" "ll $t1, 0($t0)" "\n\t" "bne $t1, $t2, exit_0" "\n\t" + "nop" "\n\t" "sc $t3, 0($t0)" "\n\t" "move %0, $t3" "\n\t" "b exit" "\n\t" "nop" "\n\t" "exit_0:" "\n\t" - "move %0, $0" "\n\t" - "exit:" "\n\t" + "move %0, $zero" "\n\t" + "exit:" "\n\t" + : /*out*/ "=r"(success) + : /*in*/ "r"(&block[0]) + : /*trash*/ "t0", "t1", "t2", "t3", "memory" + ); + + assert(success == 0 || success == 1); + return success; +} + +#elif defined(VGA_mips64) + +// mips64 +/* return 1 if success, 0 if failure */ +UWord do_acasW ( UWord* addr, UWord expected, UWord nyu ) +{ + UWord old, success; + UWord block[3] = { (UWord)addr, nyu, expected}; + + __asm__ __volatile__( + ".set noreorder" "\n\t" + "ld $t0, 0(%1)" "\n\t" + "ld $t2, 16(%1)" "\n\t" + "ld $t3, 8(%1)" "\n\t" + "ll $t1, 0($t0)" "\n\t" + "bne $t1, $t2, exit_0" "\n\t" + "nop" "\n\t" + "sc $t3, 0($t0)" "\n\t" + "move %0, $t3" "\n\t" + "b exit" "\n\t" + "nop" "\n\t" + "exit_0:" "\n\t" + "move %0, $zero" "\n\t" + "exit:" "\n\t" : /*out*/ "=r"(success) : /*in*/ "r"(&block[0]) : /*trash*/ "t0", "t1", "t2", "t3", "memory" diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c index ac1e775ca..ae1368cbc 100644 --- a/memcheck/tests/atomic_incs.c +++ b/memcheck/tests/atomic_incs.c @@ -111,44 +111,73 @@ __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) : "d" (n) : "cc", "memory", "0", "1"); #elif defined(VGA_mips32) + /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an + exception that can cause this function to fail. */ #if defined (_MIPSEL) unsigned int block[3] - = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + = { (unsigned int)p, (unsigned int)n, 0x0 }; do { __asm__ __volatile__( - "move $t0, %0" "\n\t" - "lw $t1, 0($t0)" "\n\t" // p - "lw $t2, 4($t0)" "\n\t" // n - "ll $t3, 0($t1)" "\n\t" - "addu $t3, $t3, $t2" "\n\t" - "andi $t3, $t3, 0xFF" "\n\t" + "move $t0, %0" "\n\t" + "lw $t1, 0($t0)" "\n\t" // p + "lw $t2, 4($t0)" "\n\t" // n + "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF + "li $t4, 0xFF" "\n\t" + "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00 + "ll $t3, 0($t1)" "\n\t" // $t3 = old value + "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00 + "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n + "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF + "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 "sc $t3, 0($t1)" "\n\t" - "sw $t3, 8($t0)" "\n\t" + "sw $t3, 8($t0)" "\n\t" // save result : /*out*/ : /*in*/ "r"(&block[0]) - : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3" + : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" ); } while (block[2] != 1); #elif defined (_MIPSEB) unsigned int block[3] - = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + = { (unsigned int)p, (unsigned int)n << 24, 0x0 }; do { __asm__ __volatile__( - "move $t0, %0" "\n\t" - "lw $t1, 0($t0)" "\n\t" // p - "lw $t2, 4($t0)" "\n\t" // n - "li $t4, 0x000000FF" "\n\t" - "ll $t3, 0($t1)" "\n\t" - "addu $t3, $t3, $t2" "\n\t" - "and $t3, $t3, $t4" "\n\t" - "wsbh $t4, $t3" "\n\t" - "rotr $t4, $t4, 16" "\n\t" - "or $t3, $t4, $t3" "\n\t" - "sc $t3, 0($t1)" "\n\t" - "sw $t3, 8($t0)" "\n\t" + "move $t0, %0" "\n\t" + "lw $t1, 0($t0)" "\n\t" // p + "lw $t2, 4($t0)" "\n\t" // n + "ll $t3, 0($t1)" "\n\t" + "addu $t3, $t3, $t2" "\n\t" + "sc $t3, 0($t1)" "\n\t" + "sw $t3, 8($t0)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) - : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3", "t4" + : /*trash*/ "memory", "t0", "t1", "t2", "t3" + ); + } while (block[2] != 1); +#endif +#elif defined(VGA_mips64) + /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an + exception that can cause this function to fail. */ +#if defined (_MIPSEL) + unsigned long block[3] + = { (unsigned long)p, (unsigned long)n, 0x0ULL }; + do { + __asm__ __volatile__( + "move $t0, %0" "\n\t" + "ld $t1, 0($t0)" "\n\t" // p + "ld $t2, 8($t0)" "\n\t" // n + "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF + "li $s0, 0xFF" "\n\t" + "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00 + "ll $t3, 0($t1)" "\n\t" // $t3 = old value + "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00 + "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n + "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF + "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 + "sc $t3, 0($t1)" "\n\t" + "sw $t3, 16($t0)" "\n\t" // save result + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" ); } while (block[2] != 1); #endif @@ -246,39 +275,73 @@ __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) : "d" (n) : "cc", "memory", "0", "1"); #elif defined(VGA_mips32) + /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an + exception that can cause this function to fail. */ #if defined (_MIPSEL) unsigned int block[3] - = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + = { (unsigned int)p, (unsigned int)n, 0x0 }; do { __asm__ __volatile__( - "move $t0, %0" "\n\t" - "lw $t1, 0($t0)" "\n\t" // p - "lw $t2, 4($t0)" "\n\t" // n - "ll $t3, 0($t1)" "\n\t" - "addu $t3, $t3, $t2" "\n\t" - "andi $t3, $t3, 0xFFFF" "\n\t" + "move $t0, %0" "\n\t" + "lw $t1, 0($t0)" "\n\t" // p + "lw $t2, 4($t0)" "\n\t" // n + "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF + "li $t4, 0xFFFF" "\n\t" + "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000 + "ll $t3, 0($t1)" "\n\t" // $t3 = old value + "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000 + "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n + "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF + "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4 "sc $t3, 0($t1)" "\n\t" - "sw $t3, 8($t0)" "\n\t" + "sw $t3, 8($t0)" "\n\t" // save result : /*out*/ : /*in*/ "r"(&block[0]) - : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3" + : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4" ); } while (block[2] != 1); #elif defined (_MIPSEB) unsigned int block[3] - = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + = { (unsigned int)p, (unsigned int)n << 16, 0x0 }; do { __asm__ __volatile__( - "move $t0, %0" "\n\t" - "lw $t1, 0($t0)" "\n\t" // p - "li $t2, 32694" "\n\t" // n - "li $t3, 0x1" "\n\t" - "sll $t2, $t2, 16" "\n\t" - "sw $t2, 0($t1)" "\n\t" - "sw $t3, 8($t0)" "\n\t" + "move $t0, %0" "\n\t" + "lw $t1, 0($t0)" "\n\t" // p + "lw $t2, 4($t0)" "\n\t" // n + "ll $t3, 0($t1)" "\n\t" + "addu $t3, $t3, $t2" "\n\t" + "sc $t3, 0($t1)" "\n\t" + "sw $t3, 8($t0)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) - : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3" + : /*trash*/ "memory", "t0", "t1", "t2", "t3" + ); + } while (block[2] != 1); +#endif +#elif defined(VGA_mips64) + /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an + exception that can cause this function to fail. */ +#if defined (_MIPSEL) + unsigned long block[3] + = { (unsigned long)p, (unsigned long)n, 0x0ULL }; + do { + __asm__ __volatile__( + "move $t0, %0" "\n\t" + "ld $t1, 0($t0)" "\n\t" // p + "ld $t2, 8($t0)" "\n\t" // n + "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF + "li $s0, 0xFFFF" "\n\t" + "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000 + "ll $t3, 0($t1)" "\n\t" // $t3 = old value + "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000 + "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n + "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF + "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0 + "sc $t3, 0($t1)" "\n\t" + "sw $t3, 16($t0)" "\n\t" // save result + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0" ); } while (block[2] != 1); #endif @@ -370,19 +433,36 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) : "cc", "memory", "0", "1"); #elif defined(VGA_mips32) unsigned int block[3] - = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + = { (unsigned int)p, (unsigned int)n, 0x0 }; do { __asm__ __volatile__( - "move $t0, %0" "\n\t" - "lw $t1, 0($t0)" "\n\t" // p - "lw $t2, 4($t0)" "\n\t" // n - "ll $t3, 0($t1)" "\n\t" - "addu $t3, $t3, $t2" "\n\t" - "sc $t3, 0($t1)" "\n\t" - "sw $t3, 8($t0)" "\n\t" + "move $t0, %0" "\n\t" + "lw $t1, 0($t0)" "\n\t" // p + "lw $t2, 4($t0)" "\n\t" // n + "ll $t3, 0($t1)" "\n\t" + "addu $t3, $t3, $t2" "\n\t" + "sc $t3, 0($t1)" "\n\t" + "sw $t3, 8($t0)" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) - : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3" + : /*trash*/ "memory", "t0", "t1", "t2", "t3" + ); + } while (block[2] != 1); +#elif defined(VGA_mips64) + unsigned long block[3] + = { (unsigned long)p, (unsigned long)n, 0x0ULL }; + do { + __asm__ __volatile__( + "move $t0, %0" "\n\t" + "ld $t1, 0($t0)" "\n\t" // p + "ld $t2, 8($t0)" "\n\t" // n + "ll $t3, 0($t1)" "\n\t" + "addu $t3, $t3, $t2" "\n\t" + "sc $t3, 0($t1)" "\n\t" + "sd $t3, 16($t0)" "\n\t" + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "t0", "t1", "t2", "t3" ); } while (block[2] != 1); #else @@ -451,6 +531,23 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) : "+m" (*p) : "d" (n) : "cc", "memory", "0", "1"); +#elif defined(VGA_mips64) + unsigned long block[3] + = { (unsigned long)p, (unsigned long)n, 0x0ULL }; + do { + __asm__ __volatile__( + "move $t0, %0" "\n\t" + "ld $t1, 0($t0)" "\n\t" // p + "ld $t2, 8($t0)" "\n\t" // n + "lld $t3, 0($t1)" "\n\t" + "daddu $t3, $t3, $t2" "\n\t" + "scd $t3, 0($t1)" "\n\t" + "sd $t3, 16($t0)" "\n\t" + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "t0", "t1", "t2", "t3" + ); + } while (block[2] != 1); #else # error "Unsupported arch" #endif