mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-12 06:11:37 +00:00
660 lines
22 KiB
C
660 lines
22 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
#include "tests/malloc.h"
|
|
|
|
typedef unsigned char UChar;
|
|
typedef unsigned int UInt;
|
|
typedef unsigned long int UWord;
|
|
typedef unsigned long long int ULong;
|
|
typedef double Double;
|
|
typedef float Float;
|
|
|
|
#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
|
|
|
|
typedef union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM;
|
|
typedef union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; } YMM;
|
|
typedef struct { YMM r1; YMM r2; YMM r3; YMM r4; YMM m; } Block;
|
|
|
|
void showFloat ( XMM* vec, int idx )
|
|
{
|
|
Float f = vec->f32[idx];
|
|
int neg = signbit (f);
|
|
char sign = neg != 0 ? '-' : ' ';
|
|
switch (fpclassify (f)) {
|
|
case FP_NORMAL: {
|
|
for (int i = idx * 4 + 3; i >= idx * 4; i--)
|
|
printf("%02x", (UInt)vec->u8[i]);
|
|
break;
|
|
}
|
|
case FP_INFINITE: {
|
|
printf ("[ %cINF ]", sign);
|
|
break;
|
|
}
|
|
case FP_ZERO: {
|
|
printf ("[%cZERO ]", sign);
|
|
break;
|
|
}
|
|
case FP_NAN: {
|
|
printf ("[ NAN ]");
|
|
break;
|
|
}
|
|
default: {
|
|
printf ("[%cSUBNR]", sign);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void showDouble ( XMM* vec, int idx )
|
|
{
|
|
Double d = vec->f64[idx];
|
|
int neg = signbit (d);
|
|
char sign = neg != 0 ? '-' : ' ';
|
|
switch (fpclassify (d)) {
|
|
case FP_NORMAL: {
|
|
for (int i = idx * 8 + 7; i >= idx * 8; i--)
|
|
printf("%02x", (UInt)vec->u8[i]);
|
|
break;
|
|
}
|
|
case FP_INFINITE: {
|
|
printf ("[ %cINF ]", sign);
|
|
break;
|
|
}
|
|
case FP_ZERO: {
|
|
printf ("[ %cZERO ]", sign);
|
|
break;
|
|
}
|
|
case FP_NAN: {
|
|
printf ("[ NAN ]");
|
|
break;
|
|
}
|
|
default: {
|
|
printf ("[ %cSUBNORMAL ]", sign);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void showXMM ( XMM* vec, int isDouble )
|
|
{
|
|
if (isDouble) {
|
|
showDouble ( vec, 1 );
|
|
printf (".");
|
|
showDouble ( vec, 0 );
|
|
} else {
|
|
showFloat ( vec, 3 );
|
|
printf (".");
|
|
showFloat ( vec, 2 );
|
|
printf (".");
|
|
showFloat ( vec, 1 );
|
|
printf (".");
|
|
showFloat ( vec, 0 );
|
|
}
|
|
}
|
|
|
|
void showYMM ( YMM* vec, int isDouble )
|
|
{
|
|
assert(IS_32_ALIGNED(vec));
|
|
showXMM ( &vec->xmm[1], isDouble );
|
|
printf(".");
|
|
showXMM ( &vec->xmm[0], isDouble );
|
|
}
|
|
|
|
void showBlock ( char* msg, Block* block, int isDouble )
|
|
{
|
|
printf(" %s\n", msg);
|
|
printf("r1: "); showYMM(&block->r1, isDouble); printf("\n");
|
|
printf("r2: "); showYMM(&block->r2, isDouble); printf("\n");
|
|
printf("r3: "); showYMM(&block->r3, isDouble); printf("\n");
|
|
printf("r4: "); showYMM(&block->r4, isDouble); printf("\n");
|
|
printf(" m: "); showYMM(&block->m, isDouble); printf("\n");
|
|
}
|
|
|
|
static Double special_values[10];
|
|
|
|
static __attribute__((noinline))
|
|
Double negate ( Double d ) { return -d; }
|
|
static __attribute__((noinline))
|
|
Double divf64 ( Double x, Double y ) { return x/y; }
|
|
|
|
static __attribute__((noinline))
|
|
Double plusZero ( void ) { return 0.0; }
|
|
static __attribute__((noinline))
|
|
Double minusZero ( void ) { return negate(plusZero()); }
|
|
|
|
static __attribute__((noinline))
|
|
Double plusOne ( void ) { return 1.0; }
|
|
static __attribute__((noinline))
|
|
Double minusOne ( void ) { return negate(plusOne()); }
|
|
|
|
static __attribute__((noinline))
|
|
Double plusInf ( void ) { return 1.0 / 0.0; }
|
|
static __attribute__((noinline))
|
|
Double minusInf ( void ) { return negate(plusInf()); }
|
|
|
|
static __attribute__((noinline))
|
|
Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); }
|
|
static __attribute__((noinline))
|
|
Double minusNaN ( void ) { return negate(plusNaN()); }
|
|
|
|
static __attribute__((noinline))
|
|
Double plusDenorm ( void ) { return 1.23e-315 / 1e3; }
|
|
static __attribute__((noinline))
|
|
Double minusDenorm ( void ) { return negate(plusDenorm()); }
|
|
|
|
static void init_special_values ( void )
|
|
{
|
|
special_values[0] = plusZero();
|
|
special_values[1] = minusZero();
|
|
special_values[2] = plusOne();
|
|
special_values[3] = minusOne();
|
|
special_values[4] = plusInf();
|
|
special_values[5] = minusInf();
|
|
special_values[6] = plusNaN();
|
|
special_values[7] = minusNaN();
|
|
special_values[8] = plusDenorm();
|
|
special_values[9] = minusDenorm();
|
|
}
|
|
|
|
void specialFBlock ( Block* b )
|
|
{
|
|
int i;
|
|
Float* p = (Float*)b;
|
|
for (i = 0; i < sizeof(Block) / sizeof(Float); i++)
|
|
p[i] = (Float) special_values[i % 10];
|
|
}
|
|
|
|
void specialDBlock ( Block* b )
|
|
{
|
|
int i;
|
|
Double* p = (Double*)b;
|
|
for (i = 0; i < sizeof(Block) / sizeof(Double); i++)
|
|
p[i] = special_values[i % 10];
|
|
}
|
|
|
|
UChar randUChar ( void )
|
|
{
|
|
static UInt seed = 80021;
|
|
seed = 1103515245 * seed + 12345;
|
|
return (seed >> 17) & 0xFF;
|
|
}
|
|
|
|
void randBlock ( Block* b )
|
|
{
|
|
int i;
|
|
UChar* p = (UChar*)b;
|
|
for (i = 0; i < sizeof(Block); i++)
|
|
p[i] = randUChar();
|
|
}
|
|
|
|
void oneBlock ( Block* b )
|
|
{
|
|
int i;
|
|
UChar* p = (UChar*)b;
|
|
for (i = 0; i < sizeof(Block); i++)
|
|
p[i] = 1;
|
|
}
|
|
|
|
#define GEN_test(_name, _instr, _isD) \
|
|
__attribute__ ((noinline)) void \
|
|
test_##_name ( const char *n, Block* b) \
|
|
{ \
|
|
printf("%s %s\n", #_name, n); \
|
|
showBlock("before", b, _isD); \
|
|
__asm__ __volatile__( \
|
|
"vmovdqa 0(%0),%%ymm7" "\n\t" \
|
|
"vmovdqa 32(%0),%%ymm8" "\n\t" \
|
|
"vmovdqa 64(%0),%%ymm6" "\n\t" \
|
|
"vmovdqa 96(%0),%%ymm9" "\n\t" \
|
|
"leaq 128(%0),%%r14" "\n\t" \
|
|
_instr "\n\t" \
|
|
"vmovdqa %%ymm7, 0(%0)" "\n\t" \
|
|
"vmovdqa %%ymm8, 32(%0)" "\n\t" \
|
|
"vmovdqa %%ymm6, 64(%0)" "\n\t" \
|
|
"vmovdqa %%ymm9, 96(%0)" "\n\t" \
|
|
: /*OUT*/ \
|
|
: /*IN*/"r"(b) \
|
|
: /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
|
|
); \
|
|
showBlock("after", b, _isD); \
|
|
printf("\n"); \
|
|
}
|
|
|
|
/* All these defines do the same thing (and someone with stronger
|
|
preprocessor foo could probably express things much smaller).
|
|
They generate 4 different functions to test 4 variants of an
|
|
fma4 instruction. One with as input 4 registers, one where
|
|
the output register is also one of the input registers and
|
|
two versions where different inputs are a memory location.
|
|
The xmm variants create 128 versions, the ymm variants 256. */
|
|
|
|
#define GEN_test_VFMADDPD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFMADDPD_xmm(VFMADDPD)
|
|
|
|
#define GEN_test_VFMADDPD_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
|
|
GEN_test_VFMADDPD_ymm(VFMADDPD)
|
|
|
|
#define GEN_test_VFMADDPS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFMADDPS_xmm(VFMADDPS)
|
|
|
|
#define GEN_test_VFMADDPS_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
|
|
GEN_test_VFMADDPS_ymm(VFMADDPS)
|
|
|
|
#define GEN_test_VFMADDSD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFMADDSD_xmm(VFMADDSD)
|
|
|
|
#define GEN_test_VFMADDSS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFMADDSS_xmm(VFMADDSS)
|
|
|
|
#define GEN_test_VFMADDSUBPD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD)
|
|
|
|
#define GEN_test_VFMADDSUBPD_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
|
|
GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD)
|
|
|
|
#define GEN_test_VFMADDSUBPS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS)
|
|
|
|
#define GEN_test_VFMADDSUBPS_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
|
|
GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS)
|
|
|
|
#define GEN_test_VFMSUBADDPD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD)
|
|
|
|
#define GEN_test_VFMSUBADDPD_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
|
|
GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD)
|
|
|
|
#define GEN_test_VFMSUBADDPS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS)
|
|
|
|
#define GEN_test_VFMSUBADDPS_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
|
|
GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS)
|
|
|
|
#define GEN_test_VFMSUBPD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFMSUBPD_xmm(VFMSUBPD)
|
|
|
|
#define GEN_test_VFMSUBPD_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
|
|
GEN_test_VFMSUBPD_ymm(VFMSUBPD)
|
|
|
|
#define GEN_test_VFMSUBPS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFMSUBPS_xmm(VFMSUBPS)
|
|
|
|
#define GEN_test_VFMSUBPS_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
|
|
GEN_test_VFMSUBPS_ymm(VFMSUBPS)
|
|
|
|
#define GEN_test_VFMSUBSD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFMSUBSD_xmm(VFMSUBSD)
|
|
|
|
#define GEN_test_VFMSUBSS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFMSUBSS_xmm(VFMSUBSS)
|
|
|
|
#define GEN_test_VFNMADDPD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFNMADDPD_xmm(VFNMADDPD)
|
|
|
|
#define GEN_test_VFNMADDPD_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
|
|
GEN_test_VFNMADDPD_ymm(VFNMADDPD)
|
|
|
|
#define GEN_test_VFNMADDPS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFNMADDPS_xmm(VFNMADDPS)
|
|
|
|
#define GEN_test_VFNMADDPS_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
|
|
GEN_test_VFNMADDPS_ymm(VFNMADDPS)
|
|
|
|
#define GEN_test_VFNMADDSD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFNMADDSD_xmm(VFNMADDSD)
|
|
|
|
#define GEN_test_VFNMADDSS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFNMADDSS_xmm(VFNMADDSS)
|
|
|
|
#define GEN_test_VFNMSUBPD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFNMSUBPD_xmm(VFNMSUBPD)
|
|
|
|
#define GEN_test_VFNMSUBPD_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
|
|
GEN_test_VFNMSUBPD_ymm(VFNMSUBPD)
|
|
|
|
#define GEN_test_VFNMSUBPS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFNMSUBPS_xmm(VFNMSUBPS)
|
|
|
|
#define GEN_test_VFNMSUBPS_ymm(_name) \
|
|
GEN_test(_name##_ymm, \
|
|
"vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_src_dst, \
|
|
"vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem1, \
|
|
"vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
|
|
GEN_test(_name##_ymm_mem2, \
|
|
"vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
|
|
GEN_test_VFNMSUBPS_ymm(VFNMSUBPS)
|
|
|
|
#define GEN_test_VFNMSUBSD_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
|
|
GEN_test_VFNMSUBSD_xmm(VFNMSUBSD)
|
|
|
|
#define GEN_test_VFNMSUBSS_xmm(_name) \
|
|
GEN_test(_name##_xmm, \
|
|
"vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_src_dst, \
|
|
"vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem1, \
|
|
"vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
|
|
GEN_test(_name##_xmm_mem2, \
|
|
"vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
|
|
GEN_test_VFNMSUBSS_xmm(VFNMSUBSS)
|
|
|
|
#define DO_test_block(_name, _sub, _bname, _block) \
|
|
test_##_name##_##_sub(_bname,_block);
|
|
|
|
#define DO_test(_name, _sub, _isD) { \
|
|
Block* b = memalign32(sizeof(Block)); \
|
|
oneBlock(b); \
|
|
DO_test_block(_name, _sub, "ones", b); \
|
|
if (_isD) { \
|
|
specialDBlock(b); \
|
|
DO_test_block(_name, _sub, "specialD", b); \
|
|
} else { \
|
|
specialFBlock(b); \
|
|
DO_test_block(_name, _sub, "specialF", b); \
|
|
} \
|
|
randBlock(b); \
|
|
DO_test_block(_name, _sub, "rand", b); \
|
|
free(b); \
|
|
}
|
|
|
|
#define DO_tests_xmm(_name,_isD) \
|
|
DO_test(_name, xmm, _isD); \
|
|
DO_test(_name, xmm_src_dst, _isD); \
|
|
DO_test(_name, xmm_mem1, _isD); \
|
|
DO_test(_name, xmm_mem2, _isD);
|
|
|
|
#define DO_tests_ymm(_name,_isD) \
|
|
DO_test(_name, ymm, _isD); \
|
|
DO_test(_name, ymm_src_dst, _isD); \
|
|
DO_test(_name, ymm_mem1, _isD); \
|
|
DO_test(_name, ymm_mem2, _isD);
|
|
|
|
int main ( void )
|
|
{
|
|
init_special_values();
|
|
|
|
// 128
|
|
DO_tests_xmm(VFMADDPD, 1);
|
|
DO_tests_xmm(VFMADDPS, 0);
|
|
DO_tests_xmm(VFMADDSD, 1);
|
|
DO_tests_xmm(VFMADDSS, 0);
|
|
DO_tests_xmm(VFMADDSUBPD, 1);
|
|
DO_tests_xmm(VFMADDSUBPS, 0);
|
|
DO_tests_xmm(VFMSUBADDPD, 1);
|
|
DO_tests_xmm(VFMSUBADDPS, 0);
|
|
DO_tests_xmm(VFMSUBPD, 1);
|
|
DO_tests_xmm(VFMSUBPS, 0);
|
|
DO_tests_xmm(VFMSUBSD, 1);
|
|
DO_tests_xmm(VFMSUBSS, 0);
|
|
DO_tests_xmm(VFNMADDPD, 1);
|
|
DO_tests_xmm(VFNMADDPS, 0);
|
|
DO_tests_xmm(VFNMADDSD, 1);
|
|
DO_tests_xmm(VFNMADDSS, 0);
|
|
DO_tests_xmm(VFNMSUBPD, 1);
|
|
DO_tests_xmm(VFNMSUBPS, 0);
|
|
DO_tests_xmm(VFNMSUBSD, 1);
|
|
DO_tests_xmm(VFNMSUBSS, 0);
|
|
|
|
// 256
|
|
/*
|
|
DO_tests_ymm(VFMADDPD, 1);
|
|
DO_tests_ymm(VFMADDPS, 0);
|
|
DO_tests_ymm(VFMADDSUBPD, 1);
|
|
DO_tests_ymm(VFMADDSUBPS, 0);
|
|
DO_tests_ymm(VFMSUBADDPD, 1);
|
|
DO_tests_ymm(VFMSUBADDPS, 0);
|
|
DO_tests_ymm(VFMSUBPD, 1);
|
|
DO_tests_ymm(VFMSUBPS, 0);
|
|
DO_tests_ymm(VFNMADDPD, 1);
|
|
DO_tests_ymm(VFNMADDPS, 0);
|
|
DO_tests_ymm(VFNMSUBPD, 1);
|
|
DO_tests_ymm(VFNMSUBPS, 0);
|
|
*/
|
|
|
|
return 0;
|
|
}
|