mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-04 02:18:37 +00:00
Bug 414268 - Enable AArch64 feature detection and decoding for v8.x instructions (where x>0).
Patch from Assad Hashmi <assad.hashmi@linaro.org>.
This commit is contained in:
parent
383e463e1a
commit
cb52fee5dd
@ -1765,11 +1765,36 @@ static const HChar* show_hwcaps_arm ( UInt hwcaps )
|
||||
|
||||
static const HChar* show_hwcaps_arm64 ( UInt hwcaps )
|
||||
{
|
||||
/* Since there are no variants, just insist that hwcaps is zero,
|
||||
and declare it invalid otherwise. */
|
||||
if (hwcaps == 0)
|
||||
return "baseline";
|
||||
return "Unsupported";
|
||||
static const HChar prefix[] = "v8";
|
||||
static const struct {
|
||||
UInt hwcaps_bit;
|
||||
HChar name[16];
|
||||
} hwcaps_list[] = {
|
||||
{ VEX_HWCAPS_ARM64_FHM, "fhm" },
|
||||
{ VEX_HWCAPS_ARM64_DPBCVAP, "dpcvap" },
|
||||
{ VEX_HWCAPS_ARM64_DPBCVADP, "dpbcvadp" },
|
||||
{ VEX_HWCAPS_ARM64_SM3, "sm3" },
|
||||
{ VEX_HWCAPS_ARM64_SM4, "sm4" },
|
||||
{ VEX_HWCAPS_ARM64_SHA3, "sha3" },
|
||||
{ VEX_HWCAPS_ARM64_RDM, "rdm" },
|
||||
{ VEX_HWCAPS_ARM64_I8MM, "i8mm" },
|
||||
{ VEX_HWCAPS_ARM64_ATOMICS, "atomics" },
|
||||
{ VEX_HWCAPS_ARM64_BF16, "bf16" },
|
||||
{ VEX_HWCAPS_ARM64_FP16, "fp16" },
|
||||
{ VEX_HWCAPS_ARM64_VFP16, "vfp16" },
|
||||
};
|
||||
|
||||
static HChar buf[sizeof prefix + // '\0'
|
||||
NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) + 1];
|
||||
|
||||
HChar *p = buf + vex_sprintf(buf, "%s", prefix);
|
||||
UInt i;
|
||||
for (i = 0 ; i < NUM_HWCAPS; ++i) {
|
||||
if (hwcaps & hwcaps_list[i].hwcaps_bit)
|
||||
p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static const HChar* show_hwcaps_s390x ( UInt hwcaps )
|
||||
@ -2130,11 +2155,20 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
|
||||
}
|
||||
}
|
||||
|
||||
case VexArchARM64:
|
||||
if (hwcaps != 0)
|
||||
case VexArchARM64: {
|
||||
/* Mandatory dependencies. */
|
||||
Bool have_fp16 = ((hwcaps & VEX_HWCAPS_ARM64_FP16) != 0);
|
||||
Bool have_vfp16 = ((hwcaps & VEX_HWCAPS_ARM64_VFP16) != 0);
|
||||
if (have_fp16 != have_vfp16)
|
||||
invalid_hwcaps(arch, hwcaps,
|
||||
"Unsupported hardware capabilities.\n");
|
||||
"Mismatch detected between scalar and vector FP16 features.\n");
|
||||
Bool have_rdm = ((hwcaps & VEX_HWCAPS_ARM64_RDM) != 0);
|
||||
Bool have_atomics = ((hwcaps & VEX_HWCAPS_ARM64_ATOMICS) != 0);
|
||||
if (have_rdm != have_atomics)
|
||||
invalid_hwcaps(arch, hwcaps,
|
||||
"Mismatch detected between RDMA and atomics features.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
case VexArchS390X:
|
||||
if (! s390_host_has_ldisp)
|
||||
|
||||
@ -207,7 +207,18 @@ typedef
|
||||
#define VEX_ARM_ARCHLEVEL(x) ((x) & 0x3f)
|
||||
|
||||
/* ARM64: baseline capability is AArch64 v8. */
|
||||
/* (no definitions since no variants so far) */
|
||||
#define VEX_HWCAPS_ARM64_FHM (1 << 4)
|
||||
#define VEX_HWCAPS_ARM64_DPBCVAP (1 << 5)
|
||||
#define VEX_HWCAPS_ARM64_DPBCVADP (1 << 6)
|
||||
#define VEX_HWCAPS_ARM64_SM3 (1 << 7)
|
||||
#define VEX_HWCAPS_ARM64_SM4 (1 << 8)
|
||||
#define VEX_HWCAPS_ARM64_SHA3 (1 << 9)
|
||||
#define VEX_HWCAPS_ARM64_RDM (1 << 10)
|
||||
#define VEX_HWCAPS_ARM64_ATOMICS (1 << 11)
|
||||
#define VEX_HWCAPS_ARM64_I8MM (1 << 12)
|
||||
#define VEX_HWCAPS_ARM64_BF16 (1 << 13)
|
||||
#define VEX_HWCAPS_ARM64_FP16 (1 << 14)
|
||||
#define VEX_HWCAPS_ARM64_VFP16 (1 << 15)
|
||||
|
||||
/* MIPS baseline capability */
|
||||
/* Assigned Company values for bits 23:16 of the PRId Register
|
||||
|
||||
@ -478,7 +478,7 @@ Int VG_(machine_arm_archlevel) = 4;
|
||||
testing, so we need a VG_MINIMAL_JMP_BUF. */
|
||||
#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
|
||||
|| defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
|
||||
|| defined(VGA_mips64)
|
||||
|| defined(VGA_mips64) || defined(VGA_arm64)
|
||||
#include "pub_core_libcsetjmp.h"
|
||||
static VG_MINIMAL_JMP_BUF(env_unsup_insn);
|
||||
static void handler_unsup_insn ( Int x ) {
|
||||
@ -1719,10 +1719,84 @@ Bool VG_(machine_get_hwcaps)( void )
|
||||
|
||||
#elif defined(VGA_arm64)
|
||||
{
|
||||
/* Use the attribute and feature registers to determine host hardware
|
||||
* capabilities. Only user-space features are read. Naming conventions
|
||||
* follow the Arm Architecture Reference Manual.
|
||||
*
|
||||
* ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
|
||||
* ----------------
|
||||
* ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
|
||||
* ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
|
||||
* FHM DP SM4 SM3 SHA3 RDM ATOMICS
|
||||
*
|
||||
* ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
|
||||
* ----------------
|
||||
* ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
|
||||
* ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
|
||||
* ...I8MM BF16 DPB
|
||||
*
|
||||
* ID_AA64PFR0_EL1 Processor Feature Register 0
|
||||
* ---------------
|
||||
* 6666...2222 2222 1111 1111 11
|
||||
* 3210...7654 3210 9876 5432 1098 7654 3210
|
||||
* ASIMD FP16
|
||||
*/
|
||||
|
||||
Bool is_base_v8 = False;
|
||||
|
||||
Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm;
|
||||
Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp;
|
||||
Bool have_vfp16, have_fp16;
|
||||
|
||||
have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm
|
||||
= have_atomics = have_i8mm = have_bf16 = have_dpbcvap
|
||||
= have_dpbcvadp = have_vfp16 = have_fp16 = False;
|
||||
|
||||
/* Some baseline v8.0 kernels do not allow reads of these registers. Use
|
||||
* the same SIGILL handling algorithm as other architectures for such
|
||||
* kernels.
|
||||
*/
|
||||
vki_sigset_t saved_set, tmp_set;
|
||||
vki_sigaction_fromK_t saved_sigill_act;
|
||||
vki_sigaction_toK_t tmp_sigill_act;
|
||||
|
||||
vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
|
||||
|
||||
VG_(sigemptyset)(&tmp_set);
|
||||
VG_(sigaddset)(&tmp_set, VKI_SIGILL);
|
||||
|
||||
Int r;
|
||||
|
||||
r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
|
||||
vg_assert(r == 0);
|
||||
|
||||
r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
|
||||
vg_assert(r == 0);
|
||||
tmp_sigill_act = saved_sigill_act;
|
||||
|
||||
/* NODEFER: signal handler does not return (from the kernel's point of
|
||||
view), hence if it is to successfully catch a signal more than once,
|
||||
we need the NODEFER flag. */
|
||||
tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
|
||||
tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
|
||||
tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
|
||||
tmp_sigill_act.ksa_handler = handler_unsup_insn;
|
||||
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
|
||||
|
||||
/* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
|
||||
if (VG_MINIMAL_SETJMP(env_unsup_insn))
|
||||
is_base_v8 = True;
|
||||
else
|
||||
__asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1");
|
||||
|
||||
VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
|
||||
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
|
||||
VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
|
||||
|
||||
va = VexArchARM64;
|
||||
vai.endness = VexEndnessLE;
|
||||
|
||||
/* So far there are no variants. */
|
||||
/* Baseline features are v8.0. */
|
||||
vai.hwcaps = 0;
|
||||
|
||||
VG_(machine_get_cache_info)(&vai);
|
||||
@ -1747,6 +1821,162 @@ Bool VG_(machine_get_hwcaps)( void )
|
||||
VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
|
||||
vai.arm64_requires_fallback_LLSC ? "yes" : "no");
|
||||
|
||||
if (is_base_v8)
|
||||
return True;
|
||||
|
||||
/* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
|
||||
#define ID_AA64ISAR0_FHM_SHIFT 48
|
||||
#define ID_AA64ISAR0_DP_SHIFT 44
|
||||
#define ID_AA64ISAR0_SM4_SHIFT 40
|
||||
#define ID_AA64ISAR0_SM3_SHIFT 36
|
||||
#define ID_AA64ISAR0_SHA3_SHIFT 32
|
||||
#define ID_AA64ISAR0_RDM_SHIFT 28
|
||||
#define ID_AA64ISAR0_ATOMICS_SHIFT 20
|
||||
/* Field values */
|
||||
#define ID_AA64ISAR0_FHM_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR0_DP_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR0_SM4_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR0_SM3_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR0_RDM_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
|
||||
|
||||
/* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
|
||||
#define ID_AA64ISAR1_I8MM_SHIFT 52
|
||||
#define ID_AA64ISAR1_BF16_SHIFT 44
|
||||
#define ID_AA64ISAR1_DPB_SHIFT 0
|
||||
/* Field values */
|
||||
#define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR1_BF16_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
|
||||
#define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
|
||||
|
||||
/* ID_AA64PFR0_EL1 Processor feature register 0 fields */
|
||||
#define ID_AA64PFR0_VFP16_SHIFT 20
|
||||
#define ID_AA64PFR0_FP16_SHIFT 16
|
||||
/* Field values */
|
||||
#define ID_AA64PFR0_VFP16_SUPPORTED 0x1
|
||||
#define ID_AA64PFR0_FP16_SUPPORTED 0x1
|
||||
|
||||
#define get_cpu_ftr(id) ({ \
|
||||
unsigned long val; \
|
||||
asm("mrs %0, "#id : "=r" (val)); \
|
||||
VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
|
||||
})
|
||||
get_cpu_ftr(ID_AA64ISAR0_EL1);
|
||||
get_cpu_ftr(ID_AA64ISAR1_EL1);
|
||||
get_cpu_ftr(ID_AA64PFR0_EL1);
|
||||
|
||||
#define get_ftr(id, ftr, fval, have_ftr) ({ \
|
||||
unsigned long rval; \
|
||||
asm("mrs %0, "#id : "=r" (rval)); \
|
||||
have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
|
||||
})
|
||||
|
||||
/* Read ID_AA64ISAR0_EL1 attributes */
|
||||
|
||||
/* FHM indicates support for FMLAL and FMLSL instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT,
|
||||
ID_AA64ISAR0_FHM_SUPPORTED, have_fhm);
|
||||
|
||||
/* DP indicates support for UDOT and SDOT instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT,
|
||||
ID_AA64ISAR0_DP_SUPPORTED, have_dp);
|
||||
|
||||
/* SM4 indicates support for SM4E and SM4EKEY instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT,
|
||||
ID_AA64ISAR0_SM4_SUPPORTED, have_sm4);
|
||||
|
||||
/* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
|
||||
* SM3PARTW1, and SM3PARTW2 instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT,
|
||||
ID_AA64ISAR0_SM3_SUPPORTED, have_sm3);
|
||||
|
||||
/* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT,
|
||||
ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3);
|
||||
|
||||
/* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
|
||||
* Mandatory from v8.1 onwards.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT,
|
||||
ID_AA64ISAR0_RDM_SUPPORTED, have_rdm);
|
||||
|
||||
/* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
|
||||
* LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
|
||||
* Mandatory from v8.1 onwards.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT,
|
||||
ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics);
|
||||
|
||||
/* Read ID_AA64ISAR1_EL1 attributes */
|
||||
|
||||
/* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
|
||||
* instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT,
|
||||
ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm);
|
||||
|
||||
/* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
|
||||
* BFCVT2 instructions.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT,
|
||||
ID_AA64ISAR1_BF16_SUPPORTED, have_bf16);
|
||||
|
||||
/* DPB indicates support for DC CVAP instruction.
|
||||
* Mandatory for v8.2 onwards.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
|
||||
ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap);
|
||||
|
||||
/* DPB indicates support for DC CVADP instruction.
|
||||
* Optional for v8.2.
|
||||
*/
|
||||
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
|
||||
ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp);
|
||||
|
||||
/* Read ID_AA64PFR0_EL1 attributes */
|
||||
|
||||
/* VFP16 indicates support for half-precision vector arithmetic.
|
||||
* Optional for v8.2. Must be the same value as FP16.
|
||||
*/
|
||||
get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT,
|
||||
ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16);
|
||||
|
||||
/* FP16 indicates support for half-precision scalar arithmetic.
|
||||
* Optional for v8.2. Must be the same value as VFP16.
|
||||
*/
|
||||
get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT,
|
||||
ID_AA64PFR0_FP16_SUPPORTED, have_fp16);
|
||||
|
||||
if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM;
|
||||
if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP;
|
||||
if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP;
|
||||
if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3;
|
||||
if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4;
|
||||
if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3;
|
||||
if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM;
|
||||
if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM;
|
||||
if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS;
|
||||
if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16;
|
||||
if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16;
|
||||
if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16;
|
||||
|
||||
#undef get_cpu_ftr
|
||||
#undef get_ftr
|
||||
|
||||
return True;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user