Bug 414268 - Enable AArch64 feature detection and decoding for v8.x instructions (where x>0).

Patch from Assad Hashmi <assad.hashmi@linaro.org>.
This commit is contained in:
Julian Seward 2020-12-09 12:54:45 +01:00
parent 383e463e1a
commit cb52fee5dd
3 changed files with 286 additions and 11 deletions

View File

@ -1765,11 +1765,36 @@ static const HChar* show_hwcaps_arm ( UInt hwcaps )
static const HChar* show_hwcaps_arm64 ( UInt hwcaps )
{
/* Since there are no variants, just insist that hwcaps is zero,
and declare it invalid otherwise. */
if (hwcaps == 0)
return "baseline";
return "Unsupported";
static const HChar prefix[] = "v8";
static const struct {
UInt hwcaps_bit;
HChar name[16];
} hwcaps_list[] = {
{ VEX_HWCAPS_ARM64_FHM, "fhm" },
{ VEX_HWCAPS_ARM64_DPBCVAP, "dpcvap" },
{ VEX_HWCAPS_ARM64_DPBCVADP, "dpbcvadp" },
{ VEX_HWCAPS_ARM64_SM3, "sm3" },
{ VEX_HWCAPS_ARM64_SM4, "sm4" },
{ VEX_HWCAPS_ARM64_SHA3, "sha3" },
{ VEX_HWCAPS_ARM64_RDM, "rdm" },
{ VEX_HWCAPS_ARM64_I8MM, "i8mm" },
{ VEX_HWCAPS_ARM64_ATOMICS, "atomics" },
{ VEX_HWCAPS_ARM64_BF16, "bf16" },
{ VEX_HWCAPS_ARM64_FP16, "fp16" },
{ VEX_HWCAPS_ARM64_VFP16, "vfp16" },
};
static HChar buf[sizeof prefix + // '\0'
NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) + 1];
HChar *p = buf + vex_sprintf(buf, "%s", prefix);
UInt i;
for (i = 0 ; i < NUM_HWCAPS; ++i) {
if (hwcaps & hwcaps_list[i].hwcaps_bit)
p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name);
}
return buf;
}
static const HChar* show_hwcaps_s390x ( UInt hwcaps )
@ -2130,11 +2155,20 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
}
}
case VexArchARM64:
if (hwcaps != 0)
case VexArchARM64: {
/* Mandatory dependencies. */
Bool have_fp16 = ((hwcaps & VEX_HWCAPS_ARM64_FP16) != 0);
Bool have_vfp16 = ((hwcaps & VEX_HWCAPS_ARM64_VFP16) != 0);
if (have_fp16 != have_vfp16)
invalid_hwcaps(arch, hwcaps,
"Unsupported hardware capabilities.\n");
"Mismatch detected between scalar and vector FP16 features.\n");
Bool have_rdm = ((hwcaps & VEX_HWCAPS_ARM64_RDM) != 0);
Bool have_atomics = ((hwcaps & VEX_HWCAPS_ARM64_ATOMICS) != 0);
if (have_rdm != have_atomics)
invalid_hwcaps(arch, hwcaps,
"Mismatch detected between RDMA and atomics features.\n");
return;
}
case VexArchS390X:
if (! s390_host_has_ldisp)

View File

@ -207,7 +207,18 @@ typedef
#define VEX_ARM_ARCHLEVEL(x) ((x) & 0x3f)
/* ARM64: baseline capability is AArch64 v8. */
/* (no definitions since no variants so far) */
#define VEX_HWCAPS_ARM64_FHM (1 << 4)
#define VEX_HWCAPS_ARM64_DPBCVAP (1 << 5)
#define VEX_HWCAPS_ARM64_DPBCVADP (1 << 6)
#define VEX_HWCAPS_ARM64_SM3 (1 << 7)
#define VEX_HWCAPS_ARM64_SM4 (1 << 8)
#define VEX_HWCAPS_ARM64_SHA3 (1 << 9)
#define VEX_HWCAPS_ARM64_RDM (1 << 10)
#define VEX_HWCAPS_ARM64_ATOMICS (1 << 11)
#define VEX_HWCAPS_ARM64_I8MM (1 << 12)
#define VEX_HWCAPS_ARM64_BF16 (1 << 13)
#define VEX_HWCAPS_ARM64_FP16 (1 << 14)
#define VEX_HWCAPS_ARM64_VFP16 (1 << 15)
/* MIPS baseline capability */
/* Assigned Company values for bits 23:16 of the PRId Register

View File

@ -478,7 +478,7 @@ Int VG_(machine_arm_archlevel) = 4;
testing, so we need a VG_MINIMAL_JMP_BUF. */
#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
|| defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
|| defined(VGA_mips64)
|| defined(VGA_mips64) || defined(VGA_arm64)
#include "pub_core_libcsetjmp.h"
static VG_MINIMAL_JMP_BUF(env_unsup_insn);
static void handler_unsup_insn ( Int x ) {
@ -1719,10 +1719,84 @@ Bool VG_(machine_get_hwcaps)( void )
#elif defined(VGA_arm64)
{
/* Use the attribute and feature registers to determine host hardware
* capabilities. Only user-space features are read. Naming conventions
* follow the Arm Architecture Reference Manual.
*
* ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
* ----------------
* ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
* ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
* FHM DP SM4 SM3 SHA3 RDM ATOMICS
*
* ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
* ----------------
* ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
* ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
* ...I8MM BF16 DPB
*
* ID_AA64PFR0_EL1 Processor Feature Register 0
* ---------------
* 6666...2222 2222 1111 1111 11
* 3210...7654 3210 9876 5432 1098 7654 3210
* ASIMD FP16
*/
Bool is_base_v8 = False;
Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm;
Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp;
Bool have_vfp16, have_fp16;
have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm
= have_atomics = have_i8mm = have_bf16 = have_dpbcvap
= have_dpbcvadp = have_vfp16 = have_fp16 = False;
/* Some baseline v8.0 kernels do not allow reads of these registers. Use
* the same SIGILL handling algorithm as other architectures for such
* kernels.
*/
vki_sigset_t saved_set, tmp_set;
vki_sigaction_fromK_t saved_sigill_act;
vki_sigaction_toK_t tmp_sigill_act;
vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
VG_(sigemptyset)(&tmp_set);
VG_(sigaddset)(&tmp_set, VKI_SIGILL);
Int r;
r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
vg_assert(r == 0);
r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
vg_assert(r == 0);
tmp_sigill_act = saved_sigill_act;
/* NODEFER: signal handler does not return (from the kernel's point of
view), hence if it is to successfully catch a signal more than once,
we need the NODEFER flag. */
tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
tmp_sigill_act.ksa_handler = handler_unsup_insn;
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
/* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
if (VG_MINIMAL_SETJMP(env_unsup_insn))
is_base_v8 = True;
else
__asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1");
VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
va = VexArchARM64;
vai.endness = VexEndnessLE;
/* So far there are no variants. */
/* Baseline features are v8.0. */
vai.hwcaps = 0;
VG_(machine_get_cache_info)(&vai);
@ -1747,6 +1821,162 @@ Bool VG_(machine_get_hwcaps)( void )
VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
vai.arm64_requires_fallback_LLSC ? "yes" : "no");
if (is_base_v8)
return True;
/* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
#define ID_AA64ISAR0_FHM_SHIFT 48
#define ID_AA64ISAR0_DP_SHIFT 44
#define ID_AA64ISAR0_SM4_SHIFT 40
#define ID_AA64ISAR0_SM3_SHIFT 36
#define ID_AA64ISAR0_SHA3_SHIFT 32
#define ID_AA64ISAR0_RDM_SHIFT 28
#define ID_AA64ISAR0_ATOMICS_SHIFT 20
/* Field values */
#define ID_AA64ISAR0_FHM_SUPPORTED 0x1
#define ID_AA64ISAR0_DP_SUPPORTED 0x1
#define ID_AA64ISAR0_SM4_SUPPORTED 0x1
#define ID_AA64ISAR0_SM3_SUPPORTED 0x1
#define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
#define ID_AA64ISAR0_RDM_SUPPORTED 0x1
#define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
/* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
#define ID_AA64ISAR1_I8MM_SHIFT 52
#define ID_AA64ISAR1_BF16_SHIFT 44
#define ID_AA64ISAR1_DPB_SHIFT 0
/* Field values */
#define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
#define ID_AA64ISAR1_BF16_SUPPORTED 0x1
#define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
#define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
/* ID_AA64PFR0_EL1 Processor feature register 0 fields */
#define ID_AA64PFR0_VFP16_SHIFT 20
#define ID_AA64PFR0_FP16_SHIFT 16
/* Field values */
#define ID_AA64PFR0_VFP16_SUPPORTED 0x1
#define ID_AA64PFR0_FP16_SUPPORTED 0x1
#define get_cpu_ftr(id) ({ \
unsigned long val; \
asm("mrs %0, "#id : "=r" (val)); \
VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
})
get_cpu_ftr(ID_AA64ISAR0_EL1);
get_cpu_ftr(ID_AA64ISAR1_EL1);
get_cpu_ftr(ID_AA64PFR0_EL1);
#define get_ftr(id, ftr, fval, have_ftr) ({ \
unsigned long rval; \
asm("mrs %0, "#id : "=r" (rval)); \
have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
})
/* Read ID_AA64ISAR0_EL1 attributes */
/* FHM indicates support for FMLAL and FMLSL instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT,
ID_AA64ISAR0_FHM_SUPPORTED, have_fhm);
/* DP indicates support for UDOT and SDOT instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT,
ID_AA64ISAR0_DP_SUPPORTED, have_dp);
/* SM4 indicates support for SM4E and SM4EKEY instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT,
ID_AA64ISAR0_SM4_SUPPORTED, have_sm4);
/* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
* SM3PARTW1, and SM3PARTW2 instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT,
ID_AA64ISAR0_SM3_SUPPORTED, have_sm3);
/* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT,
ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3);
/* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
* Mandatory from v8.1 onwards.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT,
ID_AA64ISAR0_RDM_SUPPORTED, have_rdm);
/* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
* LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
* Mandatory from v8.1 onwards.
*/
get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT,
ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics);
/* Read ID_AA64ISAR1_EL1 attributes */
/* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
* instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT,
ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm);
/* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
* BFCVT2 instructions.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT,
ID_AA64ISAR1_BF16_SUPPORTED, have_bf16);
/* DPB indicates support for DC CVAP instruction.
* Mandatory for v8.2 onwards.
*/
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap);
/* DPB indicates support for DC CVADP instruction.
* Optional for v8.2.
*/
get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp);
/* Read ID_AA64PFR0_EL1 attributes */
/* VFP16 indicates support for half-precision vector arithmetic.
* Optional for v8.2. Must be the same value as FP16.
*/
get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT,
ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16);
/* FP16 indicates support for half-precision scalar arithmetic.
* Optional for v8.2. Must be the same value as VFP16.
*/
get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT,
ID_AA64PFR0_FP16_SUPPORTED, have_fp16);
if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM;
if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP;
if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP;
if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3;
if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4;
if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3;
if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM;
if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM;
if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS;
if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16;
if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16;
if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16;
#undef get_cpu_ftr
#undef get_ftr
return True;
}