[gcc r14-10338] AArch64: Fix cpu features initialization [PR115342]

Wilco Dijkstra wilco@gcc.gnu.org
Fri Jun 21 16:33:35 GMT 2024


https://gcc.gnu.org/g:9421f02916676d27e24fcda918f85e359329ac69

commit r14-10338-g9421f02916676d27e24fcda918f85e359329ac69
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date:   Wed Jun 5 14:04:33 2024 +0100

    AArch64: Fix cpu features initialization [PR115342]
    
    The CPU features initialization code uses CPUID registers (rather than
    HWCAP).  The equality comparisons it uses are incorrect: for example FEAT_SVE
    is not set if SVE2 is available.  Using HWCAPs for these is both simpler and
    correct.  The initialization must also be done atomically to avoid multiple
    threads causing corruption due to non-atomic RMW accesses to the global.
    
    libgcc:
            PR target/115342
            * config/aarch64/cpuinfo.c (__init_cpu_features_constructor):
            Use HWCAP where possible.  Use atomic write for initialization.
            Fix FEAT_PREDRES comparison.
            (__init_cpu_features_resolver): Use atomic load for correct
            initialization.
            (__init_cpu_features): Likewise.
    (cherry picked from commit d7cbcfe7c33645eaf95f175f19884d443817857b)

Diff:
---
 libgcc/config/aarch64/cpuinfo.c | 181 +++++++++++++++++-----------------------
 1 file changed, 75 insertions(+), 106 deletions(-)

diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c
index eb0ac97255d..ec36d105738 100644
--- a/libgcc/config/aarch64/cpuinfo.c
+++ b/libgcc/config/aarch64/cpuinfo.c
@@ -230,14 +230,22 @@ struct {
 #ifndef HWCAP2_SVE_EBF16
 #define HWCAP2_SVE_EBF16 (1UL << 33)
 #endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3	(1UL << 46)
+#endif
 
 static void
-__init_cpu_features_constructor(unsigned long hwcap,
-				const __ifunc_arg_t *arg) {
-#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+__init_cpu_features_constructor (unsigned long hwcap,
+				 const __ifunc_arg_t *arg)
+{
+  unsigned long feat = 0;
+#define setCPUFeature(F) feat |= 1UL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number) \
-  (val & ((1ULL << number) - 1ULL) << start) >> start
+  (val & ((1UL << number) - 1UL) << start) >> start
   unsigned long hwcap2 = 0;
   if (hwcap & _IFUNC_ARG_HWCAP)
     hwcap2 = arg->_hwcap2;
@@ -247,26 +255,20 @@ __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_PMULL);
   if (hwcap & HWCAP_FLAGM)
     setCPUFeature(FEAT_FLAGM);
-  if (hwcap2 & HWCAP2_FLAGM2) {
-    setCPUFeature(FEAT_FLAGM);
+  if (hwcap2 & HWCAP2_FLAGM2)
     setCPUFeature(FEAT_FLAGM2);
-  }
-  if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+  if (hwcap & HWCAP_SM4)
     setCPUFeature(FEAT_SM4);
   if (hwcap & HWCAP_ASIMDDP)
     setCPUFeature(FEAT_DOTPROD);
   if (hwcap & HWCAP_ASIMDFHM)
     setCPUFeature(FEAT_FP16FML);
-  if (hwcap & HWCAP_FPHP) {
+  if (hwcap & HWCAP_FPHP)
     setCPUFeature(FEAT_FP16);
-    setCPUFeature(FEAT_FP);
-  }
   if (hwcap & HWCAP_DIT)
     setCPUFeature(FEAT_DIT);
   if (hwcap & HWCAP_ASIMDRDM)
     setCPUFeature(FEAT_RDM);
-  if (hwcap & HWCAP_ILRCPC)
-    setCPUFeature(FEAT_RCPC2);
   if (hwcap & HWCAP_AES)
     setCPUFeature(FEAT_AES);
   if (hwcap & HWCAP_SHA1)
@@ -280,22 +282,21 @@ __init_cpu_features_constructor(unsigned long hwcap,
   if (hwcap & HWCAP_SB)
     setCPUFeature(FEAT_SB);
   if (hwcap & HWCAP_SSBS)
-    setCPUFeature(FEAT_SSBS2);
-  if (hwcap2 & HWCAP2_MTE) {
-    setCPUFeature(FEAT_MEMTAG);
-    setCPUFeature(FEAT_MEMTAG2);
-  }
-  if (hwcap2 & HWCAP2_MTE3) {
-    setCPUFeature(FEAT_MEMTAG);
-    setCPUFeature(FEAT_MEMTAG2);
+    {
+      setCPUFeature(FEAT_SSBS);
+      setCPUFeature(FEAT_SSBS2);
+    }
+  if (hwcap2 & HWCAP2_MTE)
+    {
+      setCPUFeature(FEAT_MEMTAG);
+      setCPUFeature(FEAT_MEMTAG2);
+    }
+  if (hwcap2 & HWCAP2_MTE3)
     setCPUFeature(FEAT_MEMTAG3);
-  }
   if (hwcap2 & HWCAP2_SVEAES)
     setCPUFeature(FEAT_SVE_AES);
-  if (hwcap2 & HWCAP2_SVEPMULL) {
-    setCPUFeature(FEAT_SVE_AES);
+  if (hwcap2 & HWCAP2_SVEPMULL)
     setCPUFeature(FEAT_SVE_PMULL128);
-  }
   if (hwcap2 & HWCAP2_SVEBITPERM)
     setCPUFeature(FEAT_SVE_BITPERM);
   if (hwcap2 & HWCAP2_SVESHA3)
@@ -332,108 +333,76 @@ __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_WFXT);
   if (hwcap2 & HWCAP2_SME)
     setCPUFeature(FEAT_SME);
+  if (hwcap2 & HWCAP2_SME2)
+    setCPUFeature(FEAT_SME2);
   if (hwcap2 & HWCAP2_SME_I16I64)
     setCPUFeature(FEAT_SME_I64);
   if (hwcap2 & HWCAP2_SME_F64F64)
     setCPUFeature(FEAT_SME_F64);
-  if (hwcap & HWCAP_CPUID) {
-    unsigned long ftr;
-    getCPUFeature(ID_AA64PFR1_EL1, ftr);
-    /* ID_AA64PFR1_EL1.MTE >= 0b0001  */
-    if (extractBits(ftr, 8, 4) >= 0x1)
-      setCPUFeature(FEAT_MEMTAG);
-    /* ID_AA64PFR1_EL1.SSBS == 0b0001  */
-    if (extractBits(ftr, 4, 4) == 0x1)
-      setCPUFeature(FEAT_SSBS);
-    /* ID_AA64PFR1_EL1.SME == 0b0010  */
-    if (extractBits(ftr, 24, 4) == 0x2)
-      setCPUFeature(FEAT_SME2);
-    getCPUFeature(ID_AA64PFR0_EL1, ftr);
-    /* ID_AA64PFR0_EL1.FP != 0b1111  */
-    if (extractBits(ftr, 16, 4) != 0xF) {
-      setCPUFeature(FEAT_FP);
-      /* ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP  */
-      setCPUFeature(FEAT_SIMD);
-    }
-    /* ID_AA64PFR0_EL1.SVE != 0b0000  */
-    if (extractBits(ftr, 32, 4) != 0x0) {
-      /* get ID_AA64ZFR0_EL1, that name supported if sve enabled only  */
-      getCPUFeature(S3_0_C0_C4_4, ftr);
-      /* ID_AA64ZFR0_EL1.SVEver == 0b0000  */
-      if (extractBits(ftr, 0, 4) == 0x0)
-	setCPUFeature(FEAT_SVE);
-      /* ID_AA64ZFR0_EL1.SVEver == 0b0001  */
-      if (extractBits(ftr, 0, 4) == 0x1)
-	setCPUFeature(FEAT_SVE2);
-      /* ID_AA64ZFR0_EL1.BF16 != 0b0000  */
-      if (extractBits(ftr, 20, 4) != 0x0)
-	setCPUFeature(FEAT_SVE_BF16);
+  if (hwcap & HWCAP_CPUID)
+    {
+      unsigned long ftr;
+
+      getCPUFeature(ID_AA64ISAR1_EL1, ftr);
+      /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001  */
+      if (extractBits(ftr, 40, 4) >= 0x1)
+	setCPUFeature(FEAT_PREDRES);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0001  */
+      if (extractBits(ftr, 60, 4) >= 0x1)
+	setCPUFeature(FEAT_LS64);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0010  */
+      if (extractBits(ftr, 60, 4) >= 0x2)
+	setCPUFeature(FEAT_LS64_V);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0011  */
+      if (extractBits(ftr, 60, 4) >= 0x3)
+	setCPUFeature(FEAT_LS64_ACCDATA);
     }
-    getCPUFeature(ID_AA64ISAR0_EL1, ftr);
-    /* ID_AA64ISAR0_EL1.SHA3 != 0b0000  */
-    if (extractBits(ftr, 32, 4) != 0x0)
-      setCPUFeature(FEAT_SHA3);
-    getCPUFeature(ID_AA64ISAR1_EL1, ftr);
-    /* ID_AA64ISAR1_EL1.DPB >= 0b0001  */
-    if (extractBits(ftr, 0, 4) >= 0x1)
-      setCPUFeature(FEAT_DPB);
-    /* ID_AA64ISAR1_EL1.LRCPC != 0b0000  */
-    if (extractBits(ftr, 20, 4) != 0x0)
-      setCPUFeature(FEAT_RCPC);
-    /* ID_AA64ISAR1_EL1.LRCPC == 0b0011  */
-    if (extractBits(ftr, 20, 4) == 0x3)
-      setCPUFeature(FEAT_RCPC3);
-    /* ID_AA64ISAR1_EL1.SPECRES == 0b0001  */
-    if (extractBits(ftr, 40, 4) == 0x2)
-      setCPUFeature(FEAT_PREDRES);
-    /* ID_AA64ISAR1_EL1.BF16 != 0b0000  */
-    if (extractBits(ftr, 44, 4) != 0x0)
-      setCPUFeature(FEAT_BF16);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0001  */
-    if (extractBits(ftr, 60, 4) >= 0x1)
-      setCPUFeature(FEAT_LS64);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0010  */
-    if (extractBits(ftr, 60, 4) >= 0x2)
-      setCPUFeature(FEAT_LS64_V);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0011  */
-    if (extractBits(ftr, 60, 4) >= 0x3)
-      setCPUFeature(FEAT_LS64_ACCDATA);
-  } else {
-    /* Set some features in case of no CPUID support.  */
-    if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
+
+  if (hwcap & HWCAP_FP)
+    {
       setCPUFeature(FEAT_FP);
       /* FP and AdvSIMD fields have the same value.  */
       setCPUFeature(FEAT_SIMD);
     }
-    if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
-      setCPUFeature(FEAT_DPB);
-    if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
-      setCPUFeature(FEAT_RCPC);
-    if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
-      setCPUFeature(FEAT_BF16);
-    if (hwcap2 & HWCAP2_SVEBF16)
-      setCPUFeature(FEAT_SVE_BF16);
-    if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
-      setCPUFeature(FEAT_SVE2);
-    if (hwcap & HWCAP_SHA3)
-      setCPUFeature(FEAT_SHA3);
-  }
+  if (hwcap & HWCAP_DCPOP)
+    setCPUFeature(FEAT_DPB);
+  if (hwcap & HWCAP_LRCPC)
+    setCPUFeature(FEAT_RCPC);
+  if (hwcap & HWCAP_ILRCPC)
+    setCPUFeature(FEAT_RCPC2);
+  if (hwcap2 & HWCAP2_LRCPC3)
+    setCPUFeature(FEAT_RCPC3);
+  if (hwcap2 & HWCAP2_BF16)
+    setCPUFeature(FEAT_BF16);
+  if (hwcap2 & HWCAP2_SVEBF16)
+    setCPUFeature(FEAT_SVE_BF16);
+  if (hwcap & HWCAP_SVE)
+    setCPUFeature(FEAT_SVE);
+  if (hwcap2 & HWCAP2_SVE2)
+    setCPUFeature(FEAT_SVE2);
+  if (hwcap & HWCAP_SHA3)
+    setCPUFeature(FEAT_SHA3);
   setCPUFeature(FEAT_INIT);
+
+  __atomic_store_n (&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
 }
 
 void
-__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {
-  if (__aarch64_cpu_features.features)
+__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg)
+{
+  if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
   __init_cpu_features_constructor(hwcap, arg);
 }
 
 void __attribute__ ((constructor))
-__init_cpu_features(void) {
+__init_cpu_features(void)
+{
   unsigned long hwcap;
   unsigned long hwcap2;
+
   /* CPU features already initialized.  */
-  if (__aarch64_cpu_features.features)
+  if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
   hwcap = getauxval(AT_HWCAP);
   hwcap2 = getauxval(AT_HWCAP2);


More information about the Gcc-cvs mailing list