[PATCH 5/5] [AARCH64] Add variant support to -m*=native and add thunderxt88pass1.

Tue Nov 17 22:11:00 GMT 2015

Since ThunderX T88 pass 1 (variant 0) is a ARMv8 part while pass 2 (variant 1)
is an ARMv8.1 part, I needed to add detecting of the variant also for this
difference. Also I simplify a little bit and combined the single core and
arch detecting cases so it would be easier to add variant.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
Tested -mcpu=native on both T88 pass 1 and T88 pass 2 to make sure it is
deecting the two seperately.

Thanks,
Andrew Pinski


* config/aarch64/aarch64-cores.def: Add -1 as the variant to all of the cores.
(thunderxt88pass1): New core.
* config/aarch64/driver-aarch64.c (struct aarch64_core_data): Add variant field.
(ALL_VARIANTS): New define.
(AARCH64_CORE): Support VARIANT operand.
(cpu_data): Likewise.
(host_detect_local_cpu): Parse variant field of /proc/cpuinfo.  Combine the arch
and single core case and support variant searching.
* common/config/aarch64/aarch64-common.c (AARCH64_CORE): Add VARIANT operand.
* config/aarch64/aarch64-opts.h (AARCH64_CORE): Likewise.
* config/aarch64/aarch64.c (AARCH64_CORE): Likewise.
* config/aarch64/aarch64.h (AARCH64_CORE): Likewise.
* config/aarch64/aarch64-tune.md: Regernate.
---
 gcc/common/config/aarch64/aarch64-common.c |  2 +-
 gcc/config/aarch64/aarch64-cores.def       | 27 ++++++-----
 gcc/config/aarch64/aarch64-opts.h          |  2 +-
 gcc/config/aarch64/aarch64-tune.md         |  2 +-
 gcc/config/aarch64/aarch64.c               |  2 +-
 gcc/config/aarch64/aarch64.h               |  2 +-
 gcc/config/aarch64/driver-aarch64.c        | 78 ++++++++++++++++--------------
 7 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
index e312bbc..f6fd7e7 100644
--- a/gcc/common/config/aarch64/aarch64-common.c
+++ b/gcc/common/config/aarch64/aarch64-common.c
@@ -141,7 +141,7 @@ struct arch_to_arch_name
    the default set of architectural feature flags they support.  */
 static const struct processor_name_to_arch all_cores[] =
 {
-#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
 #include "config/aarch64/aarch64-cores.def"
   {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 05ee525..52a9906 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -21,7 +21,7 @@
 
    Before using #include to read this file, define a macro:
 
-      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART)
+      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT)
 
    The CORE_NAME is the name of the core, represented as a string constant.
    The CORE_IDENT is the name of the core, represented as an identifier.
@@ -37,31 +37,36 @@
    PART is the part number of the CPU.  On a GNU/Linux system it can be found
    in /proc/cpuinfo.  For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE
    where the big part number comes as the first arugment to the macro and little is the
-   second.  */
+   second.
+   VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
+   in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
 
 /* V8 Architecture Processors.  */
 
 /* ARM cores. */
-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
 
 /* Samsung cores */
-AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
+AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001, -1)
 
 /* Qualcomm cores */
-AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
+AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800, -1)
 
 /* Cavium cores */
-AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
+/* ThunderX T88 pass 1 is 8.0-a arch. */
+AARCH64_CORE("thunderxt88pass1", thunderxt88pass1,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1, 0)
+/* ThunderX T88 pass 2 and on is 8.1-a arch. */
+AARCH64_CORE("thunderx",    thunderx,  thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a1, -1)
 
 /* APM cores */
-AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
+AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
 
 /* V8 big.LITTLE implementations.  */
 
-AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03))
-AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03))
+AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03), -1)
+AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03), -1)
 
 
 #undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
index 5534867..c66cd72 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -25,7 +25,7 @@
 /* The various cores that implement AArch64.  */
 enum aarch64_processor
 {
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
   INTERNAL_IDENT,
 #include "aarch64-cores.def"
   /* Used to indicate that no processor has been specified.  */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index c65a124..28b573e 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
+	"cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderxt88pass1,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a5971a1..d244846 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -526,7 +526,7 @@ static const struct processor all_architectures[] =
 /* Processor cores implementing AArch64.  */
 static const struct processor all_cores[] =
 {
-#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
   {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH,				\
   all_architectures[AARCH64_ARCH_##ARCH].architecture_version,	\
   FLAGS, &COSTS##_tunings},
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index c6582a4..6f06369 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -464,7 +464,7 @@ enum reg_class
 
 enum target_cpus
 {
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
   TARGET_CPU_##INTERNAL_IDENT,
 #include "aarch64-cores.def"
   TARGET_CPU_generic
diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
index ea1e856..b11d914 100644
--- a/gcc/config/aarch64/driver-aarch64.c
+++ b/gcc/config/aarch64/driver-aarch64.c
@@ -40,20 +40,22 @@ struct aarch64_core_data
   const char *arch;
   unsigned char implementer_id; /* Exactly 8 bits */
   unsigned int part_no; /* 12 bits + 12 bits */
+  unsigned variant;
 };
 
 #define AARCH64_BIG_LITTLE(BIG, LITTLE) \
   (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
 #define INVALID_IMP ((unsigned char) -1)
 #define INVALID_CORE ((unsigned)-1)
+#define ALL_VARIANTS ((unsigned)-1)
 
-#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
-  { CORE_NAME, #ARCH, IMP, PART },
+#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
+  { CORE_NAME, #ARCH, IMP, PART, VARIANT },
 
 static struct aarch64_core_data cpu_data [] =
 {
 #include "aarch64-cores.def"
-  { NULL, NULL, INVALID_IMP, INVALID_CORE }
+  { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS }
 };
 
 
@@ -149,7 +151,6 @@ contains_core_p (unsigned *arr, unsigned core)
 const char *
 host_detect_local_cpu (int argc, const char **argv)
 {
-  const char *arch_id = NULL;
   const char *res = NULL;
   static const int num_exts = ARRAY_SIZE (ext_to_feat_string);
   char buf[128];
@@ -158,10 +159,11 @@ host_detect_local_cpu (int argc, const char **argv)
   bool tune = false;
   bool cpu = false;
   unsigned int i = 0;
-  int core_idx = -1;
   unsigned char imp = INVALID_IMP;
   unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
   unsigned int n_cores = 0;
+  unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
+  unsigned int n_variants = 0;
   bool processed_exts = false;
   const char *ext_string = "";
 
@@ -203,6 +205,19 @@ host_detect_local_cpu (int argc, const char **argv)
 	    goto not_found;
 	}
 
+      if (strstr (buf, "variant") != NULL)
+	{
+	  unsigned cvariant = parse_field (buf);
+	  if (!contains_core_p (variants, cvariant))
+	    {
+              if (n_variants == 2)
+                goto not_found;
+
+              variants[n_variants++] = cvariant;
+	    }
+          continue;
+        }
+
       if (strstr (buf, "part") != NULL)
 	{
 	  unsigned ccore = parse_field (buf);
@@ -245,32 +260,41 @@ host_detect_local_cpu (int argc, const char **argv)
   f = NULL;
 
   /* Weird cpuinfo format that we don't know how to handle.  */
-  if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP)
+  if (n_cores == 0 || n_cores > 2
+      || (n_cores == 1 && n_variants != 1)
+      || imp == INVALID_IMP)
     goto not_found;
 
-  if (arch)
+
+  /* Simple case, one core type or just looking for the arch. */
+  if (n_cores == 1 || arch)
     {
       /* Search for one of the cores in the list. */
       for (i = 0; cpu_data[i].name != NULL; i++)
 	if (cpu_data[i].implementer_id == imp
-	    && contains_core_p (cores, cpu_data[i].part_no))
-	  {
-	    arch_id = cpu_data[i].arch;
-	    break;
-	  }
-      if (!arch_id)
+	    && cores[0] == cpu_data[i].part_no
+	    && (cpu_data[i].variant == ALL_VARIANTS
+	        || variants[0] == cpu_data[i].variant))
+	  break;
+      if (cpu_data[i].name == NULL)
 	goto not_found;
 
-      const char* arch_name = get_arch_name_from_id (arch_id);
+      if (arch)
+	{
+	  const char* arch_name = get_arch_name_from_id (cpu_data[i].arch);
 
-      /* We got some arch indentifier that's not in aarch64-arches.def?  */
-      if (!arch_name)
-        goto not_found;
+	  /* We got some arch indentifier that's not in aarch64-arches.def?  */
+	  if (!arch_name)
+	    goto not_found;
 
-      res = concat ("-march=", arch_name, NULL);
+	  res = concat ("-march=", arch_name, NULL);
+	}
+      else
+        res = concat ("-m", cpu ? "cpu" : "tune", "=",
+                      cpu_data[i].name, NULL);
     }
   /* We have big.LITTLE.  */
-  else if (n_cores == 2)
+  else
     {
       for (i = 0; cpu_data[i].name != NULL; i++)
         {
@@ -284,22 +308,6 @@ host_detect_local_cpu (int argc, const char **argv)
       if (!res)
         goto not_found;
     }
-  /* The simple, non-big.LITTLE case.  */
-  else
-    {
-      for (i = 0; cpu_data[i].name != NULL; i++)
-	if (cores[0] == cpu_data[i].part_no
-	    && cpu_data[i].implementer_id == imp)
-	  {
-	    core_idx = i;
-	    break;
-	  }
-      if (core_idx == -1)
-	goto not_found;
-
-      res = concat ("-m", cpu ? "cpu" : "tune", "=",
-                      cpu_data[core_idx].name, NULL);
-    }
 
   if (tune)
     return res;
-- 
1.9.1