This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 4/4] Wire X-Gene 1 up in the ARM (32bit) backend as a AArch32-capable core.




On 12/01/15 20:15, Philipp Tomsich wrote:
---
  gcc/ChangeLog-2014            | 10 ++++++++++
  gcc/config/arm/arm-cores.def  |  1 +
  gcc/config/arm/arm-tables.opt |  3 +++
  gcc/config/arm/arm-tune.md    |  3 ++-
  gcc/config/arm/arm.c          | 22 ++++++++++++++++++++++
  gcc/config/arm/arm.md         | 11 +++++++++--
  gcc/config/arm/bpabi.h        |  2 ++
  gcc/config/arm/t-arm          |  1 +
  gcc/doc/invoke.texi           |  3 ++-
  9 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog-2014 b/gcc/ChangeLog-2014
index dd49d7f..c3c62db 100644
--- a/gcc/ChangeLog-2014
+++ b/gcc/ChangeLog-2014
@@ -3497,6 +3497,16 @@
  	63965.
  	* config/rs6000/rs6000.c: Likewise.

+2014-12-23  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list.
+	Include xgene1.md.
+	* config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1.
+	* config/arm/arm-cores.def (xgene1): New entry.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1.
+
  2014-11-22  Jan Hubicka  <hubicka@ucw.cz>

  	PR ipa/63671
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index be125ac..fa13eb9 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -167,6 +167,7 @@ ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7,	7A,  FL_LDSCHED |
  /* V8 Architecture Processors */
  ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A, FL_LDSCHED | FL_CRC32, cortex_a53)
  ARM_CORE("cortex-a57",	cortexa57, cortexa15,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("xgene1",      xgene1,    xgene1,      8A, FL_LDSCHED,            xgene1)

  /* V8 big.LITTLE implementations */
  ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index ece9d5e..1392429 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -310,6 +310,9 @@ EnumValue
  Enum(processor_type) String(cortex-a57) Value(cortexa57)

  EnumValue
+Enum(processor_type) String(xgene1) Value(xgene1)
+
+EnumValue
  Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53)

  Enum
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 452820ab..dcd5054 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -32,5 +32,6 @@
  	cortexr4f,cortexr5,cortexr7,
  	cortexm7,cortexm4,cortexm3,
  	marvell_pj4,cortexa15cortexa7,cortexa17cortexa7,
-	cortexa53,cortexa57,cortexa57cortexa53"
+	cortexa53,cortexa57,xgene1,
+	cortexa57cortexa53"
  	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8ca2dd8..14c8a87 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1903,6 +1903,25 @@ const struct tune_params arm_cortex_a57_tune =
    ARM_FUSE_MOVW_MOVT				/* Fuseable pairs of instructions.  */
  };

+const struct tune_params arm_xgene1_tune =
+{
+  arm_9e_rtx_costs,
+  &xgene1_extra_costs,
+  NULL,                                        /* Scheduler cost adjustment.  */
+  1,                                           /* Constant limit.  */
+  2,                                           /* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,                                       /* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,                                        /* Prefer LDRD/STRD.  */
+  {true, true},                                /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                       /* Vectorizer costs.  */
+  false,                                       /* Prefer Neon for 64-bits bitops.  */
+  true, true,                                  /* Prefer 32-bit encodings.  */
+  false,				       /* Prefer Neon for stringops.  */
+  32					       /* Maximum insns to inline memset.  */
+};
+
  /* Branches can be dual-issued on Cortex-A5, so conditional execution is
     less appealing.  Set max_insns_skipped to a low value.  */

@@ -27066,6 +27085,9 @@ arm_issue_rate (void)
  {
    switch (arm_tune)
      {
+    case xgene1:
+      return 4;
+
      case cortexa15:
      case cortexa57:
        return 3;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index c61057f..a3cbf3b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -109,6 +109,11 @@
  ;; given instruction does not shift one of its input operands.
  (define_attr "shift" "" (const_int 0))

+;; [For compatibility with AArch64 in pipeline models]
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+(define_attr "fp" "no,yes" (const_string "no"))
+
  ; Floating Point Unit.  If we only have floating point emulation, then there
  ; is no point in scheduling the floating point insns.  (Well, for best
  ; performance we should try and group them together).
@@ -386,7 +391,8 @@
                                  arm926ejs,arm1020e,arm1026ejs,arm1136js,\
                                  arm1136jfs,cortexa5,cortexa7,cortexa8,\
                                  cortexa9,cortexa12,cortexa15,cortexa17,\
-                                cortexa53,cortexm4,cortexm7,marvell_pj4")
+                                cortexa53,cortexm4,cortexm7,marvell_pj4,\
+				xgene1")
  	       (eq_attr "tune_cortexr4" "yes"))
            (const_string "no")
            (const_string "yes"))))
@@ -396,7 +402,7 @@
  	  (and (eq_attr "fpu" "vfp")
  	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,\
                                  cortexa8,cortexa9,cortexa53,cortexm4,\
-                                cortexm7,marvell_pj4")
+                                cortexm7,marvell_pj4,xgene1")
  	       (eq_attr "tune_cortexr4" "no"))
  	  (const_string "yes")
  	  (const_string "no"))))
@@ -426,6 +432,7 @@
  (include "cortex-m4-fpu.md")
  (include "vfp11.md")
  (include "marvell-pj4.md")
+(include "xgene1.md")

  
  ;;---------------------------------------------------------------------------
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index 6e69b81..8e16434 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -71,6 +71,7 @@
     |mcpu=cortex-a53					\
     |mcpu=cortex-a57					\
     |mcpu=cortex-a57.cortex-a53				\
+   |mcpu=xgene1                                         \
     |mcpu=cortex-m1.small-multiply                       \
     |mcpu=cortex-m0.small-multiply                       \
     |mcpu=cortex-m0plus.small-multiply			\
@@ -92,6 +93,7 @@
     |mcpu=cortex-a53					\
     |mcpu=cortex-a57					\
     |mcpu=cortex-a57.cortex-a53				\
+   |mcpu=xgene1                                         \
     |mcpu=cortex-m1.small-multiply                       \
     |mcpu=cortex-m0.small-multiply                       \
     |mcpu=cortex-m0plus.small-multiply                   \
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index df97a13..4ef38a8 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -40,6 +40,7 @@ MD_INCLUDES=	$(srcdir)/config/arm/arm1020e.md \
  		$(srcdir)/config/arm/cortex-a9.md \
  		$(srcdir)/config/arm/cortex-a9-neon.md \
  		$(srcdir)/config/arm/cortex-a53.md \
+		$(srcdir)/config/arm/xgene1.md \
  		$(srcdir)/config/arm/cortex-m4-fpu.md \
  		$(srcdir)/config/arm/cortex-m4.md \
  		$(srcdir)/config/arm/cortex-r4f.md \
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 433b1de..51ed1da 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12957,7 +12957,8 @@ Permissible names are: @samp{arm2}, @samp{arm250},
  @samp{marvell-pj4},
  @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312},
  @samp{fa526}, @samp{fa626},
-@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}.
+@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te},
+@samp{xgene1}.

  Additionally, this option can specify that GCC should tune the performance
  of the code for a big.LITTLE system.  Permissible names are:



This patch is OK. Thanks for doing this. I assume you'll follow up at some point of time during the next stage1 adjusting the prefetch pattern for AArch32 as well with a different "type" and taking care of treating it the same as load1 in all the pipeline descriptions in gcc/config/arm ?



Ramana


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]