This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[SPARC] Small fixes and cleanup


This patch rewrites the implementation of BRANCH_COST to use a table and take 
into account the couple of parameters.  It also fixes out-of-range issues for 
the Compare-and-Branch-on-Condition instructions on Niagara-4 and later, and 
cleans up the handling of MASK_FSMULD.

Bootstrapped/regtested on SPARC/Solaris, applied on the mainline.


2018-09-28  Eric Botcazou  <ebotcazou@adacore.com>

	* config/sparc/sparc-protos.h (sparc_branch_cost): Declare.
	* config/sparc/sparc.h (BRANCH_COST): Call sparc_branch_cost.
	* config/sparc/sparc.c (struct processor_costs): Add branch_cost field.
	(cypress_costs): Set it.
	(supersparc_costs): Likewise.
	(hypersparc_costs): Likewise.
	(leon_cost): Likewise.
	(leon3_costs): Likewise.
	(sparclet_costs): Likewise.
	(ultrasparc_costs): Likewise.
	(ultrasparc_costs): Likewise.
	(niagara_costs): Likewise.
	(niagara2_costs): Likewise.
	(niagara3_costs): Likewise.
	(niagara4_costs): Likewise.
	(niagara7_costs): Likewise.
	(m8_costs): Likewise.
	(TARGET_CAN_FOLLOW_JUMP): Define.
	(pass_work_around_errata::gate): Minor tweak.
	(sparc_option_override): Remove MASK_FSMULD mask for V7 processors.
	Do not set both MASK_VIS4 and MASK_VIS4B for M8 processor.
	Automaitcally clear MASK_FSMULD mask for V7 processors.
	(sparc_can_follow_jump): New static function.
	(output_ubranch): Deal with CROSSING_JUMP_P.
	(sparc_use_sched_lookahead): Rewrite using switch statement.
	(sparc_issue_rate): Reorder.
	(sparc_branch_cost): New function.
testsuite/
	* gcc.target/sparc/20160229-1.c: Minor tweak.
	* gcc.target/sparc/cbcond-2.c: Likewise.
	* gcc.target/sparc/movcc-1.c: Add -mcpu=v9 option.
	* gcc.target/sparc/movcc-2.c: Minor tweak.
	* gcc.target/sparc/overflow-3.c: Likewise.
	* gcc.target/sparc/overflow-4.c: Add -mno-vis4 option.
	* gcc.target/sparc/overflow-5.c: Minor tweak.
	* gcc.target/sparc/setcc-4.c: Add -mno-vis4 option.

-- 
Eric Botcazou
Index: config/sparc/sparc-protos.h
===================================================================
--- config/sparc/sparc-protos.h	(revision 264525)
+++ config/sparc/sparc-protos.h	(working copy)
@@ -43,6 +43,7 @@ extern void sparc_override_options (void
 extern void sparc_output_scratch_registers (FILE *);
 extern void sparc_target_macros (void);
 extern void sparc_emit_membar_for_model (enum memmodel, int, int);
+extern int sparc_branch_cost (bool, bool);
 
 #ifdef RTX_CODE
 extern machine_mode select_cc_mode (enum rtx_code, rtx, rtx);
Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c	(revision 264525)
+++ config/sparc/sparc.c	(working copy)
@@ -139,6 +139,9 @@ struct processor_costs {
 
   /* penalty for shifts, due to scheduling rules etc. */
   const int shift_penalty;
+
+  /* cost of a (predictable) branch.  */
+  const int branch_cost;
 };
 
 static const
@@ -163,6 +166,7 @@ struct processor_costs cypress_costs = {
   COSTS_N_INSNS (1), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  3 /* branch cost */
 };
 
 static const
@@ -187,6 +191,7 @@ struct processor_costs supersparc_costs
   COSTS_N_INSNS (4), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   1, /* shift penalty */
+  3 /* branch cost */
 };
 
 static const
@@ -211,6 +216,7 @@ struct processor_costs hypersparc_costs
   COSTS_N_INSNS (17), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  3 /* branch cost */
 };
 
 static const
@@ -235,6 +241,7 @@ struct processor_costs leon_costs = {
   COSTS_N_INSNS (5), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  3 /* branch cost */
 };
 
 static const
@@ -259,6 +266,7 @@ struct processor_costs leon3_costs = {
   COSTS_N_INSNS (35), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  3 /* branch cost */
 };
 
 static const
@@ -283,6 +291,7 @@ struct processor_costs sparclet_costs =
   COSTS_N_INSNS (5), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  3 /* branch cost */
 };
 
 static const
@@ -307,6 +316,7 @@ struct processor_costs ultrasparc_costs
   COSTS_N_INSNS (68), /* idivX */
   COSTS_N_INSNS (2), /* movcc/movr */
   2, /* shift penalty */
+  2 /* branch cost */
 };
 
 static const
@@ -331,6 +341,7 @@ struct processor_costs ultrasparc3_costs
   COSTS_N_INSNS (71), /* idivX */
   COSTS_N_INSNS (2), /* movcc/movr */
   0, /* shift penalty */
+  2 /* branch cost */
 };
 
 static const
@@ -355,6 +366,7 @@ struct processor_costs niagara_costs = {
   COSTS_N_INSNS (72), /* idivX */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  4 /* branch cost */
 };
 
 static const
@@ -379,6 +391,7 @@ struct processor_costs niagara2_costs =
   COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  5 /* branch cost */
 };
 
 static const
@@ -403,6 +416,7 @@ struct processor_costs niagara3_costs =
   COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  5 /* branch cost */
 };
 
 static const
@@ -427,6 +441,7 @@ struct processor_costs niagara4_costs =
   COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  2 /* branch cost */
 };
 
 static const
@@ -451,6 +466,7 @@ struct processor_costs niagara7_costs =
   COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  1 /* branch cost */
 };
 
 static const
@@ -475,6 +491,7 @@ struct processor_costs m8_costs = {
   COSTS_N_INSNS (30), /* udivx/sdivx */
   COSTS_N_INSNS (1), /* movcc/movr */
   0, /* shift penalty */
+  1 /* branch cost */
 };
 
 static const struct processor_costs *sparc_costs = &cypress_costs;
@@ -693,6 +710,7 @@ static bool sparc_can_change_mode_class
 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 					    const vec_perm_indices &);
+static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
 
 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 /* Table of valid machine attributes.  */
@@ -946,6 +964,9 @@ char sparc_hard_reg_printed[8];
 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
 
+#undef TARGET_CAN_FOLLOW_JUMP
+#define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Return the memory reference contained in X if any, zero otherwise.  */
@@ -1523,8 +1544,9 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
-	  || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
+      return sparc_fix_at697f
+	     || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
+	     || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
     }
 
   virtual unsigned int execute (function *)
@@ -1657,24 +1679,24 @@ sparc_option_override (void)
     const int disable;
     const int enable;
   } const cpu_table[] = {
-    { "v7",		MASK_ISA|MASK_FSMULD, 0 },
-    { "cypress",	MASK_ISA|MASK_FSMULD, 0 },
+    { "v7",		MASK_ISA, 0 },
+    { "cypress",	MASK_ISA, 0 },
     { "v8",		MASK_ISA, MASK_V8 },
     /* TI TMS390Z55 supersparc */
     { "supersparc",	MASK_ISA, MASK_V8 },
     { "hypersparc",	MASK_ISA, MASK_V8 },
     { "leon",		MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
     { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3 },
-    { "leon3v7",	MASK_ISA|MASK_FSMULD, MASK_LEON3 },
-    { "sparclite",	MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
+    { "leon3v7",	MASK_ISA, MASK_LEON3 },
+    { "sparclite",	MASK_ISA, MASK_SPARCLITE },
     /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
     { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
     /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
-    { "f934",		MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
+    { "f934",		MASK_ISA, MASK_SPARCLITE },
     { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
-    { "sparclet",	MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
+    { "sparclet",	MASK_ISA, MASK_SPARCLET },
     /* TEMIC sparclet */
-    { "tsc701",		MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
+    { "tsc701",		MASK_ISA, MASK_SPARCLET },
     { "v9",		MASK_ISA, MASK_V9 },
     /* UltraSPARC I, II, IIi */
     { "ultrasparc",	MASK_ISA,
@@ -1701,7 +1723,7 @@ sparc_option_override (void)
       MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
     /* UltraSPARC M8 */
     { "m8",		MASK_ISA,
-      MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
+      MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
   };
   const struct cpu_table *cpu;
   unsigned int i;
@@ -1844,6 +1866,10 @@ sparc_option_override (void)
 		   & ~(target_flags_explicit & MASK_FEATURES)
 		   );
 
+  /* FsMULd is a V8 instruction.  */
+  if (!TARGET_V8 && !TARGET_V9)
+    target_flags &= ~MASK_FSMULD;
+
   /* -mvis2 implies -mvis.  */
   if (TARGET_VIS2)
     target_flags |= MASK_VIS;
@@ -1897,8 +1923,8 @@ sparc_option_override (void)
   /* Enable applicable errata workarounds for LEON3FT.  */
   if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
     {
-    sparc_fix_b2bst = 1;
-    sparc_fix_lost_divsqrt = 1;
+      sparc_fix_b2bst = 1;
+      sparc_fix_lost_divsqrt = 1;
     }
 
   /* Disable FsMULd for the UT699 since it doesn't work correctly.  */
@@ -8004,6 +8030,19 @@ sparc_preferred_simd_mode (scalar_mode m
   return word_mode;
 }
 
+/* Implement TARGET_CAN_FOLLOW_JUMP.  */
+
+static bool
+sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
+{
+  /* Do not fold unconditional jumps that have been created for crossing
+     partition boundaries.  */
+  if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
+    return false;
+
+  return true;
+}
+
 /* Return the string to output an unconditional branch to LABEL, which is
    the operand number of the label.
 
@@ -8019,9 +8058,8 @@ output_ubranch (rtx dest, rtx_insn *insn
 
   /* Even if we are trying to use cbcond for this, evaluate
      whether we can use V9 branches as our backup plan.  */
-
   delta = 5000000;
-  if (INSN_ADDRESSES_SET_P ())
+  if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
     delta = (INSN_ADDRESSES (INSN_UID (dest))
 	     - INSN_ADDRESSES (INSN_UID (insn)));
 
@@ -10159,22 +10197,25 @@ sparc_sched_init (FILE *dump ATTRIBUTE_U
 static int
 sparc_use_sched_lookahead (void)
 {
-  if (sparc_cpu == PROCESSOR_NIAGARA
-      || sparc_cpu == PROCESSOR_NIAGARA2
-      || sparc_cpu == PROCESSOR_NIAGARA3)
-    return 0;
-  if (sparc_cpu == PROCESSOR_NIAGARA4
-      || sparc_cpu == PROCESSOR_NIAGARA7
-      || sparc_cpu == PROCESSOR_M8)
-    return 2;
-  if (sparc_cpu == PROCESSOR_ULTRASPARC
-      || sparc_cpu == PROCESSOR_ULTRASPARC3)
-    return 4;
-  if ((1 << sparc_cpu) &
-      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
-       (1 << PROCESSOR_SPARCLITE86X)))
-    return 3;
-  return 0;
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_ULTRASPARC:
+    case PROCESSOR_ULTRASPARC3:
+      return 4;
+    case PROCESSOR_SUPERSPARC:
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      return 3;
+    case PROCESSOR_NIAGARA4:
+    case PROCESSOR_NIAGARA7:
+    case PROCESSOR_M8:
+      return 2;
+    case PROCESSOR_NIAGARA:
+    case PROCESSOR_NIAGARA2:
+    case PROCESSOR_NIAGARA3:
+    default:
+      return 0;
+    }
 }
 
 static int
@@ -10182,28 +10223,60 @@ sparc_issue_rate (void)
 {
   switch (sparc_cpu)
     {
+    case PROCESSOR_ULTRASPARC:
+    case PROCESSOR_ULTRASPARC3:
+    case PROCESSOR_M8:
+      return 4;
+    case PROCESSOR_SUPERSPARC:
+      return 3;
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+    case PROCESSOR_V9:
+      /* Assume V9 processors are capable of at least dual-issue.  */
+    case PROCESSOR_NIAGARA4:
+    case PROCESSOR_NIAGARA7:
+      return 2;
     case PROCESSOR_NIAGARA:
     case PROCESSOR_NIAGARA2:
     case PROCESSOR_NIAGARA3:
     default:
       return 1;
-    case PROCESSOR_NIAGARA4:
-    case PROCESSOR_NIAGARA7:
+    }
+}
+
+int
+sparc_branch_cost (bool speed_p, bool predictable_p)
+{
+  if (!speed_p)
+    return 2;
+
+  /* For pre-V9 processors we use a single value (usually 3) to take into
+     account the potential annulling of the delay slot (which ends up being
+     a bubble in the pipeline slot) plus a cycle to take into consideration
+     the instruction cache effects.
+
+     On V9 and later processors, which have branch prediction facilities,
+     we take into account whether the branch is (easily) predictable.  */
+  const int cost = sparc_costs->branch_cost;
+
+  switch (sparc_cpu)
+    {
     case PROCESSOR_V9:
-      /* Assume V9 processors are capable of at least dual-issue.  */
-      return 2;
-    case PROCESSOR_SUPERSPARC:
-      return 3;
-    case PROCESSOR_HYPERSPARC:
-    case PROCESSOR_SPARCLITE86X:
-      return 2;
     case PROCESSOR_ULTRASPARC:
     case PROCESSOR_ULTRASPARC3:
+    case PROCESSOR_NIAGARA:
+    case PROCESSOR_NIAGARA2:
+    case PROCESSOR_NIAGARA3:
+    case PROCESSOR_NIAGARA4:
+    case PROCESSOR_NIAGARA7:
     case PROCESSOR_M8:
-      return 4;
+      return cost + (predictable_p ? 0 : 2);
+
+    default:
+      return cost;
     }
 }
-
+      
 static int
 set_extends (rtx_insn *insn)
 {
Index: config/sparc/sparc.h
===================================================================
--- config/sparc/sparc.h	(revision 264525)
+++ config/sparc/sparc.h	(working copy)
@@ -1496,41 +1496,10 @@ do {									   \
 #define DITF_CONVERSION_LIBFUNCS	0
 #define SUN_INTEGER_MULTIPLY_64 	0
 
-/* Provide the cost of a branch.  For pre-v9 processors we use
-   a value of 3 to take into account the potential annulling of
-   the delay slot (which ends up being a bubble in the pipeline slot)
-   plus a cycle to take into consideration the instruction cache
-   effects.
-
-   On v9 and later, which have branch prediction facilities, we set
-   it to the depth of the pipeline as that is the cost of a
-   mispredicted branch.
-
-   On Niagara, normal branches insert 3 bubbles into the pipe
-   and annulled branches insert 4 bubbles.
-
-   On Niagara-2 and Niagara-3, a not-taken branch costs 1 cycle whereas
-   a taken branch costs 6 cycles.
-
-   The T4 Supplement specifies the branch latency at 2 cycles.
-   The M7 Supplement specifies the branch latency at 1 cycle. */
-
-#define BRANCH_COST(speed_p, predictable_p) \
-	((sparc_cpu == PROCESSOR_V9 \
-	  || sparc_cpu == PROCESSOR_ULTRASPARC) \
-	 ? 7 \
-         : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
-            ? 9 \
-	 : (sparc_cpu == PROCESSOR_NIAGARA \
-	    ? 4 \
-	 : ((sparc_cpu == PROCESSOR_NIAGARA2 \
-	     || sparc_cpu == PROCESSOR_NIAGARA3) \
-	    ? 5 \
-	 : (sparc_cpu == PROCESSOR_NIAGARA4 \
-	    ? 2 \
-	 : (sparc_cpu == PROCESSOR_NIAGARA7 \
-	    ? 1 \
-	 : 3))))))
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
+  (sparc_branch_cost (SPEED_P, PREDICTABLE_P))
 
 /* Control the assembler format that we output.  */
 
Index: gcc.target/sparc/20160229-1.c
===================================================================
--- gcc.target/sparc/20160229-1.c	(revision 264525)
+++ gcc.target/sparc/20160229-1.c	(working copy)
@@ -2,8 +2,8 @@
 /* Reported by John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> */
 
 /* { dg-do run } */
-/* { dg-options "-std=gnu99" }
 /* { dg-require-effective-target lp64 } */
+/* { dg-options "-std=gnu99" } */
 
 extern void abort (void);
 
Index: gcc.target/sparc/cbcond-2.c
===================================================================
--- gcc.target/sparc/cbcond-2.c	(revision 264525)
+++ gcc.target/sparc/cbcond-2.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O -mcbcond" } */
 /* { dg-require-effective-target lp64 } */
+/* { dg-options "-O -mcbcond" } */
 
 extern void foo (void);
 extern void bar (void);
Index: gcc.target/sparc/movcc-1.c
===================================================================
--- gcc.target/sparc/movcc-1.c	(revision 264525)
+++ gcc.target/sparc/movcc-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mcpu=v9" } */
 
 int foo1 (int a)
 {
Index: gcc.target/sparc/movcc-2.c
===================================================================
--- gcc.target/sparc/movcc-2.c	(revision 264525)
+++ gcc.target/sparc/movcc-2.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
 /* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
 
 long foo1 (long a)
 {
Index: gcc.target/sparc/overflow-3.c
===================================================================
--- gcc.target/sparc/overflow-3.c	(revision 264525)
+++ gcc.target/sparc/overflow-3.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O" } */
 /* { dg-require-effective-target lp64 } */
+/* { dg-options "-O" } */
 
 #include <stdbool.h>
 #include <stdint.h>
Index: gcc.target/sparc/overflow-4.c
===================================================================
--- gcc.target/sparc/overflow-4.c	(revision 264525)
+++ gcc.target/sparc/overflow-4.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O -mno-vis3" } */
 /* { dg-require-effective-target lp64 } */
+/* { dg-options "-O -mno-vis3 -mno-vis4" } */
 
 #include <stdbool.h>
 #include <stdint.h>
Index: gcc.target/sparc/overflow-5.c
===================================================================
--- gcc.target/sparc/overflow-5.c	(revision 264525)
+++ gcc.target/sparc/overflow-5.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O -mvis3" } */
 /* { dg-require-effective-target lp64 } */
+/* { dg-options "-O -mvis3" } */
 
 #include <stdbool.h>
 #include <stdint.h>
Index: gcc.target/sparc/setcc-4.c
===================================================================
--- gcc.target/sparc/setcc-4.c	(revision 264525)
+++ gcc.target/sparc/setcc-4.c	(working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target lp64 } */
-/* { dg-options "-O1 -mno-vis3" } */
+/* { dg-options "-O1 -mno-vis3 -mno-vis4" } */
 
 long neq (long a, long b)
 {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]