This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[SPARC] Small fixes and cleanup
- From: Eric Botcazou <ebotcazou at adacore dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 28 Sep 2018 10:47:53 +0200
- Subject: [SPARC] Small fixes and cleanup
This patch rewrites the implementation of BRANCH_COST to use a table and take
into account the couple of parameters. It also fixes out-of-range issues for
the Compare-and-Branch-on-Condition instructions on Niagara-4 and later, and
cleans up the handling of MASK_FSMULD.
Bootstrapped/regtested on SPARC/Solaris, applied on the mainline.
2018-09-28 Eric Botcazou <ebotcazou@adacore.com>
* config/sparc/sparc-protos.h (sparc_branch_cost): Declare.
* config/sparc/sparc.h (BRANCH_COST): Call sparc_branch_cost.
* config/sparc/sparc.c (struct processor_costs): Add branch_cost field.
(cypress_costs): Set it.
(supersparc_costs): Likewise.
(hypersparc_costs): Likewise.
(leon_cost): Likewise.
(leon3_costs): Likewise.
(sparclet_costs): Likewise.
(ultrasparc_costs): Likewise.
(ultrasparc_costs): Likewise.
(niagara_costs): Likewise.
(niagara2_costs): Likewise.
(niagara3_costs): Likewise.
(niagara4_costs): Likewise.
(niagara7_costs): Likewise.
(m8_costs): Likewise.
(TARGET_CAN_FOLLOW_JUMP): Define.
(pass_work_around_errata::gate): Minor tweak.
(sparc_option_override): Remove MASK_FSMULD mask for V7 processors.
Do not set both MASK_VIS4 and MASK_VIS4B for M8 processor.
Automaitcally clear MASK_FSMULD mask for V7 processors.
(sparc_can_follow_jump): New static function.
(output_ubranch): Deal with CROSSING_JUMP_P.
(sparc_use_sched_lookahead): Rewrite using switch statement.
(sparc_issue_rate): Reorder.
(sparc_branch_cost): New function.
testsuite/
* gcc.target/sparc/20160229-1.c: Minor tweak.
* gcc.target/sparc/cbcond-2.c: Likewise.
* gcc.target/sparc/movcc-1.c: Add -mcpu=v9 option.
* gcc.target/sparc/movcc-2.c: Minor tweak.
* gcc.target/sparc/overflow-3.c: Likewise.
* gcc.target/sparc/overflow-4.c: Add -mno-vis4 option.
* gcc.target/sparc/overflow-5.c: Minor tweak.
* gcc.target/sparc/setcc-4.c: Add -mno-vis4 option.
--
Eric Botcazou
Index: config/sparc/sparc-protos.h
===================================================================
--- config/sparc/sparc-protos.h (revision 264525)
+++ config/sparc/sparc-protos.h (working copy)
@@ -43,6 +43,7 @@ extern void sparc_override_options (void
extern void sparc_output_scratch_registers (FILE *);
extern void sparc_target_macros (void);
extern void sparc_emit_membar_for_model (enum memmodel, int, int);
+extern int sparc_branch_cost (bool, bool);
#ifdef RTX_CODE
extern machine_mode select_cc_mode (enum rtx_code, rtx, rtx);
Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c (revision 264525)
+++ config/sparc/sparc.c (working copy)
@@ -139,6 +139,9 @@ struct processor_costs {
/* penalty for shifts, due to scheduling rules etc. */
const int shift_penalty;
+
+ /* cost of a (predictable) branch. */
+ const int branch_cost;
};
static const
@@ -163,6 +166,7 @@ struct processor_costs cypress_costs = {
COSTS_N_INSNS (1), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 3 /* branch cost */
};
static const
@@ -187,6 +191,7 @@ struct processor_costs supersparc_costs
COSTS_N_INSNS (4), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
1, /* shift penalty */
+ 3 /* branch cost */
};
static const
@@ -211,6 +216,7 @@ struct processor_costs hypersparc_costs
COSTS_N_INSNS (17), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 3 /* branch cost */
};
static const
@@ -235,6 +241,7 @@ struct processor_costs leon_costs = {
COSTS_N_INSNS (5), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 3 /* branch cost */
};
static const
@@ -259,6 +266,7 @@ struct processor_costs leon3_costs = {
COSTS_N_INSNS (35), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 3 /* branch cost */
};
static const
@@ -283,6 +291,7 @@ struct processor_costs sparclet_costs =
COSTS_N_INSNS (5), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 3 /* branch cost */
};
static const
@@ -307,6 +316,7 @@ struct processor_costs ultrasparc_costs
COSTS_N_INSNS (68), /* idivX */
COSTS_N_INSNS (2), /* movcc/movr */
2, /* shift penalty */
+ 2 /* branch cost */
};
static const
@@ -331,6 +341,7 @@ struct processor_costs ultrasparc3_costs
COSTS_N_INSNS (71), /* idivX */
COSTS_N_INSNS (2), /* movcc/movr */
0, /* shift penalty */
+ 2 /* branch cost */
};
static const
@@ -355,6 +366,7 @@ struct processor_costs niagara_costs = {
COSTS_N_INSNS (72), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 4 /* branch cost */
};
static const
@@ -379,6 +391,7 @@ struct processor_costs niagara2_costs =
COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 5 /* branch cost */
};
static const
@@ -403,6 +416,7 @@ struct processor_costs niagara3_costs =
COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 5 /* branch cost */
};
static const
@@ -427,6 +441,7 @@ struct processor_costs niagara4_costs =
COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 2 /* branch cost */
};
static const
@@ -451,6 +466,7 @@ struct processor_costs niagara7_costs =
COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 1 /* branch cost */
};
static const
@@ -475,6 +491,7 @@ struct processor_costs m8_costs = {
COSTS_N_INSNS (30), /* udivx/sdivx */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
+ 1 /* branch cost */
};
static const struct processor_costs *sparc_costs = &cypress_costs;
@@ -693,6 +710,7 @@ static bool sparc_can_change_mode_class
static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
const vec_perm_indices &);
+static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
#ifdef SUBTARGET_ATTRIBUTE_TABLE
/* Table of valid machine attributes. */
@@ -946,6 +964,9 @@ char sparc_hard_reg_printed[8];
#undef TARGET_VECTORIZE_VEC_PERM_CONST
#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
+#undef TARGET_CAN_FOLLOW_JUMP
+#define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Return the memory reference contained in X if any, zero otherwise. */
@@ -1523,8 +1544,9 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
- || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
+ return sparc_fix_at697f
+ || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
+ || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
}
virtual unsigned int execute (function *)
@@ -1657,24 +1679,24 @@ sparc_option_override (void)
const int disable;
const int enable;
} const cpu_table[] = {
- { "v7", MASK_ISA|MASK_FSMULD, 0 },
- { "cypress", MASK_ISA|MASK_FSMULD, 0 },
+ { "v7", MASK_ISA, 0 },
+ { "cypress", MASK_ISA, 0 },
{ "v8", MASK_ISA, MASK_V8 },
/* TI TMS390Z55 supersparc */
{ "supersparc", MASK_ISA, MASK_V8 },
{ "hypersparc", MASK_ISA, MASK_V8 },
{ "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
{ "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
- { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
- { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
+ { "leon3v7", MASK_ISA, MASK_LEON3 },
+ { "sparclite", MASK_ISA, MASK_SPARCLITE },
/* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
{ "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
/* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
- { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
+ { "f934", MASK_ISA, MASK_SPARCLITE },
{ "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
- { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
+ { "sparclet", MASK_ISA, MASK_SPARCLET },
/* TEMIC sparclet */
- { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
+ { "tsc701", MASK_ISA, MASK_SPARCLET },
{ "v9", MASK_ISA, MASK_V9 },
/* UltraSPARC I, II, IIi */
{ "ultrasparc", MASK_ISA,
@@ -1701,7 +1723,7 @@ sparc_option_override (void)
MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
/* UltraSPARC M8 */
{ "m8", MASK_ISA,
- MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
+ MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
};
const struct cpu_table *cpu;
unsigned int i;
@@ -1844,6 +1866,10 @@ sparc_option_override (void)
& ~(target_flags_explicit & MASK_FEATURES)
);
+ /* FsMULd is a V8 instruction. */
+ if (!TARGET_V8 && !TARGET_V9)
+ target_flags &= ~MASK_FSMULD;
+
/* -mvis2 implies -mvis. */
if (TARGET_VIS2)
target_flags |= MASK_VIS;
@@ -1897,8 +1923,8 @@ sparc_option_override (void)
/* Enable applicable errata workarounds for LEON3FT. */
if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
{
- sparc_fix_b2bst = 1;
- sparc_fix_lost_divsqrt = 1;
+ sparc_fix_b2bst = 1;
+ sparc_fix_lost_divsqrt = 1;
}
/* Disable FsMULd for the UT699 since it doesn't work correctly. */
@@ -8004,6 +8030,19 @@ sparc_preferred_simd_mode (scalar_mode m
return word_mode;
}
+/* Implement TARGET_CAN_FOLLOW_JUMP. */
+
+static bool
+sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
+{
+ /* Do not fold unconditional jumps that have been created for crossing
+ partition boundaries. */
+ if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
+ return false;
+
+ return true;
+}
+
/* Return the string to output an unconditional branch to LABEL, which is
the operand number of the label.
@@ -8019,9 +8058,8 @@ output_ubranch (rtx dest, rtx_insn *insn
/* Even if we are trying to use cbcond for this, evaluate
whether we can use V9 branches as our backup plan. */
-
delta = 5000000;
- if (INSN_ADDRESSES_SET_P ())
+ if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
delta = (INSN_ADDRESSES (INSN_UID (dest))
- INSN_ADDRESSES (INSN_UID (insn)));
@@ -10159,22 +10197,25 @@ sparc_sched_init (FILE *dump ATTRIBUTE_U
static int
sparc_use_sched_lookahead (void)
{
- if (sparc_cpu == PROCESSOR_NIAGARA
- || sparc_cpu == PROCESSOR_NIAGARA2
- || sparc_cpu == PROCESSOR_NIAGARA3)
- return 0;
- if (sparc_cpu == PROCESSOR_NIAGARA4
- || sparc_cpu == PROCESSOR_NIAGARA7
- || sparc_cpu == PROCESSOR_M8)
- return 2;
- if (sparc_cpu == PROCESSOR_ULTRASPARC
- || sparc_cpu == PROCESSOR_ULTRASPARC3)
- return 4;
- if ((1 << sparc_cpu) &
- ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
- (1 << PROCESSOR_SPARCLITE86X)))
- return 3;
- return 0;
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ return 4;
+ case PROCESSOR_SUPERSPARC:
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ return 3;
+ case PROCESSOR_NIAGARA4:
+ case PROCESSOR_NIAGARA7:
+ case PROCESSOR_M8:
+ return 2;
+ case PROCESSOR_NIAGARA:
+ case PROCESSOR_NIAGARA2:
+ case PROCESSOR_NIAGARA3:
+ default:
+ return 0;
+ }
}
static int
@@ -10182,28 +10223,60 @@ sparc_issue_rate (void)
{
switch (sparc_cpu)
{
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ case PROCESSOR_M8:
+ return 4;
+ case PROCESSOR_SUPERSPARC:
+ return 3;
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ case PROCESSOR_V9:
+ /* Assume V9 processors are capable of at least dual-issue. */
+ case PROCESSOR_NIAGARA4:
+ case PROCESSOR_NIAGARA7:
+ return 2;
case PROCESSOR_NIAGARA:
case PROCESSOR_NIAGARA2:
case PROCESSOR_NIAGARA3:
default:
return 1;
- case PROCESSOR_NIAGARA4:
- case PROCESSOR_NIAGARA7:
+ }
+}
+
+int
+sparc_branch_cost (bool speed_p, bool predictable_p)
+{
+ if (!speed_p)
+ return 2;
+
+ /* For pre-V9 processors we use a single value (usually 3) to take into
+ account the potential annulling of the delay slot (which ends up being
+ a bubble in the pipeline slot) plus a cycle to take into consideration
+ the instruction cache effects.
+
+ On V9 and later processors, which have branch prediction facilities,
+ we take into account whether the branch is (easily) predictable. */
+ const int cost = sparc_costs->branch_cost;
+
+ switch (sparc_cpu)
+ {
case PROCESSOR_V9:
- /* Assume V9 processors are capable of at least dual-issue. */
- return 2;
- case PROCESSOR_SUPERSPARC:
- return 3;
- case PROCESSOR_HYPERSPARC:
- case PROCESSOR_SPARCLITE86X:
- return 2;
case PROCESSOR_ULTRASPARC:
case PROCESSOR_ULTRASPARC3:
+ case PROCESSOR_NIAGARA:
+ case PROCESSOR_NIAGARA2:
+ case PROCESSOR_NIAGARA3:
+ case PROCESSOR_NIAGARA4:
+ case PROCESSOR_NIAGARA7:
case PROCESSOR_M8:
- return 4;
+ return cost + (predictable_p ? 0 : 2);
+
+ default:
+ return cost;
}
}
-
+
static int
set_extends (rtx_insn *insn)
{
Index: config/sparc/sparc.h
===================================================================
--- config/sparc/sparc.h (revision 264525)
+++ config/sparc/sparc.h (working copy)
@@ -1496,41 +1496,10 @@ do { \
#define DITF_CONVERSION_LIBFUNCS 0
#define SUN_INTEGER_MULTIPLY_64 0
-/* Provide the cost of a branch. For pre-v9 processors we use
- a value of 3 to take into account the potential annulling of
- the delay slot (which ends up being a bubble in the pipeline slot)
- plus a cycle to take into consideration the instruction cache
- effects.
-
- On v9 and later, which have branch prediction facilities, we set
- it to the depth of the pipeline as that is the cost of a
- mispredicted branch.
-
- On Niagara, normal branches insert 3 bubbles into the pipe
- and annulled branches insert 4 bubbles.
-
- On Niagara-2 and Niagara-3, a not-taken branch costs 1 cycle whereas
- a taken branch costs 6 cycles.
-
- The T4 Supplement specifies the branch latency at 2 cycles.
- The M7 Supplement specifies the branch latency at 1 cycle. */
-
-#define BRANCH_COST(speed_p, predictable_p) \
- ((sparc_cpu == PROCESSOR_V9 \
- || sparc_cpu == PROCESSOR_ULTRASPARC) \
- ? 7 \
- : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
- ? 9 \
- : (sparc_cpu == PROCESSOR_NIAGARA \
- ? 4 \
- : ((sparc_cpu == PROCESSOR_NIAGARA2 \
- || sparc_cpu == PROCESSOR_NIAGARA3) \
- ? 5 \
- : (sparc_cpu == PROCESSOR_NIAGARA4 \
- ? 2 \
- : (sparc_cpu == PROCESSOR_NIAGARA7 \
- ? 1 \
- : 3))))))
+/* A C expression for the cost of a branch instruction. A value of 1
+ is the default; other values are interpreted relative to that. */
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
+ (sparc_branch_cost (SPEED_P, PREDICTABLE_P))
/* Control the assembler format that we output. */
Index: gcc.target/sparc/20160229-1.c
===================================================================
--- gcc.target/sparc/20160229-1.c (revision 264525)
+++ gcc.target/sparc/20160229-1.c (working copy)
@@ -2,8 +2,8 @@
/* Reported by John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> */
/* { dg-do run } */
-/* { dg-options "-std=gnu99" }
/* { dg-require-effective-target lp64 } */
+/* { dg-options "-std=gnu99" } */
extern void abort (void);
Index: gcc.target/sparc/cbcond-2.c
===================================================================
--- gcc.target/sparc/cbcond-2.c (revision 264525)
+++ gcc.target/sparc/cbcond-2.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-/* { dg-options "-O -mcbcond" } */
/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O -mcbcond" } */
extern void foo (void);
extern void bar (void);
Index: gcc.target/sparc/movcc-1.c
===================================================================
--- gcc.target/sparc/movcc-1.c (revision 264525)
+++ gcc.target/sparc/movcc-1.c (working copy)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mcpu=v9" } */
int foo1 (int a)
{
Index: gcc.target/sparc/movcc-2.c
===================================================================
--- gcc.target/sparc/movcc-2.c (revision 264525)
+++ gcc.target/sparc/movcc-2.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-/* { dg-options "-O2" } */
/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
long foo1 (long a)
{
Index: gcc.target/sparc/overflow-3.c
===================================================================
--- gcc.target/sparc/overflow-3.c (revision 264525)
+++ gcc.target/sparc/overflow-3.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-/* { dg-options "-O" } */
/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O" } */
#include <stdbool.h>
#include <stdint.h>
Index: gcc.target/sparc/overflow-4.c
===================================================================
--- gcc.target/sparc/overflow-4.c (revision 264525)
+++ gcc.target/sparc/overflow-4.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-/* { dg-options "-O -mno-vis3" } */
/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O -mno-vis3 -mno-vis4" } */
#include <stdbool.h>
#include <stdint.h>
Index: gcc.target/sparc/overflow-5.c
===================================================================
--- gcc.target/sparc/overflow-5.c (revision 264525)
+++ gcc.target/sparc/overflow-5.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-/* { dg-options "-O -mvis3" } */
/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O -mvis3" } */
#include <stdbool.h>
#include <stdint.h>
Index: gcc.target/sparc/setcc-4.c
===================================================================
--- gcc.target/sparc/setcc-4.c (revision 264525)
+++ gcc.target/sparc/setcc-4.c (working copy)
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O1 -mno-vis3" } */
+/* { dg-options "-O1 -mno-vis3 -mno-vis4" } */
long neq (long a, long b)
{