This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 1/3] rs6000: Split rs6000_cpu
- From: Segher Boessenkool <segher at kernel dot crashing dot org>
- To: gcc-patches at gcc dot gnu dot org
- Cc: dje dot gcc at gmail dot com, Segher Boessenkool <segher at kernel dot crashing dot org>
- Date: Wed, 6 Dec 2017 11:42:12 +0000
- Subject: [PATCH 1/3] rs6000: Split rs6000_cpu
- Authentication-results: sourceware.org; auth=none
This splits rs6000_cpu into rs6000_cpu and rs6000_tune. Both are still
initialised identically, so there is no behaviour change.
Now rs6000_cpu should be set by -mcpu=, and rs6000_tune by -mtune= (but
both still are set by -mtune=, fixed in a later patch in the series).
Also change rs6000_cpu to rs6000_tune in the appropriate cases (i.e.
most, but not those that enable using new insns).
All three patches bootstrapped and tested on powerpc64-linux {-m32,-m64},
and on powerpc64le-linux.
David, do those look okay?
Segher
2017-12-06 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.opt (rs6000_tune): New variable.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Also set
rs6000_tune. Use rs6000_tune instead of rs6000_cpu where appropriate.
(rs6000_loop_align): Use rs6000_tune instead of rs6000_cpu where
appropriate.
(rs6000_reassociation_width): Ditto.
(rs6000_emit_epilogue): Ditto.
(rs6000_adjust_cost): Ditto.
(is_microcoded_insn): Ditto.
(is_cracked_insn): Ditto.
(rs6000_adjust_priority): Ditto.
(rs6000_sched_reorder): Ditto.
(rs6000_sched_reorder2): Ditto.
(insn_must_be_first_in_group): Ditto.
(insn_must_be_last_in_group): Ditto.
(rs6000_register_move_cost): Ditto.
* config/rs6000/rs6000.h (rs6000_cpu_attr): Use rs6000_tune instead of
rs6000_cpu.
---
gcc/config/rs6000/rs6000.c | 118 ++++++++++++++++++++++---------------------
gcc/config/rs6000/rs6000.h | 2 +-
gcc/config/rs6000/rs6000.opt | 4 ++
3 files changed, 65 insertions(+), 59 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0034089..1ad13cd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4185,6 +4185,8 @@ rs6000_option_override_internal (bool global_init_p)
gcc_assert (tune_index >= 0);
rs6000_cpu = processor_target_table[tune_index].processor;
+ gcc_assert (tune_index >= 0);
+ rs6000_tune = processor_target_table[tune_index].processor;
if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
|| rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
@@ -4673,10 +4675,10 @@ rs6000_option_override_internal (bool global_init_p)
/* E500mc does "better" if we inline more aggressively. Respect the
user's opinion, though. */
if (rs6000_block_move_inline_limit == 0
- && (rs6000_cpu == PROCESSOR_PPCE500MC
- || rs6000_cpu == PROCESSOR_PPCE500MC64
- || rs6000_cpu == PROCESSOR_PPCE5500
- || rs6000_cpu == PROCESSOR_PPCE6500))
+ && (rs6000_tune == PROCESSOR_PPCE500MC
+ || rs6000_tune == PROCESSOR_PPCE500MC64
+ || rs6000_tune == PROCESSOR_PPCE5500
+ || rs6000_tune == PROCESSOR_PPCE6500))
rs6000_block_move_inline_limit = 128;
/* store_one_arg depends on expand_block_move to handle at least the
@@ -4834,29 +4836,29 @@ rs6000_option_override_internal (bool global_init_p)
"point");
}
- rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
- && rs6000_cpu != PROCESSOR_POWER5
- && rs6000_cpu != PROCESSOR_POWER6
- && rs6000_cpu != PROCESSOR_POWER7
- && rs6000_cpu != PROCESSOR_POWER8
- && rs6000_cpu != PROCESSOR_POWER9
- && rs6000_cpu != PROCESSOR_PPCA2
- && rs6000_cpu != PROCESSOR_CELL
- && rs6000_cpu != PROCESSOR_PPC476);
- rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
- || rs6000_cpu == PROCESSOR_POWER5
- || rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8);
- rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
- || rs6000_cpu == PROCESSOR_POWER5
- || rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8
- || rs6000_cpu == PROCESSOR_POWER9
- || rs6000_cpu == PROCESSOR_PPCE500MC
- || rs6000_cpu == PROCESSOR_PPCE500MC64
- || rs6000_cpu == PROCESSOR_PPCE5500
- || rs6000_cpu == PROCESSOR_PPCE6500);
+ rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
+ && rs6000_tune != PROCESSOR_POWER5
+ && rs6000_tune != PROCESSOR_POWER6
+ && rs6000_tune != PROCESSOR_POWER7
+ && rs6000_tune != PROCESSOR_POWER8
+ && rs6000_tune != PROCESSOR_POWER9
+ && rs6000_tune != PROCESSOR_PPCA2
+ && rs6000_tune != PROCESSOR_CELL
+ && rs6000_tune != PROCESSOR_PPC476);
+ rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
+ || rs6000_tune == PROCESSOR_POWER5
+ || rs6000_tune == PROCESSOR_POWER7
+ || rs6000_tune == PROCESSOR_POWER8);
+ rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
+ || rs6000_tune == PROCESSOR_POWER5
+ || rs6000_tune == PROCESSOR_POWER6
+ || rs6000_tune == PROCESSOR_POWER7
+ || rs6000_tune == PROCESSOR_POWER8
+ || rs6000_tune == PROCESSOR_POWER9
+ || rs6000_tune == PROCESSOR_PPCE500MC
+ || rs6000_tune == PROCESSOR_PPCE500MC64
+ || rs6000_tune == PROCESSOR_PPCE5500
+ || rs6000_tune == PROCESSOR_PPCE6500);
/* Allow debug switches to override the above settings. These are set to -1
in rs6000.opt to indicate the user hasn't directly set the switch. */
@@ -4996,8 +4998,8 @@ rs6000_option_override_internal (bool global_init_p)
{
/* Cell wants to be aligned 8byte for dual issue. Titan wants to be
aligned 8byte to avoid misprediction by the branch predictor. */
- if (rs6000_cpu == PROCESSOR_TITAN
- || rs6000_cpu == PROCESSOR_CELL)
+ if (rs6000_tune == PROCESSOR_TITAN
+ || rs6000_tune == PROCESSOR_CELL)
{
if (align_functions <= 0)
align_functions = 8;
@@ -5041,7 +5043,7 @@ rs6000_option_override_internal (bool global_init_p)
if (optimize_size)
rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
else
- switch (rs6000_cpu)
+ switch (rs6000_tune)
{
case PROCESSOR_RS64A:
rs6000_cost = &rs64a_cost;
@@ -5216,7 +5218,7 @@ rs6000_option_override_internal (bool global_init_p)
DERAT mispredict penalty. However the LVE and STVE altivec instructions
need indexed accesses and the type used is the scalar type of the element
being loaded or stored. */
- TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
+ TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
&& !TARGET_ALTIVEC);
/* Set the -mrecip options. */
@@ -5285,7 +5287,7 @@ rs6000_option_override_internal (bool global_init_p)
/* If not explicitly specified via option, decide whether to generate the
extra blr's required to preserve the link stack on some cpus (eg, 476). */
if (TARGET_LINK_STACK == -1)
- SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
+ SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
return ret;
}
@@ -5328,12 +5330,12 @@ rs6000_loop_align (rtx label)
/* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
if (ninsns > 4 && ninsns <= 8
- && (rs6000_cpu == PROCESSOR_POWER4
- || rs6000_cpu == PROCESSOR_POWER5
- || rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8
- || rs6000_cpu == PROCESSOR_POWER9))
+ && (rs6000_tune == PROCESSOR_POWER4
+ || rs6000_tune == PROCESSOR_POWER5
+ || rs6000_tune == PROCESSOR_POWER6
+ || rs6000_tune == PROCESSOR_POWER7
+ || rs6000_tune == PROCESSOR_POWER8
+ || rs6000_tune == PROCESSOR_POWER9))
return 5;
else
return align_loops_log;
@@ -9986,7 +9988,7 @@ static int
rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
machine_mode mode)
{
- switch (rs6000_cpu)
+ switch (rs6000_tune)
{
case PROCESSOR_POWER8:
case PROCESSOR_POWER9:
@@ -28304,9 +28306,9 @@ rs6000_emit_epilogue (int sibcall)
using_load_multiple = strategy & REST_MULTIPLE;
restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
- using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
- || rs6000_cpu == PROCESSOR_PPC603
- || rs6000_cpu == PROCESSOR_PPC750
+ using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
+ || rs6000_tune == PROCESSOR_PPC603
+ || rs6000_tune == PROCESSOR_PPC750
|| optimize_size);
/* Restore via the backchain when we have a large frame, since this
is more efficient than an addis, addi pair. The second condition
@@ -30703,7 +30705,7 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case TYPE_STORE:
case TYPE_FPSTORE:
- if ((rs6000_cpu == PROCESSOR_POWER6)
+ if ((rs6000_tune == PROCESSOR_POWER6)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
{
@@ -30771,7 +30773,7 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
break;
case TYPE_LOAD:
- if ((rs6000_cpu == PROCESSOR_POWER6)
+ if ((rs6000_tune == PROCESSOR_POWER6)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
{
@@ -30834,7 +30836,7 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
break;
case TYPE_FPLOAD:
- if ((rs6000_cpu == PROCESSOR_POWER6)
+ if ((rs6000_tune == PROCESSOR_POWER6)
&& get_attr_update (insn) == UPDATE_NO
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0)
@@ -30852,7 +30854,7 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case REG_DEP_OUTPUT:
/* Output dependency; DEP_INSN writes a register that INSN writes some
cycles later. */
- if ((rs6000_cpu == PROCESSOR_POWER6)
+ if ((rs6000_tune == PROCESSOR_POWER6)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
{
@@ -30935,7 +30937,7 @@ is_microcoded_insn (rtx_insn *insn)
return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
if (rs6000_sched_groups
- && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
+ && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
if ((type == TYPE_LOAD
@@ -30963,7 +30965,7 @@ is_cracked_insn (rtx_insn *insn)
return false;
if (rs6000_sched_groups
- && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
+ && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
if ((type == TYPE_LOAD
@@ -31169,7 +31171,7 @@ rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
return (priority + 1);
}
- if (rs6000_cpu == PROCESSOR_POWER6
+ if (rs6000_tune == PROCESSOR_POWER6
&& ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
|| (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
/* Attach highest priority to insn if the scheduler has just issued two
@@ -31685,7 +31687,7 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
std::swap (ready[n_ready - 1], ready[n_ready - 2]);
}
- if (rs6000_cpu == PROCESSOR_POWER6)
+ if (rs6000_tune == PROCESSOR_POWER6)
load_store_pendulum = 0;
return rs6000_issue_rate ();
@@ -31740,7 +31742,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
of the machine. Those instructions are currently unaccounted
for to help minimize compile time overhead of this code.
*/
- if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
+ if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
{
int pos;
int i;
@@ -31894,7 +31896,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
}
/* Do Power9 dependent reordering if necessary. */
- if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
+ if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
&& recog_memoized (last_scheduled_insn) >= 0)
return power9_sched_reorder2 (ready, *pn_ready - 1);
@@ -31949,7 +31951,7 @@ insn_must_be_first_in_group (rtx_insn *insn)
|| GET_CODE (PATTERN (insn)) == CLOBBER)
return false;
- switch (rs6000_cpu)
+ switch (rs6000_tune)
{
case PROCESSOR_POWER5:
if (is_cracked_insn (insn))
@@ -32126,7 +32128,7 @@ insn_must_be_last_in_group (rtx_insn *insn)
|| GET_CODE (PATTERN (insn)) == CLOBBER)
return false;
- switch (rs6000_cpu) {
+ switch (rs6000_tune) {
case PROCESSOR_POWER4:
case PROCESSOR_POWER5:
if (is_microcoded_insn (insn))
@@ -35132,10 +35134,10 @@ rs6000_register_move_cost (machine_mode mode,
/* For those processors that have slow LR/CTR moves, make them more
expensive than memory in order to bias spills to memory .*/
- else if ((rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8
- || rs6000_cpu == PROCESSOR_POWER9)
+ else if ((rs6000_tune == PROCESSOR_POWER6
+ || rs6000_tune == PROCESSOR_POWER7
+ || rs6000_tune == PROCESSOR_POWER8
+ || rs6000_tune == PROCESSOR_POWER9)
&& reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
ret = 6 * hard_regno_nregs (0, mode);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 1981104..ed56e69 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -392,7 +392,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define TARGET_XILINX_FPU 0
/* Recast the processor type to the cpu attribute. */
-#define rs6000_cpu_attr ((enum attr_cpu)rs6000_cpu)
+#define rs6000_cpu_attr ((enum attr_cpu)rs6000_tune)
/* Define generic processor types based upon current deployment. */
#define PROCESSOR_COMMON PROCESSOR_PPC601
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index e7d0829..fc38f40 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -40,6 +40,10 @@ HOST_WIDE_INT x_rs6000_isa_flags_explicit
TargetVariable
enum processor_type rs6000_cpu = PROCESSOR_PPC603
+;; Current tuning
+TargetVariable
+enum processor_type rs6000_tune = PROCESSOR_PPC603
+
;; Always emit branch hint bits.
TargetVariable
unsigned char rs6000_always_hint
--
1.8.3.1