This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][ARM] Make issue rate part of per-core tuning structs
- From: Kyrill Tkachov <kyrylo dot tkachov at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Ramana Radhakrishnan <ramana dot radhakrishnan at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>
- Date: Thu, 20 Nov 2014 16:43:34 +0000
- Subject: [PATCH][ARM] Make issue rate part of per-core tuning structs
- Authentication-results: sourceware.org; auth=none
Hi all,
This patch makes the arm_issue_rate function lookup the issue rate of
the process from the tuning structs.
This makes it look more like the aarch64 mechanism and centralises a
processor-specific construct to the
tuning structs, thus not forcing us to remember to update the
arm_issue_rate function every time a new core
is added.
A new tuning struct is added for the marvell-pj4 in order to decouple it
from the 9e tuning struct and
enable us to set it's correct issue rate to 2.
Bootstrapped and tested on arm-none-gnueabihf.
Ok for trunk?
Thanks,
Kyrill
2014-11-19 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/arm-protos.h (struct tune_params): Add issue_rate field.
* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune,
arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune,
arm_cortex_tune, arm_cortex_a8_tune, arm_cortex_a7_tune,
arm_cortex_a15_tune, arm_cortex_a53_tune, arm_cortex_a57_tune,
arm_cortex_a9_tune, arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune,
arm_fa726te_tune arm_cortex_a5_tune): Specify issue_rate value.
(arm_issue_rate): Look up issue rate from tuning structs. Remove
large switch statement.
(arm_marvell_pj4_tune): New struct.
* config/arm/arm-cores.def (marvell-pj4): Use arm_marvell_pj4_tune
struct.
commit a2466d31869cd7edd0a9de14d96427d361d97dd7
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed Nov 19 16:24:03 2014 +0000
[ARM] refactor issue_rate
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 637be15..12625c7 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex
ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED, cortex_m7)
ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m)
ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m)
-ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e)
+ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4)
/* V7 big.LITTLE implementations */
ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 71ce362..7d5bfd3 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -291,6 +291,8 @@ struct tune_params
int max_insns_inline_memset;
/* Bitfield encoding the fuseable pairs of instructions. */
unsigned int fuseable_ops : 1;
+ /* Issue rate of the processor. */
+ unsigned int issue_rate;
};
extern const struct tune_params *current_tune;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9aa402f..94db2b2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1671,7 +1671,8 @@ const struct tune_params arm_slowmul_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
const struct tune_params arm_fastmul_tune =
@@ -1691,7 +1692,8 @@ const struct tune_params arm_fastmul_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -1714,7 +1716,8 @@ const struct tune_params arm_strongarm_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
const struct tune_params arm_xscale_tune =
@@ -1734,7 +1737,8 @@ const struct tune_params arm_xscale_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
const struct tune_params arm_9e_tune =
@@ -1754,7 +1758,29 @@ const struct tune_params arm_9e_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
+};
+
+const struct tune_params arm_marvell_pj4_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ NULL, /* Sched adj cost. */
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost,
+ false, /* Prefer LDRD/STRD. */
+ {true, true}, /* Prefer non short circuit. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
+ false, /* Prefer Neon for 64-bits bitops. */
+ false, false, /* Prefer 32-bit encodings. */
+ false, /* Prefer Neon for stringops. */
+ 8, /* Maximum insns to inline memset. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_v6t2_tune =
@@ -1774,7 +1800,8 @@ const struct tune_params arm_v6t2_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -1795,7 +1822,8 @@ const struct tune_params arm_cortex_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_cortex_a8_tune =
@@ -1815,7 +1843,8 @@ const struct tune_params arm_cortex_a8_tune =
false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_cortex_a7_tune =
@@ -1835,7 +1864,8 @@ const struct tune_params arm_cortex_a7_tune =
false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_cortex_a15_tune =
@@ -1855,7 +1885,8 @@ const struct tune_params arm_cortex_a15_tune =
true, true, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 3 /* Issue rate. */
};
const struct tune_params arm_cortex_a53_tune =
@@ -1875,7 +1906,8 @@ const struct tune_params arm_cortex_a53_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
+ ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_cortex_a57_tune =
@@ -1895,7 +1927,8 @@ const struct tune_params arm_cortex_a57_tune =
true, true, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
+ ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
+ 3 /* Issue rate. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1918,7 +1951,8 @@ const struct tune_params arm_cortex_a5_tune =
false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_cortex_a9_tune =
@@ -1938,7 +1972,8 @@ const struct tune_params arm_cortex_a9_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
const struct tune_params arm_cortex_a12_tune =
@@ -1958,7 +1993,8 @@ const struct tune_params arm_cortex_a12_tune =
true, true, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
+ ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
@@ -1985,7 +2021,8 @@ const struct tune_params arm_v7m_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
/* Cortex-M7 tuning. */
@@ -2007,7 +2044,8 @@ const struct tune_params arm_cortex_m7_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@@ -2029,7 +2067,8 @@ const struct tune_params arm_v6m_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 1 /* Issue rate. */
};
const struct tune_params arm_fa726te_tune =
@@ -2049,7 +2088,8 @@ const struct tune_params arm_fa726te_tune =
false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */
8, /* Maximum insns to inline memset. */
- ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
+ ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
+ 2 /* Issue rate. */
};
@@ -27046,36 +27086,12 @@ thumb2_output_casesi (rtx *operands)
}
}
-/* Most ARM cores are single issue, but some newer ones can dual issue.
- The scheduler descriptions rely on this being correct. */
+/* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
+ per-core tuning structs. */
static int
arm_issue_rate (void)
{
- switch (arm_tune)
- {
- case cortexa15:
- case cortexa57:
- return 3;
-
- case cortexm7:
- case cortexr4:
- case cortexr4f:
- case cortexr5:
- case genericv7a:
- case cortexa5:
- case cortexa7:
- case cortexa8:
- case cortexa9:
- case cortexa12:
- case cortexa17:
- case cortexa53:
- case fa726te:
- case marvell_pj4:
- return 2;
-
- default:
- return 1;
- }
+ return current_tune->issue_rate;
}
const char *