This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][AArch64] Handle function literal pools according to function size
- From: Evandro Menezes <e dot menezes at samsung dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: James Greenhalgh <james dot greenhalgh at arm dot com>, ramana dot radhakrishnan at arm dot com, Marcus Shawcroft <Marcus dot Shawcroft at arm dot com>
- Date: Fri, 13 Nov 2015 16:36:15 -0600
- Subject: [PATCH][AArch64] Handle function literal pools according to function size
- Authentication-results: sourceware.org; auth=none
[AArch64] Handle function literal pools according to function size
gcc/
PR target/63304
* config/aarch64/aarch64-protos.h
(aarch64_nopcrelative_literal_loads):
Move to module scope in "aarch64.c".
(aarch64_may_load_literal_pcrel) New function.
* config/aarch64/aarch64.c (aarch64_nopcrelative_literal_loads):
Change
scope to module.
(aarch64_may_load_literal_pcrel): New function that replaces the
global
variable "aarch64_nopcrelative_literal_loads" in most cases.
(aarch64_current_func_size): New function.
* config/aarch64/aarch64.h (machine_function): Add new member
"size".
* config/aarch64/aarch64.md
(aarch64_reload_movcp<GPF_TF:mode><P:mode>):
Use "aarch64_may_load_literal_pcrel".
(aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
Since defaulting to always using a global literal pool results in
possible performance degradation on targets without insn fusion of the
resulting insns, this tentative patch reverts to per function literal
pool when the function size allows it or to the global literal pool
otherwise.
Though the global literal pool promotes reuse of constants with positive
impact in text size, it comes at the cost of increased I-cache pressure,
since it then takes a pair of insns to access a literal. Conversely,
the per function literal pools limit reuse of constants, but reduce
I-cache pressure due to then just a PC-relative load being used to
access a literal. I hope to have data to quantifying such analysis soon.
Bootstrapped in aarch64 and arm.
Feedback is welcome.
--
Evandro Menezes
>From d0fa78c4c29a15964467276493280efa091fbd64 Mon Sep 17 00:00:00 2001
From: Evandro Menezes <e.menezes@samsung.com>
Date: Fri, 13 Nov 2015 15:55:45 -0600
Subject: [PATCH] [AArch64] Handle function literal pools according to function
size
gcc/
PR target/63304
* config/aarch64/aarch64-protos.h (aarch64_nopcrelative_literal_loads):
Move to module scope in "aarch64.c".
(aarch64_may_load_literal_pcrel) New function.
* config/aarch64/aarch64.c (aarch64_nopcrelative_literal_loads): Change
scope to module.
(aarch64_may_load_literal_pcrel): New function that replaces the global
variable "aarch64_nopcrelative_literal_loads" in most cases.
(+aarch64_current_func_size): New function.
* config/aarch64/aarch64.h (machine_function): Add new member "size".
* config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>):
Use "aarch64_may_load_literal_pcrel".
(aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
---
gcc/config/aarch64/aarch64-protos.h | 4 ++-
gcc/config/aarch64/aarch64.c | 49 ++++++++++++++++++++++++++++++++-----
gcc/config/aarch64/aarch64.h | 7 +++++-
gcc/config/aarch64/aarch64.md | 4 +--
4 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 9000d67..57868b7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -426,5 +426,7 @@ int aarch64_ccmp_mode_to_code (enum machine_mode mode);
bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode);
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode);
-extern bool aarch64_nopcrelative_literal_loads;
+
+extern bool aarch64_may_load_literal_pcrel (void);
+
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5ec7f08..71f8331 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -139,6 +139,7 @@ static bool aarch64_vector_mode_supported_p (machine_mode);
static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel);
static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
+static unsigned long aarch64_current_func_size (void);
/* Major revision number of the ARM Architecture implemented by the target. */
unsigned aarch64_architecture_version;
@@ -150,7 +151,7 @@ enum aarch64_processor aarch64_tune = cortexa53;
unsigned long aarch64_tune_flags = 0;
/* Global flag for PC relative loads. */
-bool aarch64_nopcrelative_literal_loads;
+static bool aarch64_nopcrelative_literal_loads;
/* Support for command line parsing of boolean flags in the tuning
structures. */
@@ -1558,7 +1559,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
we need to expand the literal pool access carefully.
This is something that needs to be done in a number
of places, so could well live as a separate function. */
- if (aarch64_nopcrelative_literal_loads)
+ if (!aarch64_may_load_literal_pcrel ())
{
gcc_assert (can_create_pseudo_p ());
base = gen_reg_rtx (ptr_mode);
@@ -3698,7 +3699,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
return ((GET_CODE (sym) == LABEL_REF
|| (GET_CODE (sym) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (sym)
- && !aarch64_nopcrelative_literal_loads)));
+ && aarch64_may_load_literal_pcrel ())));
}
return false;
@@ -4929,7 +4930,7 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
&& (SCALAR_FLOAT_MODE_P (GET_MODE (x))
|| targetm.vector_mode_supported_p (GET_MODE (x)))
- && aarch64_nopcrelative_literal_loads)
+ && !aarch64_may_load_literal_pcrel ())
{
sri->icode = aarch64_constant_pool_reload_icode (mode);
return NO_REGS;
@@ -5256,6 +5257,22 @@ aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
return 0;
}
+/* Return true if PC-relative loads may be used to access the per function
+ literal pool. */
+
+bool
+aarch64_may_load_literal_pcrel (void)
+{
+ /* The range for PC-relative loads is +/-1MB. To stay on the safe side,
+ the function size limit is set lower to half of the range. */
+ const unsigned long max_func_size = 1048576UL / 2;
+
+ if (aarch64_nopcrelative_literal_loads)
+ return false;
+
+ return (aarch64_current_func_size () < max_func_size);
+}
+
/* Constant pools are per function only when PC relative
literal loads are true or we are in the large memory
model. */
@@ -5263,7 +5280,7 @@ aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
static inline bool
aarch64_can_use_per_function_literal_pools_p (void)
{
- return (!aarch64_nopcrelative_literal_loads
+ return (aarch64_may_load_literal_pcrel ()
|| aarch64_cmodel == AARCH64_CMODEL_LARGE);
}
@@ -7181,6 +7198,26 @@ aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
}
+/* Return the size of the current function in bytes. */
+
+unsigned long
+aarch64_current_func_size (void)
+{
+ unsigned long func_size;
+ rtx_insn *insn;
+
+ if (cfun->machine->size)
+ return cfun->machine->size;
+
+ if (reload_in_progress)
+ return 0;
+
+ for (func_size = 0, insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ func_size += get_attr_length (insn);
+
+ cfun->machine->size = func_size;
+ return func_size;
+}
/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
@@ -8966,7 +9003,7 @@ aarch64_classify_symbol (rtx x, rtx offset)
/* This is alright even in PIC code as the constant
pool reference is always PC relative and within
the same translation unit. */
- if (nopcrelative_literal_loads
+ if (!aarch64_may_load_literal_pcrel ()
&& CONSTANT_POOL_ADDRESS_P (x))
return SYMBOL_SMALL_ABSOLUTE;
else
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 8834c9b..85afdb8 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -546,12 +546,17 @@ struct GTY (()) aarch64_frame
bool laid_out;
};
+#endif
typedef struct GTY (()) machine_function
{
+ /* Approximate size of the current function in bytes.
+ Only valid after reload and if non-zero. */
+ unsigned long size;
+#ifdef HOST_WIDE_INT
struct aarch64_frame frame;
-} machine_function;
#endif
+} machine_function;
/* Which ABI to use. */
enum aarch64_abi_type
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1586256..d992273 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4535,7 +4535,7 @@
[(set (match_operand:GPF_TF 0 "register_operand" "=w")
(mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S")))
(clobber (match_operand:P 2 "register_operand" "=&r"))]
- "TARGET_FLOAT && aarch64_nopcrelative_literal_loads"
+ "TARGET_FLOAT && !aarch64_may_load_literal_pcrel ()"
{
aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
emit_move_insn (operands[0], gen_rtx_MEM (<GPF_TF:MODE>mode, operands[2]));
@@ -4548,7 +4548,7 @@
[(set (match_operand:VALL 0 "register_operand" "=w")
(mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S")))
(clobber (match_operand:P 2 "register_operand" "=&r"))]
- "TARGET_FLOAT && aarch64_nopcrelative_literal_loads"
+ "TARGET_FLOAT && !aarch64_may_load_literal_pcrel ()"
{
aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
emit_move_insn (operands[0], gen_rtx_MEM (<VALL:MODE>mode, operands[2]));
--
2.1.0.243.g30d45f7