[RFC AArch64][PR 63304] Handle literal pools for functions > 1 MiB in size.

Ramana Radhakrishnan ramana.radhakrishnan@foss.arm.com
Mon Sep 14 13:20:00 GMT 2015



On 27/08/15 15:07, Marcus Shawcroft wrote:
> On 27 July 2015 at 15:33, Ramana Radhakrishnan
> <ramana.radhakrishnan@foss.arm.com> wrote:
> 
>> <DATE>  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>>
>>         PR target/63304
>>         * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Handle
>>         nopcrelative_literal_loads.
>>         (aarch64_classify_address): Likewise.
>>         (aarch64_constant_pool_reload_icode): Define.
>>         (aarch64_secondary_reload): Handle secondary reloads for
>>         literal pools.
>>         (aarch64_override_options): Handle nopcrelative_literal_loads.
>>         (aarch64_classify_symbol): Handle nopcrelative_literal_loads.
>>         * config/aarch64/aarch64.md (aarch64_reload_movcp<ALLTF:mode><P:mode>):
>>         Define.
>>         (aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
>>         * config/aarch64/aarch64.opt: New option mnopc-relative-literal-loads
>>         * config/aarch64/predicates.md (aarch64_constant_pool_symref): New
>>         predicate.
>>         * doc/invoke.texi (mnopc-relative-literal-loads): Document.
> 
> This looks OK to me. It needs rebasing, but OK if the rebase is
> trival.  Default on is fine.  Hold off on the back ports for a couple
> of weeks.
> Cheers
> /Marcus
> 

This is what I applied. I'll give it a week or so on trunk before backporting to the release branches. 
Since we handle literal pools > 1MiB away on by default, this final rebased version switches the option name
to the positive form (mpc-relative-literal-loads) and handles it accordingly.

Tested on aarch64-none-elf , no regressions. Applied to trunk.

Thanks,
Ramana 


2015-09-14  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>

        PR target/63304
        * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Handle
        nopcrelative_literal_loads.
        (aarch64_classify_address): Likewise.
        (aarch64_constant_pool_reload_icode): Define.
        (aarch64_secondary_reload): Handle secondary reloads for
        literal pools.
        (aarch64_override_options): Handle nopcrelative_literal_loads.
        (aarch64_classify_symbol): Handle nopcrelative_literal_loads.
        * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>):
        Define.
        (aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
        * config/aarch64/aarch64.opt (mpc-relative-literal-loads): New option.
        * config/aarch64/predicates.md (aarch64_constant_pool_symref): New
        predicate.
        * doc/invoke.texi (mpc-relative-literal-loads): Document.
-------------- next part --------------
Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 227737)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,22 @@
+2015-09-14  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+    	PR target/63304
+    	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Handle
+    	nopcrelative_literal_loads.
+    	(aarch64_classify_address): Likewise.
+    	(aarch64_constant_pool_reload_icode): Define.
+    	(aarch64_secondary_reload): Handle secondary reloads for
+    	literal pools.
+    	(aarch64_override_options): Handle nopcrelative_literal_loads.
+    	(aarch64_classify_symbol): Handle nopcrelative_literal_loads.
+    	* config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>):
+    	Define.
+    	(aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
+    	* config/aarch64/aarch64.opt (mpc-relative-literal-loads): New option.
+    	* config/aarch64/predicates.md (aarch64_constant_pool_symref): New
+    	predicate.
+    	* doc/invoke.texi (mpc-relative-literal-loads): Document.
+
 2015-09-13  Olivier Hainque  <hainque@adacore.com>
 	    Eric Botcazou  <ebotcazou@adacore.com>
 
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	(revision 227737)
+++ gcc/config/aarch64/aarch64.c	(working copy)
@@ -1734,11 +1734,27 @@
 	      aarch64_emit_move (dest, base);
 	      return;
 	    }
+
 	  mem = force_const_mem (ptr_mode, imm);
 	  gcc_assert (mem);
+
+	  /* If we aren't generating PC relative literals, then
+	     we need to expand the literal pool access carefully.
+	     This is something that needs to be done in a number
+	     of places, so could well live as a separate function.  */
+	  if (nopcrelative_literal_loads)
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      base = gen_reg_rtx (ptr_mode);
+	      aarch64_expand_mov_immediate (base, XEXP (mem, 0));
+	      mem = gen_rtx_MEM (ptr_mode, base);
+	    }
+
 	  if (mode != ptr_mode)
 	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
+
 	  emit_insn (gen_rtx_SET (dest, mem));
+
 	  return;
 
         case SYMBOL_SMALL_TLSGD:
@@ -3854,9 +3870,10 @@
 	  rtx sym, addend;
 
 	  split_const (x, &sym, &addend);
-	  return (GET_CODE (sym) == LABEL_REF
-		  || (GET_CODE (sym) == SYMBOL_REF
-		      && CONSTANT_POOL_ADDRESS_P (sym)));
+	  return ((GET_CODE (sym) == LABEL_REF
+		   || (GET_CODE (sym) == SYMBOL_REF
+		       && CONSTANT_POOL_ADDRESS_P (sym)
+		       && !nopcrelative_literal_loads)));
 	}
       return false;
 
@@ -5039,6 +5056,51 @@
 }
 
 
+/* Return the reload icode required for a constant pool in mode.  */
+static enum insn_code
+aarch64_constant_pool_reload_icode (machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return CODE_FOR_aarch64_reload_movcpsfdi;
+
+    case DFmode:
+      return CODE_FOR_aarch64_reload_movcpdfdi;
+
+    case TFmode:
+      return CODE_FOR_aarch64_reload_movcptfdi;
+
+    case V8QImode:
+      return CODE_FOR_aarch64_reload_movcpv8qidi;
+
+    case V16QImode:
+      return CODE_FOR_aarch64_reload_movcpv16qidi;
+
+    case V4HImode:
+      return CODE_FOR_aarch64_reload_movcpv4hidi;
+
+    case V8HImode:
+      return CODE_FOR_aarch64_reload_movcpv8hidi;
+
+    case V2SImode:
+      return CODE_FOR_aarch64_reload_movcpv2sidi;
+
+    case V4SImode:
+      return CODE_FOR_aarch64_reload_movcpv4sidi;
+
+    case V2DImode:
+      return CODE_FOR_aarch64_reload_movcpv2didi;
+
+    case V2DFmode:
+      return CODE_FOR_aarch64_reload_movcpv2dfdi;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_unreachable ();
+}
 static reg_class_t
 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 			  reg_class_t rclass,
@@ -5045,6 +5107,18 @@
 			  machine_mode mode,
 			  secondary_reload_info *sri)
 {
+
+  /* If we have to disable direct literal pool loads and stores because the
+     function is too big, then we need a scratch register.  */
+  if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
+      && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
+	  || targetm.vector_mode_supported_p (GET_MODE (x)))
+      && nopcrelative_literal_loads)
+    {
+      sri->icode = aarch64_constant_pool_reload_icode (mode);
+      return NO_REGS;
+    }
+
   /* Without the TARGET_SIMD instructions we cannot move a Q register
      to a Q register directly.  We need a scratch.  */
   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
@@ -7693,6 +7767,24 @@
       if (opts->x_align_functions <= 0)
 	opts->x_align_functions = aarch64_tune_params.function_align;
     }
+
+  /* If nopcrelative_literal_loads is set on the command line, this
+     implies that the user asked for PC relative literal loads.  */
+  if (nopcrelative_literal_loads == 1)
+    nopcrelative_literal_loads = 0;
+
+  /* If it is not set on the command line, we default to no
+     pc relative literal loads.  */
+  if (nopcrelative_literal_loads == 2)
+    nopcrelative_literal_loads = 1;
+
+  /* In the tiny memory model it makes no sense
+     to disallow non PC relative literal pool loads
+     as many other things will break anyway.  */
+  if (nopcrelative_literal_loads
+      && (aarch64_cmodel == AARCH64_CMODEL_TINY
+	  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC))
+    nopcrelative_literal_loads = 0;
 }
 
 /* 'Unpack' up the internal tuning structs and update the options
@@ -8884,7 +8976,16 @@
   if (GET_CODE (x) == SYMBOL_REF)
     {
       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
-	  return SYMBOL_FORCE_TO_MEM;
+	{
+	  /* This is alright even in PIC code as the constant
+	     pool reference is always PC relative and within
+	     the same translation unit.  */
+	  if (nopcrelative_literal_loads
+	      && CONSTANT_POOL_ADDRESS_P (x))
+	    return SYMBOL_SMALL_ABSOLUTE;
+	  else
+	    return SYMBOL_FORCE_TO_MEM;
+	}
 
       if (aarch64_tls_symbol_p (x))
 	return aarch64_classify_tls_symbol (x);
Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	(revision 227737)
+++ gcc/config/aarch64/aarch64.md	(working copy)
@@ -4415,7 +4415,33 @@
 ;; -------------------------------------------------------------------
 ;; Reload support
 ;; -------------------------------------------------------------------
+;; Reload Scalar Floating point modes from constant pool.
+;; The AArch64 port doesn't have __int128 constant move support.
+(define_expand "aarch64_reload_movcp<GPF_TF:mode><P:mode>"
+ [(set (match_operand:GPF_TF 0 "register_operand" "=w")
+       (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S")))
+  (clobber (match_operand:P 2 "register_operand" "=&r"))]
+ "TARGET_FLOAT && nopcrelative_literal_loads"
+ {
+   aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
+   emit_move_insn (operands[0], gen_rtx_MEM (<GPF_TF:MODE>mode, operands[2]));
+   DONE;
+ }
+)
 
+;; Reload Vector modes from constant pool.
+(define_expand "aarch64_reload_movcp<VALL:mode><P:mode>"
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+       (mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S")))
+  (clobber (match_operand:P 2 "register_operand" "=&r"))]
+ "TARGET_FLOAT && nopcrelative_literal_loads"
+ {
+   aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
+   emit_move_insn (operands[0], gen_rtx_MEM (<VALL:MODE>mode, operands[2]));
+   DONE;
+ }
+)
+
 (define_expand "aarch64_reload_mov<mode>"
   [(set (match_operand:TX 0 "register_operand" "=w")
         (match_operand:TX 1 "register_operand" "w"))
Index: gcc/config/aarch64/aarch64.opt
===================================================================
--- gcc/config/aarch64/aarch64.opt	(revision 227737)
+++ gcc/config/aarch64/aarch64.opt	(working copy)
@@ -144,3 +144,7 @@
 
 EnumValue
 Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
+
+mpc-relative-literal-loads
+Target Report Save Var(nopcrelative_literal_loads) Init(2) Save
+PC relative literal loads.
Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	(revision 227737)
+++ gcc/config/aarch64/iterators.md	(working copy)
@@ -44,6 +44,9 @@
 ;; Double vector modes.
 (define_mode_iterator VDF [V2SF V4HF])
 
+;; Iterator for all scalar floating point modes (SF, DF and TF)
+(define_mode_iterator GPF_TF [SF DF TF])
+
 ;; Integer vector modes.
 (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
 
Index: gcc/config/aarch64/predicates.md
===================================================================
--- gcc/config/aarch64/predicates.md	(revision 227737)
+++ gcc/config/aarch64/predicates.md	(working copy)
@@ -362,3 +362,7 @@
 (define_predicate "aarch64_simd_shift_imm_bitsize_di"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 64)")))
+
+(define_predicate "aarch64_constant_pool_symref"
+   (and (match_code "symbol_ref")
+	(match_test "CONSTANT_POOL_ADDRESS_P (op)")))
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 227737)
+++ gcc/doc/invoke.texi	(working copy)
@@ -12437,6 +12437,14 @@
 across releases.
 
 This option is only intended to be useful when developing GCC.
+
+@item -mpc-relative-literal-loads
+@opindex mpcrelativeliteralloads
+Enable PC relative literal loads. If this option is used, literal
+pools are assumed to have a range of up to 1MiB and an appropriate
+instruction sequence is used. This option has no impact when used
+with @option{-mcmodel=tiny}.
+
 @end table
 
 @subsubsection @option{-march} and @option{-mcpu} Feature Modifiers


More information about the Gcc-patches mailing list