This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

patch to enable LRA for ppc


The following patch permits today trunk to use LRA for ppc by default. 
To switch it off -mno-lra can be used.

The patch was bootstrapped on ppc64.  GCC testsuite does not have
regressions too (in comparison with reload).  The change in rs6000.md is
for fix LRA failure on a recently added ppc test.


2013-10-03  Vladimir Makarov  <vmakarov@redhat.com>

	* config/rs6000/rs6000-protos.h
	(rs6000_secondary_memory_needed_mode): New prototype.
	* config/rs6000/rs6000.c: Include ira.h.
	(TARGET_LRA_P): Redefine.
	(rs6000_legitimate_offset_address_p): Call
	legitimate_constant_pool_address_p in strict mode for LRA.
	(rs6000_legitimate_address_p): Ditto.
	(legitimate_lo_sum_address_p): Add code for LRA.
	Use lra_in_progress.
	(rs6000_emit_move): Add LRA version of code to generate load/store
	of SDmode values.
	(rs6000_secondary_memory_needed_mode): New.
	(rs6000_alloc_sdmode_stack_slot): Do nothing for LRA.
	(rs6000_secondary_reload_class): Return NO_REGS for LRA for
	constants, memory, and FP registers.
	(rs6000_lra_p): New.
	* config/rs6000/rs6000.h (SECONDARY_MEMORY_NEEDED_MODE): New
	macro.
	* config/rs6000/rs6000.md (*bswapdi2_64bit): Remove ?? from 3rd
	alternative.
	* config/rs6000/rs6000.opt (mlra): New option.

Index: config/rs6000/rs6000-protos.h
===================================================================
--- config/rs6000/rs6000-protos.h	(revision 203164)
+++ config/rs6000/rs6000-protos.h	(working copy)
@@ -124,6 +124,8 @@ extern rtx create_TOC_reference (rtx, rt
 extern void rs6000_split_multireg_move (rtx, rtx);
 extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
 extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
+extern enum machine_mode rs6000_secondary_memory_needed_mode (enum
+							      machine_mode);
 extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
 						    int, int, int, int *);
 extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx,
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c	(revision 203164)
+++ config/rs6000/rs6000.c	(working copy)
@@ -56,6 +56,7 @@
 #include "intl.h"
 #include "params.h"
 #include "tm-constrs.h"
+#include "ira.h"
 #include "opts.h"
 #include "tree-vectorizer.h"
 #include "dumpfile.h"
@@ -1493,6 +1494,9 @@ static const struct attribute_spec rs600
 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
 
@@ -6030,7 +6034,7 @@ rs6000_legitimate_offset_address_p (enum
     return false;
   if (!reg_offset_addressing_ok_p (mode))
     return virtual_stack_registers_memory_p (x);
-  if (legitimate_constant_pool_address_p (x, mode, strict))
+  if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
     return true;
   if (GET_CODE (XEXP (x, 1)) != CONST_INT)
     return false;
@@ -6170,19 +6174,31 @@ legitimate_lo_sum_address_p (enum machin
 
   if (TARGET_ELF || TARGET_MACHO)
     {
+      bool large_toc_ok;
+
       if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
 	return false;
-      if (TARGET_TOC)
+      /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
+	 recognizes some LO_SUM addresses as valid although this
+	 function says opposite.  In most cases, LRA through different
+	 transformations can generate correct code for address reloads.
+	 It can not manage only some LO_SUM cases.  So we need to add
+	 code analogous to one in rs6000_legitimize_reload_address for
+	 LOW_SUM here saying that some addresses are still valid.  */
+      large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+		      && small_toc_ref (x, VOIDmode));
+      if (TARGET_TOC && ! large_toc_ok)
 	return false;
       if (GET_MODE_NUNITS (mode) != 1)
 	return false;
-      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      if (! lra_in_progress && GET_MODE_SIZE (mode) > UNITS_PER_WORD
 	  && !(/* ??? Assume floating point reg based on mode?  */
 	       TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
 	       && (mode == DFmode || mode == DDmode)))
 	return false;
 
-      return CONSTANT_P (x);
+      return CONSTANT_P (x) || large_toc_ok;
     }
 
   return false;
@@ -7180,7 +7196,8 @@ rs6000_legitimate_address_p (enum machin
   if (reg_offset_p && legitimate_small_data_p (mode, x))
     return 1;
   if (reg_offset_p
-      && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+      && legitimate_constant_pool_address_p (x, mode,
+					     reg_ok_strict || lra_in_progress))
     return 1;
   /* For TImode, if we have load/store quad, only allow register indirect
      addresses.  This will allow the values to go in either GPRs or VSX
@@ -7479,6 +7496,7 @@ rs6000_conditional_register_usage (void)
 	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
     }
 }
+
 
 /* Try to output insns to set TARGET equal to the constant C if it can
    be done in less than N insns.  Do all computations in MODE.
@@ -7783,6 +7801,68 @@ rs6000_emit_move (rtx dest, rtx source,
     cfun->machine->sdmode_stack_slot =
       eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
 
+
+  if (lra_in_progress
+      && mode == SDmode
+      && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+      && (REG_P (operands[1])
+	  || (GET_CODE (operands[1]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[1])))))
+    {
+      int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+			 ? SUBREG_REG (operands[1]) : operands[1]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[0]) != DDmode)
+	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+	  emit_insn (gen_movsd_store (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (lra_in_progress
+      && mode == SDmode
+      && (REG_P (operands[0])
+	  || (GET_CODE (operands[0]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[0]))))
+      && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+    {
+      int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+			 ? SUBREG_REG (operands[0]) : operands[0]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[1]) != DDmode)
+	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+	  emit_insn (gen_movsd_load (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+
   if (reload_in_progress
       && mode == SDmode
       && cfun->machine->sdmode_stack_slot != NULL_RTX
@@ -14630,6 +14710,17 @@ rs6000_secondary_memory_needed_rtx (enum
   return ret;
 }
 
+/* Return the mode to be used for memory when a secondary memory
+   location is needed.  For SDmode values we need to use DDmode, in
+   all other cases we can use the same mode.  */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+  if (mode == SDmode)
+    return DDmode;
+  return mode;
+}
+
 static tree
 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
 {
@@ -15523,6 +15614,10 @@ rs6000_alloc_sdmode_stack_slot (void)
   gimple_stmt_iterator gsi;
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+  /* We use a different approach for dealing with the secondary
+     memory in LRA.  */
+  if (ira_use_lra_p)
+    return;
 
   if (TARGET_NO_SDMODE_STACK)
     return;
@@ -15744,7 +15839,7 @@ rs6000_secondary_reload_class (enum reg_
   /* Constants, memory, and FP registers can go into FP registers.  */
   if ((regno == -1 || FP_REGNO_P (regno))
       && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
-    return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+    return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
 
   /* Memory, and FP/altivec registers can go into fp/altivec registers under
      VSX.  However, for scalar variables, use the traditional floating point
@@ -28936,6 +29031,13 @@ rs6000_libcall_value (enum machine_mode
 }
 
 
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+rs6000_lra_p (void)
+{
+  return rs6000_lra_flag;
+}
+
 /* Given FROM and TO register numbers, say whether this elimination is allowed.
    Frame pointer elimination is automatically handled.
 
Index: config/rs6000/rs6000.h
===================================================================
--- config/rs6000/rs6000.h	(revision 203164)
+++ config/rs6000/rs6000.h	(working copy)
@@ -1491,6 +1491,13 @@ extern enum reg_class rs6000_constraints
 #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
   rs6000_secondary_memory_needed_rtx (MODE)
 
+/* Specify the mode to be used for memory when a secondary memory
+   location is needed.  For cpus that cannot load/store SDmode values
+   from the 64-bit FP registers without using a full 64-bit
+   load/store, we need a wider mode.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+  rs6000_secondary_memory_needed_mode (MODE)
+
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.
 
Index: config/rs6000/rs6000.md
===================================================================
--- config/rs6000/rs6000.md	(revision 203164)
+++ config/rs6000/rs6000.md	(working copy)
@@ -2391,7 +2391,7 @@
 
 ;; Non-power7/cell, fall back to use lwbrx/stwbrx
 (define_insn "*bswapdi2_64bit"
-  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,&r")
 	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
    (clobber (match_scratch:DI 2 "=&b,&b,&r"))
    (clobber (match_scratch:DI 3 "=&r,&r,&r"))
Index: config/rs6000/rs6000.opt
===================================================================
--- config/rs6000/rs6000.opt	(revision 203164)
+++ config/rs6000/rs6000.opt	(working copy)
@@ -446,6 +446,10 @@ mlong-double-
 Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
 -mlong-double-<n>	Specify size of long double (64 or 128 bits)
 
+mlra
+Target Report Var(rs6000_lra_flag) Init(1) Save
+Use LRA instead of reload
+
 msched-costly-dep=
 Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
 Determine which dependences between insns are considered costly

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]