[4.8, PATCH 20/26] Backport Power8 and LE support: LRA

Bill Schmidt wschmidt@linux.vnet.ibm.com
Wed Mar 19 19:34:00 GMT 2014


Hi,

This patch (diff-lra) backports the changes to enable -mlra for the
PowerPC back end.

Thanks,
Bill


2014-03-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Backport from mainline
        2014-02-04  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.opt (-mlra): Add switch to enable the LRA
	register allocator.

	* config/rs6000/rs6000.c (TARGET_LRA_P): Add support for -mlra to
	enable the LRA register allocator.  Back port the changes from the
	trunk to enable LRA.
	(rs6000_legitimate_offset_address_p): Likewise.
	(legitimate_lo_sum_address_p): Likewise.
	(use_toc_relative_ref): Likewise.
	(rs6000_legitimate_address_p): Likewise.
	(rs6000_emit_move): Likewise.
	(rs6000_secondary_memory_needed_mode): Likewise.
	(rs6000_alloc_sdmode_stack_slot): Likewise.
	(rs6000_lra_p): Likewise.

	* config/rs6000/sync.md (load_lockedti): Copy TI/PTI variables by
	64-bit parts to force the register allocator to allocate even/odd
	register pairs for the quad word atomic instructions.
	(store_conditionalti): Likewise.


Index: gcc-4_8-test/gcc/config/rs6000/rs6000.c
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000.c
+++ gcc-4_8-test/gcc/config/rs6000/rs6000.c
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on IBM RS/6000.
-   Copyright (C) 1991-2013 Free Software Foundation, Inc.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -56,6 +56,7 @@
 #include "intl.h"
 #include "params.h"
 #include "tm-constrs.h"
+#include "ira.h"
 #include "opts.h"
 #include "tree-vectorizer.h"
 #include "dumpfile.h"
@@ -1563,6 +1564,9 @@ static const struct attribute_spec rs600
 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
 
@@ -6242,7 +6246,7 @@ rs6000_legitimate_offset_address_p (enum
     return false;
   if (!reg_offset_addressing_ok_p (mode))
     return virtual_stack_registers_memory_p (x);
-  if (legitimate_constant_pool_address_p (x, mode, strict))
+  if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
     return true;
   if (GET_CODE (XEXP (x, 1)) != CONST_INT)
     return false;
@@ -6383,9 +6387,21 @@ legitimate_lo_sum_address_p (enum machin
 
   if (TARGET_ELF || TARGET_MACHO)
     {
+      bool large_toc_ok;
+
       if (DEFAULT_ABI == ABI_V4 && flag_pic)
 	return false;
-      if (TARGET_TOC)
+      /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
+	 recognizes some LO_SUM addresses as valid although this
+	 function says opposite.  In most cases, LRA through different
+	 transformations can generate correct code for address reloads.
+	 It can not manage only some LO_SUM cases.  So we need to add
+	 code analogous to one in rs6000_legitimize_reload_address for
+	 LOW_SUM here saying that some addresses are still valid.  */
+      large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+		      && small_toc_ref (x, VOIDmode));
+      if (TARGET_TOC && ! large_toc_ok)
 	return false;
       if (GET_MODE_NUNITS (mode) != 1)
 	return false;
@@ -6395,7 +6411,7 @@ legitimate_lo_sum_address_p (enum machin
 	       && (mode == DFmode || mode == DDmode)))
 	return false;
 
-      return CONSTANT_P (x);
+      return CONSTANT_P (x) || large_toc_ok;
     }
 
   return false;
@@ -7106,7 +7122,6 @@ use_toc_relative_ref (rtx sym)
 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
 					       get_pool_mode (sym)))
 	  || (TARGET_CMODEL == CMODEL_MEDIUM
-	      && !CONSTANT_POOL_ADDRESS_P (sym)
 	      && SYMBOL_REF_LOCAL_P (sym)));
 }
 
@@ -7394,7 +7409,8 @@ rs6000_legitimate_address_p (enum machin
   if (reg_offset_p && legitimate_small_data_p (mode, x))
     return 1;
   if (reg_offset_p
-      && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+      && legitimate_constant_pool_address_p (x, mode,
+					     reg_ok_strict || lra_in_progress))
     return 1;
   /* For TImode, if we have load/store quad and TImode in VSX registers, only
      allow register indirect addresses.  This will allow the values to go in
@@ -7680,6 +7696,7 @@ rs6000_conditional_register_usage (void)
 	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
     }
 }
+
 

 /* Try to output insns to set TARGET equal to the constant C if it can
    be done in less than N insns.  Do all computations in MODE.
@@ -8112,6 +8129,68 @@ rs6000_emit_move (rtx dest, rtx source,
     cfun->machine->sdmode_stack_slot =
       eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
 
+
+  if (lra_in_progress
+      && mode == SDmode
+      && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+      && (REG_P (operands[1])
+	  || (GET_CODE (operands[1]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[1])))))
+    {
+      int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+			 ? SUBREG_REG (operands[1]) : operands[1]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[0]) != DDmode)
+	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+	  emit_insn (gen_movsd_store (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (lra_in_progress
+      && mode == SDmode
+      && (REG_P (operands[0])
+	  || (GET_CODE (operands[0]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[0]))))
+      && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+    {
+      int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+			 ? SUBREG_REG (operands[0]) : operands[0]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[1]) != DDmode)
+	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+	  emit_insn (gen_movsd_load (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+
   if (reload_in_progress
       && mode == SDmode
       && cfun->machine->sdmode_stack_slot != NULL_RTX
@@ -15501,6 +15580,17 @@ rs6000_secondary_memory_needed_rtx (enum
   return ret;
 }
 
+/* Return the mode to be used for memory when a secondary memory
+   location is needed.  For SDmode values we need to use DDmode, in
+   all other cases we can use the same mode.  */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+  if (mode == SDmode)
+    return DDmode;
+  return mode;
+}
+
 static tree
 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
 {
@@ -16394,6 +16484,10 @@ rs6000_alloc_sdmode_stack_slot (void)
   gimple_stmt_iterator gsi;
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+  /* We use a different approach for dealing with the secondary
+     memory in LRA.  */
+  if (ira_use_lra_p)
+    return;
 
   if (TARGET_NO_SDMODE_STACK)
     return;
@@ -16615,7 +16709,7 @@ rs6000_secondary_reload_class (enum reg_
   /* Constants, memory, and FP registers can go into FP registers.  */
   if ((regno == -1 || FP_REGNO_P (regno))
       && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
-    return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+    return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
 
   /* Memory, and FP/altivec registers can go into fp/altivec registers under
      VSX.  However, for scalar variables, use the traditional floating point
@@ -30418,6 +30512,13 @@ rs6000_libcall_value (enum machine_mode
 }
 

+/* Return true if we use LRA instead of reload pass.  */
+static bool
+rs6000_lra_p (void)
+{
+  return rs6000_lra_flag;
+}
+
 /* Given FROM and TO register numbers, say whether this elimination is allowed.
    Frame pointer elimination is automatically handled.
 
Index: gcc-4_8-test/gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000.opt
+++ gcc-4_8-test/gcc/config/rs6000/rs6000.opt
@@ -1,6 +1,6 @@
 ; Options for the rs6000 port of the compiler
 ;
-; Copyright (C) 2005-2013 Free Software Foundation, Inc.
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
@@ -454,6 +454,10 @@ mlong-double-
 Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
 -mlong-double-<n>	Specify size of long double (64 or 128 bits)
 
+mlra
+Target Report Var(rs6000_lra_flag) Init(0) Save
+Use LRA instead of reload
+
 msched-costly-dep=
 Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
 Determine which dependences between insns are considered costly
Index: gcc-4_8-test/gcc/config/rs6000/sync.md
===================================================================
--- gcc-4_8-test.orig/gcc/config/rs6000/sync.md
+++ gcc-4_8-test/gcc/config/rs6000/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for PowerPC synchronization instructions.
-;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
 ;; Contributed by Geoffrey Keating.
 
 ;; This file is part of GCC.
@@ -205,9 +205,12 @@
   [(set_attr "type" "load_l")])
 
 ;; Use PTImode to get even/odd register pairs.
+
 ;; Use a temporary register to force getting an even register for the
-;; lqarx/stqcrx. instructions.  Normal optimizations will eliminate this extra
-;; copy on big endian systems.
+;; lqarx/stqcrx. instructions.  Under AT 7.0, we need use an explicit copy,
+;; even in big endian mode, unless we are using the LRA register allocator.  In
+;; GCC 4.9, the register allocator is smart enough to assign a even/odd
+;; register pair.
 
 ;; On little endian systems where non-atomic quad word load/store instructions
 ;; are not used, the address can be register+offset, so make sure the address
@@ -230,12 +233,26 @@
     }
 
   emit_insn (gen_load_lockedpti (pti, op1));
-  if (WORDS_BIG_ENDIAN)
+  if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
     emit_move_insn (op0, gen_lowpart (TImode, pti));
   else
     {
-      emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti));
-      emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti));
+      rtx op0_lo = gen_lowpart (DImode, op0);
+      rtx op0_hi = gen_highpart (DImode, op0);
+      rtx pti_lo = gen_lowpart (DImode, pti);
+      rtx pti_hi = gen_highpart (DImode, pti);
+
+      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+      if (WORDS_BIG_ENDIAN)
+	{
+	  emit_move_insn (op0_hi, pti_hi);
+	  emit_move_insn (op0_lo, pti_lo);
+	}
+      else
+	{
+	  emit_move_insn (op0_hi, pti_lo);
+	  emit_move_insn (op0_lo, pti_hi);
+	}
     }
   DONE;
 })
@@ -260,8 +277,9 @@
   [(set_attr "type" "store_c")])
 
 ;; Use a temporary register to force getting an even register for the
-;; lqarx/stqcrx. instructions.  Normal optimizations will eliminate this extra
-;; copy on big endian systems.
+;; lqarx/stqcrx. instructions.  Under AT 7.0, we need use an explicit copy,
+;; even in big endian mode.  In GCC 4.9, the register allocator is smart enough
+;; to assign a even/odd register pair.
 
 ;; On little endian systems where non-atomic quad word load/store instructions
 ;; are not used, the address can be register+offset, so make sure the address
@@ -290,12 +308,26 @@
   pti_mem = change_address (op1, PTImode, addr);
   pti_reg = gen_reg_rtx (PTImode);
 
-  if (WORDS_BIG_ENDIAN)
+  if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
     emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
   else
     {
-      emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2));
-      emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2));
+      rtx op2_lo = gen_lowpart (DImode, op2);
+      rtx op2_hi = gen_highpart (DImode, op2);
+      rtx pti_lo = gen_lowpart (DImode, pti_reg);
+      rtx pti_hi = gen_highpart (DImode, pti_reg);
+
+      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+      if (WORDS_BIG_ENDIAN)
+	{
+	  emit_move_insn (pti_hi, op2_hi);
+	  emit_move_insn (pti_lo, op2_lo);
+	}
+      else
+	{
+	  emit_move_insn (pti_hi, op2_lo);
+	  emit_move_insn (pti_lo, op2_hi);
+	}
     }
 
   emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));





More information about the Gcc-patches mailing list