patch for latest lra changes.

Vladimir Makarov vmakarov@redhat.com
Wed Apr 24 18:07:00 GMT 2013


The following patch incorporates some LRA changes on lra and mike-lra 
branches.  I am committing them to get a better testing of them for 
x86/x86-64 on trunk.

The patch was successfully tested and bootstrapped on x86/x86-64.

Committed as rev. 198252.

2013-04-24  Vladimir Makarov  <vmakarov@redhat.com>

         * rtl.h (struct rtx_def): Add comment for field jump.
         (LRA_SUBREG_P): New macro.
         * recog.c (register_operand): Check LRA_SUBREG_P.
         * lra.c (lra): Add note at the end of RTL code. Align non-empty
         stack frame.
         * lra-spills.c (lra_spill): Align stack after spilling pseudos.
         (lra_final_code_change): Skip subreg change for operators.
         * lra-eliminations.c (eliminate_regs_in_insn): Make return earlier
         if there are no operand changes.
         * lra-constraints.c (curr_insn_set): New.
         (match_reload): Set LRA_SUBREG_P.
         (emit_spill_move): Ditto.
         (check_and_process_move): Use curr_insn_set. Process only single
         set insns.  Don't initialize sec_mem_p and change_p.
         (simplify_operand_subreg): Use LRA_SUBREG_P.
         (reg_in_class_p): New function.
         (process_alt_operands): Use it.  Use #if HAVE_ATTR_enabled instead
         of #ifdef.  Add code to remove cycling.
         (process_address): Check EXTRA_CONSTRAINT_STR. Process even if
         non-null disp.  Reload inner instead of disp when base and index
         are null.  Try to put lo_sum into register.
         (EBB_PROBABILITY_CUTOFF): Redefine probability in percents.
         (check_and_process_move): Move code for move cost check to
         simple_move_p.  Remove equiv_substitution.
         (simple_move_p): New function.
         (curr_insn_transform): Initialize sec_mem_p and change_p.  Set up
         curr_insn_set.  Call check_and_process_move only for single set
         insns.  Use the new function.  Move call of check_and_process_move
         after operand equiv substitution and address process.




-------------- next part --------------
Index: rtl.h
===================================================================
--- rtl.h	(revision 198251)
+++ rtl.h	(working copy)
@@ -265,7 +265,8 @@ struct GTY((chain_next ("RTX_NEXT (&%h)"
      1 in a SET that is for a return.
      In a CODE_LABEL, part of the two-bit alternate entry field.
      1 in a CONCAT is VAL_EXPR_IS_COPIED in var-tracking.c.
-     1 in a VALUE is SP_BASED_VALUE_P in cselib.c.  */
+     1 in a VALUE is SP_BASED_VALUE_P in cselib.c.
+     1 in a SUBREG generated by LRA for reload insns.  */
   unsigned int jump : 1;
   /* In a CODE_LABEL, part of the two-bit alternate entry field.
      1 in a MEM if it cannot trap.
@@ -1411,6 +1412,11 @@ do {									\
   ((RTL_FLAG_CHECK1("SUBREG_PROMOTED_UNSIGNED_P", (RTX), SUBREG)->volatil) \
    ? -1 : (int) (RTX)->unchanging)
 
+/* True if the subreg was generated by LRA for reload insns.  Such
+   subregs are valid only during LRA.  */
+#define LRA_SUBREG_P(RTX)	\
+  (RTL_FLAG_CHECK1("LRA_SUBREG_P", (RTX), SUBREG)->jump)
+
 /* Access various components of an ASM_OPERANDS rtx.  */
 
 #define ASM_OPERANDS_TEMPLATE(RTX) XCSTR (RTX, 0, ASM_OPERANDS)
Index: recog.c
===================================================================
--- recog.c	(revision 198251)
+++ recog.c	(working copy)
@@ -1065,7 +1065,11 @@ register_operand (rtx op, enum machine_m
 	  && REGNO (sub) < FIRST_PSEUDO_REGISTER
 	  && REG_CANNOT_CHANGE_MODE_P (REGNO (sub), GET_MODE (sub), mode)
 	  && GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_INT
-	  && GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_FLOAT)
+	  && GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_FLOAT
+	  /* LRA can generate some invalid SUBREGS just for matched
+	     operand reload presentation.  LRA needs to treat them as
+	     valid.  */
+	  && ! LRA_SUBREG_P (op))
 	return 0;
 #endif
 
Index: lra.c
===================================================================
--- lra.c	(revision 198251)
+++ lra.c	(working copy)
@@ -2203,6 +2203,10 @@ lra (FILE *f)
 
   timevar_push (TV_LRA);
 
+  /* Make sure that the last insn is a note.  Some subsequent passes
+     need it.  */
+  emit_note (NOTE_INSN_DELETED);
+
   COPY_HARD_REG_SET (lra_no_alloc_regs, ira_no_alloc_regs);
 
   init_reg_info ();
@@ -2259,6 +2263,11 @@ lra (FILE *f)
   bitmap_initialize (&lra_split_regs, &reg_obstack);
   bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack);
   live_p = false;
+  if (get_frame_size () != 0 && crtl->stack_alignment_needed)
+    /* If we have a stack frame, we must align it now.  The stack size
+       may be a part of the offset computation for register
+       elimination.  */
+    assign_stack_local (BLKmode, 0, crtl->stack_alignment_needed);
   for (;;)
     {
       for (;;)
Index: lra-spills.c
===================================================================
--- lra-spills.c	(revision 198251)
+++ lra-spills.c	(working copy)
@@ -548,6 +548,11 @@ lra_spill (void)
   for (i = 0; i < n; i++)
     if (pseudo_slots[pseudo_regnos[i]].mem == NULL_RTX)
       assign_mem_slot (pseudo_regnos[i]);
+  if (n > 0 && crtl->stack_alignment_needed)
+    /* If we have a stack frame, we must align it now.  The stack size
+       may be a part of the offset computation for register
+       elimination.  */
+    assign_stack_local (BLKmode, 0, crtl->stack_alignment_needed);
   if (lra_dump_file != NULL)
     {
       for (i = 0; i < slots_num; i++)
@@ -644,10 +649,12 @@ lra_final_code_change (void)
 	    }
 
 	  lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+	  struct lra_static_insn_data *static_id = id->insn_static_data;
 	  bool insn_change_p = false;
 
 	  for (i = id->insn_static_data->n_operands - 1; i >= 0; i--)
-	    if (alter_subregs (id->operand_loc[i], ! DEBUG_INSN_P (insn)))
+	    if ((DEBUG_INSN_P (insn) || ! static_id->operand[i].is_operator)
+		&& alter_subregs (id->operand_loc[i], ! DEBUG_INSN_P (insn)))
 	      {
 		lra_update_dup (id, i);
 		insn_change_p = true;
Index: lra-eliminations.c
===================================================================
--- lra-eliminations.c	(revision 198251)
+++ lra-eliminations.c	(working copy)
@@ -977,6 +977,9 @@ eliminate_regs_in_insn (rtx insn, bool r
 	}
     }
 
+  if (! validate_p)
+    return;
+
   /* Substitute the operands; the new values are in the substed_operand
      array.  */
   for (i = 0; i < static_id->n_operands; i++)
@@ -984,16 +987,13 @@ eliminate_regs_in_insn (rtx insn, bool r
   for (i = 0; i < static_id->n_dups; i++)
     *id->dup_loc[i] = substed_operand[(int) static_id->dup_num[i]];
 
-  if (validate_p)
-    {
-      /* If we had a move insn but now we don't, re-recognize it.
-	 This will cause spurious re-recognition if the old move had a
-	 PARALLEL since the new one still will, but we can't call
-	 single_set without having put new body into the insn and the
-	 re-recognition won't hurt in this rare case.  */
-      id = lra_update_insn_recog_data (insn);
-      static_id = id->insn_static_data;
-    }
+  /* If we had a move insn but now we don't, re-recognize it.
+     This will cause spurious re-recognition if the old move had a
+     PARALLEL since the new one still will, but we can't call
+     single_set without having put new body into the insn and the
+     re-recognition won't hurt in this rare case.  */
+  id = lra_update_insn_recog_data (insn);
+  static_id = id->insn_static_data;
 }
 
 /* Spill pseudos which are assigned to hard registers in SET.  Add
Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 198251)
+++ lra-constraints.c	(working copy)
@@ -135,10 +135,11 @@
    reload insns.  */
 static int bb_reload_num;
 
-/* The current insn being processed and corresponding its data (basic
-   block, the insn data, the insn static data, and the mode of each
-   operand).  */
+/* The current insn being processed and corresponding its single set
+   (NULL otherwise), its data (basic block, the insn data, the insn
+   static data, and the mode of each operand).  */
 static rtx curr_insn;
+static rtx curr_insn_set;
 static basic_block curr_bb;
 static lra_insn_recog_data_t curr_id;
 static struct lra_static_insn_data *curr_static_id;
@@ -698,6 +699,7 @@ match_reload (signed char out, signed ch
 	    new_out_reg = gen_lowpart_SUBREG (outmode, reg);
 	  else
 	    new_out_reg = gen_rtx_SUBREG (outmode, reg, 0);
+	  LRA_SUBREG_P (new_out_reg) = 1;
 	  /* If the input reg is dying here, we can use the same hard
 	     register for REG and IN_RTX.  We do it only for original
 	     pseudos as reload pseudos can die although original
@@ -721,6 +723,7 @@ match_reload (signed char out, signed ch
 	     it at the end of LRA work.  */
 	  clobber = emit_clobber (new_out_reg);
 	  LRA_TEMP_CLOBBER_P (PATTERN (clobber)) = 1;
+	  LRA_SUBREG_P (new_in_reg) = 1;
 	  if (GET_CODE (in_rtx) == SUBREG)
 	    {
 	      rtx subreg_reg = SUBREG_REG (in_rtx);
@@ -855,40 +858,34 @@ static rtx
 emit_spill_move (bool to_p, rtx mem_pseudo, rtx val)
 {
   if (GET_MODE (mem_pseudo) != GET_MODE (val))
-    val = gen_rtx_SUBREG (GET_MODE (mem_pseudo),
-			  GET_CODE (val) == SUBREG ? SUBREG_REG (val) : val,
-			  0);
+    {
+      val = gen_rtx_SUBREG (GET_MODE (mem_pseudo),
+			    GET_CODE (val) == SUBREG ? SUBREG_REG (val) : val,
+			    0);
+      LRA_SUBREG_P (val) = 1;
+    }
   return (to_p
-	  ? gen_move_insn (mem_pseudo, val)
-	  : gen_move_insn (val, mem_pseudo));
+          ? gen_move_insn (mem_pseudo, val)
+          : gen_move_insn (val, mem_pseudo));
 }
 
 /* Process a special case insn (register move), return true if we
-   don't need to process it anymore.  Return that RTL was changed
-   through CHANGE_P and macro SECONDARY_MEMORY_NEEDED says to use
-   secondary memory through SEC_MEM_P.	*/
+   don't need to process it anymore.  INSN should be a single set
+   insn.  Set up that RTL was changed through CHANGE_P and macro
+   SECONDARY_MEMORY_NEEDED says to use secondary memory through
+   SEC_MEM_P.  */
 static bool
-check_and_process_move (bool *change_p, bool *sec_mem_p)
+check_and_process_move (bool *change_p, bool *sec_mem_p ATTRIBUTE_UNUSED)
 {
   int sregno, dregno;
-  rtx set, dest, src, dreg, sreg, old_sreg, new_reg, before, scratch_reg;
+  rtx dest, src, dreg, sreg, old_sreg, new_reg, before, scratch_reg;
   enum reg_class dclass, sclass, secondary_class;
   enum machine_mode sreg_mode;
   secondary_reload_info sri;
 
-  *sec_mem_p = *change_p = false;
-  if ((set = single_set (curr_insn)) == NULL)
-    return false;
-  dreg = dest = SET_DEST (set);
-  sreg = src = SET_SRC (set);
-  /* Quick check on the right move insn which does not need
-     reloads.  */
-  if ((dclass = get_op_class (dest)) != NO_REGS
-      && (sclass = get_op_class (src)) != NO_REGS
-      /* The backend guarantees that register moves of cost 2 never
-	 need reloads.  */
-      && targetm.register_move_cost (GET_MODE (src), dclass, sclass) == 2)
-    return true;
+  lra_assert (curr_insn_set != NULL_RTX);
+  dreg = dest = SET_DEST (curr_insn_set);
+  sreg = src = SET_SRC (curr_insn_set);
   if (GET_CODE (dest) == SUBREG)
     dreg = SUBREG_REG (dest);
   if (GET_CODE (src) == SUBREG)
@@ -896,7 +893,6 @@ check_and_process_move (bool *change_p,
   if (! REG_P (dreg) || ! REG_P (sreg))
     return false;
   sclass = dclass = NO_REGS;
-  dreg = get_equiv_substitution (dreg);
   if (REG_P (dreg))
     dclass = get_reg_class (REGNO (dreg));
   if (dclass == ALL_REGS)
@@ -910,7 +906,6 @@ check_and_process_move (bool *change_p,
     return false;
   sreg_mode = GET_MODE (sreg);
   old_sreg = sreg;
-  sreg = get_equiv_substitution (sreg);
   if (REG_P (sreg))
     sclass = get_reg_class (REGNO (sreg));
   if (sclass == ALL_REGS)
@@ -1007,7 +1002,7 @@ check_and_process_move (bool *change_p,
       if (GET_CODE (src) == SUBREG)
 	SUBREG_REG (src) = new_reg;
       else
-	SET_SRC (set) = new_reg;
+	SET_SRC (curr_insn_set) = new_reg;
     }
   else
     {
@@ -1204,7 +1199,10 @@ simplify_operand_subreg (int nop, enum m
        && (hard_regno_nregs[hard_regno][GET_MODE (reg)]
 	   >= hard_regno_nregs[hard_regno][mode])
        && simplify_subreg_regno (hard_regno, GET_MODE (reg),
-				 SUBREG_BYTE (operand), mode) < 0)
+				 SUBREG_BYTE (operand), mode) < 0
+       /* Don't reload subreg for matching reload.  It is actually
+	  valid subreg in LRA.  */
+       && ! LRA_SUBREG_P (operand))
       || CONSTANT_P (reg) || GET_CODE (reg) == PLUS || MEM_P (reg))
     {
       enum op_type type = curr_static_id->operand[nop].type;
@@ -1311,6 +1309,14 @@ general_constant_p (rtx x)
   return CONSTANT_P (x) && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (x));
 }
 
+static bool
+reg_in_class_p (rtx reg, enum reg_class cl)
+{
+  if (cl == NO_REGS)
+    return get_reg_class (REGNO (reg)) == NO_REGS;
+  return in_class_p (reg, cl, NULL);
+}
+
 /* Major function to choose the current insn alternative and what
    operands should be reloaded and how.	 If ONLY_ALTERNATIVE is not
    negative we should consider only this alternative.  Return false if
@@ -1390,7 +1396,7 @@ process_alt_operands (int only_alternati
   for (nalt = 0; nalt < n_alternatives; nalt++)
     {
       /* Loop over operands for one constraint alternative.  */
-#ifdef HAVE_ATTR_enabled
+#if HAVE_ATTR_enabled
       if (curr_id->alternative_enabled_p != NULL
 	  && ! curr_id->alternative_enabled_p[nalt])
 	continue;
@@ -2054,6 +2060,31 @@ process_alt_operands (int only_alternati
 	  if (early_clobber_p && operand_reg[nop] != NULL_RTX)
 	    early_clobbered_nops[early_clobbered_regs_num++] = nop;
 	}
+      if (curr_insn_set != NULL_RTX && n_operands == 2
+	  && ((! curr_alt_win[0] && ! curr_alt_win[1]
+	       && REG_P (no_subreg_reg_operand[0])
+	       && REG_P (no_subreg_reg_operand[1])
+	       && (reg_in_class_p (no_subreg_reg_operand[0], curr_alt[1])
+		   || reg_in_class_p (no_subreg_reg_operand[1], curr_alt[0])))
+	      || (! curr_alt_win[0] && curr_alt_win[1]
+		  && REG_P (no_subreg_reg_operand[1])
+		  && reg_in_class_p (no_subreg_reg_operand[1], curr_alt[0]))
+	      || (curr_alt_win[0] && ! curr_alt_win[1]
+		  && REG_P (no_subreg_reg_operand[0])
+		  && reg_in_class_p (no_subreg_reg_operand[0], curr_alt[1])
+		  && (! CONST_POOL_OK_P (curr_operand_mode[1],
+					 no_subreg_reg_operand[1])
+		      || (targetm.preferred_reload_class
+			  (no_subreg_reg_operand[1],
+			   (enum reg_class) curr_alt[1]) != NO_REGS))
+		  /* If it is a result of recent elimination in move
+		     insn we can transform it into an add still by
+		     using this alternative.  */
+		  && GET_CODE (no_subreg_reg_operand[1]) != PLUS)))
+	/* We have a move insn and a new reload insn will be similar
+	   to the current insn.  We should avoid such situation as it
+	   results in LRA cycling.  */
+	overall += LRA_MAX_REJECT;
       ok_p = true;
       curr_alt_dont_inherit_ops_num = 0;
       for (nop = 0; nop < early_clobbered_regs_num; nop++)
@@ -2425,27 +2456,35 @@ process_address (int nop, rtx *before, r
       && process_addr_reg (ad.index_term, before, NULL, INDEX_REG_CLASS))
     change_p = true;
 
+#ifdef EXTRA_CONSTRAINT_STR
+  /* Target hooks sometimes reject extra constraint addresses -- use
+     EXTRA_CONSTRAINT_STR for the validation.  */
+  if (constraint[0] != 'p'
+      && EXTRA_ADDRESS_CONSTRAINT (constraint[0], constraint)
+      && EXTRA_CONSTRAINT_STR (op, constraint[0], constraint))
+    return change_p;
+#endif
+
   /* There are three cases where the shape of *AD.INNER may now be invalid:
 
      1) the original address was valid, but either elimination or
-	equiv_address_substitution applied a displacement that made
-	it invalid.
+	equiv_address_substitution was applied and that made
+	the address invalid.
 
      2) the address is an invalid symbolic address created by
 	force_const_to_mem.
 
      3) the address is a frame address with an invalid offset.
 
-     All these cases involve a displacement and a non-autoinc address,
-     so there is no point revalidating other types.  */
-  if (ad.disp == NULL || ad.autoinc_p || valid_address_p (&ad))
+     All these cases involve a non-autoinc address, so there is no
+     point revalidating other types.  */
+  if (ad.autoinc_p || valid_address_p (&ad))
     return change_p;
 
   /* Any index existed before LRA started, so we can assume that the
      presence and shape of the index is valid.  */
   push_to_sequence (*before);
-  gcc_assert (ad.segment == NULL);
-  gcc_assert (ad.disp == ad.disp_term);
+  lra_assert (ad.disp == ad.disp_term);
   if (ad.base == NULL)
     {
       if (ad.index == NULL)
@@ -2453,26 +2492,39 @@ process_address (int nop, rtx *before, r
 	  int code = -1;
 	  enum reg_class cl = base_reg_class (ad.mode, ad.as,
 					      SCRATCH, SCRATCH);
-	  rtx disp = *ad.disp;
+	  rtx addr = *ad.inner;
 
-	  new_reg = lra_create_new_reg (Pmode, NULL_RTX, cl, "disp");
+	  new_reg = lra_create_new_reg (Pmode, NULL_RTX, cl, "addr");
 #ifdef HAVE_lo_sum
 	  {
 	    rtx insn;
 	    rtx last = get_last_insn ();
 
-	    /* disp => lo_sum (new_base, disp), case (2) above.  */
+	    /* addr => lo_sum (new_base, addr), case (2) above.  */
 	    insn = emit_insn (gen_rtx_SET
 			      (VOIDmode, new_reg,
-			       gen_rtx_HIGH (Pmode, copy_rtx (disp))));
+			       gen_rtx_HIGH (Pmode, copy_rtx (addr))));
 	    code = recog_memoized (insn);
 	    if (code >= 0)
 	      {
-		*ad.disp = gen_rtx_LO_SUM (Pmode, new_reg, disp);
+		*ad.inner = gen_rtx_LO_SUM (Pmode, new_reg, addr);
 		if (! valid_address_p (ad.mode, *ad.outer, ad.as))
 		  {
-		    *ad.disp = disp;
-		    code = -1;
+		    /* Try to put lo_sum into register.  */
+		    insn = emit_insn (gen_rtx_SET
+				      (VOIDmode, new_reg,
+				       gen_rtx_LO_SUM (Pmode, new_reg, addr)));
+		    code = recog_memoized (insn);
+		    if (code >= 0)
+		      {
+			*ad.inner = new_reg;
+			if (! valid_address_p (ad.mode, *ad.outer, ad.as))
+			  {
+			    *ad.inner = addr;
+			    code = -1;
+			  }
+		      }
+		    
 		  }
 	      }
 	    if (code < 0)
@@ -2481,9 +2533,9 @@ process_address (int nop, rtx *before, r
 #endif
 	  if (code < 0)
 	    {
-	      /* disp => new_base, case (2) above.  */
-	      lra_emit_move (new_reg, disp);
-	      *ad.disp = new_reg;
+	      /* addr => new_base, case (2) above.  */
+	      lra_emit_move (new_reg, addr);
+	      *ad.inner = new_reg;
 	    }
 	}
       else
@@ -2650,6 +2702,24 @@ emit_inc (enum reg_class new_rclass, rtx
   return result;
 }
 
+/* Return true if the current move insn does not need processing as we
+   already know that it satisfies its constraints.  */
+static bool
+simple_move_p (void)
+{
+  rtx dest, src;
+  enum reg_class dclass, sclass;
+
+  lra_assert (curr_insn_set != NULL_RTX);
+  dest = SET_DEST (curr_insn_set);
+  src = SET_SRC (curr_insn_set);
+  return ((dclass = get_op_class (dest)) != NO_REGS
+	  && (sclass = get_op_class (src)) != NO_REGS
+	  /* The backend guarantees that register moves of cost 2
+	     never need reloads.  */
+	  && targetm.register_move_cost (GET_MODE (src), dclass, sclass) == 2);
+ }
+
 /* Swap operands NOP and NOP + 1. */
 static inline void
 swap_operands (int nop)
@@ -2693,12 +2763,13 @@ curr_insn_transform (void)
   int max_regno_before;
   int reused_alternative_num;
 
+  curr_insn_set = single_set (curr_insn);
+  if (curr_insn_set != NULL_RTX && simple_move_p ())
+    return false;
+
   no_input_reloads_p = no_output_reloads_p = false;
   goal_alt_number = -1;
-
-  if (check_and_process_move (&change_p, &sec_mem_p))
-    return change_p;
-
+  change_p = sec_mem_p = false;
   /* JUMP_INSNs and CALL_INSNs are not allowed to have any output
      reloads; neither are insns that SET cc0.  Insns that use CC0 are
      not allowed to have any input reloads.  */
@@ -2793,6 +2864,10 @@ curr_insn_transform (void)
        we chose previously may no longer be valid.  */
     lra_set_used_insn_alternative (curr_insn, -1);
 
+  if (curr_insn_set != NULL_RTX
+      && check_and_process_move (&change_p, &sec_mem_p))
+    return change_p;
+
  try_swapped:
 
   reused_alternative_num = curr_id->used_insn_alternative;
@@ -4812,7 +4887,7 @@ inherit_in_ebb (rtx head, rtx tail)
 /* This value affects EBB forming.  If probability of edge from EBB to
    a BB is not greater than the following value, we don't add the BB
    to EBB.  */
-#define EBB_PROBABILITY_CUTOFF (REG_BR_PROB_BASE / 2)
+#define EBB_PROBABILITY_CUTOFF ((REG_BR_PROB_BASE * 50) / 100)
 
 /* Current number of inheritance/split iteration.  */
 int lra_inheritance_iter;


More information about the Gcc-patches mailing list