This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[new-ra] Fix web splitting (6-fix)

From: Michael Matz <matz at suse dot de>
To: gcc-patches at gcc dot gnu dot org
Date: Wed, 2 Jul 2003 16:51:14 +0200 (CEST)
Subject: [new-ra] Fix web splitting (6-fix)
Hi,

this is the final patch for today, if I'm not overlooking some file in
this directory ;-)  It fixes the regressions from introducing web
splitting.  There was also another problem when ignoring conflicts
resulting from copy insns.  When such a def is ignored it must be ensured,
they either the two webs fully overlap (i.e. are coalesced), or not at
all.  Otherwise we would generate a copy instruction which copies for
instance regs {1,2} into regs {0,1}.  This overwrites register 1, thereby
destroys the whole set {1,2}.  If it's still needed we generate wrong
code.

Booted/regtested on i686-linux, all langs except Ada+treelang.  Fixes all
new regressions from patch 6.  Compared to patch 5 it only adds five
regressions for 20001226-1.c, which times out.  This is due to the slow
building of the containment graph.  If allowed to run longer it generates
correct code, so basically no regressions are added.


Ciao,
Michael.
-- 
2003-06-30  Michael Matz  <matz@suse.de>
	* ra-build.c (add_additional_conflicts): Prevent strictly partly
	overlap between copy related webs.  Renamed from ...
	(add_earlyclobber_conflicts): ... this.
	(make_webs): Change call to above.
	* ra-rewrite.c (init_split_costs): Don't ignore refs from copy insns.
	Handle deficiency in generated split code.
	(find_splits): Don't split hardregs.
	(split_insert_load): Deal with modeless slots (constants).

diff -urpN work-gcc.orig/gcc/ra-build.c work-gcc/gcc/ra-build.c
--- work-gcc.orig/gcc/ra-build.c	2003-06-16 17:51:58.000000000 +0200
+++ work-gcc/gcc/ra-build.c	2003-06-16 17:55:49.000000000 +0200
@@ -110,7 +110,7 @@ static void reset_conflicts PARAMS ((voi
 static void check_conflict_numbers PARAMS ((void));
 #endif
 static void conflicts_between_webs PARAMS ((struct df *));
-static void add_earlyclobber_conflicts PARAMS ((void));
+static void add_additional_conflicts PARAMS ((void));
 static void detect_spill_temps PARAMS ((void));
 static int contains_pseudo PARAMS ((rtx));
 static int want_to_remat PARAMS ((rtx x));
@@ -2458,8 +2458,12 @@ conflicts_between_webs (df)
 #endif
 }

+/* This adds conflicts from early-clobber operands and those to prevent
+   coalescable webs connected with moves to be colored so that they only
+   partly overlap (instead of becoming fully coalesced).  */
+
 static void
-add_earlyclobber_conflicts ()
+add_additional_conflicts ()
 {
   rtx insn;
   for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
@@ -2467,6 +2471,7 @@ add_earlyclobber_conflicts ()
       {
 	int uid = INSN_UID (insn);
 	unsigned int n, num_defs = insn_df[uid].num_defs;
+	rtx source, dest;
 	for (n = 0; n < num_defs; n++)
 	  {
 	    struct ref *def = insn_df[uid].defs[n];
@@ -2488,6 +2493,53 @@ add_earlyclobber_conflicts ()
 	    else
 	      DF_REF_FLAGS (def) &= ~DF_REF_EARLYCLOBBER;
 	  }
+	if (copy_insn_p (insn, &source, &dest)
+	    && REG_P (source) && REG_P (dest))
+	  {
+	    /* We want to prevent that the source and target web of this
+	       move become strictly partly overlapped.  Because we ignored
+	       this def while constructing conflicts for the source web
+	       this could otherwise happen.  But that would lead to wrong
+	       code if the full source web still is needed.  For instance:
+	         p1:DI <-- p2:DI     ; the copy insn
+		 p2 <-- -p2
+		 use <-- p2
+		 use <-- p1:[SI+0]
+	       here p1:[SI+0] conflicts with p2:DI.  So we could color
+	       p1 with {0,1} and p2 with {1,2}.  But the copy insn would
+	       then become {0,1} <-- {1,2}, thereby overwriting the lowpart
+	       of p2, which still is needed.  We prevent this by
+	       making conflicts between p1 and p2 such, that either only
+	       full overlap (coalescing) or none at all is possible.  */
+	    struct web *sweb = NULL, *dweb = NULL;
+	    struct ref *ref;
+	    for (n = 0; !dweb && n < num_defs; n++)
+	      if (REGNO (dest) == DF_REF_REGNO (ref = insn_df[uid].defs[n]))
+		dweb = def2web[DF_REF_ID (ref)];
+	    for (n = 0; !sweb && n < insn_df[uid].num_uses; n++)
+	      if (REGNO (source) == DF_REF_REGNO (ref = insn_df[uid].uses[n]))
+		sweb = use2web[DF_REF_ID (ref)];
+	    /* We simply let the first real part of one web conflict with
+	       all non-overlapping parts of the second web.  If one web
+	       has no parts there's no need to do this.  */
+	    if (sweb && dweb)
+	      {
+		sweb = sweb->subreg_next;
+		if (sweb)
+		  for (dweb = dweb->subreg_next, source = sweb->orig_x; dweb;
+		       dweb = dweb->subreg_next)
+		    {
+		      dest = dweb->orig_x;
+		      if (SUBREG_BYTE (source) >= (SUBREG_BYTE (dest)
+						   + GET_MODE_SIZE (GET_MODE
+								    (dest)))
+			  || SUBREG_BYTE (dest) >= (SUBREG_BYTE (source)
+						    + GET_MODE_SIZE (GET_MODE
+								     (source))))
+			record_conflict (sweb, dweb);
+		    }
+	      }
+	  }
       }
 }

@@ -3124,7 +3176,7 @@ make_webs (df)
   /* And finally relate them to each other, meaning to record all possible
      conflicts between webs (see the comment there).  */
   conflicts_between_webs (df);
-  add_earlyclobber_conflicts ();
+  add_additional_conflicts ();

   if (WEBS (SPILLED))
     return;
diff -urpN work-gcc.orig/gcc/ra-rewrite.c work-gcc/gcc/ra-rewrite.c
--- work-gcc.orig/gcc/ra-rewrite.c	2003-06-16 17:54:42.000000000 +0200
+++ work-gcc/gcc/ra-rewrite.c	2003-06-16 17:55:49.000000000 +0200
@@ -3191,6 +3191,7 @@ init_split_costs ()
     live_at_begin[i] = sbitmap_alloc (num_webs);
   live_at_begin += 2;
   any_splits_found = 0;
+
   FOR_EACH_BB (bb)
     {
       int j;
@@ -3267,11 +3268,19 @@ init_split_costs ()
 			  split_costs[web->id].loads += (4 * (e->dest->frequency + 1)) / 3;
 		    }
 		}
-	      /* Even if it's a copy insn info.num_uses might be zero.
-		 This happens if the source of the copy is a fixed hardreg,
-		 because we don't remember those in info.  */
-	      if (info.num_uses && copy_insn_p (insn, NULL, NULL))
-		reset_web_live_s (live, suplive, use2web[DF_REF_ID (info.uses[0])]);
+	      /* If this would construct a normal conflict graph, this
+		 would be the place where we ignore definitions which copy
+		 from us (i.e. basically we would temporarily make uses
+		 of copy insns dead, so they won't conflict with the def).
+		 But this is a containment graph.  We really need to know
+		 all references.  For instance in this situation:
+		   p1 <- ... //  p2 <- p1 // ... // use <- p1 // use <- p2
+		 If we would ignore the def of p2 for containment in p1 we
+		 would think, that we can split p2 around p1.  But that's
+		 not true.  Hence don't ignore copies.
+		 Remember that if you want to merge the containment graph
+		 creation with the normal conflict graph creation.  */
+
 	      /* Ugly hack.  Unused defs need to be handled for the conflict
 	         graph as if there's a use just after the current insn.  I.e
 		 as if they were in fact live.  The real reason is, that simply
@@ -3283,6 +3292,31 @@ init_split_costs ()
 		   checking if they indeed are useless (are not live).  The
 		   effect is the same.  */
 		set_web_live_s (live, suplive, def2web[DF_REF_ID (info.defs[n])]);
+	      /* XXX Argh.  Crap.  We need to make the webs which become live
+		 here due to uses "contain" all the defs in this insn,
+		 although in data-flow sense they don't.  Suppose this
+                 situation (p1 is DImode):
+		   1 p1 <== bla
+		   2 p2 <== bla
+                   3 p1:[SI+0] <== p2
+		   4 use <== p1
+		 Without the following loop, the p2 web wouldn't contain
+		 any refs to p1 (and rightly so).  So we could split p1 around
+		 p2.  Unfortunately our split code inserted only handles full
+		 register, hence we would create a "p1 <= pback" load after
+		 insn 3, thereby overwriting the lower half which just was
+		 set.  This normally isn't an issue with full defs, as there
+		 the two webs wouldn't even conflict, and hence don't have
+		 the problem of spliting one around the other.
+		 The real fix would be to only generate the split loads
+		 and store for the part which actually conflicts with p2,
+		 or which is still live at the point of loads (in the above
+		 case p1:[SI+4]).  For now we simply avoid the situation
+		 by letting the defs be contained in all uses' webs.  */
+
+	      for (n = 0; n < info.num_uses; n++)
+		set_web_live_s (live, suplive, use2web[DF_REF_ID (info.uses[n])]);
+
 	      for (n = 0; n < info.num_defs; n++)
 		{
 		  struct web *web1 = def2web[DF_REF_ID (info.defs[n])];
@@ -3433,7 +3467,8 @@ find_splits (web)
 	    /* Test if we can split web or web2 at all.  */
 	    if (SPILL_SLOT_P (web->regno))
 	      SET_HARD_REG_BIT (wrong_around_color, c1);
-	    if (SPILL_SLOT_P (web2->regno))
+	    if (SPILL_SLOT_P (web2->regno)
+		|| aweb->type == PRECOLORED)
 	      SET_HARD_REG_BIT (wrong_around_name, c1);
 	  }
     }
@@ -3531,8 +3566,11 @@ split_insert_load (web, insn, before, li
 	if (is_partly_live (live, web))
 	  {
 	    rtx reg = copy_rtx (web->orig_x);
+	    enum machine_mode slot_mode = GET_MODE (whole_slot);
+	    if (slot_mode == VOIDmode)
+	      slot_mode = GET_MODE (aweb->orig_x);
 	    rtx slot = simplify_gen_subreg (GET_MODE (reg), whole_slot,
-					    GET_MODE (whole_slot),
+					    slot_mode,
 					    SUBREG_BYTE (reg));
 	    ra_emit_move_insn (reg, slot);
 	  }
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]