This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[new-ra] Cleanup sources, plus costs dump (3-1)


Hi,

this mostly cleans up the sources (deleting #if 0 or commented code).  It
also changes the various heuristics a bit, like webs live over bb borders
are not that hard to spill, or more black magic in colorize_one_web.  It
also makes the spill code insertion handle early clobber references.
Additionally it deletes the fake uses added for the return value register
again, before proceeding into other passes.  And finally it adds a new
dump pass (-dT), which emits the costs of instructions accessing the
stack per basic block and overall.

Bootstrap with this breaks in libjava due to recursive trying to recolor
the same webs over and over.


Ciao,
Michael.
-- 
2003-06-30  Michael Matz  <matz@suse.de>
	* ra-build.c: Include obstack.h.
	(add_earlyclobber_conflicts): New.
	(check_conflict_numbers): Comment out.
	(conflicts_early_clobbered): Delete.
	(undef_table[]): Make const.
	(init_one_web_common): Initialize web->orig_usable_set.
	(parts_to_webs_1): Silence warnings.
	(detect_spill_temps): Ditto.  Webs crossing bbs are not really hard
	to spill.
	(detect_remat_webs): Don't try to remat throwing expressions.
	(make_webs): Call add_earlyclobber_conflicts.
	* ra-colorize.c (aggressive_coalesce_fast): Delete.
	(reset_lists): Delete #if 0 code.
	(combine): Union preferred colors.
	(INV_REG_ALLOC_ORDER): Define.
	(get_free_reg): Use it.  New argument allow_unaligned.
	Adjust callers.
	(get_biased_reg): New argument allow_unaligned.
	(hardregset_to_string): Use HOST_WIDE_INT_PRINT_HEX.
	(colorize_one_web): Adjust calls to get_biased_reg.  Try hard also
	if web is a spill temp.  Add two more magic cases.
	(try_recolor_web): Adjust call to get_free_reg.
	(insert_coalesced_conflicts): Handle fake self-conflicts.
	(break_precolored_alias): Set is_coalesced for all coalesced webs.
	* ra-debug.c: Include tm_p.h.
	(ra_debug_msg): Use VA_OPEN and friends.
	(dump_igraph): Split long lines.
	(dump_graph_cost): Ditto.
	(dump_static_insn_cost): Use memset instead aggregate initializers.
	* ra-rewrite.c (flag_ra_test, df2ra): Declare.
	(detect_bbs_for_rewrite, detect_deaths_in_bb): Delete.
	(rewrite_program): Delete #if 0 code.
	(insert_stores): Call note_uses_partial instead note_uses.
	(emit_loads): Reset any_spill_temps_spilled.
	(NEW_SPILL): Don't define.  Delete all code which was
	#ifndef NEW_SPILL.
	(rewrite_program2): Delete changed_bbs (was only used by old spill
	code).  Deal with early clobbers.  Don't clear any_spill_temps_spilled
	here.
	(dump_cost): Split long lines.
	* ra.c (obstack_chunk_alloc, obstack_chunk_free): Don't define.
	(eliminables): Make const.
	(init_ra): Call ORDER_REGS_FOR_LOCAL_ALLOC.
	(reg_alloc): Call cleanup_cfg.  Add fake uses of return value.
	Set/reset while_newra.  Call purge_all_dead_edges. Delete commented
	code.  Call build_insn_chain.
	* toplev.c: New dump pass "costs".
	(flag_ra_test): New flag, add it to relevant tables.
	(count_stack_refs, count_stack_writes, print_costs_bb): New.
	(rest_of_compilation): Call costs dump.


diff -urpN work-gcc.orig/gcc/ra-build.c work-gcc/gcc/ra-build.c
--- work-gcc.orig/gcc/ra-build.c	2003-06-16 17:40:50.000000000 +0200
+++ work-gcc/gcc/ra-build.c	2003-06-16 17:42:08.000000000 +0200
@@ -26,6 +26,7 @@
 #include "recog.h"
 #include "function.h"
 #include "regs.h"
+#include "obstack.h"
 #include "hard-reg-set.h"
 #include "basic-block.h"
 #include "df.h"
@@ -102,8 +103,11 @@ static unsigned int parts_to_webs_1 PARA
 					     struct df_link *));
 static void parts_to_webs PARAMS ((struct df *));
 static void reset_conflicts PARAMS ((void));
+#if 0
 static void check_conflict_numbers PARAMS ((void));
+#endif
 static void conflicts_between_webs PARAMS ((struct df *));
+static void add_earlyclobber_conflicts PARAMS ((void));
 static void detect_spill_temps PARAMS ((void));
 static int contains_pseudo PARAMS ((rtx));
 static int want_to_remat PARAMS ((rtx x));
@@ -120,7 +124,6 @@ static void free_bb_info PARAMS ((void))
 static void build_web_parts_and_conflicts PARAMS ((struct df *));
 static void select_regclass PARAMS ((void));
 static void detect_spanned_deaths PARAMS ((unsigned int *spanned_deaths));
-static void conflicts_early_clobbered PARAMS ((void));
 static void web_class PARAMS ((struct web*));
 static int rematerializable_stack_arg_p PARAMS ((rtx, rtx));

@@ -291,7 +294,7 @@ copy_insn_p (insn, source, target)
   s_regno = (unsigned) REGNO (GET_CODE (s) == SUBREG ? SUBREG_REG (s) : s);
   d_regno = (unsigned) REGNO (GET_CODE (d) == SUBREG ? SUBREG_REG (d) : d);

-  /* Copies between hardregs are useless for us, as not coalesable anyway. */
+  /* Copies between hardregs are useless for us, as not coalesable anyway.  */
   if ((s_regno < FIRST_PSEUDO_REGISTER
        && d_regno < FIRST_PSEUDO_REGISTER))
     return 0;
@@ -363,7 +366,7 @@ static struct undef_table_s {
   unsigned int new_undef;
   /* size | (byte << 16)  */
   unsigned int size_word;
-} undef_table [] = {
+} const undef_table [] = {
   { 0, BL_TO_WORD (0, 0)}, /* 0 */
   { 0, BL_TO_WORD (0, 1)},
   { 0, BL_TO_WORD (1, 1)},
@@ -580,7 +583,7 @@ remember_move (insn)
       SET_BIT (move_handled, INSN_UID (insn));
       if (copy_insn_p (insn, &s, &d) == COPY_P_MOVE)
 	{
-	  /* Some sanity test for the copy insn. */
+	  /* Some sanity test for the copy insn.  */
 	  struct df_link *slink = DF_INSN_USES (df, insn);
 	  struct df_link *link = DF_INSN_DEFS (df, insn);
 	  if (!link || !link->ref || !slink || !slink->ref)
@@ -866,7 +869,7 @@ live_out_1 (df, use, insn)
       else
 	{
 	  /* If this insn doesn't completely define the USE, increment also
-	     it's spanned deaths count (if this insn contains a death). */
+	     it's spanned deaths count (if this insn contains a death).  */
 	  if (uid >= death_insns_max_uid)
 	    abort ();
 	  if (TEST_BIT (insns_with_deaths, uid))
@@ -898,7 +901,7 @@ live_out (df, use, insn)
       && (use->undefined & ~visit_trace[uid].undefined) == 0)
     {
       union_web_parts (visit_trace[uid].wp, use->wp);
-      /* Don't search any further, as we already were here with this regno. */
+      /* Don't search any further, as we already were here with this regno.  */
       return 0;
     }
   else
@@ -1293,6 +1296,7 @@ init_one_web_common (web, reg)
       web->add_hardregs = 0;
       CLEAR_HARD_REG_SET (web->usable_regs);
       SET_HARD_REG_BIT (web->usable_regs, web->regno);
+      COPY_HARD_REG_SET (web->orig_usable_regs, web->usable_regs);
       web->num_freedom = 1;
     }
   else
@@ -1821,7 +1825,7 @@ parts_to_webs_1 (df, copy_webs, all_refs
   webnum = 0;
   for (i = 0; i < def_id + use_id; i++)
     {
-      struct web *web, *subweb;
+      struct web *subweb, *web = 0; /* Initialize web to silence warnings.  */
       struct web_part *wp = &web_parts[i];
       struct ref *ref = wp->ref;
       unsigned int ref_id;
@@ -1836,7 +1840,7 @@ parts_to_webs_1 (df, copy_webs, all_refs
       if (! wp->uplink)
 	{
 	  /* If we have a web part root, create a new web.  */
-	  unsigned int newid = ~0U;
+	  unsigned int newid = ~(unsigned)0;
 	  unsigned int old_web = 0;

 	  /* In the first pass, there are no old webs, so unconditionally
@@ -1885,7 +1889,7 @@ parts_to_webs_1 (df, copy_webs, all_refs
 		    }
 		}
 	      /* The id is zeroed in init_one_web().  */
-	      if (newid == ~0U)
+	      if (newid == ~(unsigned)0)
 		newid = web->id;
 	      if (old_web)
 		reinit_one_web (web, GET_CODE (reg) == SUBREG
@@ -2374,6 +2378,39 @@ conflicts_between_webs (df)
 #endif
 }

+static void
+add_earlyclobber_conflicts ()
+{
+  rtx insn;
+  for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn) && INSN_UID (insn) < insn_df_max_uid)
+      {
+	int uid = INSN_UID (insn);
+	unsigned int n, num_defs = insn_df[uid].num_defs;
+	for (n = 0; n < num_defs; n++)
+	  {
+	    struct ref *def = insn_df[uid].defs[n];
+	    ra_ref *rdef = DF2RA (df2ra, def);
+	    if (rdef && RA_REF_CLOBBER_P (rdef))
+	      {
+		unsigned int u, num_uses = insn_df[uid].num_uses;
+		struct web *web1 = def2web[DF_REF_ID (def)];
+		DF_REF_FLAGS (def) |= DF_REF_EARLYCLOBBER;
+		for (u = 0; u < num_uses; u++)
+		  {
+		    struct web *web2 = use2web[DF_REF_ID
+			                       (insn_df[uid].uses[u])];
+		    if (find_web_for_subweb (web1)->regno
+			!= find_web_for_subweb (web2)->regno)
+		      record_conflict (web1, web2);
+		  }
+	      }
+	    else
+	      DF_REF_FLAGS (def) &= ~DF_REF_EARLYCLOBBER;
+	  }
+      }
+}
+
 /* Look at each web, if it is used as spill web.  Or better said,
    if it will be spillable in this pass.  */

@@ -2382,7 +2419,7 @@ detect_spill_temps ()
 {
   struct dlist *d;
   bitmap already = BITMAP_XMALLOC ();
-  unsigned int *spanned_deaths_from_scratch;
+  unsigned int *spanned_deaths_from_scratch = NULL;

   if (flag_ra_spanned_deaths_from_scratch)
     {
@@ -2412,7 +2449,7 @@ detect_spill_temps ()
          emitted (can happen with IR spilling ignoring sometimes
 	 all deaths).  */
       else if (web->changed)
-	web->spill_temp = 1;
+	web->spill_temp = web->crosses_bb ? 2 : 1 ;
       /* A spill temporary has one def, one or more uses, all uses
 	 are in one insn, and either the def or use insn was inserted
 	 by the allocator.  */
@@ -2476,7 +2513,8 @@ detect_spill_temps ()
 	      /* But mark them specially if they could possibly be spilled,
 		 either because they cross some deaths (without the above
 		 mentioned ones) or calls.  */
-	      if (web->crosses_call || num_deaths > 0)
+	      if (web->crosses_call || web->crosses_bb
+		  || web->live_over_abnormal || num_deaths > 0)
 		web->spill_temp = 1 * 2;
 	    }
 	  /* A web spanning no deaths can't be spilled either.  No loads
@@ -2487,7 +2525,8 @@ detect_spill_temps ()
 	  else if ((flag_ra_spanned_deaths_from_scratch
 		    ? spanned_deaths_from_scratch[web->id] == 0
 		    : web->span_deaths == 0)
-		   && !web->crosses_call && !web->crosses_bb)
+		   && !web->crosses_call && !web->crosses_bb
+		   && !web->live_over_abnormal)
 	    web->spill_temp = 3;
 	}
       web->orig_spill_temp = web->spill_temp;
@@ -2654,6 +2693,11 @@ detect_remat_webs ()
 	     rematerializable.  */
 	  if (!rtx_equal_p (SET_DEST (set), web->orig_x))
 	    break;
+	  /* If this insn throws, and we have a single set, then this must
+	     be the throwing one.  Don't try to rematerialize such things.
+	     Would change the CFG.  */
+	  if (can_throw_internal (insn))
+	    break;
 	  /* If we already have a pattern it must be equal to the current.  */
 	  if (pat && !rtx_equal_p (pat, src))
 	    break;
@@ -2936,7 +2980,7 @@ make_webs (df)
   /* And finally relate them to each other, meaning to record all possible
      conflicts between webs (see the comment there).  */
   conflicts_between_webs (df);
-  conflicts_early_clobbered ();
+  add_earlyclobber_conflicts ();

   if (WEBS (SPILLED))
     return;
@@ -2997,7 +3041,7 @@ moves_to_webs (df)
       if (m->source_web && m->target_web
 	  /* If the usable_regs don't intersect we can't coalesce the two
 	     webs anyway, as this is no simple copy insn (it might even
-	     need an intermediate stack temp to execute this "copy" insn). */
+	     need an intermediate stack temp to execute this "copy" insn).  */
 	  && hard_regs_combinable_p (m->target_web, m->source_web))
 	{
 	  if (!flag_ra_optimistic_coalescing)
@@ -3683,41 +3727,6 @@ detect_spanned_deaths (spanned_deaths)
   sbitmap_free (defs_per_insn);
 }

-static void
-conflicts_early_clobbered ()
-{
-  rtx insn;
-  struct ra_insn_info info;
-
-  for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
-    {
-      unsigned int n;
-
-      if (!INSN_P (insn) || INSN_UID (insn) > insn_df_max_uid)
-	continue;
-
-      info = insn_df[INSN_UID (insn)];
-
-      for (n = 0; n < info.num_defs; n++)
-	{
-	  ra_ref *rref;
-	  struct ref *dref = info.defs[n];
-	  rref = DF2RA (df2ra, dref);
-	  if (rref && RA_REF_CLOBBER_P (rref))
-	    {
-	      unsigned int i;
-	      struct web *web1 = def2web[DF_REF_ID (dref)];
-	      for (i = 0; i < info.num_uses; ++i)
-		{
-		  struct ref *uref = info.uses[i];
-		  struct web *web2 = use2web[DF_REF_ID (uref)];
-		  record_conflict (web1, web2);
-		}
-	    }
-	}
-    }
-}
-
 static int class_ok_for_mode PARAMS ((enum reg_class, enum machine_mode));

 /* Returns true if at least one of the hardregs in CLASS is OK
diff -urpN work-gcc.orig/gcc/ra-colorize.c work-gcc/gcc/ra-colorize.c
--- work-gcc.orig/gcc/ra-colorize.c	2003-02-25 17:44:52.000000000 +0100
+++ work-gcc/gcc/ra-colorize.c	2003-06-16 17:42:08.000000000 +0200
@@ -67,14 +67,15 @@ static void freeze PARAMS ((void));
 static void select_spill PARAMS ((void));
 static int color_usable_p PARAMS ((int, HARD_REG_SET, HARD_REG_SET,
 				   enum machine_mode));
-int get_free_reg PARAMS ((HARD_REG_SET, HARD_REG_SET, enum machine_mode));
+int get_free_reg PARAMS ((HARD_REG_SET, HARD_REG_SET, enum machine_mode, int));
 static int get_biased_reg PARAMS ((HARD_REG_SET, HARD_REG_SET, HARD_REG_SET,
-				   HARD_REG_SET, enum machine_mode));
+				   HARD_REG_SET, enum machine_mode, int));
 static char * hardregset_to_string PARAMS ((HARD_REG_SET));
 static void calculate_dont_begin PARAMS ((struct web *, HARD_REG_SET *));
 static int proper_hard_reg_subset_p PARAMS ((HARD_REG_SET, HARD_REG_SET));
 static void colorize_one_web PARAMS ((struct web *, int));
 static void assign_colors PARAMS ((void));
+
 static void try_recolor_web PARAMS ((struct web *));
 static void insert_coalesced_conflicts PARAMS ((void));
 static void recolor_spills PARAMS ((void));
@@ -88,9 +89,9 @@ static void init_web_pairs PARAMS ((void
 static void add_web_pair_cost PARAMS ((struct web *, struct web *,
 			               unsigned HOST_WIDE_INT, unsigned int));
 static int comp_web_pairs PARAMS ((const void *, const void *));
+
 static void sort_and_combine_web_pairs PARAMS ((int));
 static void aggressive_coalesce PARAMS ((void));
-static void aggressive_coalesce_fast PARAMS ((void));
 static void extended_coalesce_2 PARAMS ((void));
 static void check_uncoalesced_moves PARAMS ((void));

@@ -244,22 +245,6 @@ reset_lists ()
   while ((d = pop_list (&WEBS(COLORED))) != NULL)
     put_web (DLIST_WEB (d), INITIAL);

-#if 0
-    {
-      struct dlist *d_next;
-      for (d = WEBS(INITIAL); d; d = d_next)
-	{
-	  struct web *web = DLIST_WEB (d);
-	  d_next = d->next;
-	  if (web->type != PRECOLORED)
-	    {
-	      remove_list (d, &WEBS(INITIAL));
-	      put_web (DLIST_WEB (d), FREE);
-	    }
-	}
-    }
-#endif
-
   /* All free webs have no conflicts anymore.  */
   for (d = WEBS(FREE); d; d = d->next)
     {
@@ -852,6 +837,8 @@ combine (u, v)
   if (!u->num_freedom)
     abort();

+  IOR_HARD_REG_SET (u->prefer_colors, v->prefer_colors);
+
   if (u->num_conflicts >= NUM_REGS (u)
       && (u->type == FREEZE || simplify_p (u->type)))
     {
@@ -1071,6 +1058,13 @@ color_usable_p (c, dont_begin_colors, fr
   return 0;
 }

+/* I don't want to clutter up the actual code with ifdef's.  */
+#ifdef REG_ALLOC_ORDER
+#define INV_REG_ALLOC_ORDER(c) inv_reg_alloc_order[c]
+#else
+#define INV_REG_ALLOC_ORDER(c) c
+#endif
+
 /* Searches in FREE_COLORS for a block of hardregs of the right length
    for MODE, which doesn't begin at a hardreg mentioned in DONT_BEGIN_COLORS.
    If it needs more than one hardreg it prefers blocks beginning
@@ -1078,9 +1072,10 @@ color_usable_p (c, dont_begin_colors, fr
    block could be found.  */

 int
-get_free_reg (dont_begin_colors, free_colors, mode)
+get_free_reg (dont_begin_colors, free_colors, mode, allow_unaligned)
      HARD_REG_SET dont_begin_colors, free_colors;
      enum machine_mode mode;
+     int allow_unaligned;
 {
   int c;
   int last_resort_reg = -1;
@@ -1105,16 +1100,17 @@ get_free_reg (dont_begin_colors, free_co
 	  {
 	    if (size < 2 || (c & 1) == 0)
 	      {
-		if (inv_reg_alloc_order[c] < pref_reg_order)
+		if (INV_REG_ALLOC_ORDER (c) < pref_reg_order)
 		  {
 		    pref_reg = c;
-		    pref_reg_order = inv_reg_alloc_order[c];
+		    pref_reg_order = INV_REG_ALLOC_ORDER (c);
 		  }
 	      }
-	    else if (inv_reg_alloc_order[c] < last_resort_reg_order)
+	    else if (allow_unaligned
+		     && INV_REG_ALLOC_ORDER (c) < last_resort_reg_order)
 	      {
 		last_resort_reg = c;
-		last_resort_reg_order = inv_reg_alloc_order[c];
+		last_resort_reg_order = INV_REG_ALLOC_ORDER (c);
 	      }
 	  }
 	else
@@ -1129,9 +1125,11 @@ get_free_reg (dont_begin_colors, free_co
    only do the last two steps.  */

 static int
-get_biased_reg (dont_begin_colors, bias, prefer_colors, free_colors, mode)
+get_biased_reg (dont_begin_colors, bias, prefer_colors, free_colors, mode,
+		allow_unaligned)
      HARD_REG_SET dont_begin_colors, bias, prefer_colors, free_colors;
      enum machine_mode mode;
+     int allow_unaligned;
 {
   int c = -1;
   HARD_REG_SET s;
@@ -1140,21 +1138,21 @@ get_biased_reg (dont_begin_colors, bias,
       COPY_HARD_REG_SET (s, dont_begin_colors);
       IOR_COMPL_HARD_REG_SET (s, bias);
       IOR_COMPL_HARD_REG_SET (s, prefer_colors);
-      c = get_free_reg (s, free_colors, mode);
+      c = get_free_reg (s, free_colors, mode, allow_unaligned);
       if (c >= 0)
 	return c;
       COPY_HARD_REG_SET (s, dont_begin_colors);
       IOR_COMPL_HARD_REG_SET (s, bias);
-      c = get_free_reg (s, free_colors, mode);
+      c = get_free_reg (s, free_colors, mode, allow_unaligned);
       if (c >= 0)
 	return c;
     }
   COPY_HARD_REG_SET (s, dont_begin_colors);
   IOR_COMPL_HARD_REG_SET (s, prefer_colors);
-  c = get_free_reg (s, free_colors, mode);
+  c = get_free_reg (s, free_colors, mode, allow_unaligned);
   if (c >= 0)
-      return c;
-  c = get_free_reg (dont_begin_colors, free_colors, mode);
+    return c;
+  c = get_free_reg (dont_begin_colors, free_colors, mode, allow_unaligned);
   return c;
 }

@@ -1193,7 +1191,7 @@ hardregset_to_string (s)
 {
   static char string[/*FIRST_PSEUDO_REGISTER + 30*/1024];
 #if FIRST_PSEUDO_REGISTER <= HOST_BITS_PER_WIDE_INT
-  sprintf (string, "%x", s);
+  sprintf (string, HOST_WIDE_INT_PRINT_HEX, s);
 #else
   char *c = string;
   int i,j;
@@ -1443,10 +1441,10 @@ colorize_one_web (web, hard)
 	c = -1;
       if (c < 0)
 	c = get_biased_reg (dont_begin, bias, web->prefer_colors,
-			    call_clobbered, PSEUDO_REGNO_MODE (web->regno));
+			    call_clobbered, PSEUDO_REGNO_MODE (web->regno), 0);
       if (c < 0)
 	c = get_biased_reg (dont_begin, bias, web->prefer_colors,
-			  colors, PSEUDO_REGNO_MODE (web->regno));
+			  colors, PSEUDO_REGNO_MODE (web->regno), 1);

       if (/*!web->use_my_regs &&*/ c < 0 && !flag_ra_pre_reload)
 	{
@@ -1464,10 +1462,10 @@ colorize_one_web (web, hard)
 	  AND_HARD_REG_SET (call_clobbered, call_used_reg_set);

 	  c = get_biased_reg (dont_begin, bias, web->prefer_colors,
-			    call_clobbered, PSEUDO_REGNO_MODE (web->regno));
+			    call_clobbered, PSEUDO_REGNO_MODE (web->regno), 0);
 	  if (c < 0)
 	    c = get_biased_reg (dont_begin, bias, web->prefer_colors,
-			      colors, PSEUDO_REGNO_MODE (web->regno));
+			      colors, PSEUDO_REGNO_MODE (web->regno), 1);
 	}
       if (c < 0)
 	break;
@@ -1507,7 +1505,7 @@ colorize_one_web (web, hard)
 	break;
     }
   ra_debug_msg (DUMP_COLORIZE, " --> got %d", c < 0 ? bestc : c);
-  if (bestc >= 0 && c < 0 && !web->was_spilled)
+  if (bestc >= 0 && c < 0 && (!web->was_spilled || web->spill_temp))
     {
       /* This is a non-potential-spill web, which got a color, which did
 	 destroy a hardreg block for one of it's neighbors.  We color
@@ -1539,7 +1537,7 @@ colorize_one_web (web, hard)
 	{
 	  unsigned int loop;
 	  struct web *try = NULL;
-	  struct web *candidates[10];
+	  struct web *candidates[12];

 	  ra_debug_msg (DUMP_COLORIZE, "  *** %d spilled, although %s ***\n",
 		     web->id, web->spill_temp ? "spilltemp" : "non-spill");
@@ -1644,10 +1642,34 @@ colorize_one_web (web, hard)
 			  || proper_hard_reg_subset_p (web->usable_regs,
 						       aw->usable_regs)))
 		    set_cand (8, aw);
+		  /* Also, if we are a real spill temp (meaning we already
+		     were spilled), and the other web is just a short one
+		     (meaning it wasn't yet spilled, but was not live over
+		     deaths) try to spill that one.  It can happen, that
+		     we forgot to correctly mark a death insn, or in this
+                     situation:
+		       A <- f(X)    (1)
+		       B <- C
+		       B <- f(B, A) (2)
+		         <- C
+		     Suppose, that B already was spilled.  A is short.
+		     Now suppose that constraints are such, that A and B
+		     both must be placed into that same reg class (A due to
+		     (1) and B due to (2)), and it only has one register (e.g.
+		     AREG on x86).  In this case it is of no use to spill
+		     B again.  Instead spilling A is helpful, is it separates
+		     the use and def of A, possibly giving the A at (2) a
+		     wider class.  */
+		  if (web->spill_temp == 1 && aw->spill_temp == 3)
+		    set_cand (9, aw);
+		  if (web->spill_temp == 2 && aw->is_coalesced
+		      && flag_ra_optimistic_coalescing)
+		    set_cand (10, aw);
+		  /* This is from Denis.  What does it do?  */
 		  if (aw->spill_temp && aw->span_deaths /* && !aw->changed */
 		      && !web->span_deaths
 		      && !web->is_coalesced)
-		    set_cand (9, aw);
+		    set_cand (11, aw);
 		}
 	    }
 	  for (loop = 0;
@@ -1866,7 +1888,7 @@ try_recolor_web (web)
 	    if (wide_p)
 	      SET_HARD_REG_BIT (wide_seen, c1);
 	    if (get_free_reg (dont_begin, colors,
-			      GET_MODE (web2->orig_x)) < 0)
+			      GET_MODE (web2->orig_x), 1) < 0)
 	      {
 		if (web2->spill_temp)
 		  SET_HARD_REG_BIT (spill_temps, c1);
@@ -2014,6 +2036,11 @@ insert_coalesced_conflicts ()
 		   || !(wl->t->type == PRECOLORED
 		        || TEST_BIT (sup_igraph,
 				     wl->t->id * num_webs + tweb->id)))
+		  /* Under some circumstances it's possible, that a coalesced
+		     web conflicts with it's alias (see
+		     non_conflicting_for_combine).  In that case we come here
+		     with tweb and wl->t being equal.  */
+		  && tweb->id != wl->t->id
 		  && hard_regs_intersect_p (&tweb->usable_regs,
 					    &wl->t->usable_regs))
 		{
@@ -2319,47 +2346,51 @@ break_precolored_alias (web)
       struct web *aweb;
       struct conflict_link *wl;
       for (aweb = web2->alias; aweb; aweb = aweb->alias)
-	for (wl = web2->conflict_list; wl; wl = wl->next)
-	  {
-	    struct web *tweb = aweb;
-	    int i;
-	    int nregs = 1 + web2->add_hardregs;
-	    if (aweb->type == PRECOLORED)
-	      nregs = HARD_REGNO_NREGS (aweb->color, GET_MODE (web2->orig_x));
-	    for (i = 0; i < nregs; i++)
-	      {
-		if (aweb->type == PRECOLORED)
-		  tweb = hardreg2web[i + aweb->color];
-		/* There might be some conflict edges laying around
-		   where the usable_regs don't intersect.  This can happen
-		   when first some webs were coalesced and conflicts
-		   propagated, then some combining narrowed usable_regs and
-		   further coalescing ignored those conflicts.  Now there are
-		   some edges to COALESCED webs but not to it's alias.
-		   So abort only when they really should conflict.  */
-		if ((!(tweb->type == PRECOLORED
-		       || TEST_BIT (sup_igraph, tweb->id * num_webs + wl->t->id))
-		     || !(wl->t->type == PRECOLORED
-			  || TEST_BIT (sup_igraph,
-				       wl->t->id * num_webs + tweb->id)))
-		    && hard_regs_intersect_p (&tweb->usable_regs,
-					      &wl->t->usable_regs))
-		  {
-		    /*ra_debug_msg (DUMP_COLORIZE, "add conflict %d - %d\n",
-				  tweb->id, wl->t->id);*/
-		    if (wl->sub == NULL)
-		      record_conflict (tweb, wl->t);
-		    else
-		      {
-			struct sub_conflict *sl;
-			for (sl = wl->sub; sl; sl = sl->next)
-			  record_conflict (tweb, sl->t);
-		      }
-		  }
-		if (aweb->type != PRECOLORED)
-		  break;
-	      }
-	  }
+	{
+	  aweb->is_coalesced = 1;
+	  for (wl = web2->conflict_list; wl; wl = wl->next)
+	    {
+	      struct web *tweb = aweb;
+	      int i;
+	      int nregs = 1 + web2->add_hardregs;
+	      if (aweb->type == PRECOLORED)
+		nregs = HARD_REGNO_NREGS (aweb->color, GET_MODE (web2->orig_x));
+	      for (i = 0; i < nregs; i++)
+		{
+		  if (aweb->type == PRECOLORED)
+		    tweb = hardreg2web[i + aweb->color];
+		  /* There might be some conflict edges laying around
+		     where the usable_regs don't intersect.  This can happen
+		     when first some webs were coalesced and conflicts
+		     propagated, then some combining narrowed usable_regs and
+		     further coalescing ignored those conflicts.  Now there are
+		     some edges to COALESCED webs but not to it's alias.
+		     So abort only when they really should conflict.  */
+		  if ((!(tweb->type == PRECOLORED
+			 || TEST_BIT (sup_igraph, tweb->id * num_webs + wl->t->id))
+		       || !(wl->t->type == PRECOLORED
+			    || TEST_BIT (sup_igraph,
+					 wl->t->id * num_webs + tweb->id)))
+		      && tweb->id != wl->t->id
+		      && hard_regs_intersect_p (&tweb->usable_regs,
+						&wl->t->usable_regs))
+		    {
+		      /*ra_debug_msg (DUMP_COLORIZE, "add conflict %d - %d\n",
+			tweb->id, wl->t->id);*/
+		      if (wl->sub == NULL)
+			record_conflict (tweb, wl->t);
+		      else
+			{
+			  struct sub_conflict *sl;
+			  for (sl = wl->sub; sl; sl = sl->next)
+			    record_conflict (tweb, sl->t);
+			}
+		    }
+		  if (aweb->type != PRECOLORED)
+		    break;
+		}
+	    }
+	}
     }
 }

@@ -2432,12 +2463,11 @@ restore_conflicts_from_coalesce (web)
 		for (sub1 = web->subreg_next; sub1; sub1 = sub1->subreg_next)
 		  {
 		    RESET_BIT (igraph, igraph_index (other->id, sub1->id));
-		    for (sub2 = other->subreg_next; sub2; sub2 =
-			 sub2->subreg_next)
+		    for (sub2 = other->subreg_next; sub2;
+			 sub2 = sub2->subreg_next)
 		      RESET_BIT (igraph, igraph_index (sub1->id, sub2->id));
 		  }
-		for (sub2 = other->subreg_next; sub2; sub2 =
-		     sub2->subreg_next)
+		for (sub2 = other->subreg_next; sub2; sub2 = sub2->subreg_next)
 		  RESET_BIT (igraph, igraph_index (web->id, sub2->id));
 	      }
 /*	    for (sl = wl->sub; sl; sl = sl->next)
@@ -2462,6 +2492,10 @@ restore_conflicts_from_coalesce (web)

   /* We must restore usable_regs because record_conflict will use it.  */
   COPY_HARD_REG_SET (web->usable_regs, web->orig_usable_regs);
+  /* We might have deleted some conflicts above, which really are still
+     there (diamond pattern coalescing).  This is because we don't reference
+     count interference edges but some of them were the result of different
+     coalesces.  */
   for (wl = web->conflict_list; wl; wl = wl->next)
     if (wl->t->type == COALESCED)
       {
@@ -2874,7 +2908,7 @@ out:
 /* This is the difference between optimistic coalescing and
    optimistic coalescing+.  Extended coalesce tries to coalesce also
    non-conflicting nodes, not related by a move.  The criteria here is,
-   the one web must be a source, the other a destination of the same insn.
+   that one web must be a source, the other a destination of the same insn.
    This actually makes sense, as (because they are in the same insn) they
    share many of their neighbors, and if they are coalesced, reduce the
    number of conflicts of those neighbors by one.  For this we sort the
diff -urpN work-gcc.orig/gcc/ra-debug.c work-gcc/gcc/ra-debug.c
--- work-gcc.orig/gcc/ra-debug.c	2003-02-03 20:30:39.000000000 +0100
+++ work-gcc/gcc/ra-debug.c	2003-06-16 17:42:08.000000000 +0200
@@ -29,6 +29,7 @@
 #include "df.h"
 #include "output.h"
 #include "ra.h"
+#include "tm_p.h"

 /* This file contains various dumping and debug functions for
    the graph coloring register allocator.  */
@@ -47,22 +48,12 @@ static const char *const reg_class_names
 void
 ra_debug_msg VPARAMS ((unsigned int level, const char *format, ...))
 {
-#ifndef ANSI_PROTOTYPES
-  int level;
-  const char *format;
-#endif
-  va_list ap;
+  VA_OPEN (ap, format);
+  VA_FIXEDARG (ap, unsigned int, level);
+  VA_FIXEDARG (ap, const char *, format);
   if ((debug_new_regalloc & level) != 0 && rtl_dump_file != NULL)
-    {
-      VA_START (ap, format);
-
-#ifndef ANSI_PROTOTYPES
-      format = va_arg (ap, const char *);
-#endif
-
-      vfprintf (rtl_dump_file, format, ap);
-      va_end (ap);
-    }
+    vfprintf (rtl_dump_file, format, ap);
+  VA_CLOSE (ap);
 }


@@ -387,11 +378,11 @@ ra_print_rtx (file, x, with_pn)
 	    fprintf (file, "(%s) ", LABEL_NAME (x));
 	  switch (LABEL_KIND (x))
 	    {
-	      case LABEL_NORMAL: break;
-	      case LABEL_STATIC_ENTRY: fputs (" (entry)", file); break;
-	      case LABEL_GLOBAL_ENTRY: fputs (" (global entry)", file); break;
-	      case LABEL_WEAK_ENTRY: fputs (" (weak entry)", file); break;
-	      default: abort();
+	    case LABEL_NORMAL: break;
+	    case LABEL_STATIC_ENTRY: fputs (" (entry)", file); break;
+	    case LABEL_GLOBAL_ENTRY: fputs (" (global entry)", file); break;
+	    case LABEL_WEAK_ENTRY: fputs (" (weak entry)", file); break;
+	    default: abort();
 	    }
 	  fprintf (file, " [%d uses] uid=(", LABEL_NUSES (x));
 	}
@@ -733,10 +724,10 @@ dump_igraph (df)
 	  ra_debug_msg (DUMP_WEBS, " sub %d", SUBREG_BYTE (web->orig_x));
 	  ra_debug_msg (DUMP_WEBS, " par %d", find_web_for_subweb (web)->id);
 	}
-      ra_debug_msg (DUMP_WEBS, " +%d (span %d, cost "
-		 HOST_WIDE_INT_PRINT_DEC ") (%s)",
-	         web->add_hardregs, web->span_deaths, web->spill_cost,
-	         reg_class_names[web->regclass]);
+      ra_debug_msg (DUMP_WEBS, " +%d (span %d, cost ",
+		    web->add_hardregs, web->span_deaths);
+      ra_debug_msg (DUMP_WEBS, HOST_WIDE_INT_PRINT_DEC, web->spill_cost);
+      ra_debug_msg (DUMP_WEBS, ") (%s)", reg_class_names[web->regclass]);
       if (web->spill_temp == 1)
 	ra_debug_msg (DUMP_WEBS, " (spilltemp)");
       else if (web->spill_temp == 2)
@@ -879,7 +870,6 @@ dump_graph_cost (level, msg)
 {
   unsigned int i;
   unsigned HOST_WIDE_INT cost;
-#define LU HOST_WIDE_INT_PRINT_UNSIGNED
   if (!rtl_dump_file || (debug_new_regalloc & level) == 0)
     return;

@@ -890,9 +880,9 @@ dump_graph_cost (level, msg)
       if (alias (web)->type == SPILLED)
 	cost += web->orig_spill_cost;
     }
-  ra_debug_msg (level, " spill cost of graph (%s) = " LU "\n",
-	     msg ? msg : "", cost);
-#undef LU
+  ra_debug_msg (level, " spill cost of graph (%s) = ", msg ? msg : "");
+  ra_debug_msg (level, HOST_WIDE_INT_PRINT_UNSIGNED, cost);
+  ra_debug_msg (level, "\n");
 }

 /* Dump the color assignment per web, the coalesced and spilled webs.  */
@@ -943,12 +933,13 @@ dump_static_insn_cost (file, message, pr
       unsigned HOST_WIDE_INT cost;
       unsigned int count;
     };
-  struct cost load = {0, 0};
-  struct cost store = {0, 0};
-  struct cost regcopy = {0, 0};
-  struct cost selfcopy = {0, 0};
-  struct cost overall = {0, 0};
   basic_block bb;
+  struct cost load, store, regcopy, selfcopy, overall;
+  memset (&load, 0, sizeof(load));
+  memset (&store, 0, sizeof(store));
+  memset (&regcopy, 0, sizeof(regcopy));
+  memset (&selfcopy, 0, sizeof(selfcopy));
+  memset (&overall, 0, sizeof(overall));

   if (!file)
     return;
@@ -1004,16 +995,21 @@ dump_static_insn_cost (file, message, pr
   if (!prefix)
     prefix = "";
   fprintf (file, "static insn cost %s\n", message ? message : "");
-  fprintf (file, "  %soverall:\tnum=%6d\tcost=%8d\n", prefix, overall.count,
-	   overall.cost);
-  fprintf (file, "  %sloads:\tnum=%6d\tcost=%8d\n", prefix, load.count,
-	   load.cost);
-  fprintf (file, "  %sstores:\tnum=%6d\tcost=%8d\n", prefix,
-	   store.count, store.cost);
-  fprintf (file, "  %sregcopy:\tnum=%6d\tcost=%8d\n", prefix, regcopy.count,
-	   regcopy.cost);
-  fprintf (file, "  %sselfcpy:\tnum=%6d\tcost=%8d\n", prefix, selfcopy.count,
-	   selfcopy.cost);
+  fprintf (file, "  %soverall:\tnum=%6d\tcost=", prefix, overall.count);
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC_SPACE, 8, overall.cost);
+  fprintf (file, "\n");
+  fprintf (file, "  %sloads:\tnum=%6d\tcost=", prefix, load.count);
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC_SPACE, 8, load.cost);
+  fprintf (file, "\n");
+  fprintf (file, "  %sstores:\tnum=%6d\tcost=", prefix, store.count);
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC_SPACE, 8, store.cost);
+  fprintf (file, "\n");
+  fprintf (file, "  %sregcopy:\tnum=%6d\tcost=", prefix, regcopy.count);
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC_SPACE, 8, regcopy.cost);
+  fprintf (file, "\n");
+  fprintf (file, "  %sselfcpy:\tnum=%6d\tcost=", prefix, selfcopy.count);
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC_SPACE, 8, selfcopy.cost);
+  fprintf (file, "\n");
 }

 /* Returns nonzero, if WEB1 and WEB2 have some possible
diff -urpN work-gcc.orig/gcc/ra-rewrite.c work-gcc/gcc/ra-rewrite.c
--- work-gcc.orig/gcc/ra-rewrite.c	2003-06-13 14:37:36.000000000 +0200
+++ work-gcc/gcc/ra-rewrite.c	2003-06-16 17:45:04.000000000 +0200
@@ -37,6 +37,9 @@
 #include "reload.h"
 #include "pre-reload.h"

+extern int flag_ra_test;
+extern struct df2ra df2ra;
+
 /* This file is part of the graph coloring register allocator, and
    contains the functions to change the insn stream.  I.e. it adds
    spill code, rewrites insns to use the new registers after
@@ -65,8 +68,6 @@ static unsigned int is_partly_live_1 PAR
 static void update_spill_colors PARAMS ((HARD_REG_SET *, struct web *, int));
 static int spill_is_free PARAMS ((HARD_REG_SET *, struct web *));
 static void emit_loads PARAMS ((struct rewrite_info *, int, rtx));
-static void detect_bbs_for_rewrite PARAMS ((sbitmap));
-static void detect_deaths_in_bb PARAMS ((basic_block, sbitmap, bitmap));
 static void reloads_to_loads PARAMS ((struct rewrite_info *, struct ref **,
 				      unsigned int, struct web **));
 static void rewrite_program2 PARAMS ((bitmap));
@@ -533,26 +534,6 @@ rewrite_program (new_deaths)
 	      source = DF_REF_REG (web->defs[j]);
 	      dest = slot;
 	      if (GET_CODE (source) == SUBREG)
-#if 0
-		{
-		  dest = simplify_gen_subreg (GET_MODE (source), dest,
-					      GET_MODE (dest),
-					      SUBREG_BYTE (source));
-		  ra_emit_move_insn (dest, source);
-		}
-	      else
-		{
-		  /*if (! bitmap_bit_p (useless_defs, DF_REF_ID (web->defs[j]))
-		      || !validate_change (insn, DF_REF_LOC (web->defs[j]),
-					   slot, 0))*/
-		  /*rtx reg = gen_reg_rtx (GET_MODE (source));
-		  if (validate_change (insn, DF_REF_LOC (web->defs[j]),
-				       reg, 0))
-		    emit_insn (gen_move_insn (dest, reg));
-		  else*/
-		    ra_emit_move_insn (dest, source);
-		}
-#endif
 		dest = simplify_gen_subreg (GET_MODE (source), dest,
 					    GET_MODE (dest),
 					    SUBREG_BYTE (source));
@@ -619,7 +600,7 @@ remember_slot (list, x)
 }

 /* Given two rtx' S1 and S2, either being REGs or MEMs (or SUBREGs
-   thereof), return non-zero, if they overlap.  REGs and MEMs don't
+   thereof), return nonzero, if they overlap.  REGs and MEMs don't
    overlap, and if they are MEMs they must have an easy address
    (plus (basereg) (const_inst x)), otherwise they overlap.  */

@@ -771,6 +752,7 @@ insert_stores (new_deaths)
 	  struct ra_insn_info info;
 	  rtx following = NEXT_INSN (insn);
 	  basic_block bb = BLOCK_FOR_INSN (insn);
+
 	  info = insn_df[uid];
 	  for (n = 0; n < info.num_defs; n++)
 	    {
@@ -891,7 +873,7 @@ insert_stores (new_deaths)
 	  else
 	    {
 	      /* rtx d = SET_DEST (set); */
-	      note_uses (&set, delete_overlapping_uses, (void *)&slots);
+	      note_uses_partial (&set, delete_overlapping_uses, (void *)&slots);
 	      /*if (1 || GET_CODE (SET_SRC (set)) == MEM)
 		delete_overlapping_slots (&slots, SET_SRC (set));*/
 	      /*if (REG_P (d) || GET_CODE (d) == MEM
@@ -1043,6 +1025,7 @@ emit_loads (ri, nl_first_reload, last_bl
      rtx last_block_insn;
 {
   int j;
+  ri->any_spilltemps_spilled = 0;
   for (j = ri->nl_size; j;)
     {
       struct web *web = ri->needed_loads[--j];
@@ -1156,24 +1139,6 @@ emit_loads (ri, nl_first_reload, last_bl
     ri->nl_size = j;
 }

-static void
-detect_bbs_for_rewrite (changed_bbs)
-     sbitmap changed_bbs;
-{
-  int pass;
-  struct dlist *d;
-  for (pass = 0; pass < 2; pass++)
-    for (d = (pass == 0) ? WEBS(SPILLED) : WEBS(COALESCED); d; d = d->next)
-      {
-        struct web *web = DLIST_WEB (d);
-	unsigned int i;
-	if (pass == 1 && alias (web)->type != SPILLED)
-	  continue;
-	for (i = 0; i < web->num_uses; i++)
-	  SET_BIT (changed_bbs, 2 + DF_REF_BBNO (web->uses[i]));
-      }
-}
-
 /* Test LIVE for partial WEB live.  */
 int
 is_partly_dead (live, web)
@@ -1222,90 +1187,6 @@ reset_web_live (live, web)
     RESET_BIT (live, web->id);
 }

-/* Fast version of rewrite_program2() for one basic block, where
-   no spill code is necessary.  We detect here only insns with deaths.  */
-static void
-detect_deaths_in_bb (bb, live, new_deaths)
-     basic_block bb;
-     sbitmap live;
-     bitmap new_deaths;
-{
-  rtx insn, head_prev;
-  int j;
-
-  insn = bb->end;
-  if (!INSN_P (insn))
-    insn = prev_real_insn (insn);
-  /* Empty block?  */
-  if (BLOCK_FOR_INSN (insn) != bb)
-    return;
-
-  head_prev = PREV_INSN (bb->head);
-  sbitmap_zero (live);
-  EXECUTE_IF_SET_IN_BITMAP (live_at_end[bb->index], 0, j,
-    {
-      struct web *web = use2web[j];
-      struct web *aweb = alias (find_web_for_subweb (web));
-      /* See below in rewrite_program2() for a comment which webs we
-	 consider live at end.  */
-      if (aweb->type != SPILLED /*|| aweb->color >= 0*/)
-	SET_BIT (live, web->id);
-    });
-
-  for (; insn != head_prev; insn = PREV_INSN (insn))
-    {
-      struct ra_insn_info info;
-      unsigned int n;
-
-      if (!INSN_P (insn))
-	continue;
-
-      info = insn_df[INSN_UID (insn)];
-      for (n = 0; n < info.num_defs; n++)
-	{
-	  unsigned int n2;
-	  struct ref *ref = info.defs[n];
-	  struct web *web = def2web[DF_REF_ID (ref)];
-	  struct web *supweb = find_web_for_subweb (web);
-	  int is_non_def = 0;
-
-	  /* Detect rmw webs.  */
-	  for (n2 = 0; n2 < info.num_uses; n2++)
-	    {
-	      struct web *web2 = use2web[DF_REF_ID (info.uses[n2])];
-	      if (supweb == find_web_for_subweb (web2))
-		{
-		  is_non_def = 1;
-		  break;
-		}
-	    }
-	  if (is_non_def)
-	    continue;
-
-	  if (!is_partly_live (live, supweb))
-	    bitmap_set_bit (useless_defs, DF_REF_ID (ref));
-
-	  reset_web_live (live, web);
-	}
-
-      for (n = 0; n < info.num_uses; n++)
-	{
-	  struct web *web = use2web[DF_REF_ID (info.uses[n])];
-	  if (is_partly_dead (live, web))
-	    {
-	      bitmap_set_bit (new_deaths, INSN_UID (insn));
-	      break;
-	    }
-	}
-
-      for (n = 0; n < info.num_uses; n++)
-	{
-	  struct web *web = use2web[DF_REF_ID (info.uses[n])];
-	  set_web_live (live, web);
-	}
-    }
-}
-
 /* Given a set of reloads in RI, an array of NUM_REFS references (either
    uses or defs) in REFS, and REF2WEB to translate ref IDs to webs
    (either use2web or def2web) convert some reloads to loads.
@@ -1368,8 +1249,6 @@ reloads_to_loads (ri, refs, num_refs, re
   ri->num_reloads = num_reloads;
 }

-#define NEW_SPILL
-
 /* This adds loads for spilled webs to the program.  It uses a kind of
    interference region spilling.  If flag_ra_ir_spilling is zero it
    only uses improved chaitin spilling (adding loads only at insns
@@ -1380,7 +1259,6 @@ rewrite_program2 (new_deaths)
      bitmap new_deaths;
 {
   basic_block bb;
-  sbitmap changed_bbs = sbitmap_alloc (3 + last_basic_block);
   int nl_first_reload;
   struct rewrite_info ri;
   rtx insn;
@@ -1390,34 +1268,11 @@ rewrite_program2 (new_deaths)
   ri.live = sbitmap_alloc (num_webs);
   ri.nl_size = 0;
   ri.num_reloads = 0;
-  sbitmap_zero (changed_bbs);
-  detect_bbs_for_rewrite (changed_bbs);
-#ifndef NEW_SPILL
-  FOR_ALL_BB (BB)
-#else
   for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
-#endif
     {
       basic_block last_bb = NULL;
       rtx last_block_insn;
       int i, j;
-#ifndef NEW_SPILL
-      i = bb->index + 2;
-      if (!bb->end)
-	continue;
-      if (!TEST_BIT (changed_bbs, i))
-	{
-	  detect_deaths_in_bb (bb, ri.live, new_deaths);
-	  continue;
-	}
-
-      insn = bb->end;
-      if (!INSN_P (insn))
-        insn = prev_real_insn (insn);
-      /* Empty block?  */
-      if (BLOCK_FOR_INSN (insn) != bb)
-	continue;
-#else
       if (!INSN_P (insn))
 	insn = prev_real_insn (insn);
       while (insn && !(bb = BLOCK_FOR_INSN (insn)))
@@ -1425,7 +1280,6 @@ rewrite_program2 (new_deaths)
       if (!insn)
 	break;
       i = bb->index + 2;
-#endif
       last_block_insn = insn;

       sbitmap_zero (ri.live);
@@ -1481,8 +1335,8 @@ rewrite_program2 (new_deaths)
 	{
 	  struct ra_insn_info info;
 	  unsigned int n;
+	  HARD_REG_SET earlyclobber_colors;

-#ifdef NEW_SPILL
 	  if (INSN_P (insn) && BLOCK_FOR_INSN (insn) != last_bb)
 	    {
 	      int index = BLOCK_FOR_INSN (insn)->index + 2;
@@ -1520,8 +1374,8 @@ rewrite_program2 (new_deaths)
 	      if (!INSN_P (last_block_insn))
 	        last_block_insn = prev_real_insn (last_block_insn);
 	    }
-#endif

+	  CLEAR_HARD_REG_SET (earlyclobber_colors);
 	  ri.need_load = 0;
 	  if (INSN_P (insn))
 	    info = insn_df[INSN_UID (insn)];
@@ -1580,7 +1434,7 @@ rewrite_program2 (new_deaths)
 		else
 		  {
 		    struct web *sweb;
-		    /* If the whole web is defined here, no parts of it are
+		    /* The whole web is defined here, so no parts of it are
 		       live anymore and no reloads are needed for them.  */
 		    for (sweb = supweb->subreg_next; sweb;
 			 sweb = sweb->subreg_next)
@@ -1593,7 +1447,14 @@ rewrite_program2 (new_deaths)
 		      }
 		  }
 		if (alias (supweb)->type != SPILLED)
-		  update_spill_colors (&(ri.colors_in_use), web, 0);
+		  {
+		    /* Colors of early clobber operands don't become
+		       free yet.  */
+		    if (DF_REF_FLAGS (ref) & DF_REF_EARLYCLOBBER)
+		      update_spill_colors (&earlyclobber_colors, web, 1);
+		    else
+		      update_spill_colors (&(ri.colors_in_use), web, 0);
+		  }
 	      }

 	  nl_first_reload = ri.nl_size;
@@ -1649,10 +1510,17 @@ rewrite_program2 (new_deaths)
 		struct web *web = use2web[DF_REF_ID (info.uses[n])];
 		struct web *aweb = alias (find_web_for_subweb (web));
 		if (aweb->type != SPILLED)
-		  update_spill_colors (&(ri.colors_in_use), web, 1);
+		  {
+		    update_spill_colors (&(ri.colors_in_use), web, 1);
+		    /* Make sure we don't accidentially remove this color
+		       from the in_use set again.  */
+		    update_spill_colors (&earlyclobber_colors, web, 0);
+		  }
 	      }

-	  ri.any_spilltemps_spilled = 0;
+	  /* Temporarily mark the early clobber hard regs as in use.  */
+	  IOR_HARD_REG_SET (ri.colors_in_use, earlyclobber_colors);
+
 	  if (INSN_P (insn))
 	    for (n = 0; n < info.num_uses; n++)
 	      {
@@ -1688,11 +1556,10 @@ rewrite_program2 (new_deaths)
 		  web->one_load = 0;
 	      }

-#ifndef NEW_SPILL
-	  if (insn == bb->head)
-#else
+	  /* Now that the effect of this insn are all handled the colors
+	     of early clobber operand are free.  */
+	  AND_COMPL_HARD_REG_SET (ri.colors_in_use, earlyclobber_colors);
 	  if (GET_CODE (insn) == CODE_LABEL)
-#endif
 	    break;
 	}

@@ -1751,13 +1618,10 @@ rewrite_program2 (new_deaths)
       emit_loads (&ri, nl_first_reload, last_block_insn);
       if (ri.nl_size != 0 /*|| ri.num_reloads != 0*/)
 	abort ();
-#ifdef NEW_SPILL
       if (!insn)
 	break;
-#endif
     }
   free (ri.needed_loads);
-  sbitmap_free (changed_bbs);
   sbitmap_free (ri.live);
   BITMAP_XFREE (ri.scratch);
   BITMAP_XFREE (ri.need_reload);
@@ -1942,8 +1806,8 @@ detect_web_parts_to_rebuild ()
 	    bitmap_set_bit (uses_as_bitmap, DF_REF_ID (link->ref));
       }

-  /* The information in live_at_end[] will be rebuild for all uses
-     we recheck, so clear it here (the uses of spilled webs, might
+  /* The information in live_at_end[] will be rebuilt for all uses
+     we recheck, so clear it here (the uses of spilled webs might
      indeed not become member of it again).  */
   live_at_end -= 2;
   for (i = 0; i < (unsigned int) last_basic_block + 2; i++)
@@ -2890,20 +2754,18 @@ void
 dump_cost (level)
      unsigned int level;
 {
-#define LU HOST_WIDE_INT_PRINT_UNSIGNED
   ra_debug_msg (level, "Instructions for spilling\n added:\n");
-  ra_debug_msg (level, "  loads =%d cost=" LU "\n", emitted_spill_loads,
-	     spill_load_cost);
-  ra_debug_msg (level, "  stores=%d cost=" LU "\n", emitted_spill_stores,
-	     spill_store_cost);
-  ra_debug_msg (level, "  remat =%d cost=" LU "\n", emitted_remat,
-	     spill_remat_cost);
-  ra_debug_msg (level, " removed:\n");
-  ra_debug_msg (level, "  moves =%d cost=" LU "\n", deleted_move_insns,
-	     deleted_move_cost);
-  ra_debug_msg (level, "  others=%d cost=" LU "\n", deleted_def_insns,
-	     deleted_def_cost);
-#undef LU
+  ra_debug_msg (level, "  loads =%d cost=", emitted_spill_loads);
+  ra_debug_msg (level, HOST_WIDE_INT_PRINT_UNSIGNED, spill_load_cost);
+  ra_debug_msg (level, "\n  stores=%d cost=", emitted_spill_stores);
+  ra_debug_msg (level, HOST_WIDE_INT_PRINT_UNSIGNED, spill_store_cost);
+  ra_debug_msg (level, "\n  remat =%d cost=", emitted_remat);
+  ra_debug_msg (level, HOST_WIDE_INT_PRINT_UNSIGNED, spill_remat_cost);
+  ra_debug_msg (level, "\n removed:\n  moves =%d cost=", deleted_move_insns);
+  ra_debug_msg (level, HOST_WIDE_INT_PRINT_UNSIGNED, deleted_move_cost);
+  ra_debug_msg (level, "\n  others=%d cost=", deleted_def_insns);
+  ra_debug_msg (level, HOST_WIDE_INT_PRINT_UNSIGNED, deleted_def_cost);
+  ra_debug_msg (level, "\n");
 }

 /* Initialization of the rewrite phase.  */
diff -urpN work-gcc.orig/gcc/ra.c work-gcc/gcc/ra.c
--- work-gcc.orig/gcc/ra.c	2003-06-16 17:37:31.000000000 +0200
+++ work-gcc/gcc/ra.c	2003-06-16 17:42:08.000000000 +0200
@@ -39,9 +39,6 @@
 #include "pre-reload.h"
 #include "ra.h"

-#define obstack_chunk_alloc xmalloc
-#define obstack_chunk_free free
-
 /* This is the toplevel file of a graph coloring register allocator.
    It is able to act like a George & Appel allocator, i.e. with iterative
    coalescing plus spill coalescing/propagation.
@@ -546,7 +543,7 @@ init_ra ()
   int i;
   HARD_REG_SET rs;
 #ifdef ELIMINABLE_REGS
-  static struct {int from, to; } eliminables[] = ELIMINABLE_REGS;
+  static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
   unsigned int j;
 #endif
   int need_fp
@@ -556,6 +553,11 @@ init_ra ()
 #endif
        || FRAME_POINTER_REQUIRED);

+#ifdef ORDER_REGS_FOR_LOCAL_ALLOC
+  if (1)
+    ORDER_REGS_FOR_LOCAL_ALLOC;
+#endif
+
   ra_colorize_init ();

   /* We can't ever use any of the fixed regs.  */
@@ -842,7 +844,15 @@ reg_alloc ()
 {
   int changed;
   FILE *ra_dump_file = rtl_dump_file;
-  rtx last = get_last_insn ();
+  rtx last;
+  bitmap use_insns = BITMAP_XMALLOC ();
+
+  delete_trivially_dead_insns (get_insns (), max_reg_num ());
+  /* The above might have deleted some trapping insns making some basic
+     blocks unreachable.  So do a simple cleanup pass to remove them.  */
+  cleanup_cfg (0);
+  last = get_last_insn ();
+
   /*
   flag_ra_pre_reload = 0;
   flag_ra_spanned_deaths_from_scratch = 0;
@@ -864,11 +874,13 @@ reg_alloc ()
 	  last = bb->end;
 	  if (!INSN_P (last) || GET_CODE (PATTERN (last)) != USE)
 	    {
-	      rtx insns;
+	      rtx insn, insns;
 	      start_sequence ();
 	      use_return_register ();
 	      insns = get_insns ();
 	      end_sequence ();
+	      for (insn = insns; insn; insn = NEXT_INSN (insn))
+		bitmap_set_bit (use_insns, INSN_UID (insn));
 	      emit_insn_after (insns, last);
 	    }
 	}
@@ -1072,7 +1084,6 @@ reg_alloc ()
 	  if (!flag_ra_pre_reload)
 	    regclass (get_insns (), max_reg_num (), rtl_dump_file);
 	  rtl_dump_file = ra_dump_file;
-
 	  /* Remember the number of defs and uses, so we can distinguish
 	     new from old refs in the next pass.  */
 	  last_def_id = df->def_id;
@@ -1102,7 +1113,6 @@ reg_alloc ()
   /* We are done with allocation, free all memory and output some
      debug info.  */
   free_all_mem (df);
-  /*  free (depths); */
   df_finish (df);
   if ((debug_new_regalloc & DUMP_RESULTS) == 0)
     dump_cost (DUMP_COSTS);
@@ -1116,12 +1126,33 @@ reg_alloc ()
     rtl_dump_file = NULL;
   no_new_pseudos = 0;
   allocate_reg_info (max_reg_num (), FALSE, FALSE);
-  /*compute_bb_for_insn ();*/
-  /*store_motion ();*/
+  while_newra = 1;
   no_new_pseudos = 1;
   newra_in_progress = 0;
   rtl_dump_file = ra_dump_file;

+    {
+      edge e;
+      for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
+	{
+	  basic_block bb = e->src;
+	  last = bb->end;
+	  for (last = bb->end; last; last = PREV_INSN (last))
+	    {
+	      if (last == bb->head)
+		break;
+	      if (bitmap_bit_p (use_insns, INSN_UID (last)))
+		delete_insn (last);
+	    }
+	}
+    }
+  BITMAP_XFREE (use_insns);
+  /* We might have deleted/moved dead stores, which could trap (mem accesses
+     with flag_non_call_exceptions).  This might have made some edges dead.
+     Get rid of them now.  No need to rebuild life info with that call,
+     we do it anyway some statements below.  */
+  purge_all_dead_edges (0);
+
   /* Some spill insns could've been inserted after trapping calls, i.e.
      at the end of a basic block, which really ends at that call.
      Fixup that breakages by adjusting basic block boundaries.  */
@@ -1130,28 +1161,15 @@ reg_alloc ()
   /* Cleanup the flow graph.  */
   if ((debug_new_regalloc & DUMP_LAST_FLOW) == 0)
     rtl_dump_file = NULL;
-  /*free_bb_for_insn ();
-  find_basic_blocks (get_insns (), max_reg_num (), rtl_dump_file);*/
-  /*compute_bb_for_insn ();*/
-  /*clear_log_links (get_insns ());*/
   life_analysis (get_insns (), rtl_dump_file,
-		 PROP_DEATH_NOTES | PROP_LOG_LINKS | PROP_REG_INFO);
-/*  recompute_reg_usage (get_insns (), TRUE);
-  life_analysis (get_insns (), rtl_dump_file,
-		 PROP_SCAN_DEAD_CODE | PROP_KILL_DEAD_CODE); */
+		 PROP_DEATH_NOTES | PROP_LOG_LINKS  | PROP_REG_INFO);
   cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_UPDATE_LIFE);
   recompute_reg_usage (get_insns (), TRUE);
-/*  delete_trivially_dead_insns (get_insns (), max_reg_num ());*/
   if (rtl_dump_file)
     dump_flow_info (rtl_dump_file);
-
-  /* XXX: reg_scan screws up reg_renumber, and without reg_scan, we can't do
-     regclass. */
-  /*reg_scan (get_insns (), max_reg_num (), 1);
-    regclass (get_insns (), max_reg_num (), rtl_dump_file); */
   rtl_dump_file = ra_dump_file;

-  /* Also update_equiv_regs() can't be called after register allocation.
+  /* update_equiv_regs() can't be called after register allocation.
      It might delete some pseudos, and insert other insns setting
      up those pseudos in different places.  This of course screws up
      the allocation because that may destroy a hardreg for another
@@ -1167,6 +1185,11 @@ reg_alloc ()
   setup_renumber (1);
   sbitmap_free (insns_with_deaths);

+  /* Build the insn chain before deleting some of the REG_DEAD notes.
+     It initializes the chain->live_throughout bitmap, and when we delete
+     some REG_DEAD we leave some pseudo in those bitmaps for insns, where
+     they really are dead already.  This can confuse caller-save.  */
+  build_insn_chain (get_insns ());
   /* Remove REG_DEAD notes which are incorrectly set.  See the docu
      of that function.  */
   remove_suspicious_death_notes ();
diff -urpN work-gcc.orig/gcc/toplev.c work-gcc/gcc/toplev.c
--- work-gcc.orig/gcc/toplev.c	2003-01-23 16:47:48.000000000 +0100
+++ work-gcc/gcc/toplev.c	2003-06-16 17:42:08.000000000 +0200
@@ -252,6 +252,7 @@ enum dump_file_index
   DFI_bbro,
   DFI_mach,
   DFI_dbr,
+  DFI_costs,
   DFI_MAX
 };

@@ -261,7 +262,7 @@ enum dump_file_index
    Remaining -d letters:

 	"              o q         "
-	"       H JK   OPQ  TUV  YZ"
+	"       H JK   O Q   UV  YZ"
 */

 static struct dump_file_info dump_file[DFI_MAX] =
@@ -301,11 +302,13 @@ static struct dump_file_info dump_file[D
   { "bbro",	'B', 1, 0, 0 },
   { "mach",	'M', 1, 0, 0 },
   { "dbr",	'd', 0, 0, 0 },
+  { "costs",	'T', 0, 0, 0 },
 };

 static int open_dump_file PARAMS ((enum dump_file_index, tree));
 static void close_dump_file PARAMS ((enum dump_file_index,
 				     void (*) (FILE *, rtx), rtx));
+static void print_costs_bb PARAMS ((FILE *, rtx));

 /* Other flags saying which kinds of debugging dump have been requested.  */

@@ -883,6 +886,7 @@ int flag_ra_pre_reload = 1;
 /* If nonzero, use separate passs for calculation web deaths in new
    register allocator.  */
 int flag_ra_spanned_deaths_from_scratch = 0;
+int flag_ra_test = 0;

 /* Nonzero if we perform superblock formation.  */

@@ -1197,6 +1201,7 @@ static const lang_independent_options f_
    N_("Use pre-reload in graph coloring register allocation.") },
   { "new-ra-spanned-deaths-pass", &flag_ra_spanned_deaths_from_scratch, 1,
    N_("Use special pass for detect web deaths in graph coloring register allocation.") },
+  { "new-ra-test", &flag_ra_test, 1, "" },
 };

 /* Table of language-specific options.  */
@@ -1894,6 +1899,85 @@ close_dump_file (index, func, insns)
   timevar_pop (TV_DUMP);
 }

+static void count_stack_refs PARAMS ((rtx *, void *));
+static void count_stack_writes PARAMS ((rtx, rtx, void *));
+
+static void
+count_stack_refs (dest, data)
+     rtx *dest;
+     void *data;
+{
+  rtx x = *dest;
+  int *count = (int *) data;
+  int regno;
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+  if (GET_CODE (x) != MEM)
+    return;
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	return;
+      x = XEXP (x, 0);
+    }
+  if (!REG_P (x))
+    return;
+  regno = REGNO (x);
+  if (!REGNO_PTR_FRAME_P (regno))
+    return;
+  (*count)++;
+}
+
+static void
+count_stack_writes (dest, setter, data)
+     rtx dest, setter ATTRIBUTE_UNUSED;
+     void *data;
+{
+  count_stack_refs (&dest, data);
+}
+
+/* Print the costs of various statements of the function.  */
+
+static void
+print_costs_bb (file, insn)
+     FILE *file;
+     rtx insn;
+{
+  basic_block bb;
+  double stack_load = 0.0;
+  double stack_store = 0.0;
+  int count_load = 0, count_store = 0;
+  if (!insn || !file)
+    return;
+  FOR_EACH_BB (bb)
+    {
+      double sl = 0.0, sw = 0.0;
+      int cl = 0, cw = 0, count = 0;
+      for (insn = bb->head; insn; insn = NEXT_INSN (insn))
+        {
+	  if (INSN_P (insn))
+	    {
+	      count++;
+	      note_stores (PATTERN (insn), count_stack_writes, &cw);
+	      note_uses (&PATTERN (insn), count_stack_refs, &cl);
+	    }
+	  if (insn == bb->end)
+	    break;
+	}
+      sl = cl * bb->frequency;
+      sw = cw * bb->frequency;
+      stack_load += sl;
+      stack_store += sw;
+      count_load += cl;
+      count_store += cw;
+      fprintf (file, "  BB %4d: freq %5d load %3d=%-6.0f stor %3d=%-6.0f\n",
+	       bb->index, bb->frequency, cl, sl, cw, sw);
+    }
+  fprintf (file, "  ALL    :            load %5d=%-.0f stor %5d=%-.0f\n",
+           count_load, stack_load, count_store, stack_store);
+}
+
 /* Do any final processing required for the declarations in VEC, of
    which there are LEN.  We write out inline functions and variables
    that have been deferred until this point, but which are required.
@@ -3241,7 +3325,6 @@ rest_of_compilation (decl)

   if (flag_new_regalloc)
     {
-      delete_trivially_dead_insns (insns, max_reg_num ());
       reg_alloc ();

       timevar_pop (TV_LOCAL_ALLOC);
@@ -3257,7 +3340,6 @@ rest_of_compilation (decl)
       timevar_push (TV_GLOBAL_ALLOC);
       open_dump_file (DFI_greg, decl);

-      build_insn_chain (insns);
       failure = reload (insns, 0);

       timevar_pop (TV_GLOBAL_ALLOC);
@@ -3505,6 +3587,9 @@ rest_of_compilation (decl)
     }
   compute_alignments ();

+  open_dump_file (DFI_costs, decl);
+  close_dump_file (DFI_costs, print_costs_bb, insns);
+
   /* CFG is no longer maintained up-to-date.  */
   free_bb_for_insn ();



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]