This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

split global and reload


This patch separates global register allocation from reload.  That
means they are reported separately in the -Q timing summary, and get
separate RTL dump files.  No code generation should change; it simply
makes it easier to determine which of the two is misbehaving.

The .greg dump is still -dg.  The new .reload dump is -dh; that's not
mnemonic at all, but the good letters are already taken.

Reload uses some of global's data structures, so it was necessary to
break global_alloc into three functions: setup, allocation, and
cleanup.  Cleanup isn't called till after reload finishes.

zw

	* toplev.c: Always call reload from rest_of_compilation.
	Account time spent in reload separately.  Give reload its own
	dump file.
	* global.c: Break setup and teardown code out of global_alloc
	into global_alloc_init and global_alloc_cleanup respectively.
	Update commentary.  Don't call reload from global_alloc.
	* rtl.h: Prototype global_alloc_init and global_alloc_cleanup.
	* reload1.c: Update commentary.

===================================================================
Index: toplev.c
--- toplev.c	2000/04/07 09:23:29	1.316
+++ toplev.c	2000/04/10 00:00:54
@@ -266,6 +266,7 @@ enum dump_file_index
   DFI_sched,
   DFI_lreg,
   DFI_greg,
+  DFI_reload,
   DFI_flow2,
   DFI_peephole2,
   DFI_sched2,
@@ -299,6 +300,7 @@ struct dump_file_info dump_file[DFI_MAX]
   { "sched",	'S', 1, 0, 0 },
   { "lreg",	'l', 1, 0, 0 },
   { "greg",	'g', 1, 0, 0 },
+  { "reload",	'h', 1, 0, 0 },
   { "flow2",	'w', 1, 0, 0 },
   { "peephole2", 'z', 1, 0, 0 },
   { "sched2",	'R', 1, 0, 0 },
@@ -1418,6 +1420,7 @@ int regmove_time;
 int sched_time;
 int local_alloc_time;
 int global_alloc_time;
+int reload_time;
 int flow2_time;
 int peephole2_time;
 int sched2_time;
@@ -2170,6 +2173,7 @@ compile_file (name)
   sched_time = 0;
   local_alloc_time = 0;
   global_alloc_time = 0;
+  reload_time = 0;
   flow2_time = 0;
   peephole2_time = 0;
   sched2_time = 0;
@@ -2577,6 +2581,7 @@ compile_file (name)
 #endif
       print_time ("local-alloc", local_alloc_time);
       print_time ("global-alloc", global_alloc_time);
+      print_time ("reload", reload_time);
       print_time ("flow2", flow2_time);
 #ifdef HAVE_peephole2
       print_time ("peephole2", peephole2_time);
@@ -3355,20 +3360,33 @@ rest_of_compilation (decl)
   /* If optimizing, allocate remaining pseudo-regs.  Do the reload
      pass fixing up any insns that are invalid.  */
 
-  TIMEVAR (global_alloc_time,
+  if (optimize)
+    TIMEVAR (global_alloc_time,
+	     {
+	       global_alloc_init ();
+	       global_alloc (rtl_dump_file);
+	     });
+
+  if (dump_file[DFI_greg].enabled)
+    {
+      TIMEVAR (dump_time, dump_global_regs (rtl_dump_file));
+      close_dump_file (DFI_greg, print_rtl_with_bb, insns);
+    }
+
+  open_dump_file (DFI_reload, decl);
+
+  TIMEVAR (reload_time,
 	   {
-	     if (optimize)
-	       failure = global_alloc (rtl_dump_file);
-	     else
-	       {
-		 build_insn_chain (insns);
-		 failure = reload (insns, 0, rtl_dump_file);
-	       }
+	     build_insn_chain (insns);
+	     failure = reload (insns, optimize, rtl_dump_file);
 	   });
 
   if (failure)
     goto exit_rest_of_compilation;
 
+  if (optimize)
+    TIMEVAR (global_alloc_time, global_alloc_cleanup ());
+
   if (ggc_p)
     ggc_collect ();
 
@@ -3387,10 +3405,10 @@ rest_of_compilation (decl)
   if (rebuild_label_notes_after_reload)
     TIMEVAR (jump_time, rebuild_jump_labels (insns));
 
-  if (dump_file[DFI_greg].enabled)
+  if (dump_file[DFI_reload].enabled)
     {
       TIMEVAR (dump_time, dump_global_regs (rtl_dump_file));
-      close_dump_file (DFI_greg, print_rtl_with_bb, insns);
+      close_dump_file (DFI_reload, print_rtl_with_bb, insns);
     }
 
   /* Re-create the death notes which were deleted during reload.  */
===================================================================
Index: global.c
--- global.c	2000/03/25 18:34:02	1.59
+++ global.c	2000/04/10 00:00:55
@@ -43,12 +43,16 @@ Boston, MA 02111-1307, USA.  */
    (Such changes are made by final).  The entry point is
    the function global_alloc.
 
-   After allocation is complete, the reload pass is run as a subroutine
-   of this pass, so that when a pseudo reg loses its hard reg due to
-   spilling it is possible to make a second attempt to find a hard
-   reg for it.  The reload pass is independent in other respects
-   and it is run even when stupid register allocation is in use.
+   There is substantial setup which has to be done first, and the
+   entry point for that is global_alloc_init.  This allocates data
+   used both by global_alloc, and later by the reload pass.  When this
+   data is available, reload can attempt to find another hard reg for
+   a pseudo that it had to spill.  Reload is otherwise independent and
+   is run even when not optimizing.  After reload has finished,
+   global_alloc_cleanup deallocates the shared data.
 
+   This pass obeys the following algorithm:
+
    1. Assign allocation-numbers (allocnos) to the pseudo-registers
    still needing allocations and to the pseudo-registers currently
    allocated by local-alloc which may be spilled by reload.
@@ -307,18 +311,9 @@ static void reg_becomes_live	PARAMS ((rt
 static void reg_dies		PARAMS ((int, enum machine_mode,
 				       struct insn_chain *));
 
-/* Perform allocation of pseudo-registers not allocated by local_alloc.
-   FILE is a file to output debugging information on,
-   or zero if such output is not desired.
-
-   Return value is nonzero if reload failed
-   and we must not do any more for this function.  */
-
-int
-global_alloc (file)
-     FILE *file;
+void
+global_alloc_init (void)
 {
-  int retval;
 #ifdef ELIMINABLE_REGS
   static struct {int from, to; } eliminables[] = ELIMINABLE_REGS;
 #endif
@@ -484,111 +479,108 @@ global_alloc (file)
 				    sizeof (INT_TYPE));
 
   allocnos_live = (INT_TYPE *) xmalloc (allocno_row_words * sizeof (INT_TYPE));
+}
 
-  /* If there is work to be done (at least one reg to allocate),
-     perform global conflict analysis and allocate the regs.  */
+/* Perform allocation of pseudo-registers not allocated by local_alloc.
+   FILE is a file to output debugging information on,
+   or zero if such output is not desired.  */
+void
+global_alloc (file)
+     FILE *file;
+{
+  size_t i;
 
-  if (max_allocno > 0)
+  /* Bail early if there is no work to be done (no regs to allocate).  */
+  if (max_allocno == 0)
+    return;
+
+  /* Scan all the insns and compute the conflicts among allocnos
+     and between allocnos and hard regs.  */
+
+  global_conflicts ();
+  mirror_conflicts ();
+
+  /* Eliminate conflicts between pseudos and eliminable registers.  If
+     the register is not eliminated, the pseudo won't really be able to
+     live in the eliminable register, so the conflict doesn't matter.
+     If we do eliminate the register, the conflict will no longer exist.
+     So in either case, we can ignore the conflict.  Likewise for
+     preferences.  */
+
+  for (i = 0; i < (size_t) max_allocno; i++)
     {
-      /* Scan all the insns and compute the conflicts among allocnos
-	 and between allocnos and hard regs.  */
+      AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_conflicts,
+			      eliminable_regset);
+      AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_copy_preferences,
+			      eliminable_regset);
+      AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_preferences,
+			      eliminable_regset);
+    }
+
+  /* Try to expand the preferences by merging them between allocnos.  */
 
-      global_conflicts ();
+  expand_preferences ();
 
-      mirror_conflicts ();
+  /* Determine the order to allocate the remaining pseudo registers.  */
 
-      /* Eliminate conflicts between pseudos and eliminable registers.  If
-	 the register is not eliminated, the pseudo won't really be able to
-	 live in the eliminable register, so the conflict doesn't matter.
-	 If we do eliminate the register, the conflict will no longer exist.
-	 So in either case, we can ignore the conflict.  Likewise for
-	 preferences.  */
-
-      for (i = 0; i < (size_t) max_allocno; i++)
-	{
-	  AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_conflicts,
-				  eliminable_regset);
-	  AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_copy_preferences,
-				  eliminable_regset);
-	  AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_preferences,
-				  eliminable_regset);
-	}
-
-      /* Try to expand the preferences by merging them between allocnos.  */
-
-      expand_preferences ();
-
-      /* Determine the order to allocate the remaining pseudo registers.  */
-
-      allocno_order = (int *) xmalloc (max_allocno * sizeof (int));
-      for (i = 0; i < (size_t) max_allocno; i++)
-	allocno_order[i] = i;
-
-      /* Default the size to 1, since allocno_compare uses it to divide by.
-	 Also convert allocno_live_length of zero to -1.  A length of zero
-	 can occur when all the registers for that allocno have reg_live_length
-	 equal to -2.  In this case, we want to make an allocno, but not
-	 allocate it.  So avoid the divide-by-zero and set it to a low
-	 priority.  */
-
-      for (i = 0; i < (size_t) max_allocno; i++)
-	{
-	  if (allocno[i].size == 0)
-	    allocno[i].size = 1;
-	  if (allocno[i].live_length == 0)
-	    allocno[i].live_length = -1;
-	}
+  allocno_order = (int *) xmalloc (max_allocno * sizeof (int));
+  for (i = 0; i < (size_t) max_allocno; i++)
+    allocno_order[i] = i;
 
-      qsort (allocno_order, max_allocno, sizeof (int), allocno_compare);
+  /* Default the size to 1, since allocno_compare uses it to divide by.
+     Also convert allocno_live_length of zero to -1.  A length of zero
+     can occur when all the registers for that allocno have reg_live_length
+     equal to -2.  In this case, we want to make an allocno, but not
+     allocate it.  So avoid the divide-by-zero and set it to a low
+     priority.  */
+
+  for (i = 0; i < (size_t) max_allocno; i++)
+    {
+      if (allocno[i].size == 0)
+	allocno[i].size = 1;
+      if (allocno[i].live_length == 0)
+	allocno[i].live_length = -1;
+    }
+
+  qsort (allocno_order, max_allocno, sizeof (int), allocno_compare);
       
-      prune_preferences ();
+  prune_preferences ();
 
-      if (file)
-	dump_conflicts (file);
+  if (file)
+    dump_conflicts (file);
 
-      /* Try allocating them, one by one, in that order,
-	 except for parameters marked with reg_live_length[regno] == -2.  */
+  /* Try allocating them, one by one, in that order,
+     except for parameters marked with reg_live_length[regno] == -2.  */
 
-      for (i = 0; i < (size_t) max_allocno; i++)
-	if (reg_renumber[allocno[allocno_order[i]].reg] < 0
-	    && REG_LIVE_LENGTH (allocno[allocno_order[i]].reg) >= 0)
+  for (i = 0; i < (size_t) max_allocno; i++)
+    if (reg_renumber[allocno[allocno_order[i]].reg] < 0
+	&& REG_LIVE_LENGTH (allocno[allocno_order[i]].reg) >= 0)
+      {
+	/* If we have more than one register class,
+	   first try allocating in the class that is cheapest
+	   for this pseudo-reg.  If that fails, try any reg.  */
+	if (N_REG_CLASSES > 1)
 	  {
-	    /* If we have more than one register class,
-	       first try allocating in the class that is cheapest
-	       for this pseudo-reg.  If that fails, try any reg.  */
-	    if (N_REG_CLASSES > 1)
-	      {
-		find_reg (allocno_order[i], 0, 0, 0, 0);
-		if (reg_renumber[allocno[allocno_order[i]].reg] >= 0)
-		  continue;
-	      }
-	    if (reg_alternate_class (allocno[allocno_order[i]].reg) != NO_REGS)
-	      find_reg (allocno_order[i], 0, 1, 0, 0);
+	    find_reg (allocno_order[i], 0, 0, 0, 0);
+	    if (reg_renumber[allocno[allocno_order[i]].reg] >= 0)
+	      continue;
 	  }
-
-      free (allocno_order);
-    }
+	if (reg_alternate_class (allocno[allocno_order[i]].reg) != NO_REGS)
+	  find_reg (allocno_order[i], 0, 1, 0, 0);
+      }
 
-  /* Do the reloads now while the allocno data still exist, so that we can
-     try to assign new hard regs to any pseudo regs that are spilled.  */
+  free (allocno_order);
+}
 
-#if 0 /* We need to eliminate regs even if there is no rtl code,
-	 for the sake of debugging information.  */
-  if (n_basic_blocks > 0)
-#endif
-    {
-      build_insn_chain (get_insns ());
-      retval = reload (get_insns (), 1, file);
-    }
-
-  /* Clean up.  */
+/* Clean up our data structures, now that reload has completed.  */
+void
+global_alloc_cleanup (void)
+{
   free (reg_allocno);
   free (reg_may_share);
   free (allocno);
   free (conflicts);
   free (allocnos_live);
-
-  return retval;
 }
 
 /* Sort predicate for ordering the allocnos.
===================================================================
Index: rtl.h
--- rtl.h	2000/04/09 01:16:44	1.188
+++ rtl.h	2000/04/10 00:00:55
@@ -1635,8 +1635,10 @@ extern int gcse_main			PARAMS ((rtx, FIL
 
 /* In global.c */
 extern void mark_elimination		PARAMS ((int, int));
+extern void global_alloc_init		PARAMS ((void));
+extern void global_alloc_cleanup	PARAMS ((void));
 #ifdef BUFSIZ
-extern int global_alloc			PARAMS ((FILE *));
+extern void global_alloc		PARAMS ((FILE *));
 extern void dump_global_regs		PARAMS ((FILE *));
 #endif
 #ifdef HARD_CONST
===================================================================
Index: reload1.c
--- reload1.c	2000/04/04 02:24:49	1.206
+++ reload1.c	2000/04/10 00:00:57
@@ -587,16 +587,16 @@ static int failure;
 
    FIRST is the first insn of the function being compiled.
 
-   GLOBAL nonzero means we were called from global_alloc
-   and should attempt to reallocate any pseudoregs that we
-   displace from hard regs we will use for reloads.
-   If GLOBAL is zero, we do not have enough information to do that,
-   so any pseudo reg that is spilled must go to the stack.
+   GLOBAL nonzero means global register allocation has been performed,
+   and we should attempt to reallocate any pseudoregs that we displace
+   from hard regs we will use for reloads.  If GLOBAL is zero, we do
+   not have enough information to do that, so any pseudo reg that is
+   spilled must go to the stack.
 
-   DUMPFILE is the global-reg debugging dump file stream, or 0.
-   If it is nonzero, messages are written to it to describe
-   which registers are seized as reload regs, which pseudo regs
-   are spilled from them, and where the pseudo regs are reallocated to.
+   DUMPFILE is the debugging dump file stream, or 0.  If it is nonzero,
+   messages are written to it to describe which registers are seized
+   as reload regs, which pseudo regs are spilled from them, and where
+   the pseudo regs are reallocated to.
 
    Return value is nonzero if reload failed
    and we must not do any more for this function.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]