This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
split global and reload
- To: gcc-patches at gcc dot gnu dot org
- Subject: split global and reload
- From: Zack Weinberg <zack at wolery dot cumb dot org>
- Date: Sun, 9 Apr 2000 17:09:40 -0700
This patch separates global register allocation from reload. That
means they are reported separately in the -Q timing summary, and get
separate RTL dump files. No code generation should change; it simply
makes it easier to determine which of the two is misbehaving.
The .greg dump is still -dg. The new .reload dump is -dh; that's not
mnemonic at all, but the good letters are already taken.
Reload uses some of global's data structures, so it was necessary to
break global_alloc into three functions: setup, allocation, and
cleanup. Cleanup isn't called till after reload finishes.
zw
* toplev.c: Always call reload from rest_of_compilation.
Account time spent in reload separately. Give reload its own
dump file.
* global.c: Break setup and teardown code out of global_alloc
into global_alloc_init and global_alloc_cleanup respectively.
Update commentary. Don't call reload from global_alloc.
* rtl.h: Prototype global_alloc_init and global_alloc_cleanup.
* reload1.c: Update commentary.
===================================================================
Index: toplev.c
--- toplev.c 2000/04/07 09:23:29 1.316
+++ toplev.c 2000/04/10 00:00:54
@@ -266,6 +266,7 @@ enum dump_file_index
DFI_sched,
DFI_lreg,
DFI_greg,
+ DFI_reload,
DFI_flow2,
DFI_peephole2,
DFI_sched2,
@@ -299,6 +300,7 @@ struct dump_file_info dump_file[DFI_MAX]
{ "sched", 'S', 1, 0, 0 },
{ "lreg", 'l', 1, 0, 0 },
{ "greg", 'g', 1, 0, 0 },
+ { "reload", 'h', 1, 0, 0 },
{ "flow2", 'w', 1, 0, 0 },
{ "peephole2", 'z', 1, 0, 0 },
{ "sched2", 'R', 1, 0, 0 },
@@ -1418,6 +1420,7 @@ int regmove_time;
int sched_time;
int local_alloc_time;
int global_alloc_time;
+int reload_time;
int flow2_time;
int peephole2_time;
int sched2_time;
@@ -2170,6 +2173,7 @@ compile_file (name)
sched_time = 0;
local_alloc_time = 0;
global_alloc_time = 0;
+ reload_time = 0;
flow2_time = 0;
peephole2_time = 0;
sched2_time = 0;
@@ -2577,6 +2581,7 @@ compile_file (name)
#endif
print_time ("local-alloc", local_alloc_time);
print_time ("global-alloc", global_alloc_time);
+ print_time ("reload", reload_time);
print_time ("flow2", flow2_time);
#ifdef HAVE_peephole2
print_time ("peephole2", peephole2_time);
@@ -3355,20 +3360,33 @@ rest_of_compilation (decl)
/* If optimizing, allocate remaining pseudo-regs. Do the reload
pass fixing up any insns that are invalid. */
- TIMEVAR (global_alloc_time,
+ if (optimize)
+ TIMEVAR (global_alloc_time,
+ {
+ global_alloc_init ();
+ global_alloc (rtl_dump_file);
+ });
+
+ if (dump_file[DFI_greg].enabled)
+ {
+ TIMEVAR (dump_time, dump_global_regs (rtl_dump_file));
+ close_dump_file (DFI_greg, print_rtl_with_bb, insns);
+ }
+
+ open_dump_file (DFI_reload, decl);
+
+ TIMEVAR (reload_time,
{
- if (optimize)
- failure = global_alloc (rtl_dump_file);
- else
- {
- build_insn_chain (insns);
- failure = reload (insns, 0, rtl_dump_file);
- }
+ build_insn_chain (insns);
+ failure = reload (insns, optimize, rtl_dump_file);
});
if (failure)
goto exit_rest_of_compilation;
+ if (optimize)
+ TIMEVAR (global_alloc_time, global_alloc_cleanup ());
+
if (ggc_p)
ggc_collect ();
@@ -3387,10 +3405,10 @@ rest_of_compilation (decl)
if (rebuild_label_notes_after_reload)
TIMEVAR (jump_time, rebuild_jump_labels (insns));
- if (dump_file[DFI_greg].enabled)
+ if (dump_file[DFI_reload].enabled)
{
TIMEVAR (dump_time, dump_global_regs (rtl_dump_file));
- close_dump_file (DFI_greg, print_rtl_with_bb, insns);
+ close_dump_file (DFI_reload, print_rtl_with_bb, insns);
}
/* Re-create the death notes which were deleted during reload. */
===================================================================
Index: global.c
--- global.c 2000/03/25 18:34:02 1.59
+++ global.c 2000/04/10 00:00:55
@@ -43,12 +43,16 @@ Boston, MA 02111-1307, USA. */
(Such changes are made by final). The entry point is
the function global_alloc.
- After allocation is complete, the reload pass is run as a subroutine
- of this pass, so that when a pseudo reg loses its hard reg due to
- spilling it is possible to make a second attempt to find a hard
- reg for it. The reload pass is independent in other respects
- and it is run even when stupid register allocation is in use.
+ There is substantial setup which has to be done first, and the
+ entry point for that is global_alloc_init. This allocates data
+ used both by global_alloc, and later by the reload pass. When this
+ data is available, reload can attempt to find another hard reg for
+ a pseudo that it had to spill. Reload is otherwise independent and
+ is run even when not optimizing. After reload has finished,
+ global_alloc_cleanup deallocates the shared data.
+ This pass obeys the following algorithm:
+
1. Assign allocation-numbers (allocnos) to the pseudo-registers
still needing allocations and to the pseudo-registers currently
allocated by local-alloc which may be spilled by reload.
@@ -307,18 +311,9 @@ static void reg_becomes_live PARAMS ((rt
static void reg_dies PARAMS ((int, enum machine_mode,
struct insn_chain *));
-/* Perform allocation of pseudo-registers not allocated by local_alloc.
- FILE is a file to output debugging information on,
- or zero if such output is not desired.
-
- Return value is nonzero if reload failed
- and we must not do any more for this function. */
-
-int
-global_alloc (file)
- FILE *file;
+void
+global_alloc_init (void)
{
- int retval;
#ifdef ELIMINABLE_REGS
static struct {int from, to; } eliminables[] = ELIMINABLE_REGS;
#endif
@@ -484,111 +479,108 @@ global_alloc (file)
sizeof (INT_TYPE));
allocnos_live = (INT_TYPE *) xmalloc (allocno_row_words * sizeof (INT_TYPE));
+}
- /* If there is work to be done (at least one reg to allocate),
- perform global conflict analysis and allocate the regs. */
+/* Perform allocation of pseudo-registers not allocated by local_alloc.
+ FILE is a file to output debugging information on,
+ or zero if such output is not desired. */
+void
+global_alloc (file)
+ FILE *file;
+{
+ size_t i;
- if (max_allocno > 0)
+ /* Bail early if there is no work to be done (no regs to allocate). */
+ if (max_allocno == 0)
+ return;
+
+ /* Scan all the insns and compute the conflicts among allocnos
+ and between allocnos and hard regs. */
+
+ global_conflicts ();
+ mirror_conflicts ();
+
+ /* Eliminate conflicts between pseudos and eliminable registers. If
+ the register is not eliminated, the pseudo won't really be able to
+ live in the eliminable register, so the conflict doesn't matter.
+ If we do eliminate the register, the conflict will no longer exist.
+ So in either case, we can ignore the conflict. Likewise for
+ preferences. */
+
+ for (i = 0; i < (size_t) max_allocno; i++)
{
- /* Scan all the insns and compute the conflicts among allocnos
- and between allocnos and hard regs. */
+ AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_conflicts,
+ eliminable_regset);
+ AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_copy_preferences,
+ eliminable_regset);
+ AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_preferences,
+ eliminable_regset);
+ }
+
+ /* Try to expand the preferences by merging them between allocnos. */
- global_conflicts ();
+ expand_preferences ();
- mirror_conflicts ();
+ /* Determine the order to allocate the remaining pseudo registers. */
- /* Eliminate conflicts between pseudos and eliminable registers. If
- the register is not eliminated, the pseudo won't really be able to
- live in the eliminable register, so the conflict doesn't matter.
- If we do eliminate the register, the conflict will no longer exist.
- So in either case, we can ignore the conflict. Likewise for
- preferences. */
-
- for (i = 0; i < (size_t) max_allocno; i++)
- {
- AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_conflicts,
- eliminable_regset);
- AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_copy_preferences,
- eliminable_regset);
- AND_COMPL_HARD_REG_SET (allocno[i].hard_reg_preferences,
- eliminable_regset);
- }
-
- /* Try to expand the preferences by merging them between allocnos. */
-
- expand_preferences ();
-
- /* Determine the order to allocate the remaining pseudo registers. */
-
- allocno_order = (int *) xmalloc (max_allocno * sizeof (int));
- for (i = 0; i < (size_t) max_allocno; i++)
- allocno_order[i] = i;
-
- /* Default the size to 1, since allocno_compare uses it to divide by.
- Also convert allocno_live_length of zero to -1. A length of zero
- can occur when all the registers for that allocno have reg_live_length
- equal to -2. In this case, we want to make an allocno, but not
- allocate it. So avoid the divide-by-zero and set it to a low
- priority. */
-
- for (i = 0; i < (size_t) max_allocno; i++)
- {
- if (allocno[i].size == 0)
- allocno[i].size = 1;
- if (allocno[i].live_length == 0)
- allocno[i].live_length = -1;
- }
+ allocno_order = (int *) xmalloc (max_allocno * sizeof (int));
+ for (i = 0; i < (size_t) max_allocno; i++)
+ allocno_order[i] = i;
- qsort (allocno_order, max_allocno, sizeof (int), allocno_compare);
+ /* Default the size to 1, since allocno_compare uses it to divide by.
+ Also convert allocno_live_length of zero to -1. A length of zero
+ can occur when all the registers for that allocno have reg_live_length
+ equal to -2. In this case, we want to make an allocno, but not
+ allocate it. So avoid the divide-by-zero and set it to a low
+ priority. */
+
+ for (i = 0; i < (size_t) max_allocno; i++)
+ {
+ if (allocno[i].size == 0)
+ allocno[i].size = 1;
+ if (allocno[i].live_length == 0)
+ allocno[i].live_length = -1;
+ }
+
+ qsort (allocno_order, max_allocno, sizeof (int), allocno_compare);
- prune_preferences ();
+ prune_preferences ();
- if (file)
- dump_conflicts (file);
+ if (file)
+ dump_conflicts (file);
- /* Try allocating them, one by one, in that order,
- except for parameters marked with reg_live_length[regno] == -2. */
+ /* Try allocating them, one by one, in that order,
+ except for parameters marked with reg_live_length[regno] == -2. */
- for (i = 0; i < (size_t) max_allocno; i++)
- if (reg_renumber[allocno[allocno_order[i]].reg] < 0
- && REG_LIVE_LENGTH (allocno[allocno_order[i]].reg) >= 0)
+ for (i = 0; i < (size_t) max_allocno; i++)
+ if (reg_renumber[allocno[allocno_order[i]].reg] < 0
+ && REG_LIVE_LENGTH (allocno[allocno_order[i]].reg) >= 0)
+ {
+ /* If we have more than one register class,
+ first try allocating in the class that is cheapest
+ for this pseudo-reg. If that fails, try any reg. */
+ if (N_REG_CLASSES > 1)
{
- /* If we have more than one register class,
- first try allocating in the class that is cheapest
- for this pseudo-reg. If that fails, try any reg. */
- if (N_REG_CLASSES > 1)
- {
- find_reg (allocno_order[i], 0, 0, 0, 0);
- if (reg_renumber[allocno[allocno_order[i]].reg] >= 0)
- continue;
- }
- if (reg_alternate_class (allocno[allocno_order[i]].reg) != NO_REGS)
- find_reg (allocno_order[i], 0, 1, 0, 0);
+ find_reg (allocno_order[i], 0, 0, 0, 0);
+ if (reg_renumber[allocno[allocno_order[i]].reg] >= 0)
+ continue;
}
-
- free (allocno_order);
- }
+ if (reg_alternate_class (allocno[allocno_order[i]].reg) != NO_REGS)
+ find_reg (allocno_order[i], 0, 1, 0, 0);
+ }
- /* Do the reloads now while the allocno data still exist, so that we can
- try to assign new hard regs to any pseudo regs that are spilled. */
+ free (allocno_order);
+}
-#if 0 /* We need to eliminate regs even if there is no rtl code,
- for the sake of debugging information. */
- if (n_basic_blocks > 0)
-#endif
- {
- build_insn_chain (get_insns ());
- retval = reload (get_insns (), 1, file);
- }
-
- /* Clean up. */
+/* Clean up our data structures, now that reload has completed. */
+void
+global_alloc_cleanup (void)
+{
free (reg_allocno);
free (reg_may_share);
free (allocno);
free (conflicts);
free (allocnos_live);
-
- return retval;
}
/* Sort predicate for ordering the allocnos.
===================================================================
Index: rtl.h
--- rtl.h 2000/04/09 01:16:44 1.188
+++ rtl.h 2000/04/10 00:00:55
@@ -1635,8 +1635,10 @@ extern int gcse_main PARAMS ((rtx, FIL
/* In global.c */
extern void mark_elimination PARAMS ((int, int));
+extern void global_alloc_init PARAMS ((void));
+extern void global_alloc_cleanup PARAMS ((void));
#ifdef BUFSIZ
-extern int global_alloc PARAMS ((FILE *));
+extern void global_alloc PARAMS ((FILE *));
extern void dump_global_regs PARAMS ((FILE *));
#endif
#ifdef HARD_CONST
===================================================================
Index: reload1.c
--- reload1.c 2000/04/04 02:24:49 1.206
+++ reload1.c 2000/04/10 00:00:57
@@ -587,16 +587,16 @@ static int failure;
FIRST is the first insn of the function being compiled.
- GLOBAL nonzero means we were called from global_alloc
- and should attempt to reallocate any pseudoregs that we
- displace from hard regs we will use for reloads.
- If GLOBAL is zero, we do not have enough information to do that,
- so any pseudo reg that is spilled must go to the stack.
+ GLOBAL nonzero means global register allocation has been performed,
+ and we should attempt to reallocate any pseudoregs that we displace
+ from hard regs we will use for reloads. If GLOBAL is zero, we do
+ not have enough information to do that, so any pseudo reg that is
+ spilled must go to the stack.
- DUMPFILE is the global-reg debugging dump file stream, or 0.
- If it is nonzero, messages are written to it to describe
- which registers are seized as reload regs, which pseudo regs
- are spilled from them, and where the pseudo regs are reallocated to.
+ DUMPFILE is the debugging dump file stream, or 0. If it is nonzero,
+ messages are written to it to describe which registers are seized
+ as reload regs, which pseudo regs are spilled from them, and where
+ the pseudo regs are reallocated to.
Return value is nonzero if reload failed
and we must not do any more for this function. */