This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[dataflow] speed up by 2%, patch 3/4 - CSE and regscan changes
- From: Paolo Bonzini <paolo dot bonzini at lu dot unisi dot ch>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Kenneth Zadeck <zadeck at naturalbridge dot com>
- Date: Mon, 19 Feb 2007 13:21:20 +0100
- Subject: [dataflow] speed up by 2%, patch 3/4 - CSE and regscan changes
This patch removes cuids from CSE and removes two fields that regscan
sets but now nobody reads (regmove makes a laudable attempt to keep them
up to date ;-) and they're gone too with this patch).
CSE is using REGNO_FIRST_UID and REGNO_LAST_UID to find registers that
are set in more than one basic block. Actually, the heuristic is a bit
more complicated, because it involves the lowest and highest CUID in the
extended basic block being analyzed. However, when the CFG is shaped
like this:
A
/ \
B C
we analyze the path A/C and we will accept registers set only in the
A/B/C region. But when we analyze A/B, we will accept registers set
only in the two basic blocks A/B.
In my opinion, a simpler heuristic like the one I implement here (which
is, just do more or less what the comments say) is more than enough.
But anyway, I've also built a powerpc-darwin cross and done an assembly
language comparison on cc1 .i files, and there is absolutely no change.
Bootstrapped/regtested i686-pc-linux-gnu together with the others. Ok?
Paolo
2007-01-18 Paolo Bonzini <bonzini@gnu.org>
* regs.h (struct reg_info_def): Remove first_uid and last_uid.
(REGNO_FIRST_UID, REGNO_LAST_UID): Remove.
* cse.c (cse_basic_block_start, cse_basic_block_end, uid_cuid,
max_uid, INSN_CUID): Remove.
(struct cse_basic_block_data): Remove low_cuid and high_cuid.
(reg_used_in_multiple_bb, reg_used_in_bb): New.
(make_regs_eqv): Test reg_used_in_multiple_bb instead of cuids.
(cse_prescan_path): Remove low_cuid and high_cuid.
(mark_reg_use_bb): New.
(cse_main): Replace computation of cuids with initialization of
reg_used_in_multiple_bb. Remove references to deleted variables.
* regmove.c (copy_src_to_dest): Don't update REGNO_FIRST_UID and
REGNO_LAST_UID.
* regclass.c (reg_scan_mark_refs): Remove penultimate argument.
Don't track REGNO_FIRST_UID and REGNO_LAST_UID.
(reg_scan, reg_scan_update): Remove penultimate argument to
reg_scan_mark_refs.
Index: regs.h
===================================================================
--- regs.h (revision 122099)
+++ regs.h (working copy)
@@ -49,9 +49,6 @@ extern int max_regno;
/* Register information indexed by register number */
typedef struct reg_info_def
{ /* fields set by reg_scan */
- int first_uid; /* UID of first insn to use (REG n) */
- int last_uid; /* UID of last insn to use (REG n) */
-
/* fields set by reg_scan & flow_analysis */
int sets; /* # of times (REG n) is set */
@@ -177,21 +174,6 @@ extern bool have_regs_of_mode [MAX_MACHI
extern enum machine_mode reg_raw_mode[FIRST_PSEUDO_REGISTER];
-/* Vector indexed by regno; gives uid of first insn using that reg.
- This is computed by reg_scan for use by cse and loop.
- It is sometimes adjusted for subsequent changes during loop,
- but not adjusted by cse even if cse invalidates it. */
-
-#define REGNO_FIRST_UID(N) (VEC_index (reg_info_p, reg_n_info, N)->first_uid)
-
-/* Vector indexed by regno; gives uid of last insn using that reg.
- This is computed by reg_scan for use by cse and loop.
- It is sometimes adjusted for subsequent changes during loop,
- but not adjusted by cse even if cse invalidates it.
- This is harmless since cse won't scan through a loop end. */
-
-#define REGNO_LAST_UID(N) (VEC_index (reg_info_p, reg_n_info, N)->last_uid)
-
/* Flag set by local-alloc or global-alloc if they decide to allocate
something in a call-clobbered register. */
Index: cse.c
===================================================================
--- cse.c (revision 122099)
+++ cse.c (working copy)
@@ -348,27 +348,6 @@ static unsigned int cse_reg_info_timesta
static HARD_REG_SET hard_regs_in_table;
-/* CUID of insn that starts the basic block currently being cse-processed. */
-
-static int cse_basic_block_start;
-
-/* CUID of insn that ends the basic block currently being cse-processed. */
-
-static int cse_basic_block_end;
-
-/* Vector mapping INSN_UIDs to cuids.
- The cuids are like uids but increase monotonically always.
- We use them to see whether a reg is used outside a given basic block. */
-
-static int *uid_cuid;
-
-/* Highest UID in UID_CUID. */
-static int max_uid;
-
-/* Get the cuid of an insn. */
-
-#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
-
/* Nonzero if cse has altered conditional jump insns
in such a way that jump optimization should be redone. */
@@ -539,10 +518,6 @@ static int constant_pool_entries_regcost
struct cse_basic_block_data
{
- /* Lowest CUID value of insns in block. */
- int low_cuid;
- /* Highest CUID value of insns in block. */
- int high_cuid;
/* Total number of SETs in block. */
int nsets;
/* Size of current branch path, if any. */
@@ -555,6 +530,13 @@ struct cse_basic_block_data
} *path;
};
+/* A simple bitmap to track which regs have sets in more than one
+ basic block. */
+static sbitmap reg_used_in_multiple_bb;
+
+/* An array used to initialize REG_SET_IN_MULTIPLE_BB. */
+static basic_block *reg_used_in_bb;
+
/* A simple bitmap to track which basic blocks have been visited
already as part of an already processed extended basic block. */
static sbitmap cse_visited_basic_blocks;
@@ -958,11 +940,7 @@ make_regs_eqv (unsigned int new, unsigne
&& ((new < FIRST_PSEUDO_REGISTER && FIXED_REGNO_P (new))
|| (new >= FIRST_PSEUDO_REGISTER
&& (firstr < FIRST_PSEUDO_REGISTER
- || ((uid_cuid[REGNO_LAST_UID (new)] > cse_basic_block_end
- || (uid_cuid[REGNO_FIRST_UID (new)]
- < cse_basic_block_start))
- && (uid_cuid[REGNO_LAST_UID (new)]
- > uid_cuid[REGNO_LAST_UID (firstr)]))))))
+ || TEST_BIT (reg_used_in_multiple_bb, new)))))
{
reg_eqv_table[firstr].prev = new;
reg_eqv_table[new].next = firstr;
@@ -5948,14 +5926,12 @@ have_eh_succ_edges (basic_block bb)
/* Scan to the end of the path described by DATA. Return an estimate of
- the total number of SETs, and the lowest and highest insn CUID, of all
- insns in the path. */
+ the total number of SETs of all insns in the path. */
static void
cse_prescan_path (struct cse_basic_block_data *data)
{
int nsets = 0;
- int low_cuid = -1, high_cuid = -1; /* FIXME low_cuid not computed correctly */
int path_size = data->path_size;
int path_entry;
@@ -5978,21 +5954,9 @@ cse_prescan_path (struct cse_basic_block
nsets += XVECLEN (PATTERN (insn), 0);
else
nsets += 1;
-
- /* Ignore insns made by CSE in a previous traversal of this
- basic block. They cannot affect the boundaries of the
- basic block.
- FIXME: When we only visit each basic block at most once,
- this can go away. */
- if (INSN_UID (insn) <= max_uid && INSN_CUID (insn) > high_cuid)
- high_cuid = INSN_CUID (insn);
- if (INSN_UID (insn) <= max_uid && INSN_CUID (insn) < low_cuid)
- low_cuid = INSN_CUID (insn);
}
}
- data->low_cuid = low_cuid;
- data->high_cuid = high_cuid;
data->nsets = nsets;
}
@@ -6173,6 +6137,31 @@ cse_extended_basic_block (struct cse_bas
free (qty_table);
}
+
+/* Set a bit in REG_USED_IN_MULTIPLE_BB if X points to a REG rtx, and
+ DATA (a basic block) is not the basic block stored in REG_USED_IN_BB
+ for this REG. Anyway, update REG_USED_IN_BB. */
+static int
+mark_reg_use_bb (rtx *x, void *data)
+{
+ basic_block bb = data;
+ rtx p = *x;
+ int regno;
+
+ if (!p)
+ return -1;
+ if (!REG_P (*x))
+ return 0;
+
+ regno = REGNO (*x);
+ if (reg_used_in_bb[regno] != bb)
+ {
+ if (reg_used_in_bb[regno])
+ SET_BIT (reg_used_in_multiple_bb, regno);
+ reg_used_in_bb[regno] = bb;
+ }
+ return -1;
+}
/* Perform cse on the instructions of a function.
F is the first instruction.
@@ -6212,20 +6201,19 @@ cse_main (rtx f ATTRIBUTE_UNUSED, int nr
/* Set up the table of already visited basic blocks. */
cse_visited_basic_blocks = sbitmap_alloc (last_basic_block);
+ reg_used_in_multiple_bb = sbitmap_alloc (nregs);
+ reg_used_in_bb = XCNEWVEC (basic_block, nregs);
+ sbitmap_zero (reg_used_in_multiple_bb);
sbitmap_zero (cse_visited_basic_blocks);
- /* Compute the mapping from uids to cuids.
- CUIDs are numbers assigned to insns, like uids, except that
- that cuids increase monotonically through the code. */
- max_uid = get_max_uid ();
- uid_cuid = XCNEWVEC (int, max_uid + 1);
- i = 0;
FOR_EACH_BB (bb)
{
rtx insn;
FOR_BB_INSNS (bb, insn)
- INSN_CUID (insn) = ++i;
+ if (INSN_P (insn))
+ for_each_rtx (&PATTERN (insn), mark_reg_use_bb, bb);
}
+ free (reg_used_in_bb);
/* Loop over basic blocks in reverse completion order (RPO),
excluding the ENTRY and EXIT blocks. */
@@ -6256,8 +6244,6 @@ cse_main (rtx f ATTRIBUTE_UNUSED, int nr
needed for this path. For this, we take the number of sets
and multiply that by MAX_RECOG_OPERANDS. */
max_qty = ebb_data.nsets * MAX_RECOG_OPERANDS;
- cse_basic_block_start = ebb_data.low_cuid;
- cse_basic_block_end = ebb_data.high_cuid;
/* Dump the path we're about to process. */
if (dump_file)
@@ -6269,7 +6255,6 @@ cse_main (rtx f ATTRIBUTE_UNUSED, int nr
/* Clean up. */
end_alias_analysis ();
- free (uid_cuid);
free (reg_eqv_table);
free (ebb_data.path);
sbitmap_free (cse_visited_basic_blocks);
Index: regmove.c
===================================================================
--- regmove.c (revision 122099)
+++ regmove.c (working copy)
@@ -890,18 +890,9 @@ copy_src_to_dest (rtx insn, rtx src, rtx
dest_regno = REGNO (dest);
REG_N_SETS (dest_regno) ++;
REG_LIVE_LENGTH (dest_regno)++;
- if (REGNO_FIRST_UID (dest_regno) == insn_uid)
- REGNO_FIRST_UID (dest_regno) = move_uid;
-
src_regno = REGNO (src);
if (! find_reg_note (move_insn, REG_DEAD, src))
REG_LIVE_LENGTH (src_regno)++;
-
- if (REGNO_FIRST_UID (src_regno) == insn_uid)
- REGNO_FIRST_UID (src_regno) = move_uid;
-
- if (REGNO_LAST_UID (src_regno) == insn_uid)
- REGNO_LAST_UID (src_regno) = move_uid;
}
}
Index: regclass.c
===================================================================
--- regclass.c (revision 122099)
+++ regclass.c (working copy)
@@ -885,7 +885,7 @@ static void record_address_regs (enum ma
#ifdef FORBIDDEN_INC_DEC_CLASSES
static int auto_inc_dec_reg_p (rtx, enum machine_mode);
#endif
-static void reg_scan_mark_refs (rtx, rtx, int, unsigned int);
+static void reg_scan_mark_refs (rtx, rtx, unsigned int);
/* Wrapper around REGNO_OK_FOR_INDEX_P, to allow pseudo registers. */
@@ -2362,7 +2362,6 @@ clear_reg_info_regno (unsigned int regno
and again just before loop.
It finds the first and last use of each pseudo-register
- and records them in the vectors regno_first_uid, regno_last_uid
and counts the number of sets in the vector reg_n_sets.
REPEAT is nonzero the second time this is called. */
@@ -2398,10 +2397,10 @@ reg_scan (rtx f, unsigned int nregs)
if (GET_CODE (pat) == PARALLEL
&& XVECLEN (pat, 0) > max_parallel)
max_parallel = XVECLEN (pat, 0);
- reg_scan_mark_refs (pat, insn, 0, 0);
+ reg_scan_mark_refs (pat, insn, 0);
if (REG_NOTES (insn))
- reg_scan_mark_refs (REG_NOTES (insn), insn, 1, 0);
+ reg_scan_mark_refs (REG_NOTES (insn), insn, 0);
}
max_parallel += max_set_parallel;
@@ -2428,10 +2427,10 @@ reg_scan_update (rtx first, rtx last, un
if (GET_CODE (pat) == PARALLEL
&& XVECLEN (pat, 0) > max_parallel)
max_parallel = XVECLEN (pat, 0);
- reg_scan_mark_refs (pat, insn, 0, old_max_regno);
+ reg_scan_mark_refs (pat, insn, old_max_regno);
if (REG_NOTES (insn))
- reg_scan_mark_refs (REG_NOTES (insn), insn, 1, old_max_regno);
+ reg_scan_mark_refs (REG_NOTES (insn), insn, old_max_regno);
}
}
@@ -2441,7 +2440,7 @@ reg_scan_update (rtx first, rtx last, un
greater than or equal to MIN_REGNO. */
static void
-reg_scan_mark_refs (rtx x, rtx insn, int note_flag, unsigned int min_regno)
+reg_scan_mark_refs (rtx x, rtx insn, unsigned int min_regno)
{
enum rtx_code code;
rtx dest;
@@ -2470,10 +2469,6 @@ reg_scan_mark_refs (rtx x, rtx insn, int
if (regno >= min_regno)
{
- if (!note_flag)
- REGNO_LAST_UID (regno) = INSN_UID (insn);
- if (REGNO_FIRST_UID (regno) == 0)
- REGNO_FIRST_UID (regno) = INSN_UID (insn);
/* If we are called by reg_scan_update() (indicated by min_regno
being set), we also need to update the reference count. */
if (min_regno)
@@ -2484,14 +2479,14 @@ reg_scan_mark_refs (rtx x, rtx insn, int
case EXPR_LIST:
if (XEXP (x, 0))
- reg_scan_mark_refs (XEXP (x, 0), insn, note_flag, min_regno);
+ reg_scan_mark_refs (XEXP (x, 0), insn, min_regno);
if (XEXP (x, 1))
- reg_scan_mark_refs (XEXP (x, 1), insn, note_flag, min_regno);
+ reg_scan_mark_refs (XEXP (x, 1), insn, min_regno);
break;
case INSN_LIST:
if (XEXP (x, 1))
- reg_scan_mark_refs (XEXP (x, 1), insn, note_flag, min_regno);
+ reg_scan_mark_refs (XEXP (x, 1), insn, min_regno);
break;
case CLOBBER:
@@ -2504,7 +2499,7 @@ reg_scan_mark_refs (rtx x, rtx insn, int
REG_N_REFS (REGNO (reg))++;
}
else if (MEM_P (reg))
- reg_scan_mark_refs (XEXP (reg, 0), insn, note_flag, min_regno);
+ reg_scan_mark_refs (XEXP (reg, 0), insn, min_regno);
}
break;
@@ -2603,12 +2598,12 @@ reg_scan_mark_refs (rtx x, rtx insn, int
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
- reg_scan_mark_refs (XEXP (x, i), insn, note_flag, min_regno);
+ reg_scan_mark_refs (XEXP (x, i), insn, min_regno);
else if (fmt[i] == 'E' && XVEC (x, i) != 0)
{
int j;
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
- reg_scan_mark_refs (XVECEXP (x, i, j), insn, note_flag, min_regno);
+ reg_scan_mark_refs (XVECEXP (x, i, j), insn, min_regno);
}
}
}