This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Some performance improvements.
- To: gcc-patches at gcc dot gnu dot org
- Subject: Some performance improvements.
- From: Michael Matz <matzmich at cs dot tu-berlin dot de>
- Date: Thu, 30 Nov 2000 18:47:39 +0100 (MET)
Hi,
the attachment contains patches to three files (flow.c, local-alloc.c and
simplify-rtx.c), together with an old patch for cp/decl.c (see also
http://gcc.gnu.org/ml/gcc-patches/2000-10/msg00628.html) whic all help
performance, sometimes a lot.
The changes to flow.c are more of cosmetical nature, besides the change to
make_edge, which unnecessarily searches the edge list, even if there is a
cache, and the are no flags to set. This only is visible with huge CFG's.
The changes to simplify-rtx.c:clear_table() are more worthy. With big
functions using many registers, clear_table() is one of the top time
users. Taken from a compile (-O3 khtml_part.ii) without the patch:
2.72 25.80 3.87 21718 0.18 0.18 clear_table
compared with the patch:
0.01 134.87 0.02 21718 0.00 0.00 clear_table
(It's similar with other big files)
The changes to local_alloc.c:update_equiv_regs() save even more time by
making the O(|insns| * |Basic Blocks|) clearing of dead regs from live
info a O(|insns| + |Basic Blocks|) variant. Some timings:
without the patch (-O0, Brad's file "_std.i"):
real 0m46.077s
user 0m44.220s
sys 0m1.040s
local alloc : 12.91 (29%) usr 0.03 ( 3%) sys 13.24 (29%) wall
16.08 5.04 5.04 18422665 0.00 0.00 bitmap_find_bit
10.86 8.44 3.40 4 850.00 1559.88 calculate_global_regs_live
10.19 11.62 3.19 3 1063.33 2877.05 update_equiv_regs
[9] 27.6 3.19 5.44 3 update_equiv_regs [9]
while with the patch (still -O0):
real 0m33.947s
user 0m32.700s
sys 0m0.990s
local alloc : 1.65 ( 5%) usr 0.01 ( 1%) sys 1.66 (5%) wall
13.17 2.87 2.87 4 717.50 1498.14 calculate_global_regs_live
11.29 5.33 2.46 7290601 0.00 0.00 bitmap_operation
10.10 7.53 2.20 10 220.00 220.00 mark_critical_edges
...
0.18 19.03 0.04 3 13.33 58.69 update_equiv_regs
[67] 0.8 0.04 0.14 3 update_equiv_regs [67]
13 seconds of 46 saved, not bad (well, it's a profiled build, but the
relative winning should be the same).
This saving in local-alloc doesn't change with optimization, and happens
on all bigger files (not only BBB's - Brad's Big Bastard-functions ;-))
The C++ patch is attached again, because I received no messages regarding
it, despite it's fairly large impact on runtime. See the mail above, when
this patch is safe, and for timings. One of the larger C++ file I used for
the timings above (and the C++ part) is in
http://www.ifh.de/~matz/khtml_part.ii.gz .
All of the patches (alone and together) survive a i386-linux bootstrap,
and give no additional regressions on the test suite.
Ciao,
Michael, hoping to hear this time from someone ;)
2000-30-11 Michael Matz <matzmich@cs.tu-berlin.de>
* flow.c (make_edge): Early out, if no flags to set.
(calculate_global_regs_live): Clear out garbage only when necessary.
* simplify-rtx.c (varray_type used_regs): New.
(clear_table): Use it to only clear necessary items.
(cselib_lookup, cselib_record_set): Remember newly set items.
(cselib_update_varray_sizes, cselib_init): Initialize and grow
used_regs.
* local-alloc.c (update_equiv_regs): New local `cleared_regs'.
Move clearing of dead regs out of insn-loop.
2000-10-18 Michael Matz <matzmich@cs.tu-berlin.de>
* decl.c (store_bindings): Only search in the non modified
old_bindings for duplicates.
Index: flow.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/flow.c,v
retrieving revision 1.354
diff -u -r1.354 flow.c
--- flow.c 2000/11/30 06:31:17 1.354
+++ flow.c 2000/11/30 15:45:53
@@ -1261,6 +1261,12 @@
&& src != ENTRY_BLOCK_PTR
&& dst != EXIT_BLOCK_PTR);
+ /* Make sure we don't add duplicate edges and early out, if we don't need to
+ update the flags. */
+ if (flags == 0 && use_edge_cache
+ && TEST_BIT (edge_cache[src->index], dst->index))
+ return;
+
/* Make sure we don't add duplicate edges. */
if (! use_edge_cache || TEST_BIT (edge_cache[src->index], dst->index))
for (e = src->succ; e; e = e->succ_next)
@@ -3306,15 +3312,15 @@
qtail = queue;
qhead = qend = queue + n_basic_blocks + 2;
- /* Clear out the garbage that might be hanging out in bb->aux. */
- for (i = n_basic_blocks - 1; i >= 0; --i)
- BASIC_BLOCK (i)->aux = NULL;
-
/* Queue the blocks set in the initial mask. Do this in reverse block
number order so that we are more likely for the first round to do
useful work. We use AUX non-null to flag that the block is queued. */
if (blocks_in)
{
+ /* Clear out the garbage that might be hanging out in bb->aux. */
+ for (i = n_basic_blocks - 1; i >= 0; --i)
+ BASIC_BLOCK (i)->aux = NULL;
+
EXECUTE_IF_SET_IN_SBITMAP (blocks_in, 0, i,
{
basic_block bb = BASIC_BLOCK (i);
@@ -3346,7 +3352,7 @@
qhead = queue;
bb->aux = NULL;
- /* Begin by propogating live_at_start from the successor blocks. */
+ /* Begin by propagating live_at_start from the successor blocks. */
CLEAR_REG_SET (new_live_at_end);
for (e = bb->succ; e; e = e->succ_next)
{
Index: simplify-rtx.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/simplify-rtx.c,v
retrieving revision 1.33
diff -u -r1.33 simplify-rtx.c
--- simplify-rtx.c 2000/11/28 16:19:55 1.33
+++ simplify-rtx.c 2000/11/30 15:46:17
@@ -111,7 +111,7 @@
static void unchain_one_value PARAMS ((cselib_val *));
static void unchain_one_elt_list PARAMS ((struct elt_list **));
static void unchain_one_elt_loc_list PARAMS ((struct elt_loc_list **));
-static void clear_table PARAMS ((void));
+static void clear_table PARAMS ((int));
static int discard_useless_locs PARAMS ((void **, void *));
static int discard_useless_values PARAMS ((void **, void *));
static void remove_useless_values PARAMS ((void));
@@ -168,6 +168,10 @@
static varray_type reg_values;
#define REG_VALUES(I) VARRAY_ELT_LIST (reg_values, (I))
+/* Here the set of indices I with REG_VALUES(I) != 0 is saved. This is used
+ in clear_table() for fast emptying. */
+static varray_type used_regs;
+
/* We pass this to cselib_invalidate_mem to invalidate all of
memory for a non-const call instruction. */
static rtx callmem;
@@ -2200,15 +2204,23 @@
}
/* Remove all entries from the hash table. Also used during
- initialization. */
+ initialization. If CLEAR_ALL isn't set, then only clear the entries
+ which are known to have been used. */
static void
-clear_table ()
+clear_table (clear_all)
+ int clear_all;
{
unsigned int i;
+
+ if (clear_all)
+ for (i = 0; i < cselib_nregs; i++)
+ REG_VALUES (i) = 0;
+ else
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (used_regs); i++)
+ REG_VALUES (VARRAY_UINT (used_regs, i)) = 0;
- for (i = 0; i < cselib_nregs; i++)
- REG_VALUES (i) = 0;
+ VARRAY_POP_ALL (used_regs);
htab_empty (hash_table);
obstack_free (&cselib_obstack, cselib_startobj);
@@ -2867,6 +2879,8 @@
e = new_cselib_val (++next_unknown_value, GET_MODE (x));
e->locs = new_elt_loc_list (e->locs, x);
+ if (REG_VALUES (i) == 0)
+ VARRAY_PUSH_UINT (used_regs, i);
REG_VALUES (i) = new_elt_list (REG_VALUES (i), e);
slot = htab_find_slot_with_hash (hash_table, x, e->value, INSERT);
*slot = e;
@@ -3133,6 +3147,9 @@
if (dreg >= 0)
{
+ if (REG_VALUES (dreg) == 0)
+ VARRAY_PUSH_UINT (used_regs, dreg);
+
REG_VALUES (dreg) = new_elt_list (REG_VALUES (dreg), src_elt);
if (src_elt->locs == 0)
n_useless_values--;
@@ -3249,7 +3266,7 @@
&& GET_CODE (PATTERN (insn)) == ASM_OPERANDS
&& MEM_VOLATILE_P (PATTERN (insn))))
{
- clear_table ();
+ clear_table (0);
return;
}
@@ -3309,6 +3326,7 @@
cselib_nregs = nregs;
VARRAY_GROW (reg_values, nregs);
+ VARRAY_GROW (used_regs, nregs);
}
/* Initialize cselib for one pass. The caller must also call
@@ -3329,8 +3347,9 @@
cselib_nregs = max_reg_num ();
VARRAY_ELT_LIST_INIT (reg_values, cselib_nregs, "reg_values");
+ VARRAY_UINT_INIT (used_regs, cselib_nregs, "used_regs");
hash_table = htab_create (31, get_value_hash, entry_and_rtx_equal_p, NULL);
- clear_table ();
+ clear_table (1);
}
/* Called when the current user is done with cselib. */
@@ -3338,6 +3357,6 @@
void
cselib_finish ()
{
- clear_table ();
+ clear_table (0);
htab_delete (hash_table);
}
Index: local-alloc.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/local-alloc.c,v
retrieving revision 1.74
diff -u -r1.74 local-alloc.c
--- local-alloc.c 2000/11/13 21:22:10 1.74
+++ local-alloc.c 2000/11/30 15:46:31
@@ -804,8 +804,11 @@
rtx insn;
int block;
int loop_depth;
+ regset_head cleared_regs;
+ int clear_regnos = 0;
reg_equiv = (struct equivalence *) xcalloc (max_regno, sizeof *reg_equiv);
+ INIT_REG_SET (&cleared_regs);
init_alias_analysis ();
@@ -1135,7 +1138,6 @@
INSN. Update the flow information. */
else if (PREV_INSN (insn) != equiv_insn)
{
- int l;
rtx new_insn;
new_insn = emit_insn_before (copy_rtx (PATTERN (equiv_insn)),
@@ -1156,22 +1158,43 @@
if (block >= 0 && insn == BLOCK_HEAD (block))
BLOCK_HEAD (block) = PREV_INSN (insn);
- for (l = 0; l < n_basic_blocks; l++)
- {
- CLEAR_REGNO_REG_SET (
- BASIC_BLOCK (l)->global_live_at_start,
- regno);
- CLEAR_REGNO_REG_SET (
- BASIC_BLOCK (l)->global_live_at_end,
- regno);
- }
+ /* Remember to clear REGNO from all basic block's live
+ info. */
+ SET_REGNO_REG_SET (&cleared_regs, regno);
+ clear_regnos++;
}
}
}
}
+ /* Clear all dead REGNOs from all basic block's live info. */
+ if (clear_regnos)
+ {
+ int j, l;
+ if (clear_regnos > 8)
+ {
+ for (l = 0; l < n_basic_blocks; l++)
+ {
+ AND_COMPL_REG_SET (BASIC_BLOCK (l)->global_live_at_start,
+ &cleared_regs);
+ AND_COMPL_REG_SET (BASIC_BLOCK (l)->global_live_at_end,
+ &cleared_regs);
+ }
+ }
+ else
+ EXECUTE_IF_SET_IN_REG_SET (&cleared_regs, 0, j,
+ {
+ for (l = 0; l < n_basic_blocks; l++)
+ {
+ CLEAR_REGNO_REG_SET (BASIC_BLOCK (l)->global_live_at_start, j);
+ CLEAR_REGNO_REG_SET (BASIC_BLOCK (l)->global_live_at_end, j);
+ }
+ });
+ }
+
/* Clean up. */
end_alias_analysis ();
+ CLEAR_REG_SET (&cleared_regs);
free (reg_equiv);
}
Index: cp/decl.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cp/decl.c,v
retrieving revision 1.719
diff -u -r1.719 decl.c
--- decl.c 2000/11/28 10:18:22 1.719
+++ decl.c 2000/11/30 15:48:46
@@ -2445,6 +2445,8 @@
tree names, old_bindings;
{
tree t;
+ tree search_bindings = old_bindings;
+
for (t = names; t; t = TREE_CHAIN (t))
{
tree binding, t1, id;
@@ -2461,7 +2463,7 @@
|| !(IDENTIFIER_BINDING (id) || IDENTIFIER_CLASS_VALUE (id)))
continue;
- for (t1 = old_bindings; t1; t1 = TREE_CHAIN (t1))
+ for (t1 = search_bindings; t1; t1 = TREE_CHAIN (t1))
if (TREE_VEC_ELT (t1, 0) == id)
goto skip_it;