This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
squelch dead store elimination
- To: gcc-patches at gcc dot gnu dot org
- Subject: squelch dead store elimination
- From: Richard Henderson <rth at redhat dot com>
- Date: Tue, 16 Jan 2001 06:10:26 -0800
Our current dead store elimination code builds a list of
all non-aliasing sets, and compares each new set in turn
to see if it is killed by a later store.
Consider a function containing one block with 4K provably
non-aliasing stores. Such is the subject of optimization/286.
One might guess that we could build up quite a long list,
and that it might take a bit o time to iterate through it
N times. One might not immediately guess that we'd also
eat up fantastic amounts of memory doing so, but it is true.
The following patch caps the length of mem_set_list to an
arbitrary 100 elements. Any additional non-aliasing memory
sets that we encounter are dropped on the floor.
With the test case from the PR, this improves thinks like so:
BEFORE
i686
MEM = 527M
flow analysis : 122.94 (49%) usr 6.19 (73%) sys 144.54 (42%) wall
flow 2 : 71.03 (29%) usr 0.30 ( 4%) sys 71.44 (21%) wall
TOTAL : 248.79 8.46 343.71
alpha
MEM = 19M
flow analysis : 58.29 (37%) usr 0.00 ( 3%) sys 58.31 (37%) wall
flow 2 : 57.28 (37%) usr 0.00 ( 4%) sys 57.30 (37%) wall
TOTAL : 156.17 0.21 156.40
AFTER
i686
MEM = 32M
flow analysis : 5.48 ( 9%) usr 0.21 (57%) sys 5.69 (10%) wall
flow 2 : 3.29 ( 6%) usr 0.00 ( 0%) sys 3.29 ( 6%) wall
TOTAL : 58.54 0.37 58.91
alpha
MEM = 19M
flow analysis : 2.46 ( 5%) usr 0.00 ( 0%) sys 2.46 ( 5%) wall
flow 2 : 2.44 ( 5%) usr 0.00 ( 0%) sys 2.44 ( 5%) wall
TOTAL : 45.01 0.18 45.21
I'm not sure why Alpha wasn't using the same amounts of memory
as i686. I'm guessing it is related to aliasing information,
in that Alpha wasn't managing to collect it somehow.
r~
* flow.c (struct propagate_block_info): Add mem_set_list_len.
(MAX_MEM_SET_LIST_LEN): New.
(propagate_one_insn): Update mem_set_list_len.
(invalidate_mems_from_autoinc): Likewise.
(invalidate_mems_from_set): Likewise.
(mark_used_regs): Likewise.
(init_propagate_block_info): Likewise. Stop collecting memories
when we reach MAX_MEM_SET_LIST_LEN.
(mark_set_1): Likewise.
Index: flow.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/flow.c,v
retrieving revision 1.368
diff -c -p -d -r1.368 flow.c
*** flow.c 2001/01/11 17:02:44 1.368
--- flow.c 2001/01/16 13:33:49
*************** struct propagate_block_info
*** 316,321 ****
--- 316,324 ----
regset reg_cond_reg;
#endif
+ /* The length of mem_set_list. */
+ int mem_set_list_len;
+
/* Non-zero if the value of CC0 is live. */
int cc0_live;
*************** struct propagate_block_info
*** 323,328 ****
--- 326,335 ----
int flags;
};
+ /* Maximum length of pbi->mem_set_list before we start dropping
+ new elements on the floor. */
+ #define MAX_MEM_SET_LIST_LEN 100
+
/* Store the data structures necessary for depth-first search. */
struct depth_first_search_dsS {
/* stack for backtracking during the algorithm */
*************** propagate_one_insn (pbi, insn)
*** 3877,3883 ****
/* Non-constant calls clobber memory. */
if (! CONST_CALL_P (insn))
! free_EXPR_LIST_list (&pbi->mem_set_list);
/* There may be extra registers to be clobbered. */
for (note = CALL_INSN_FUNCTION_USAGE (insn);
--- 3884,3893 ----
/* Non-constant calls clobber memory. */
if (! CONST_CALL_P (insn))
! {
! free_EXPR_LIST_list (&pbi->mem_set_list);
! pbi->mem_set_list_len = 0;
! }
/* There may be extra registers to be clobbered. */
for (note = CALL_INSN_FUNCTION_USAGE (insn);
*************** init_propagate_block_info (bb, live, loc
*** 3967,3972 ****
--- 3977,3983 ----
pbi->bb = bb;
pbi->reg_live = live;
pbi->mem_set_list = NULL_RTX;
+ pbi->mem_set_list_len = 0;
pbi->local_set = local_set;
pbi->cond_local_set = cond_local_set;
pbi->cc0_live = 0;
*************** init_propagate_block_info (bb, live, loc
*** 4111,4116 ****
--- 4122,4129 ----
mem = shallow_copy_rtx (mem);
#endif
pbi->mem_set_list = alloc_EXPR_LIST (0, mem, pbi->mem_set_list);
+ if (++pbi->mem_set_list_len >= MAX_MEM_SET_LIST_LEN)
+ break;
}
}
}
*************** invalidate_mems_from_autoinc (pbi, insn)
*** 4512,4517 ****
--- 4525,4531 ----
else
pbi->mem_set_list = next;
free_EXPR_LIST_node (temp);
+ pbi->mem_set_list_len--;
}
else
prev = temp;
*************** invalidate_mems_from_set (pbi, exp)
*** 4547,4552 ****
--- 4561,4567 ----
else
pbi->mem_set_list = next;
free_EXPR_LIST_node (temp);
+ pbi->mem_set_list_len--;
}
else
prev = temp;
*************** mark_set_1 (pbi, code, reg, cond, insn,
*** 4743,4749 ****
if (insn && GET_CODE (reg) == MEM)
invalidate_mems_from_autoinc (pbi, insn);
! if (GET_CODE (reg) == MEM && ! side_effects_p (reg)
/* ??? With more effort we could track conditional memory life. */
&& ! cond
/* We do not know the size of a BLKmode store, so we do not track
--- 4758,4765 ----
if (insn && GET_CODE (reg) == MEM)
invalidate_mems_from_autoinc (pbi, insn);
! if (pbi->mem_set_list_len < MAX_MEM_SET_LIST_LEN
! && GET_CODE (reg) == MEM && ! side_effects_p (reg)
/* ??? With more effort we could track conditional memory life. */
&& ! cond
/* We do not know the size of a BLKmode store, so we do not track
*************** mark_set_1 (pbi, code, reg, cond, insn,
*** 4761,4766 ****
--- 4777,4783 ----
reg = shallow_copy_rtx (reg);
#endif
pbi->mem_set_list = alloc_EXPR_LIST (0, reg, pbi->mem_set_list);
+ pbi->mem_set_list_len++;
}
}
*************** mark_used_regs (pbi, x, cond, insn)
*** 5859,5864 ****
--- 5876,5882 ----
else
pbi->mem_set_list = next;
free_EXPR_LIST_node (temp);
+ pbi->mem_set_list_len--;
}
else
prev = temp;
*************** mark_used_regs (pbi, x, cond, insn)
*** 5996,6002 ****
So for now, just clear the memory set list and mark any regs
we can find in ASM_OPERANDS as used. */
if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
! free_EXPR_LIST_list (&pbi->mem_set_list);
/* For all ASM_OPERANDS, we must traverse the vector of input operands.
We can not just fall through here since then we would be confused
--- 6014,6023 ----
So for now, just clear the memory set list and mark any regs
we can find in ASM_OPERANDS as used. */
if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
! {
! free_EXPR_LIST_list (&pbi->mem_set_list);
! pbi->mem_set_list_len = 0;
! }
/* For all ASM_OPERANDS, we must traverse the vector of input operands.
We can not just fall through here since then we would be confused