This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PR 10776 II
> On Sat, Jan 10, 2004 at 09:03:49PM +0100, Jan Hubicka wrote:
> > This patch take care of it in easy way of splitting very large basic
> > blocks. I don't think this will have important impact on perfomrance of
> > something (there are no such blocks in SPEC build) and it cuts memory
> > usage to 50MB and time to 99 seconds.
>
> This seems like a decent idea. I'd like to know how it affects
> fpppp from spec before applying it though.
Actually it has turned out that at least this testcase can be trimmed
down almost completely. The part of problem comes (as usual) from too
many re-canonicalizations of the expressions. Luckily scheudler already
prepares lists of memory expressions and thus it is easy to keep these
in canonicalized form.
The other problem is that the code has cache of existing dependency
links and this cache is allocated as bitmap of maxinsnXmaxinsn. For
40000 instructions this is bit too much. The attached patch make
scheduler to consume only 2% of compilation time and gets the testcase
to 44 seconds overall (after reverting your patch that makes it
uninsteresting).
I am testing it for i686-pc-gnu-linux
OK if it passes?
I will still keep the other patch as backup idea for more other
quadratic behaviours in scheduler.
Honza
2004-01-14 Jan Hubicka <jh@suse.cz>
* sched-deps.c (trye_dependency_cache, anti_dependency_cache,
outptu_dependency_cache, forward_dependency_cahe): Trun to vectors of
bitmaps
(cache_size): New variable
(add_dependence): Update use; canonize early memory locations
(sched_analyze_1): Likewise.
(sched_analyze_2): Likewise.
(init_dependency_caches): Initialize bitmaps.
(free_dependency_caches): Free bitmaps
Index: sched-deps.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/sched-deps.c,v
retrieving revision 1.63
diff -c -3 -p -r1.63 sched-deps.c
*** sched-deps.c 11 Oct 2003 19:00:49 -0000 1.63
--- sched-deps.c 14 Jan 2004 00:39:00 -0000
*************** static enum reg_pending_barrier_mode reg
*** 80,95 ****
has enough entries to represent a dependency on any other insn in
the insn chain. All bitmap for true dependencies cache is
allocated then the rest two ones are also allocated. */
! static sbitmap *true_dependency_cache;
! static sbitmap *anti_dependency_cache;
! static sbitmap *output_dependency_cache;
/* To speed up checking consistency of formed forward insn
dependencies we use the following cache. Another possible solution
could be switching off checking duplication of insns in forward
dependencies. */
#ifdef ENABLE_CHECKING
! static sbitmap *forward_dependency_cache;
#endif
static int deps_may_trap_p (rtx);
--- 80,96 ----
has enough entries to represent a dependency on any other insn in
the insn chain. All bitmap for true dependencies cache is
allocated then the rest two ones are also allocated. */
! static bitmap_head *true_dependency_cache;
! static bitmap_head *anti_dependency_cache;
! static bitmap_head *output_dependency_cache;
! int cache_size;
/* To speed up checking consistency of formed forward insn
dependencies we use the following cache. Another possible solution
could be switching off checking duplication of insns in forward
dependencies. */
#ifdef ENABLE_CHECKING
! static bitmap_head *forward_dependency_cache;
#endif
static int deps_may_trap_p (rtx);
*************** add_dependence (rtx insn, rtx elem, enum
*** 244,256 ****
if (anti_dependency_cache == NULL || output_dependency_cache == NULL)
abort ();
! if (TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
/* Do nothing (present_set_type is already 0). */
;
! else if (TEST_BIT (anti_dependency_cache[INSN_LUID (insn)],
INSN_LUID (elem)))
present_dep_type = REG_DEP_ANTI;
! else if (TEST_BIT (output_dependency_cache[INSN_LUID (insn)],
INSN_LUID (elem)))
present_dep_type = REG_DEP_OUTPUT;
else
--- 245,258 ----
if (anti_dependency_cache == NULL || output_dependency_cache == NULL)
abort ();
! if (bitmap_bit_p (&true_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem)))
/* Do nothing (present_set_type is already 0). */
;
! else if (bitmap_bit_p (&anti_dependency_cache[INSN_LUID (insn)],
INSN_LUID (elem)))
present_dep_type = REG_DEP_ANTI;
! else if (bitmap_bit_p (&output_dependency_cache[INSN_LUID (insn)],
INSN_LUID (elem)))
present_dep_type = REG_DEP_OUTPUT;
else
*************** add_dependence (rtx insn, rtx elem, enum
*** 271,282 ****
if (true_dependency_cache != NULL)
{
if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
! RESET_BIT (anti_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
&& output_dependency_cache)
! RESET_BIT (output_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else
abort ();
}
--- 273,284 ----
if (true_dependency_cache != NULL)
{
if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
! bitmap_clear_bit (&anti_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
&& output_dependency_cache)
! bitmap_clear_bit (&output_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else
abort ();
}
*************** add_dependence (rtx insn, rtx elem, enum
*** 293,306 ****
if (true_dependency_cache != NULL)
{
if ((int) REG_NOTE_KIND (link) == 0)
! SET_BIT (true_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
! SET_BIT (anti_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
! SET_BIT (output_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
}
#endif
return 0;
--- 295,308 ----
if (true_dependency_cache != NULL)
{
if ((int) REG_NOTE_KIND (link) == 0)
! bitmap_set_bit (&true_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
! bitmap_set_bit (&anti_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
! bitmap_set_bit (&output_dependency_cache[INSN_LUID (insn)],
! INSN_LUID (elem));
}
#endif
return 0;
*************** add_dependence (rtx insn, rtx elem, enum
*** 319,329 ****
if (true_dependency_cache != NULL)
{
if ((int) dep_type == 0)
! SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
else if (dep_type == REG_DEP_ANTI)
! SET_BIT (anti_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
else if (dep_type == REG_DEP_OUTPUT)
! SET_BIT (output_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
}
#endif
return 1;
--- 321,331 ----
if (true_dependency_cache != NULL)
{
if ((int) dep_type == 0)
! bitmap_set_bit (&true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
else if (dep_type == REG_DEP_ANTI)
! bitmap_set_bit (&anti_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
else if (dep_type == REG_DEP_OUTPUT)
! bitmap_set_bit (&output_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
}
#endif
return 1;
*************** add_insn_mem_dependence (struct deps *de
*** 395,401 ****
mem = shallow_copy_rtx (mem);
XEXP (mem, 0) = cselib_subst_to_values (XEXP (mem, 0));
}
! link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
*mem_list = link;
deps->pending_lists_length++;
--- 397,403 ----
mem = shallow_copy_rtx (mem);
XEXP (mem, 0) = cselib_subst_to_values (XEXP (mem, 0));
}
! link = alloc_EXPR_LIST (VOIDmode, canon_rtx (mem), *mem_list);
*mem_list = link;
deps->pending_lists_length++;
*************** sched_analyze_1 (struct deps *deps, rtx
*** 543,548 ****
--- 545,551 ----
cselib_lookup (XEXP (t, 0), Pmode, 1);
XEXP (t, 0) = cselib_subst_to_values (XEXP (t, 0));
}
+ XEXP (t, 0) = canon_rtx (XEXP (t, 0));
if (deps->pending_lists_length > MAX_PENDING_LIST_LENGTH)
{
*************** sched_analyze_2 (struct deps *deps, rtx
*** 684,689 ****
--- 687,693 ----
cselib_lookup (XEXP (t, 0), Pmode, 1);
XEXP (t, 0) = cselib_subst_to_values (XEXP (t, 0));
}
+ XEXP (t, 0) = canon_rtx (XEXP (t, 0));
pending = deps->pending_read_insns;
pending_mem = deps->pending_read_mems;
while (pending)
*************** add_forward_dependence (rtx from, rtx to
*** 1345,1358 ****
if (GET_CODE (from) == NOTE
|| INSN_DELETED_P (from)
|| (forward_dependency_cache != NULL
! && TEST_BIT (forward_dependency_cache[INSN_LUID (from)],
! INSN_LUID (to)))
|| (forward_dependency_cache == NULL
&& find_insn_list (to, INSN_DEPEND (from))))
abort ();
if (forward_dependency_cache != NULL)
! SET_BIT (forward_dependency_cache[INSN_LUID (from)],
! INSN_LUID (to));
#endif
new_link = alloc_INSN_LIST (to, INSN_DEPEND (from));
--- 1349,1362 ----
if (GET_CODE (from) == NOTE
|| INSN_DELETED_P (from)
|| (forward_dependency_cache != NULL
! && bitmap_bit_p (&forward_dependency_cache[INSN_LUID (from)],
! INSN_LUID (to)))
|| (forward_dependency_cache == NULL
&& find_insn_list (to, INSN_DEPEND (from))))
abort ();
if (forward_dependency_cache != NULL)
! bitmap_bit_p (&forward_dependency_cache[INSN_LUID (from)],
! INSN_LUID (to));
#endif
new_link = alloc_INSN_LIST (to, INSN_DEPEND (from));
*************** init_dependency_caches (int luid)
*** 1457,1472 ****
what we consider "very high". */
if (luid / n_basic_blocks > 100 * 5)
{
! true_dependency_cache = sbitmap_vector_alloc (luid, luid);
! sbitmap_vector_zero (true_dependency_cache, luid);
! anti_dependency_cache = sbitmap_vector_alloc (luid, luid);
! sbitmap_vector_zero (anti_dependency_cache, luid);
! output_dependency_cache = sbitmap_vector_alloc (luid, luid);
! sbitmap_vector_zero (output_dependency_cache, luid);
#ifdef ENABLE_CHECKING
! forward_dependency_cache = sbitmap_vector_alloc (luid, luid);
! sbitmap_vector_zero (forward_dependency_cache, luid);
#endif
}
}
--- 1461,1483 ----
what we consider "very high". */
if (luid / n_basic_blocks > 100 * 5)
{
! int i;
! true_dependency_cache = xmalloc (luid * sizeof (bitmap_head));
! anti_dependency_cache = xmalloc (luid * sizeof (bitmap_head));
! output_dependency_cache = xmalloc (luid * sizeof (bitmap_head));
! #ifdef ENABLE_CHECKING
! forward_dependency_cache = xmalloc (luid * sizeof (bitmap_head));
! #endif
! for (i = 0; i < luid; i++)
! {
! bitmap_initialize (&true_dependency_cache[i], 0);
! bitmap_initialize (&anti_dependency_cache[i], 0);
! bitmap_initialize (&output_dependency_cache[i], 0);
#ifdef ENABLE_CHECKING
! bitmap_initialize (&forward_dependency_cache[i], 0);
#endif
+ }
+ cache_size = luid;
}
}
*************** free_dependency_caches (void)
*** 1477,1490 ****
{
if (true_dependency_cache)
{
! sbitmap_vector_free (true_dependency_cache);
true_dependency_cache = NULL;
! sbitmap_vector_free (anti_dependency_cache);
anti_dependency_cache = NULL;
! sbitmap_vector_free (output_dependency_cache);
output_dependency_cache = NULL;
#ifdef ENABLE_CHECKING
! sbitmap_vector_free (forward_dependency_cache);
forward_dependency_cache = NULL;
#endif
}
--- 1488,1512 ----
{
if (true_dependency_cache)
{
! int i;
!
! for (i = 0; i < cache_size; i++)
! {
! bitmap_clear (&true_dependency_cache[i]);
! bitmap_clear (&anti_dependency_cache[i]);
! bitmap_clear (&output_dependency_cache[i]);
! #ifdef ENABLE_CHECKING
! bitmap_clear (&forward_dependency_cache[i]);
! #endif
! }
! free (true_dependency_cache);
true_dependency_cache = NULL;
! free (anti_dependency_cache);
anti_dependency_cache = NULL;
! free (output_dependency_cache);
output_dependency_cache = NULL;
#ifdef ENABLE_CHECKING
! free (forward_dependency_cache);
forward_dependency_cache = NULL;
#endif
}