This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[sel-sched] Shrink memory consumption on extra large basic blocks
- From: Andrey Belevantsev <abel at ispras dot ru>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Steve Ellcey <sje at cup dot hp dot com>, "Vladimir N. Makarov" <vmakarov at redhat dot com>
- Date: Thu, 12 Nov 2009 19:35:02 +0300
- Subject: [sel-sched] Shrink memory consumption on extra large basic blocks
Hi,
Steve kindly pointed out to me that on ia64 gcc needs a lot of memory
when compiling limits-fnargs.c with -O3. The reason for this is that we
have a basic block with ~150K instructions, and in selective scheduler
one of the ideas for speeding it up was to cache the effects of an insn
inside the dependence context created solely for this insn. A
dependence context has the reg_last array that holds information for
each register. It is not a problem with usual regions (at most several
hundred insns), but for a region with ~130K insns and on ia64 with >300
hard registers we need something like 12Kb per insn, which totals around
1.5Gb.
Fortunately, there is an easy way out -- when not pipelining (and we
never pipeline such supersize regions), we only need this information
for insns that are in our lookahead window, as we'd never move anything
through scheduled insns. Thus, lazy allocating reg_last array and
freeing it after just scheduling an insn does the trick.
The patch (also with extending per-insn data vector by smaller chunks)
lets me compile the test under 600Mb. I haven't found other easy
candidates for optimizing memory footprints. Bundling allocates quite a
lot for storing bundle states for each insn, but no obvious way to
shrink them.
Bootstrapped and tested on ia64 with selective scheduling enabled at
-O2, regular bootstrap is in progress. Ok for trunk if it succeeds?
This should also be a regression from 4.3.
Andrey
2009-11-12 Andrey Belevantsev <abel@ispras.ru>
* sched-deps.c (init_deps): New parameter lazy_reg_last. Don't
allocate reg_last when in case lazy_reg_last is true.
(init_deps_reg_last): New.
(free_deps): When max_reg is 0, this context is already freed.
* sched-int.h (init_deps_reg_last): Export.
(init_deps): Update prototype.
* sched-ebb.c (schedule_ebb): Update the call to init_deps.
* sched-rgn.c (sched_rgn_compute_dependencies): Likewise.
* ddg.c (build_intra_loop_deps): Likewise.
* sel-sched-ir.c (copy_deps_context, create_deps_context,
reset_deps_context, deps_init_id): Likewise.
(init_first_time_insn_data): Lazy allocate INSN_DEPS_CONTEXT.
(free_data_for_scheduled_insn): New, break down from ...
(free_first_time_insn_data): ... here.
(has_dependence_p): Allocate reg_last now, when it is needed.
* sel-sched-ir.h (free_data_for_scheduled_insn): Export.
* sel-sched.c (update_seqnos_and_stage): Free INSN_DEPS_CONTEXT
in scheduled insn.
Index: gcc/sched-ebb.c
===================================================================
*** gcc/sched-ebb.c (revision 154088)
--- gcc/sched-ebb.c (working copy)
*************** schedule_ebb (rtx head, rtx tail)
*** 478,484 ****
init_deps_global ();
/* Compute dependencies. */
! init_deps (&tmp_deps);
sched_analyze (&tmp_deps, head, tail);
free_deps (&tmp_deps);
--- 478,484 ----
init_deps_global ();
/* Compute dependencies. */
! init_deps (&tmp_deps, false);
sched_analyze (&tmp_deps, head, tail);
free_deps (&tmp_deps);
Index: gcc/ddg.c
===================================================================
*** gcc/ddg.c (revision 154088)
--- gcc/ddg.c (working copy)
*************** build_intra_loop_deps (ddg_ptr g)
*** 388,394 ****
/* Build the dependence information, using the sched_analyze function. */
init_deps_global ();
! init_deps (&tmp_deps);
/* Do the intra-block data dependence analysis for the given block. */
get_ebb_head_tail (g->bb, g->bb, &head, &tail);
--- 388,394 ----
/* Build the dependence information, using the sched_analyze function. */
init_deps_global ();
! init_deps (&tmp_deps, false);
/* Do the intra-block data dependence analysis for the given block. */
get_ebb_head_tail (g->bb, g->bb, &head, &tail);
Index: gcc/sel-sched.c
===================================================================
*** gcc/sel-sched.c (revision 154088)
--- gcc/sel-sched.c (working copy)
*************** update_seqnos_and_stage (int min_seqno,
*** 7364,7369 ****
--- 7364,7375 ----
gcc_assert (INSN_SEQNO (insn) < 0);
INSN_SEQNO (insn) += highest_seqno_in_use + max_seqno - min_seqno + 2;
gcc_assert (INSN_SEQNO (insn) <= new_hs);
+
+ /* When not pipelining, purge unneeded insn info on the scheduled insns.
+ For example, having reg_last array of INSN_DEPS_CONTEXT in memory may
+ require > 1GB of memory e.g. on limit-fnargs.c. */
+ if (! pipelining_p)
+ free_data_for_scheduled_insn (insn);
}
ilist_clear (pscheduled_insns);
Index: gcc/sel-sched-ir.c
===================================================================
*** gcc/sel-sched-ir.c (revision 154088)
--- gcc/sel-sched-ir.c (working copy)
*************** reset_target_context (tc_t tc, bool clea
*** 433,439 ****
static void
copy_deps_context (deps_t to, deps_t from)
{
! init_deps (to);
deps_join (to, from);
}
--- 433,439 ----
static void
copy_deps_context (deps_t to, deps_t from)
{
! init_deps (to, false);
deps_join (to, from);
}
*************** create_deps_context (void)
*** 450,456 ****
{
deps_t dc = alloc_deps_context ();
! init_deps (dc);
return dc;
}
--- 450,456 ----
{
deps_t dc = alloc_deps_context ();
! init_deps (dc, false);
return dc;
}
*************** static void
*** 484,490 ****
reset_deps_context (deps_t dc)
{
clear_deps_context (dc);
! init_deps (dc);
}
/* This structure describes the dependence analysis hooks for advancing
--- 484,490 ----
reset_deps_context (deps_t dc)
{
clear_deps_context (dc);
! init_deps (dc, false);
}
/* This structure describes the dependence analysis hooks for advancing
*************** deps_init_id (idata_t id, insn_t insn, b
*** 2674,2680 ****
deps_init_id_data.force_unique_p = force_unique_p;
deps_init_id_data.force_use_p = false;
! init_deps (dc);
memcpy (&deps_init_id_sched_deps_info,
&const_deps_init_id_sched_deps_info,
--- 2674,2680 ----
deps_init_id_data.force_unique_p = force_unique_p;
deps_init_id_data.force_use_p = false;
! init_deps (dc, false);
memcpy (&deps_init_id_sched_deps_info,
&const_deps_init_id_sched_deps_info,
*************** init_first_time_insn_data (insn_t insn)
*** 2746,2752 ****
/* These are needed for nops too. */
INSN_LIVE (insn) = get_regset_from_pool ();
INSN_LIVE_VALID_P (insn) = false;
!
if (!INSN_NOP_P (insn))
{
INSN_ANALYZED_DEPS (insn) = BITMAP_ALLOC (NULL);
--- 2746,2752 ----
/* These are needed for nops too. */
INSN_LIVE (insn) = get_regset_from_pool ();
INSN_LIVE_VALID_P (insn) = false;
!
if (!INSN_NOP_P (insn))
{
INSN_ANALYZED_DEPS (insn) = BITMAP_ALLOC (NULL);
*************** init_first_time_insn_data (insn_t insn)
*** 2754,2780 ****
INSN_TRANSFORMED_INSNS (insn)
= htab_create (16, hash_transformed_insns,
eq_transformed_insns, free_transformed_insns);
! init_deps (&INSN_DEPS_CONTEXT (insn));
}
}
! /* Free the same data as above for INSN. */
! static void
! free_first_time_insn_data (insn_t insn)
{
gcc_assert (! first_time_insn_init (insn));
!
BITMAP_FREE (INSN_ANALYZED_DEPS (insn));
BITMAP_FREE (INSN_FOUND_DEPS (insn));
htab_delete (INSN_TRANSFORMED_INSNS (insn));
! return_regset_to_pool (INSN_LIVE (insn));
! INSN_LIVE (insn) = NULL;
! INSN_LIVE_VALID_P (insn) = false;
!
/* This is allocated only for bookkeeping insns. */
if (INSN_ORIGINATORS (insn))
BITMAP_FREE (INSN_ORIGINATORS (insn));
free_deps (&INSN_DEPS_CONTEXT (insn));
}
/* Initialize region-scope data structures for basic blocks. */
--- 2754,2799 ----
INSN_TRANSFORMED_INSNS (insn)
= htab_create (16, hash_transformed_insns,
eq_transformed_insns, free_transformed_insns);
! init_deps (&INSN_DEPS_CONTEXT (insn), true);
}
}
! /* Free almost all above data for INSN that is scheduled already.
! Used for extra-large basic blocks. */
! void
! free_data_for_scheduled_insn (insn_t insn)
{
gcc_assert (! first_time_insn_init (insn));
!
! if (! INSN_ANALYZED_DEPS (insn))
! return;
!
BITMAP_FREE (INSN_ANALYZED_DEPS (insn));
BITMAP_FREE (INSN_FOUND_DEPS (insn));
htab_delete (INSN_TRANSFORMED_INSNS (insn));
!
/* This is allocated only for bookkeeping insns. */
if (INSN_ORIGINATORS (insn))
BITMAP_FREE (INSN_ORIGINATORS (insn));
free_deps (&INSN_DEPS_CONTEXT (insn));
+
+ INSN_ANALYZED_DEPS (insn) = NULL;
+
+ /* Clear the readonly flag so we would ICE when trying to recalculate
+ the deps context (as we believe that it should not happen). */
+ (&INSN_DEPS_CONTEXT (insn))->readonly = 0;
+ }
+
+ /* Free the same data as above for INSN. */
+ static void
+ free_first_time_insn_data (insn_t insn)
+ {
+ gcc_assert (! first_time_insn_init (insn));
+
+ free_data_for_scheduled_insn (insn);
+ return_regset_to_pool (INSN_LIVE (insn));
+ INSN_LIVE (insn) = NULL;
+ INSN_LIVE_VALID_P (insn) = false;
}
/* Initialize region-scope data structures for basic blocks. */
*************** has_dependence_p (expr_t expr, insn_t pr
*** 3211,3216 ****
--- 3230,3240 ----
return false;
dc = &INSN_DEPS_CONTEXT (pred);
+
+ /* We init this field lazily. */
+ if (dc->reg_last == NULL)
+ init_deps_reg_last (dc);
+
if (!dc->readonly)
{
has_dependence_data.pro = NULL;
*************** extend_insn_data (void)
*** 3847,3854 ****
- VEC_length (sel_insn_data_def, s_i_d));
if (reserve > 0
&& ! VEC_space (sel_insn_data_def, s_i_d, reserve))
! VEC_safe_grow_cleared (sel_insn_data_def, heap, s_i_d,
! 3 * sched_max_luid / 2);
}
/* Finalize data structures for insns from current region. */
--- 3871,3887 ----
- VEC_length (sel_insn_data_def, s_i_d));
if (reserve > 0
&& ! VEC_space (sel_insn_data_def, s_i_d, reserve))
! {
! int size;
!
! if (sched_max_luid / 2 > 1024)
! size = sched_max_luid + 1024;
! else
! size = 3 * sched_max_luid / 2;
!
!
! VEC_safe_grow_cleared (sel_insn_data_def, heap, s_i_d, size);
! }
}
/* Finalize data structures for insns from current region. */
Index: gcc/sel-sched-ir.h
===================================================================
*** gcc/sel-sched-ir.h (revision 154088)
--- gcc/sel-sched-ir.h (working copy)
*************** extern void init_lv_sets (void);
*** 1646,1651 ****
--- 1646,1652 ----
extern void free_lv_sets (void);
extern void setup_nop_and_exit_insns (void);
extern void free_nop_and_exit_insns (void);
+ extern void free_data_for_scheduled_insn (insn_t);
extern void setup_nop_vinsn (void);
extern void free_nop_vinsn (void);
extern void sel_set_sched_flags (void);
Index: gcc/sched-deps.c
===================================================================
*** gcc/sched-deps.c (revision 154088)
--- gcc/sched-deps.c (working copy)
*************** sched_free_deps (rtx head, rtx tail, boo
*** 3464,3475 ****
n_bbs is the number of region blocks. */
void
! init_deps (struct deps *deps)
{
int max_reg = (reload_completed ? FIRST_PSEUDO_REGISTER : max_reg_num ());
deps->max_reg = max_reg;
! deps->reg_last = XCNEWVEC (struct deps_reg, max_reg);
INIT_REG_SET (&deps->reg_last_in_use);
INIT_REG_SET (&deps->reg_conditional_sets);
--- 3464,3478 ----
n_bbs is the number of region blocks. */
void
! init_deps (struct deps *deps, bool lazy_reg_last)
{
int max_reg = (reload_completed ? FIRST_PSEUDO_REGISTER : max_reg_num ());
deps->max_reg = max_reg;
! if (lazy_reg_last)
! deps->reg_last = NULL;
! else
! deps->reg_last = XCNEWVEC (struct deps_reg, max_reg);
INIT_REG_SET (&deps->reg_last_in_use);
INIT_REG_SET (&deps->reg_conditional_sets);
*************** init_deps (struct deps *deps)
*** 3490,3495 ****
--- 3493,3510 ----
deps->readonly = 0;
}
+ /* Init only reg_last field of DEPS, which was not allocated before as
+ we inited DEPS lazily. */
+ void
+ init_deps_reg_last (struct deps *deps)
+ {
+ gcc_assert (deps && deps->max_reg > 0);
+ gcc_assert (deps->reg_last == NULL);
+
+ deps->reg_last = XCNEWVEC (struct deps_reg, deps->max_reg);
+ }
+
+
/* Free insn lists found in DEPS. */
void
*************** free_deps (struct deps *deps)
*** 3498,3503 ****
--- 3513,3526 ----
unsigned i;
reg_set_iterator rsi;
+ /* We set max_reg to 0 when this context was already freed. */
+ if (deps->max_reg == 0)
+ {
+ gcc_assert (deps->reg_last == NULL);
+ return;
+ }
+ deps->max_reg = 0;
+
free_INSN_LIST_list (&deps->pending_read_insns);
free_EXPR_LIST_list (&deps->pending_read_mems);
free_INSN_LIST_list (&deps->pending_write_insns);
*************** free_deps (struct deps *deps)
*** 3522,3528 ****
CLEAR_REG_SET (&deps->reg_last_in_use);
CLEAR_REG_SET (&deps->reg_conditional_sets);
! free (deps->reg_last);
deps->reg_last = NULL;
deps = NULL;
--- 3545,3554 ----
CLEAR_REG_SET (&deps->reg_last_in_use);
CLEAR_REG_SET (&deps->reg_conditional_sets);
! /* As we initialize reg_last lazily, it is possible that we didn't allocate
! it at all. */
! if (deps->reg_last)
! free (deps->reg_last);
deps->reg_last = NULL;
deps = NULL;
Index: gcc/sched-int.h
===================================================================
*** gcc/sched-int.h (revision 154088)
--- gcc/sched-int.h (working copy)
*************** extern bool sched_insns_conditions_mutex
*** 1199,1205 ****
extern bool sched_insn_is_legitimate_for_speculation_p (const_rtx, ds_t);
extern void add_dependence (rtx, rtx, enum reg_note);
extern void sched_analyze (struct deps *, rtx, rtx);
! extern void init_deps (struct deps *);
extern void free_deps (struct deps *);
extern void init_deps_global (void);
extern void finish_deps_global (void);
--- 1199,1206 ----
extern bool sched_insn_is_legitimate_for_speculation_p (const_rtx, ds_t);
extern void add_dependence (rtx, rtx, enum reg_note);
extern void sched_analyze (struct deps *, rtx, rtx);
! extern void init_deps (struct deps *, bool);
! extern void init_deps_reg_last (struct deps *);
extern void free_deps (struct deps *);
extern void init_deps_global (void);
extern void finish_deps_global (void);
Index: gcc/sched-rgn.c
===================================================================
*** gcc/sched-rgn.c (revision 154088)
--- gcc/sched-rgn.c (working copy)
*************** sched_rgn_compute_dependencies (int rgn)
*** 3152,3158 ****
/* Initializations for region data dependence analysis. */
bb_deps = XNEWVEC (struct deps, current_nr_blocks);
for (bb = 0; bb < current_nr_blocks; bb++)
! init_deps (bb_deps + bb);
/* Initialize bitmap used in add_branch_dependences. */
insn_referenced = sbitmap_alloc (sched_max_luid);
--- 3152,3158 ----
/* Initializations for region data dependence analysis. */
bb_deps = XNEWVEC (struct deps, current_nr_blocks);
for (bb = 0; bb < current_nr_blocks; bb++)
! init_deps (bb_deps + bb, false);
/* Initialize bitmap used in add_branch_dependences. */
insn_referenced = sbitmap_alloc (sched_max_luid);