This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] more PR33928: DIY dataflow in fwprop, to enable it at -O1 and speed it up at -O2


On Fri, May 8, 2009 at 10:07 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
> This patch modifies fwprop to build its own datastructure for
> use-to-single-def chains instead of using fullblown UD chains. ?This is
> not hard to do at all with a little refactoring of df-problems.c to
> allow simulating RD just like it's done with LR.
>
> Quoting Steven's comment from the PR, "alternatively we could re-work
> fwprop to work on regions and use the partial-CFG dataflow stuff,
> similar to what the RTL loop optimizers (like loop-invariant) do".
> Summarizing advantages and disadvantages we have:
>
> this patch
> ----------
>
> ? ?+ easy, and can easily be reverted
>
> ? ?+ no code difference at -O2 before and after the patch (not tested
> ? ? ?except on the PR testcase), giving more confidence in backporting
>
> ? ?- less extensible to cases where we want to forward propagate
> ? ? ?reaching definitions if they are all of a similar form (Bernd
> ? ? ?had a patch to form widening multiplications in fwprop)
>
> region approach
> ---------------
>
> ? ?+ may save further time in RD solution
>
> ? ?- could give different kinds of pessimization, the semi-global
> ? ? ?approach of fwprop seems to work well from tests made on
> ? ? ?well-known benchmarks who shalt not be named.
>
> ? ?- I don't think anyone is going to work on it soon :-)
>
> Bootstrapped/regtested i686-pc-linux-gnu, checked that the time spent in
> fwprop even for devilish PR26854 testcases is very small. ?Ok for trunk
> and after a while 4.4?

Err - if this patch doesn't make a difference what critical bug
does it fix to support backporting to 4.4?

Thanks,
Richard.

> Paolo
>
> 2009-05-08 ?Paolo Bonzini ?<bonzini@gnu.org>
>
> ? ? ? ?PR rtl-optimization/33928
> ? ? ? ?* fwprop.c (use_def_ref, get_def_for_use, bitmap_only_bit_bitween,
> ? ? ? ?process_uses, build_single_def_use_links): New.
> ? ? ? ?(update_df): Update use_def_ref.
> ? ? ? ?(forward_propagate_into): Use get_def_for_use instead of use-def
> ? ? ? ?chains.
> ? ? ? ?(fwprop_init): Call build_single_def_use_links and let it initialize
> ? ? ? ?dataflow.
> ? ? ? ?(fwprop_done): Free use_def_ref.
> ? ? ? ?(fwprop_addr): Eliminate duplicate call to df_set_flags.
> ? ? ? ?* df-problems.c (df_rd_simulate_artificial_defs_at_top,
> ? ? ? ?df_rd_simulate_one_insn): New.
> ? ? ? ?(df_rd_bb_local_compute_process_def): Update head comment.
> ? ? ? ?(df_chain_create_bb): Use the new RD simulation functions.
> ? ? ? ?* df.h (df_rd_simulate_artificial_defs_at_top,
> ? ? ? ?df_rd_simulate_one_insn): New.
> ? ? ? ?* opts.c (decode_options): Enable fwprop at -O1.
> ? ? ? ?* doc/invoke.texi (-fforward-propagate): Document this.
>
> Index: fwprop.c
> ===================================================================
> --- fwprop.c ? ?(revision 147269)
> +++ fwprop.c ? ?(working copy)
> @@ -105,6 +105,92 @@ along with GCC; see the file COPYING3.
>
> ?static int num_changes;
>
> +DEF_VEC_P(df_ref);
> +DEF_VEC_ALLOC_P(df_ref,heap);
> +VEC(df_ref,heap) *use_def_ref;
> +
> +static inline df_ref
> +get_def_for_use (df_ref use)
> +{
> + ?return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
> +}
> +
> +static inline int
> +bitmap_only_bit_between (const_bitmap b, unsigned first, unsigned last)
> +{
> + ?bitmap_iterator bi;
> + ?unsigned bit, bit2;
> +
> + ?if (last < first)
> + ? ?return -1;
> +
> + ?bmp_iter_set_init (&bi, b, first, &bit);
> + ?if (bmp_iter_set (&bi, &bit) && bit <= last)
> + ? ?{
> + ? ? ?bit2 = bit;
> + ? ? ?bmp_iter_next (&bi, &bit2);
> + ? ? ?if (!bmp_iter_set (&bi, &bit2) || bit2 > last)
> + ? ? ? ?return bit;
> + ? ?}
> + ?return -1;
> +}
> +
> +static void
> +process_uses (bitmap local_rd, df_ref *use_rec, int top_flag)
> +{
> + ?df_ref use;
> + ?while ((use = *use_rec++) != NULL)
> + ? ?if (top_flag == (DF_REF_FLAGS (use) & DF_REF_AT_TOP))
> + ? ? ?{
> + ? ? ? unsigned int uregno = DF_REF_REGNO (use);
> + ? ? ? unsigned int first = DF_DEFS_BEGIN (uregno);
> + ? ? ? unsigned int last = first + DF_DEFS_COUNT (uregno) - 1;
> + ? ? ? int defno = bitmap_only_bit_between (local_rd, first, last);
> + ? ? ? df_ref def = (defno == -1) ? NULL : DF_DEFS_GET (defno);
> +
> + ? ? ? VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), def);
> + ? ? ?}
> +}
> +
> +static void
> +build_single_def_use_links (void)
> +{
> + ?basic_block bb;
> + ?bitmap local_rd = BITMAP_ALLOC (NULL);
> +
> + ?/* We use reaching definitions to compute our restricted use-def chains. ?*/
> + ?df_set_flags (DF_EQ_NOTES);
> + ?df_rd_add_problem ();
> + ?df_analyze ();
> + ?df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
> +
> + ?use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
> + ?VEC_safe_grow (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
> +
> + ?FOR_EACH_BB (bb)
> + ? ?{
> + ? ? ?int bb_index = bb->index;
> + ? ? ?struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
> + ? ? ?rtx insn;
> +
> + ? ? ?bitmap_copy (local_rd, bb_info->in);
> + ? ? ?process_uses (local_rd, df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
> +
> + ? ? ?df_rd_simulate_artificial_defs_at_top (bb, local_rd);
> + ? ? ?FOR_BB_INSNS (bb, insn)
> + ? ? ? ?if (INSN_P (insn))
> + ? ? ? ? ?{
> + ? ? ? ? ? ?unsigned int uid = INSN_UID (insn);
> + ? ? ? ? ? ?process_uses (local_rd, DF_INSN_UID_USES (uid), 0);
> + ? ? ? ? ? ?process_uses (local_rd, DF_INSN_UID_EQ_USES (uid), 0);
> + ? ? ? ? ? ?df_rd_simulate_one_insn (bb, insn, local_rd);
> + ? ? ? ? }
> +
> + ? ? ?process_uses (local_rd, df_get_artificial_uses (bb_index), 0);
> + ? ?}
> +
> + ?BITMAP_FREE (local_rd);
> +}
>
> ?/* Do not try to replace constant addresses or addresses of local and
> ? ?argument slots. ?These MEM expressions are made only once and inserted
> @@ -716,7 +802,8 @@ update_df (rtx insn, rtx *loc, df_ref *u
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? width, offset, mode);
>
> - ? ? ?/* Set up the use-def chain. ?*/
> - ? ? ?df_chain_copy (new_use, DF_REF_CHAIN (orig_use));
> + ? ? ?/* Set up the use-def link. ?*/
> + ? ? ?gcc_assert (DF_REF_ID (new_use) == (int) VEC_length (df_ref, use_def_ref));
> + ? ? ?VEC_safe_push (df_ref, heap, use_def_ref, get_def_for_use (orig_use));
> ? ? ? changed = true;
> ? ? }
> ? if (changed)
> @@ -1035,7 +1122,6 @@ forward_propagate_and_simplify (df_ref u
> ?static void
> ?forward_propagate_into (df_ref use)
> ?{
> - ?struct df_link *defs;
> ? df_ref def;
> ? rtx def_insn, def_set, use_insn;
> ? rtx parent;
> @@ -1046,11 +1132,9 @@ forward_propagate_into (df_ref use)
> ? ? return;
>
> ? /* Only consider uses that have a single definition. ?*/
> - ?defs = DF_REF_CHAIN (use);
> - ?if (!defs || defs->next)
> + ?def = get_def_for_use (use);
> + ?if (!def)
> ? ? return;
> -
> - ?def = defs->ref;
> ? if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
> ? ? return;
> ? if (DF_REF_IS_ARTIFICIAL (def))
> @@ -1096,12 +1180,7 @@ fwprop_init (void)
> ? ? ?insns (sadly) if we are not working in cfglayout mode. ?*/
> ? loop_optimizer_init (0);
>
> - ?/* Now set up the dataflow problem (we only want use-def chains) and
> - ? ? put the dataflow solver to work. ?*/
> - ?df_set_flags (DF_EQ_NOTES);
> - ?df_chain_add_problem (DF_UD_CHAIN);
> - ?df_analyze ();
> - ?df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
> + ?build_single_def_use_links ();
> ? df_set_flags (DF_DEFER_INSN_RESCAN);
> ?}
>
> @@ -1110,6 +1189,7 @@ fwprop_done (void)
> ?{
> ? loop_optimizer_finalize ();
>
> + ?VEC_free (df_ref, heap, use_def_ref);
> ? free_dominance_info (CDI_DOMINATORS);
> ? cleanup_cfg (0);
> ? delete_trivially_dead_insns (get_insns (), max_reg_num ());
> @@ -1187,8 +1267,6 @@ fwprop_addr (void)
>
> ? /* Go through all the uses. ?update_df will create new ones at the
> ? ? ?end, and we'll go through them as well. ?*/
> - ?df_set_flags (DF_DEFER_INSN_RESCAN);
> -
> ? for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
> ? ? {
> ? ? ? df_ref use = DF_USES_GET (i);
> Index: df-problems.c
> ===================================================================
> --- df-problems.c ? ? ? (revision 147269)
> +++ df-problems.c ? ? ? (working copy)
> @@ -316,7 +316,61 @@ df_rd_alloc (bitmap all_blocks)
> ?}
>
>
> -/* Process a list of DEFs for df_rd_bb_local_compute. ?*/
> +/* Add the effect of the top artificial defs of BB to the reaching definitions
> + ? bitmap LOCAL_RD. ?*/
> +
> +void
> +df_rd_simulate_artificial_defs_at_top (basic_block bb, bitmap local_rd)
> +{
> + ?int bb_index = bb->index;
> + ?df_ref *def_rec;
> + ?for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
> + ? ?{
> + ? ? ?df_ref def = *def_rec;
> + ? ? ?if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
> + ? ? ? {
> + ? ? ? ? unsigned int dregno = DF_REF_REGNO (def);
> + ? ? ? ? if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> + ? ? ? ? ? bitmap_clear_range (local_rd,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_BEGIN (dregno),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_COUNT (dregno));
> + ? ? ? ? bitmap_set_bit (local_rd, DF_REF_ID (def));
> + ? ? ? }
> + ? ?}
> +}
> +
> +/* Add the effect of the defs of INSN to the reaching definitions bitmap
> + ? LOCAL_RD. ?*/
> +
> +void
> +df_rd_simulate_one_insn (basic_block bb ATTRIBUTE_UNUSED, rtx insn,
> + ? ? ? ? ? ? ? ? ? ? ? ?bitmap local_rd)
> +{
> + ?unsigned uid = INSN_UID (insn);
> + ?df_ref *def_rec;
> +
> + ?for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
> + ? ?{
> + ? ? ?df_ref def = *def_rec;
> + ? ? ?unsigned int dregno = DF_REF_REGNO (def);
> + ? ? ?if ((!(df->changeable_flags & DF_NO_HARD_REGS))
> + ? ? ? ? ?|| (dregno >= FIRST_PSEUDO_REGISTER))
> + ? ? ? ?{
> + ? ? ? ? ?if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> + ? ? ? ? ? bitmap_clear_range (local_rd,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_BEGIN (dregno),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_COUNT (dregno));
> + ? ? ? ? if (!(DF_REF_FLAGS (def)
> + ? ? ? ? ? ? ? & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
> + ? ? ? ? ? bitmap_set_bit (local_rd, DF_REF_ID (def));
> + ? ? ? }
> + ? ?}
> +}
> +
> +/* Process a list of DEFs for df_rd_bb_local_compute. ?This is a bit
> + ? more complicated than just simulating, because we must produce the
> + ? gen and kill sets and hence deal with the two possible representations
> + ? of kill sets. ? */
>
> ?static void
> ?df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
> @@ -2076,7 +2130,6 @@ df_chain_create_bb (unsigned int bb_inde
> ? struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
> ? rtx insn;
> ? bitmap cpy = BITMAP_ALLOC (NULL);
> - ?df_ref *def_rec;
>
> ? bitmap_copy (cpy, bb_info->in);
> ? bitmap_set_bit (df_chain->out_of_date_transfer_functions, bb_index);
> @@ -2095,57 +2148,23 @@ df_chain_create_bb (unsigned int bb_inde
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?DF_REF_AT_TOP);
> ?#endif
>
> - ?for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
> - ? ?{
> - ? ? ?df_ref def = *def_rec;
> - ? ? ?if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
> - ? ? ? {
> - ? ? ? ? unsigned int dregno = DF_REF_REGNO (def);
> - ? ? ? ? if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> - ? ? ? ? ? bitmap_clear_range (cpy,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_BEGIN (dregno),
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_COUNT (dregno));
> - ? ? ? ? bitmap_set_bit (cpy, DF_REF_ID (def));
> - ? ? ? }
> - ? ?}
> + ?df_rd_simulate_artificial_defs_at_top (bb, cpy);
>
> ? /* Process the regular instructions next. ?*/
> ? FOR_BB_INSNS (bb, insn)
> - ? ?{
> - ? ? ?df_ref *def_rec;
> - ? ? ?unsigned int uid = INSN_UID (insn);
> -
> - ? ? ?if (!INSN_P (insn))
> - ? ? ? continue;
> -
> - ? ? ?/* Now scan the uses and link them up with the defs that remain
> - ? ? ? ?in the cpy vector. ?*/
> -
> - ? ? ?df_chain_create_bb_process_use (cpy, DF_INSN_UID_USES (uid), 0);
> -
> - ? ? ?if (df->changeable_flags & DF_EQ_NOTES)
> - ? ? ? df_chain_create_bb_process_use (cpy, DF_INSN_UID_EQ_USES (uid), 0);
> + ? ?if (INSN_P (insn))
> + ? ? ?{
> + ? ? ? ?unsigned int uid = INSN_UID (insn);
>
> + ? ? ? ?/* First scan the uses and link them up with the defs that remain
> + ? ? ? ? ?in the cpy vector. ?*/
> + ? ? ? ?df_chain_create_bb_process_use (cpy, DF_INSN_UID_USES (uid), 0);
> + ? ? ? ?if (df->changeable_flags & DF_EQ_NOTES)
> + ? ? ? ? df_chain_create_bb_process_use (cpy, DF_INSN_UID_EQ_USES (uid), 0);
>
> - ? ? ?/* Since we are going forwards, process the defs second. ?This
> - ? ? ? ? pass only changes the bits in cpy. ?*/
> - ? ? ?for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
> - ? ? ? {
> - ? ? ? ? df_ref def = *def_rec;
> - ? ? ? ? unsigned int dregno = DF_REF_REGNO (def);
> - ? ? ? ? if ((!(df->changeable_flags & DF_NO_HARD_REGS))
> - ? ? ? ? ? ? || (dregno >= FIRST_PSEUDO_REGISTER))
> - ? ? ? ? ? {
> - ? ? ? ? ? ? if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> - ? ? ? ? ? ? ? bitmap_clear_range (cpy,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_BEGIN (dregno),
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DF_DEFS_COUNT (dregno));
> - ? ? ? ? ? ? if (!(DF_REF_FLAGS (def)
> - ? ? ? ? ? ? ? ? ? & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
> - ? ? ? ? ? ? ? bitmap_set_bit (cpy, DF_REF_ID (def));
> - ? ? ? ? ? }
> - ? ? ? }
> - ? ?}
> + ? ? ? ?/* Since we are going forwards, process the defs second. ?*/
> + ? ? ? ?df_rd_simulate_one_insn (bb, insn, cpy);
> + ? ? ?}
>
> ? /* Create the chains for the artificial uses of the hard registers
> ? ? ?at the end of the block. ?*/
> Index: df.h
> ===================================================================
> --- df.h ? ? ? ?(revision 147269)
> +++ df.h ? ? ? ?(working copy)
> @@ -939,6 +939,8 @@ extern void df_grow_bb_info (struct data
> ?extern void df_chain_dump (struct df_link *, FILE *);
> ?extern void df_print_bb_index (basic_block bb, FILE *file);
> ?extern void df_rd_add_problem (void);
> +extern void df_rd_simulate_artificial_defs_at_top (basic_block, bitmap);
> +extern void df_rd_simulate_one_insn (basic_block, rtx, bitmap);
> ?extern void df_lr_add_problem (void);
> ?extern void df_lr_verify_transfer_functions (void);
> ?extern void df_live_verify_transfer_functions (void);
> Index: opts.c
> ===================================================================
> --- opts.c ? ? ?(revision 147269)
> +++ opts.c ? ? ?(working copy)
> @@ -848,6 +848,7 @@ decode_options (unsigned int argc, const
> ?#endif
> ? flag_guess_branch_prob = opt1;
> ? flag_cprop_registers = opt1;
> + ?flag_forward_propagate = opt1;
> ? flag_if_conversion = opt1;
> ? flag_if_conversion2 = opt1;
> ? flag_ipa_pure_const = opt1;
> @@ -873,7 +874,6 @@ decode_options (unsigned int argc, const
> ? flag_thread_jumps = opt2;
> ? flag_crossjumping = opt2;
> ? flag_optimize_sibling_calls = opt2;
> - ?flag_forward_propagate = opt2;
> ? flag_cse_follow_jumps = opt2;
> ? flag_gcse = opt2;
> ? flag_expensive_optimizations = opt2;
> Index: doc/invoke.texi
> ===================================================================
> --- doc/invoke.texi ? ? (revision 147269)
> +++ doc/invoke.texi ? ? (working copy)
> @@ -5554,8 +5554,8 @@ instructions and checks if the result ca
> ?is active, two passes are performed and the second is scheduled after
> ?loop unrolling.
>
> -This option is enabled by default at optimization levels @option{-O2},
> -@option{-O3}, @option{-Os}.
> +This option is enabled by default at optimization levels @option{-O},
> +@option{-O2}, @option{-O3}, @option{-Os}.
>
> ?@item -fomit-frame-pointer
> ?@opindex fomit-frame-pointer
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]