[kill-loop] Fix a quadratic bottleneck in loop-unroll
Zdenek Dvorak
rakdver@atrey.karlin.mff.cuni.cz
Fri Aug 5 09:44:00 GMT 2005
Hello,
> This patch fixes a problem with -fvariable-expansion-in-unroller. The
> algorithm to analyze instructions was quadratic. Fixed by letting df.c
> do the data flow beforehand. I have bootstrapped and tested this patch
> with "-O2 -funroll{,-all}-loops -fvariable-expansion-in-unroller" on
> x86_64-unknown-linux-gnu. Zdenek, does the patch look good to you?
yes, this seems sane to me (does it mean that someone actually uses
the -fvariable-expansion-in-unroller flag? I have always lived under
impression that anything that is not enabled by default is not used
by anyone and hopelessly broken :-)
Zdenek
> Gr.
> Steven
>
> * loop-unroll.c: Don't include varray.h. Include df.h.
> (referenced_in_one_insn_in_loop_p): Rewrite to use a df object.
> (analyze_insn_to_expand_var): Take a df object as an argument.
> (analyze_insns_in_loop): When expanding accumulators, compute
> a df object for the loop. Pass it to analyze_insn_to_expand_var.
>
> Index: loop-unroll.c
> ===================================================================
> RCS file: /cvs/gcc/gcc/gcc/loop-unroll.c,v
> retrieving revision 1.37
> diff -u -3 -p -w -r1.37 loop-unroll.c
> --- loop-unroll.c 3 Aug 2005 13:34:35 -0000 1.37
> +++ loop-unroll.c 4 Aug 2005 13:32:19 -0000
> @@ -33,7 +33,7 @@ Software Foundation, 51 Franklin Street,
> #include "expr.h"
> #include "hashtab.h"
> #include "recog.h"
> -#include "varray.h"
> +#include "df.h"
>
> /* This pass performs loop unrolling and peeling. We only perform these
> optimizations on innermost loops (with single exception) because
> @@ -135,9 +135,6 @@ static struct opt_info *analyze_insns_in
> static void opt_info_start_duplication (struct opt_info *);
> static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
> static void free_opt_info (struct opt_info *);
> -static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
> -static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
> -static struct iv_to_split *analyze_iv_to_split_insn (rtx);
> static void expand_var_during_unrolling (struct var_to_expand *, rtx);
> static int insert_var_expansion_initialization (void **, void *);
> static int combine_var_copies_in_loop_exit (void **, void *);
> @@ -1509,28 +1506,41 @@ ve_info_eq (const void *ivts1, const voi
> return i1->insn == i2->insn;
> }
>
> -/* Returns true if REG is referenced in one insn in LOOP. */
> +/* Returns true if REG is referenced in one insn in LOOP. We really only have
> + to look at the dataflow object DF created for the loop. */
>
> -bool
> -referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
> -{
> - basic_block *body, bb;
> - unsigned i;
> - int count_ref = 0;
> - rtx insn;
> -
> - body = get_loop_body (loop);
> - for (i = 0; i < loop->num_nodes; i++)
> - {
> - bb = body[i];
> +static bool
> +referenced_in_one_insn_in_loop_p (struct loop *loop ATTRIBUTE_UNUSED,
> + rtx reg,
> + struct df *df)
> +{
> + struct df_link *link;
> + struct reg_info *reg_info;
> + rtx insn = NULL;
> +
> + reg_info = df->regs;
> +
> + /* Is there more than one insn definint REG? */
> + for (link = reg_info[REGNO (reg)].defs; link; link = link->next)
> + {
> + struct ref *ref = link->ref;
> + if (!insn)
> + insn = DF_REF_INSN (ref);
> + else if (insn != DF_REF_INSN (ref))
> + return false;
> + }
>
> - FOR_BB_INSNS (bb, insn)
> + /* Is there more than one insn that uses REG? */
> + for (link = reg_info[REGNO (reg)].uses; link; link = link->next)
> {
> - if (rtx_referenced_p (reg, insn))
> - count_ref++;
> - }
> + struct ref *ref = link->ref;
> + if (!insn)
> + insn = DF_REF_INSN (ref);
> + else if (insn != DF_REF_INSN (ref))
> + return false;
> }
> - return (count_ref == 1);
> +
> + return true;
> }
>
> /* Determine whether INSN contains an accumulator
> @@ -1557,7 +1567,7 @@ referenced_in_one_insn_in_loop_p (struct
> */
>
> static struct var_to_expand *
> -analyze_insn_to_expand_var (struct loop *loop, rtx insn)
> +analyze_insn_to_expand_var (struct loop *loop, rtx insn, struct df *df)
> {
> rtx set, dest, src, op1;
> struct var_to_expand *ves;
> @@ -1588,7 +1598,7 @@ analyze_insn_to_expand_var (struct loop
> if (!rtx_equal_p (dest, op1))
> return NULL;
>
> - if (!referenced_in_one_insn_in_loop_p (loop, dest))
> + if (!referenced_in_one_insn_in_loop_p (loop, dest, df))
> return NULL;
>
> if (rtx_referenced_p (dest, XEXP (src, 1)))
> @@ -1694,6 +1704,7 @@ analyze_insns_in_loop (struct loop *loop
> PTR *slot2;
> edge *edges = get_loop_exit_edges (loop, &num_edges);
> bool can_apply = false;
> + struct df *df = NULL;
>
> iv_analysis_loop_init (loop);
>
> @@ -1719,10 +1730,25 @@ analyze_insns_in_loop (struct loop *loop
> can_apply = true;
> }
>
> - if (flag_variable_expansion_in_unroller
> - && can_apply)
> + if (can_apply && flag_variable_expansion_in_unroller)
> + {
> + bitmap blocks = BITMAP_ALLOC (NULL);
> + for (i = 0; i < loop->num_nodes; i++)
> + bitmap_set_bit (blocks, body[i]->index);
> +
> + df = df_init ();
> + df_analyze_subcfg (df, blocks, DF_RD_CHAIN
> + | DF_RU_CHAIN
> + | DF_HARD_REGS
> + | DF_EQUIV_NOTES);
> +
> + BITMAP_FREE (blocks);
> +
> opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
> - ve_info_hash, ve_info_eq, free);
> + ve_info_hash,
> + ve_info_eq,
> + free);
> + }
>
> for (i = 0; i < loop->num_nodes; i++)
> {
> @@ -1746,7 +1772,7 @@ analyze_insns_in_loop (struct loop *loop
> }
>
> if (opt_info->insns_with_var_to_expand)
> - ves = analyze_insn_to_expand_var (loop, insn);
> + ves = analyze_insn_to_expand_var (loop, insn, df);
>
> if (ves)
> {
> @@ -1758,6 +1784,10 @@ analyze_insns_in_loop (struct loop *loop
>
> free (edges);
> free (body);
> +
> + if (df)
> + df_finish (df);
> +
> return opt_info;
> }
>
More information about the Gcc-patches
mailing list