This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Scheduling complete loop unrolling early and unconditional?
- From: Dorit Naishlos <DORIT at il dot ibm dot com>
- To: Richard Guenther <rguenth at tat dot physik dot uni-tuebingen dot de>
- Cc: David Edelsohn <dje at watson dot ibm dot com>, Diego Novillo <dnovillo at redhat dot com>, <gcc at gcc dot gnu dot org>
- Date: Wed, 22 Dec 2004 19:59:26 +0200
- Subject: Re: Scheduling complete loop unrolling early and unconditional?
- Reply-to:
- Sensitivity:
just a comment -
for better or for worse, scheduling complete-unrolling early would take
away opportunities from autovectorization (at least as long as we're
looking for vectorization opportunities only across loop iterations and not
in straight-line code). Indeed if a loop gets completely unrolled then in
many cases it may be too short to gain much if anything from
vectorization, but ideally the vectorizer would decide what's worth while
to vectorize and what not before such opportunities are eliminated (of
course we do not have such a cost model yet).
dorit
Richard Guenther
<rguenth@tat.physik.uni-tu To: David Edelsohn <dje@watson.ibm.com>
ebingen.de> cc: Diego Novillo <dnovillo@redhat.com>, <gcc@gcc.gnu.org>
Sent by: Subject: Re: Scheduling complete loop unrolling early and unconditional?
gcc-owner@gcc.gnu.org
16/12/2004 19:03
On Thu, 16 Dec 2004, David Edelsohn wrote:
> >>>>> Diego Novillo writes:
>
> Diego> Scheduling SRA after loop optimizations sounds better. SRA should
be
> Diego> able to run more than once.
>
> After some loop optimizations. Increasing the granularity of
the
> new loop optimizations could help with this overall goal.
Ok, scheduling another SRA after loop did not help. Instead I scheduled
a complete unroll pass after the first dce run which works well for me.
Though I get garbled -ftime-report output. Maybe I am not supposed to
re-use any of the struct passes?? I thought not. It still works
only with -funroll-loops although I specified NULL gate for
pass_loop1 and pass_complete_unroll1.
Anyway, stuff is now scalarized nicely. Patch below for anyone who is
interested.
Richard.
Index: tree-pass.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-pass.h,v
retrieving revision 2.22
diff -u -c -p -3 -r2.22 tree-pass.h
*** tree-pass.h 28 Nov 2004 21:02:31 -0000 2.22
--- tree-pass.h 16 Dec 2004 16:51:28 -0000
*************** extern struct tree_opt_pass pass_sra;
*** 125,130 ****
--- 125,131 ----
extern struct tree_opt_pass pass_tail_recursion;
extern struct tree_opt_pass pass_tail_calls;
extern struct tree_opt_pass pass_loop;
+ extern struct tree_opt_pass pass_loop1;
extern struct tree_opt_pass pass_loop_init;
extern struct tree_opt_pass pass_lim;
extern struct tree_opt_pass pass_unswitch;
*************** extern struct tree_opt_pass pass_iv_cano
*** 132,137 ****
--- 133,139 ----
extern struct tree_opt_pass pass_record_bounds;
extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize;
+ extern struct tree_opt_pass pass_complete_unroll1;
extern struct tree_opt_pass pass_complete_unroll;
extern struct tree_opt_pass pass_iv_optimize;
extern struct tree_opt_pass pass_loop_done;
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.65
diff -u -c -p -3 -r2.65 tree-optimize.c
*** tree-optimize.c 30 Nov 2004 15:38:33 -0000 2.65
--- tree-optimize.c 16 Dec 2004 16:51:28 -0000
*************** init_tree_optimization_passes (void)
*** 351,356 ****
--- 351,357 ----
NEXT_PASS (pass_rename_ssa_copies);
NEXT_PASS (pass_early_warn_uninitialized);
NEXT_PASS (pass_dce);
+ NEXT_PASS (pass_loop1);
NEXT_PASS (pass_dominator);
NEXT_PASS (pass_redundant_phi);
NEXT_PASS (pass_dce);
*************** init_tree_optimization_passes (void)
*** 407,412 ****
--- 408,419 ----
NEXT_PASS (pass_loop_done);
*p = NULL;
+ p = &pass_loop1.sub;
+ NEXT_PASS (pass_loop_init);
+ NEXT_PASS (pass_complete_unroll1);
+ NEXT_PASS (pass_loop_done);
+ *p = NULL;
+
#undef NEXT_PASS
/* Register the passes with the tree dump code. */
Index: tree-ssa-loop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop.c,v
retrieving revision 2.23
diff -u -c -p -3 -r2.23 tree-ssa-loop.c
*** tree-ssa-loop.c 26 Nov 2004 06:42:25 -0000 2.23
--- tree-ssa-loop.c 16 Dec 2004 16:51:28 -0000
*************** struct tree_opt_pass pass_loop =
*** 93,98 ****
--- 93,115 ----
0 /* letter */
};
+ struct tree_opt_pass pass_loop1 =
+ {
+ "loop1", /* name */
+ NULL, /* gate
*/
+ NULL, /*
execute */
+ NULL, /* sub
*/
+ NULL, /* next
*/
+ 0, /*
static_pass_number */
+ TV_TREE_LOOP, /* tv_id */
+ PROP_cfg, /*
properties_required */
+ 0, /*
properties_provided */
+ 0, /*
properties_destroyed */
+ TODO_ggc_collect, /* todo_flags_start
*/
+ TODO_dump_func | TODO_verify_ssa | TODO_ggc_collect, /*
todo_flags_finish */
+ 0 /* letter */
+ };
+
/* Loop optimizer initialization. */
static void
*************** struct tree_opt_pass pass_complete_unrol
*** 360,365 ****
--- 377,399 ----
0 /* letter */
};
+ struct tree_opt_pass pass_complete_unroll1 =
+ {
+ "cunroll1", /* name */
+ NULL, /* gate
*/
+ tree_complete_unroll, /*
execute */
+ NULL, /* sub
*/
+ NULL, /* next
*/
+ 0, /*
static_pass_number */
+ TV_COMPLETE_UNROLL, /* tv_id */
+ PROP_cfg | PROP_ssa, /*
properties_required */
+ 0, /*
properties_provided */
+ 0, /*
properties_destroyed */
+ 0, /*
todo_flags_start */
+ TODO_dump_func, /* todo_flags_finish */
+ 0 /* letter */
+ };
+
/* Induction variable optimizations. */
static void