[patch] enabling vectorization by default at -O3
Daniel Berlin
dberlin@dberlin.org
Thu Sep 6 21:43:00 GMT 2007
I can't approve it, you need a middle end maintainer
(Yo guys who can approve, see patch below!)
On 9/6/07, H.J. Lu <hjl@lucon.org> wrote:
> On Thu, Sep 06, 2007 at 10:48:43AM -0400, Daniel Berlin wrote:
> > On 9/6/07, H.J. Lu <hjl@lucon.org> wrote:
> > > On Thu, Sep 06, 2007 at 09:23:31AM -0400, Daniel Berlin wrote:
> > > > On 9/6/07, H.J. Lu <hjl@lucon.org> wrote:
> > > > > On Thu, Sep 06, 2007 at 01:49:52PM +0200, Uros Bizjak wrote:
> > > > > > >
> > > > > > > * Hmm, why is --ffast-math slower? And with vectorization that much
> > > > >
> > > > > Also see
> > > > >
> > > > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32183
> > > > >
> > > > > With -O2 --ffast-math, we turn a faster loop:
> > > > >
> > > > > float sf;
> > > > > ...
> > > > > sf = 500 * sf;
> > > > > for (i = 0; i < ceplen; i++)
> > > > > sum[i] *= sf;
> > > > >
> > > > > into a slower loop:
> > > > >
> > > > > for (i = 0; i < ceplen; i++)
> > > > > sum[i] = (sum[i]* 500)*sf;
> > > > >
> > > > > > > slower? I recheck induct (V.F, NV.F) and I could reproduce the timings.
> > > > > > >
> > > > > >
> > > > > > > that is indeed interesting (I'd be happy to look at a testcase)
> > > > > >
> > > > > > This is PR 32084, http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32084
> > > > > >
> > > >
> > > > I still don't remember why we have reassoc2. I'm in favor of removing
> > > > it unless someone can show it's producing performance improvements :)
> > >
> > > I got
> > >
> > > Here are SPEC CPU 2006 -O2 -ffast-math differences between revision
> > > 125281 without the second reassoc and revision 125281 on Intel64:
> >
> > Okay, then i guess we should fix it. I think we should just use
> > zdenek's patch for now, and if anyone complains about lack of
> > reassociation across loop boundaries, we fix that then.
>
> I have been using this patch for several months on Linux/x86-64,
> Linux/ia64 and Linux/ia32 without any regressions. OK to install?
>
> Thanks.
>
>
> H.J.
> ----
> 2007-06-04 Zdenek Dvorak <ook@ucw.cz>
>
> PR tree-optimization/32183
> * Makefile.in (tree-ssa-reassoc.o): Also depend on $(CFGLOOP_H).
>
> * tree-ssa-reassoc.c: Include cfgloop.h.
> (is_reassociable_op): Add a loop argument and return true only
> for inside loop.
> (linearize_expr): Updated.
> (should_break_up_subtract): Likewise.
> (linearize_expr_tree): Likewise.
> (init_reassoc): Call loop_optimizer_init with
> AVOID_CFG_MODIFICATIONS. Remove calculate_dominance_info call
> with CDI_DOMINATORS.
> (fini_reassoc): Call loop_optimizer_finalize.
>
> --- gcc/Makefile.in.reassoc 2007-09-02 05:27:10.000000000 -0700
> +++ gcc/Makefile.in 2007-09-02 05:27:10.000000000 -0700
> @@ -2202,7 +2202,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c
> $(SYSTEM_H) $(TREE_H) $(GGC_H) $(DIAGNOSTIC_H) errors.h $(TIMEVAR_H) \
> $(TM_H) coretypes.h $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) tree-iterator.h\
> $(BASIC_BLOCK_H) $(TREE_GIMPLE_H) $(TREE_INLINE_H) vec.h \
> - alloc-pool.h pointer-set.h
> + alloc-pool.h pointer-set.h $(CFGLOOP_H)
> tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
> $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
> $(FLAGS_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) toplev.h \
> --- gcc/tree-ssa-reassoc.c.reassoc 2007-08-09 07:12:26.000000000 -0700
> +++ gcc/tree-ssa-reassoc.c 2007-09-02 05:27:10.000000000 -0700
> @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.
> #include "vec.h"
> #include "langhooks.h"
> #include "pointer-set.h"
> +#include "cfgloop.h"
>
> /* This is a simple global reassociation pass. It is, in part, based
> on the LLVM pass of the same name (They do some things more/less
> @@ -344,13 +345,21 @@ add_to_ops_vec (VEC(operand_entry_t, hea
> }
>
> /* Return true if STMT is reassociable operation containing a binary
> - operation with tree code CODE. */
> + operation with tree code CODE, and is inside LOOP. */
>
> static bool
> -is_reassociable_op (tree stmt, enum tree_code code)
> +is_reassociable_op (tree stmt, enum tree_code code, struct loop *loop)
> {
> - if (!IS_EMPTY_STMT (stmt)
> - && TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
> + basic_block bb;
> +
> + if (IS_EMPTY_STMT (stmt))
> + return false;
> +
> + bb = bb_for_stmt (stmt);
> + if (!flow_bb_inside_loop_p (loop, bb))
> + return false;
> +
> + if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
> && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == code
> && has_single_use (GIMPLE_STMT_OPERAND (stmt, 0)))
> return true;
> @@ -929,9 +938,10 @@ linearize_expr (tree stmt)
> tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
> tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
> tree newbinrhs = NULL_TREE;
> + struct loop *loop = loop_containing_stmt (stmt);
>
> - gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs))
> - && is_reassociable_op (binrhs, TREE_CODE (rhs)));
> + gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs), loop)
> + && is_reassociable_op (binrhs, TREE_CODE (rhs), loop));
>
> bsinow = bsi_for_stmt (stmt);
> bsirhs = bsi_for_stmt (binrhs);
> @@ -959,9 +969,8 @@ linearize_expr (tree stmt)
> TREE_VISITED (stmt) = 1;
>
> /* Tail recurse on the new rhs if it still needs reassociation. */
> - if (newbinrhs && is_reassociable_op (newbinrhs, rhscode))
> + if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
> linearize_expr (stmt);
> -
> }
>
> /* If LHS has a single immediate use that is a GIMPLE_MODIFY_STMT, return
> @@ -1046,13 +1055,14 @@ should_break_up_subtract (tree stmt)
> tree binlhs = TREE_OPERAND (rhs, 0);
> tree binrhs = TREE_OPERAND (rhs, 1);
> tree immusestmt;
> + struct loop *loop = loop_containing_stmt (stmt);
>
> if (TREE_CODE (binlhs) == SSA_NAME
> - && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR))
> + && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
> return true;
>
> if (TREE_CODE (binrhs) == SSA_NAME
> - && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR))
> + && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
> return true;
>
> if (TREE_CODE (lhs) == SSA_NAME
> @@ -1096,19 +1106,20 @@ linearize_expr_tree (VEC(operand_entry_t
> bool binlhsisreassoc = false;
> bool binrhsisreassoc = false;
> enum tree_code rhscode = TREE_CODE (rhs);
> + struct loop *loop = loop_containing_stmt (stmt);
>
> TREE_VISITED (stmt) = 1;
>
> if (TREE_CODE (binlhs) == SSA_NAME)
> {
> binlhsdef = SSA_NAME_DEF_STMT (binlhs);
> - binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode);
> + binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
> }
>
> if (TREE_CODE (binrhs) == SSA_NAME)
> {
> binrhsdef = SSA_NAME_DEF_STMT (binrhs);
> - binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode);
> + binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
> }
>
> /* If the LHS is not reassociable, but the RHS is, we need to swap
> @@ -1159,7 +1170,8 @@ linearize_expr_tree (VEC(operand_entry_t
> }
>
> gcc_assert (TREE_CODE (binrhs) != SSA_NAME
> - || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode));
> + || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
> + rhscode, loop));
> bsinow = bsi_for_stmt (stmt);
> bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
> bsi_move_before (&bsilhs, &bsinow);
> @@ -1399,6 +1411,10 @@ init_reassoc (void)
> tree param;
> int *bbs = XNEWVEC (int, last_basic_block + 1);
>
> + /* Find the loops, so that we can prevent moving calculations in
> + them. */
> + loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> +
> memset (&reassociate_stats, 0, sizeof (reassociate_stats));
>
> operand_entry_pool = create_alloc_pool ("operand entry pool",
> @@ -1435,7 +1451,6 @@ init_reassoc (void)
> bb_rank[bbs[i]] = ++rank << 16;
>
> free (bbs);
> - calculate_dominance_info (CDI_DOMINATORS);
> calculate_dominance_info (CDI_POST_DOMINATORS);
> broken_up_subtracts = NULL;
> }
> @@ -1446,7 +1461,6 @@ init_reassoc (void)
> static void
> fini_reassoc (void)
> {
> -
> if (dump_file && (dump_flags & TDF_STATS))
> {
> fprintf (dump_file, "Reassociation stats:\n");
> @@ -1465,6 +1479,7 @@ fini_reassoc (void)
> free (bb_rank);
> VEC_free (tree, heap, broken_up_subtracts);
> free_dominance_info (CDI_POST_DOMINATORS);
> + loop_optimizer_finalize ();
> }
>
> /* Gate and execute functions for Reassociation. */
>
More information about the Gcc-patches
mailing list