[patch] enabling vectorization by default at -O3

H.J. Lu hjl@lucon.org
Thu Sep 6 17:18:00 GMT 2007


On Thu, Sep 06, 2007 at 10:48:43AM -0400, Daniel Berlin wrote:
> On 9/6/07, H.J. Lu <hjl@lucon.org> wrote:
> > On Thu, Sep 06, 2007 at 09:23:31AM -0400, Daniel Berlin wrote:
> > > On 9/6/07, H.J. Lu <hjl@lucon.org> wrote:
> > > > On Thu, Sep 06, 2007 at 01:49:52PM +0200, Uros Bizjak wrote:
> > > > > >
> > > > > > * Hmm, why is --ffast-math slower? And with vectorization that much
> > > >
> > > > Also see
> > > >
> > > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32183
> > > >
> > > > With -O2 --ffast-math, we turn a faster loop:
> > > >
> > > >       float sf;
> > > >       ...
> > > >       sf = 500 * sf;
> > > >       for (i = 0; i < ceplen; i++)
> > > >         sum[i] *= sf;
> > > >
> > > > into a slower loop:
> > > >
> > > >       for (i = 0; i < ceplen; i++)
> > > >         sum[i] = (sum[i]* 500)*sf;
> > > >
> > > > > > slower? I recheck induct (V.F, NV.F) and I could reproduce the timings.
> > > > > >
> > > > >
> > > > > > that is indeed interesting (I'd be happy to look at a testcase)
> > > > >
> > > > > This is PR 32084, http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32084
> > > > >
> > >
> > > I still don't remember why we have reassoc2.  I'm in favor of removing
> > > it unless someone can show it's producing performance improvements :)
> >
> > I got
> >
> > Here are SPEC CPU 2006 -O2 -ffast-math differences between revision
> > 125281 without the second reassoc and revision 125281 on Intel64:
> 
> Okay, then i guess we should fix it.  I think we should just use
> zdenek's patch for now, and if anyone complains about lack of
> reassociation across loop boundaries, we fix that then.

I have been using this patch for several months on Linux/x86-64,
Linux/ia64 and Linux/ia32 without any regressions. OK to install?

Thanks.


H.J.
----
2007-06-04  Zdenek Dvorak  <ook@ucw.cz>

	PR tree-optimization/32183
	* Makefile.in (tree-ssa-reassoc.o): Also depend on $(CFGLOOP_H).

	* tree-ssa-reassoc.c: Include cfgloop.h.
	(is_reassociable_op): Add a loop argument and return true only
	for inside loop.
	(linearize_expr): Updated.
	(should_break_up_subtract): Likewise.
	(linearize_expr_tree): Likewise.
	(init_reassoc): Call loop_optimizer_init with
	AVOID_CFG_MODIFICATIONS.  Remove calculate_dominance_info call
	with CDI_DOMINATORS.
	(fini_reassoc): Call loop_optimizer_finalize.

--- gcc/Makefile.in.reassoc	2007-09-02 05:27:10.000000000 -0700
+++ gcc/Makefile.in	2007-09-02 05:27:10.000000000 -0700
@@ -2202,7 +2202,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c 
    $(SYSTEM_H) $(TREE_H) $(GGC_H) $(DIAGNOSTIC_H) errors.h $(TIMEVAR_H) \
    $(TM_H) coretypes.h $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) tree-iterator.h\
    $(BASIC_BLOCK_H) $(TREE_GIMPLE_H) $(TREE_INLINE_H) vec.h \
-   alloc-pool.h pointer-set.h
+   alloc-pool.h pointer-set.h $(CFGLOOP_H)
 tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
    $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
    $(FLAGS_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) toplev.h \
--- gcc/tree-ssa-reassoc.c.reassoc	2007-08-09 07:12:26.000000000 -0700
+++ gcc/tree-ssa-reassoc.c	2007-09-02 05:27:10.000000000 -0700
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  
 #include "vec.h"
 #include "langhooks.h"
 #include "pointer-set.h"
+#include "cfgloop.h"
 
 /*  This is a simple global reassociation pass.  It is, in part, based
     on the LLVM pass of the same name (They do some things more/less
@@ -344,13 +345,21 @@ add_to_ops_vec (VEC(operand_entry_t, hea
 }
 
 /* Return true if STMT is reassociable operation containing a binary
-   operation with tree code CODE.  */
+   operation with tree code CODE, and is inside LOOP.  */
 
 static bool
-is_reassociable_op (tree stmt, enum tree_code code)
+is_reassociable_op (tree stmt, enum tree_code code, struct loop *loop)
 {
-  if (!IS_EMPTY_STMT (stmt)
-      && TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
+  basic_block bb;
+
+  if (IS_EMPTY_STMT (stmt))
+    return false;
+
+  bb = bb_for_stmt (stmt);
+  if (!flow_bb_inside_loop_p (loop, bb))
+    return false;
+
+  if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
       && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == code
       && has_single_use (GIMPLE_STMT_OPERAND (stmt, 0)))
     return true;
@@ -929,9 +938,10 @@ linearize_expr (tree stmt)
   tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
   tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
   tree newbinrhs = NULL_TREE;
+  struct loop *loop = loop_containing_stmt (stmt);
 
-  gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs))
-	      && is_reassociable_op (binrhs, TREE_CODE (rhs)));
+  gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs), loop)
+	      && is_reassociable_op (binrhs, TREE_CODE (rhs), loop));
 
   bsinow = bsi_for_stmt (stmt);
   bsirhs = bsi_for_stmt (binrhs);
@@ -959,9 +969,8 @@ linearize_expr (tree stmt)
   TREE_VISITED (stmt) = 1;
 
   /* Tail recurse on the new rhs if it still needs reassociation.  */
-  if (newbinrhs && is_reassociable_op (newbinrhs, rhscode))
+  if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
     linearize_expr (stmt);
-
 }
 
 /* If LHS has a single immediate use that is a GIMPLE_MODIFY_STMT, return
@@ -1046,13 +1055,14 @@ should_break_up_subtract (tree stmt)
   tree binlhs = TREE_OPERAND (rhs, 0);
   tree binrhs = TREE_OPERAND (rhs, 1);
   tree immusestmt;
+  struct loop *loop = loop_containing_stmt (stmt);
 
   if (TREE_CODE (binlhs) == SSA_NAME
-      && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR))
+      && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
     return true;
 
   if (TREE_CODE (binrhs) == SSA_NAME
-      && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR))
+      && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
     return true;
 
   if (TREE_CODE (lhs) == SSA_NAME
@@ -1096,19 +1106,20 @@ linearize_expr_tree (VEC(operand_entry_t
   bool binlhsisreassoc = false;
   bool binrhsisreassoc = false;
   enum tree_code rhscode = TREE_CODE (rhs);
+  struct loop *loop = loop_containing_stmt (stmt);
 
   TREE_VISITED (stmt) = 1;
 
   if (TREE_CODE (binlhs) == SSA_NAME)
     {
       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
-      binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode);
+      binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
     }
 
   if (TREE_CODE (binrhs) == SSA_NAME)
     {
       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
-      binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode);
+      binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
     }
 
   /* If the LHS is not reassociable, but the RHS is, we need to swap
@@ -1159,7 +1170,8 @@ linearize_expr_tree (VEC(operand_entry_t
     }
 
   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
-	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode));
+	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
+				      rhscode, loop));
   bsinow = bsi_for_stmt (stmt);
   bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
   bsi_move_before (&bsilhs, &bsinow);
@@ -1399,6 +1411,10 @@ init_reassoc (void)
   tree param;
   int *bbs = XNEWVEC (int, last_basic_block + 1);
 
+  /* Find the loops, so that we can prevent moving calculations in
+     them.  */
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
 
   operand_entry_pool = create_alloc_pool ("operand entry pool",
@@ -1435,7 +1451,6 @@ init_reassoc (void)
     bb_rank[bbs[i]] = ++rank  << 16;
 
   free (bbs);
-  calculate_dominance_info (CDI_DOMINATORS);
   calculate_dominance_info (CDI_POST_DOMINATORS);
   broken_up_subtracts = NULL;
 }
@@ -1446,7 +1461,6 @@ init_reassoc (void)
 static void
 fini_reassoc (void)
 {
-
   if (dump_file && (dump_flags & TDF_STATS))
     {
       fprintf (dump_file, "Reassociation stats:\n");
@@ -1465,6 +1479,7 @@ fini_reassoc (void)
   free (bb_rank);
   VEC_free (tree, heap, broken_up_subtracts);
   free_dominance_info (CDI_POST_DOMINATORS);
+  loop_optimizer_finalize ();
 }
 
 /* Gate and execute functions for Reassociation.  */



More information about the Gcc-patches mailing list