This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RFA: vectorize reductions on minus_expr
- From: Michael Matz <matz at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 6 May 2010 16:52:14 +0200 (CEST)
- Subject: RFA: vectorize reductions on minus_expr
Hello,
currently we can't handle reductions using subtractions in the vectorizer,
ala:
res = 0
for (i)
res -= x[i];
The most trivial way to handle them is by mindlessly rewriting this
pattern into a negation and a plus_expr, which we can already vectorize
just fine, and then let reassoc clean up the negation+plus into a minus
again. Directly supporting reductions for non-associative codes in the
vectorizer turned out to be much harder (for one because of the deeply
hard-coded assumption that the _second_ argument is the one to reduce into
:-/, and because of the peculiar split between detecting reductions
patterns, other patterns, and actually emitting code to handle
reductions).
This requires reassoc to also handle vector types, which is only a trivial
change.
This improves 482.sphinx3 (full of these reductions) by about 9% on
amdfam10.
Regstrapping on x86_64-linux in progress. Okay for trunk?
Ciao,
Michael.
--
* tree-ssa-reassoc.c (undistribute_ops_list): Use create_tmp_reg.
(can_reassociate_p): Use FLOAT_TYPE_P.
* tree-vect-loop.c (vect_is_simple_reduction): Rewrite "a-b" into
"a+(-b)".
testsuite/
* gcc.dg/vect/fast-math-vect-reduc-8.c: New test.
Index: tree-ssa-reassoc.c
===================================================================
--- tree-ssa-reassoc.c (revision 159105)
+++ tree-ssa-reassoc.c (working copy)
@@ -1165,7 +1165,7 @@ undistribute_ops_list (enum tree_code op
fprintf (dump_file, "Building (");
print_generic_expr (dump_file, oe1->op, 0);
}
- tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
+ tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
add_referenced_var (tmpvar);
zero_one_operation (&oe1->op, c->oecode, c->op);
EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
@@ -1840,7 +1840,7 @@ can_reassociate_p (tree op)
tree type = TREE_TYPE (op);
if (INTEGRAL_TYPE_P (type)
|| NON_SAT_FIXED_POINT_TYPE_P (type)
- || (flag_associative_math && SCALAR_FLOAT_TYPE_P (type)))
+ || (flag_associative_math && FLOAT_TYPE_P (type)))
return true;
return false;
}
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c (revision 159105)
+++ tree-vect-loop.c (working copy)
@@ -1744,6 +1744,21 @@ vect_is_simple_reduction (loop_vec_info
}
code = gimple_assign_rhs_code (def_stmt);
+ if (code == MINUS_EXPR)
+ {
+ tree rhs = gimple_assign_rhs2 (def_stmt);
+ tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+ gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
+ rhs, NULL);
+ gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
+ set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
+ loop_info, NULL));
+ gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
+ gimple_assign_set_rhs2 (def_stmt, negrhs);
+ gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
+ update_stmt (def_stmt);
+ code = PLUS_EXPR;
+ }
if (check_reduction
&& (!commutative_tree_code (code) || !associative_tree_code (code)))
Index: testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c
===================================================================
--- testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c (revision 0)
+++ testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c (revision 0)
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target vect_float } */
+/* { dg-do compile } */
+
+#include "tree-vect.h"
+
+extern float x[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+extern float y[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+extern float z[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+float f (unsigned n)
+{
+ float ret = 0.0;
+ unsigned i;
+ for (i = 0; i < n; i++)
+ {
+ float diff = x[i] - y[i];
+ ret -= diff * diff * z[i];
+ }
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */