This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR66623
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 8 Jun 2017 11:18:34 +0200 (CEST)
- Subject: [PATCH] Fix PR66623
- Authentication-results: sourceware.org; auth=none
The following fixes unsafe vectorization of reductions in outer loop
vectorization. It combines it with a bit of cleanup in the affected
function.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
2017-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/66623
* tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
refactor check_reduction into two parts, properly computing
whether we have to check reduction validity for outer loop
vectorization.
* gcc.dg/vect/pr66623.c: New testcase.
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c (revision 249003)
+++ gcc/tree-vect-loop.c (working copy)
@@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
- edge latch_e = loop_latch_edge (loop);
- tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
enum tree_code orig_code, code;
tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
@@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
*double_reduc = false;
*v_reduc_type = TREE_CODE_REDUCTION;
- /* Check validity of the reduction only for the innermost loop. */
- bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
- gcc_assert ((check_reduction && loop == vect_loop)
- || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
-
name = PHI_RESULT (phi);
/* ??? If there are no uses of the PHI result the inner loop reduction
won't be detected as possibly double-reduction by vectorizable_reduction
@@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.\n");
+ "reduction value used in loop.\n");
return NULL;
}
phi_use_stmt = use_stmt;
}
+ edge latch_e = loop_latch_edge (loop);
+ tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
if (TREE_CODE (loop_arg) != SSA_NAME)
{
if (dump_enabled_p ())
@@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
}
def_stmt = SSA_NAME_DEF_STMT (loop_arg);
- if (!def_stmt)
+ if (gimple_nop_p (def_stmt))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction: no def_stmt.\n");
+ "reduction: no def_stmt\n");
return NULL;
}
if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
{
if (dump_enabled_p ())
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduction: unhandled reduction operation: ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
+ }
return NULL;
}
@@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
}
nloop_uses = 0;
+ gphi *lcphi = NULL;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
gimple *use_stmt = USE_STMT (use_p);
@@ -2829,6 +2829,11 @@ vect_is_simple_reduction (loop_vec_info
continue;
if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
nloop_uses++;
+ else
+ {
+ gcc_assert (!lcphi);
+ lcphi = as_a <gphi *> (use_stmt);
+ }
if (nloop_uses > 1)
{
if (dump_enabled_p ())
@@ -2873,6 +2878,24 @@ vect_is_simple_reduction (loop_vec_info
return NULL;
}
+ /* If we are vectorizing an inner reduction we are executing that
+ in the original order only in case we are not dealing with a
+ double reduction. */
+ bool check_reduction = true;
+ if (flow_loop_nested_p (vect_loop, loop))
+ {
+ check_reduction = false;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+ if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
+ check_reduction = true;
+ }
+ }
+
+ bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
code = orig_code = gimple_assign_rhs_code (def_stmt);
/* We can handle "res -= x[i]", which is non-associative by
@@ -2887,27 +2910,8 @@ vect_is_simple_reduction (loop_vec_info
if (code == COND_EXPR)
{
- if (check_reduction)
+ if (! nested_in_vect_loop)
*v_reduc_type = COND_REDUCTION;
- }
- else if (!commutative_tree_code (code) || !associative_tree_code (code))
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: not commutative/associative: ");
- return NULL;
- }
-
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
- {
- if (code != COND_EXPR)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: not binary operation: ");
-
- return NULL;
- }
op3 = gimple_assign_rhs1 (def_stmt);
if (COMPARISON_CLASS_P (op3))
@@ -2918,30 +2922,35 @@ vect_is_simple_reduction (loop_vec_info
op1 = gimple_assign_rhs2 (def_stmt);
op2 = gimple_assign_rhs3 (def_stmt);
-
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: uses not ssa_names: ");
-
- return NULL;
- }
}
- else
+ else if (!commutative_tree_code (code) || !associative_tree_code (code))
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: not commutative/associative: ");
+ return NULL;
+ }
+ else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
{
op1 = gimple_assign_rhs1 (def_stmt);
op2 = gimple_assign_rhs2 (def_stmt);
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: not handled operation: ");
+ return NULL;
+ }
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: uses not ssa_names: ");
+ if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: both uses not ssa_names: ");
- return NULL;
- }
- }
+ return NULL;
+ }
type = TREE_TYPE (gimple_assign_lhs (def_stmt));
if ((TREE_CODE (op1) == SSA_NAME
@@ -3091,7 +3100,7 @@ vect_is_simple_reduction (loop_vec_info
== vect_internal_def
&& !is_loop_header_bb_p (gimple_bb (def2)))))))
{
- if (check_reduction && orig_code != MINUS_EXPR)
+ if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
{
/* Check if we can swap operands (just for simplicity - so that
the rest of the code can assume that the reduction variable
@@ -3145,7 +3154,8 @@ vect_is_simple_reduction (loop_vec_info
}
/* Try to find SLP reduction chain. */
- if (check_reduction && code != COND_EXPR
+ if (! nested_in_vect_loop
+ && code != COND_EXPR
&& vect_is_slp_reduction (loop_info, phi, def_stmt))
{
if (dump_enabled_p ())
Index: gcc/testsuite/gcc.dg/vect/pr66623.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr66623.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_float } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+#define OP *
+#define INIT 1.0
+
+float __attribute__((noinline,noclone))
+foo (float *__restrict__ i)
+{
+ float l = INIT;
+ int a;
+ int b;
+
+ for (a = 0; a < 4; a++)
+ for (b = 0; b < 4; b++)
+ l = l OP i[b];
+
+ return l;
+}
+
+float __attribute__((noinline,noclone))
+foo_ref (float *__restrict__ i)
+{
+ float l = INIT;
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ return l;
+}
+
+union u
+{
+ float f;
+ unsigned int u;
+};
+
+int
+main (void)
+{
+ union u res, res2;
+ float a[4];
+
+ if (sizeof (float) != sizeof (unsigned int))
+ return 0;
+
+ check_vect ();
+
+ a[0] = 0.01;
+ a[1] = 0.01;
+ a[2] = 0.01;
+ a[3] = 1.0;
+
+ res.f = foo_ref (a);
+
+ res2.f = foo (a);
+
+ if (res.u != res2.u)
+ abort ();
+
+ return 0;
+}
+
+/* need -ffast-math to vectorize this loop. */
+/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */