This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix PR66623
On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
> On Thu, 8 Jun 2017, Richard Biener wrote:
>
>>
>> The following fixes unsafe vectorization of reductions in outer loop
>> vectorization. It combines it with a bit of cleanup in the affected
>> function.
>>
>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Re-testing the following variant -- multiple loop-closed PHI nodes
> for the same SSA name happen.
>
> Richard.
>
> 2017-06-08 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/66623
> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
> refactor check_reduction into two parts, properly computing
> whether we have to check reduction validity for outer loop
> vectorization.
>
> * gcc.dg/vect/pr66623.c: New testcase.
>
Hi Richard,
The new testcase fails on armeb* targets:
FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
FAIL: gcc.dg/vect/pr66623.c execution test
Christophe
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c (revision 249007)
> +++ gcc/tree-vect-loop.c (working copy)
> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
> {
> struct loop *loop = (gimple_bb (phi))->loop_father;
> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> - edge latch_e = loop_latch_edge (loop);
> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
> enum tree_code orig_code, code;
> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
> *double_reduc = false;
> *v_reduc_type = TREE_CODE_REDUCTION;
>
> - /* Check validity of the reduction only for the innermost loop. */
> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
> - gcc_assert ((check_reduction && loop == vect_loop)
> - || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
> -
> name = PHI_RESULT (phi);
> /* ??? If there are no uses of the PHI result the inner loop reduction
> won't be detected as possibly double-reduction by vectorizable_reduction
> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "reduction used in loop.\n");
> + "reduction value used in loop.\n");
> return NULL;
> }
>
> phi_use_stmt = use_stmt;
> }
>
> + edge latch_e = loop_latch_edge (loop);
> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> if (TREE_CODE (loop_arg) != SSA_NAME)
> {
> if (dump_enabled_p ())
> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
> }
>
> def_stmt = SSA_NAME_DEF_STMT (loop_arg);
> - if (!def_stmt)
> + if (gimple_nop_p (def_stmt))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "reduction: no def_stmt.\n");
> + "reduction: no def_stmt\n");
> return NULL;
> }
>
> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
> {
> if (dump_enabled_p ())
> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
> + {
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "reduction: unhandled reduction operation: ");
> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
> + }
> return NULL;
> }
>
> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
> }
>
> nloop_uses = 0;
> + auto_vec<gphi *, 3> lcphis;
> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> {
> gimple *use_stmt = USE_STMT (use_p);
> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
> continue;
> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
> nloop_uses++;
> + else
> + /* We can have more than one loop-closed PHI. */
> + lcphis.safe_push (as_a <gphi *> (use_stmt));
> if (nloop_uses > 1)
> {
> if (dump_enabled_p ())
> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
> return NULL;
> }
>
> + /* If we are vectorizing an inner reduction we are executing that
> + in the original order only in case we are not dealing with a
> + double reduction. */
> + bool check_reduction = true;
> + if (flow_loop_nested_p (vect_loop, loop))
> + {
> + gphi *lcphi;
> + unsigned i;
> + check_reduction = false;
> + FOR_EACH_VEC_ELT (lcphis, i, lcphi)
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
> + {
> + gimple *use_stmt = USE_STMT (use_p);
> + if (is_gimple_debug (use_stmt))
> + continue;
> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
> + check_reduction = true;
> + }
> + }
> +
> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
> code = orig_code = gimple_assign_rhs_code (def_stmt);
>
> /* We can handle "res -= x[i]", which is non-associative by
> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>
> if (code == COND_EXPR)
> {
> - if (check_reduction)
> + if (! nested_in_vect_loop)
> *v_reduc_type = COND_REDUCTION;
> - }
> - else if (!commutative_tree_code (code) || !associative_tree_code (code))
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: not commutative/associative: ");
> - return NULL;
> - }
> -
> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> - {
> - if (code != COND_EXPR)
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: not binary operation: ");
> -
> - return NULL;
> - }
>
> op3 = gimple_assign_rhs1 (def_stmt);
> if (COMPARISON_CLASS_P (op3))
> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>
> op1 = gimple_assign_rhs2 (def_stmt);
> op2 = gimple_assign_rhs3 (def_stmt);
> -
> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: uses not ssa_names: ");
> -
> - return NULL;
> - }
> }
> - else
> + else if (!commutative_tree_code (code) || !associative_tree_code (code))
> + {
> + if (dump_enabled_p ())
> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> + "reduction: not commutative/associative: ");
> + return NULL;
> + }
> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
> {
> op1 = gimple_assign_rhs1 (def_stmt);
> op2 = gimple_assign_rhs2 (def_stmt);
> + }
> + else
> + {
> + if (dump_enabled_p ())
> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> + "reduction: not handled operation: ");
> + return NULL;
> + }
>
> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: uses not ssa_names: ");
> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> + {
> + if (dump_enabled_p ())
> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> + "reduction: both uses not ssa_names: ");
>
> - return NULL;
> - }
> - }
> + return NULL;
> + }
>
> type = TREE_TYPE (gimple_assign_lhs (def_stmt));
> if ((TREE_CODE (op1) == SSA_NAME
> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
> == vect_internal_def
> && !is_loop_header_bb_p (gimple_bb (def2)))))))
> {
> - if (check_reduction && orig_code != MINUS_EXPR)
> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> {
> /* Check if we can swap operands (just for simplicity - so that
> the rest of the code can assume that the reduction variable
> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
> }
>
> /* Try to find SLP reduction chain. */
> - if (check_reduction && code != COND_EXPR
> + if (! nested_in_vect_loop
> + && code != COND_EXPR
> && vect_is_slp_reduction (loop_info, phi, def_stmt))
> {
> if (dump_enabled_p ())
> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
> @@ -0,0 +1,86 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +
> +#define OP *
> +#define INIT 1.0
> +
> +float __attribute__((noinline,noclone))
> +foo (float *__restrict__ i)
> +{
> + float l = INIT;
> + int a;
> + int b;
> +
> + for (a = 0; a < 4; a++)
> + for (b = 0; b < 4; b++)
> + l = l OP i[b];
> +
> + return l;
> +}
> +
> +float __attribute__((noinline,noclone))
> +foo_ref (float *__restrict__ i)
> +{
> + float l = INIT;
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + return l;
> +}
> +
> +union u
> +{
> + float f;
> + unsigned int u;
> +};
> +
> +int
> +main (void)
> +{
> + union u res, res2;
> + float a[4];
> +
> + if (sizeof (float) != sizeof (unsigned int))
> + return 0;
> +
> + check_vect ();
> +
> + a[0] = 0.01;
> + a[1] = 0.01;
> + a[2] = 0.01;
> + a[3] = 1.0;
> +
> + res.f = foo_ref (a);
> +
> + res2.f = foo (a);
> +
> + if (res.u != res2.u)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* need -ffast-math to vectorize this loop. */
> +/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */