This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH/RFC] PR30858 - fix vectorizer ICE
Hello,
> There are two ways to fix this:
>
> 1) continue to rely on mark-relevant-stmts to detect whether a reduction is
> used in the loop by a non-reduction stmt.
> To do this we need a way to distinguish between a phi that is used by
> reduction-stmts only, and phis that are used (also) by some other stmts in
> the loop. For this purpose I added another enum value for 'relevant':
> "vect_used_directly_by_reduction", to identify defs that are used only by
> reduction-stmts (stmts that are marked as defining a reduction) [as opposed
> to the already existing enum value "vect_used_by_reduction", which
> identifies defs that feed computations that end up (only) in a reduction
> (i.e. these defs may be used by non-reduction stmts, but eventually, any
> computations/values that are affected by these defs are used to compute a
> reduction (i.e. don't get stored to memory, for example. We use that to
> identify computations that we can change the order in which they are
> computed)].
>
> 2) leave mark-relevant-stmts as is, and explicitly check elsewhere the uses
> of the stmts that are involved in the reduction cycle.
>
> I debated between the two approaches for a while.
>
> The second option is more straight-forward - we explicitly check all the
> uses of all the stmts in the reduction cycle - so nothing wrong can go
> here. The first way is also very simple, but I worry that there may be some
> more involved testcase, that I can't think of right now, that would break
> it (?).
>
> On the other hand, the second option is more expensive - with the first
> scheme we get the answer "for free" (during the bottom-up scan we do
> anyways).
>
> I attach both patches. The first one was bootstrapped on i386-linux, and
> the second one is being bootstrapped now on powerpc-linux. I still haven't
> decided between the two, and would welcome any comments.
> (by the way - the two patches together are much shorter than this email...)
unless you suspect that using the second way could lead to measurable
slowdowns (which seems quite unlikely to me), I think you should prefer
it. Someone sometime in future will be grateful to you for not forcing
him to solve the puzzles like the first way :-)
However you decide, could you please extend the comments at
enum vect_relevant so that they precisely specify the semantics
of the values? At least for vect_used_by_reduction it is not
all that clear, it seems.
Zdenek
> (See attached file: pr30858.fix1.txt) (See attached file: pr30858.fix2.txt)
>
> thanks,
> dorit
> Index: gcc/tree-vectorizer.c
> ===================================================================
> --- gcc/tree-vectorizer.c (revision 122156)
> +++ gcc/tree-vectorizer.c (working copy)
> @@ -1797,7 +1797,8 @@
> of {mult_even,mult_odd} generate the following vectors:
> vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */
>
> - if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
> + if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
> + || STMT_VINFO_RELEVANT (stmt_info) == vect_used_directly_by_reduction)
> ordered_p = false;
> else
> ordered_p = true;
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h (revision 122156)
> +++ gcc/tree-vectorizer.h (working copy)
> @@ -175,6 +175,7 @@
> /* Indicates whether/how a variable is used in the loop. */
> enum vect_relevant {
> vect_unused_in_loop = 0,
> + vect_used_directly_by_reduction,
> vect_used_by_reduction,
> vect_used_in_loop
> };
> Index: gcc/tree-vect-analyze.c
> ===================================================================
> --- gcc/tree-vect-analyze.c (revision 122156)
> +++ gcc/tree-vect-analyze.c (working copy)
> @@ -124,10 +124,11 @@
>
> /* Two cases of "relevant" phis: those that define an
> induction that is used in the loop, and those that
> - define a reduction. */
> + directly define a reduction. */
> if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
> && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
> - || (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
> + || (STMT_VINFO_RELEVANT (stmt_info) ==
> + vect_used_directly_by_reduction
> && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
> {
> gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
> @@ -328,8 +329,12 @@
> return false;
> }
>
> - if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
> - && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
> + if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
> + && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
> + || (STMT_VINFO_RELEVANT (stmt_info) >
> + vect_used_directly_by_reduction
> + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
> +
> {
> /* Most likely a reduction-like computation that is used
> in the loop. */
> @@ -2313,9 +2318,11 @@
> if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
> {
> gcc_assert (relevant == vect_unused_in_loop && live_p);
> - relevant = vect_used_by_reduction;
> + relevant = vect_used_directly_by_reduction;
> live_p = false;
> }
> + else if (relevant == vect_used_directly_by_reduction)
> + relevant = vect_used_by_reduction;
>
> i = 0;
> FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> Index: tree-vectorizer.c
> ===================================================================
> *** tree-vectorizer.c (revision 122176)
> --- tree-vectorizer.c (working copy)
> *************** vect_is_simple_reduction (struct loop *l
> *** 1935,1948 ****
> int op_type;
> tree operation, op1, op2;
> tree type;
>
> if (TREE_CODE (loop_arg) != SSA_NAME)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! {
> ! fprintf (vect_dump, "reduction: not ssa_name: ");
> ! print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
> ! }
> return NULL_TREE;
> }
>
> --- 1935,1969 ----
> int op_type;
> tree operation, op1, op2;
> tree type;
> + int nloop_uses;
> + tree name;
> + imm_use_iterator imm_iter;
> + use_operand_p use_p;
> +
> + name = PHI_RESULT (phi);
> + nloop_uses = 0;
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> + {
> + tree use_stmt = USE_STMT (use_p);
> + if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
> + && vinfo_for_stmt (use_stmt)
> + && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
> + nloop_uses++;
> + if (nloop_uses > 1)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "reduction used in loop.");
> + return NULL_TREE;
> + }
> + }
>
> if (TREE_CODE (loop_arg) != SSA_NAME)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! {
> ! fprintf (vect_dump, "reduction: not ssa_name: ");
> ! print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
> ! }
> return NULL_TREE;
> }
>
> *************** vect_is_simple_reduction (struct loop *l
> *** 1950,1968 ****
> if (!def_stmt)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "reduction: no def_stmt.");
> return NULL_TREE;
> }
>
> if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! {
> ! print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
> ! }
> return NULL_TREE;
> }
>
> operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
> code = TREE_CODE (operation);
> if (!commutative_tree_code (code) || !associative_tree_code (code))
> --- 1971,2004 ----
> if (!def_stmt)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! fprintf (vect_dump, "reduction: no def_stmt.");
> return NULL_TREE;
> }
>
> if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
> {
> if (vect_print_dump_info (REPORT_DETAILS))
> ! print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
> return NULL_TREE;
> }
>
> + name = GIMPLE_STMT_OPERAND (def_stmt, 0);
> + nloop_uses = 0;
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> + {
> + tree use_stmt = USE_STMT (use_p);
> + if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
> + && vinfo_for_stmt (use_stmt)
> + && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
> + nloop_uses++;
> + if (nloop_uses > 1)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "reduction used in loop.");
> + return NULL_TREE;
> + }
> + }
> +
> operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
> code = TREE_CODE (operation);
> if (!commutative_tree_code (code) || !associative_tree_code (code))