[PATCH] Fix PR92324
Christophe Lyon
christophe.lyon@linaro.org
Mon Nov 11 11:31:00 GMT 2019
On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote:
>
>
> I've been sitting on this for a few days since I'm not 100% happy
> with how the code looks like. There's possibly still holes in it
> (chains with mixed signed/unsigned adds for example might pick
> up signed adds in the epilogue), but the wrong-code cases should
> work fine now. I'm probably going to followup with some
> mass renaming of variable/parameter names to make it more clear
> which stmt / type we are actually looking at ...
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
>
> Richard.
>
> 2019-11-08 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/
> * tree-vect-loop.c (vect_create_epilog_for_reduction): Use
> STMT_VINFO_REDUC_VECTYPE for all computations, inserting
> sign-conversions as necessary.
> (vectorizable_reduction): Reject conversions in the chain
> that are not sign-conversions, base analysis on a non-converting
> stmt and its operation sign. Set STMT_VINFO_REDUC_VECTYPE.
> * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything
> for debug stmts.
> * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New.
> (STMT_VINFO_REDUC_VECTYPE): Likewise.
>
> * gcc.dg/vect/pr92205.c: XFAIL.
> * gcc.dg/vect/pr92324-1.c: New testcase.
> * gcc.dg/vect/pr92324-2.c: Likewise.
>
Hi Richard,
This new testcase (pr92324-2.c) fails on arm/aarch64:
FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test
FAIL: gcc.dg/vect/pr92324-2.c execution test
Christophe
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c (revision 277922)
> +++ gcc/tree-vect-loop.c (working copy)
> @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v
> gimple *new_phi = NULL, *phi;
> stmt_vec_info phi_info;
> gimple_stmt_iterator exit_gsi;
> - tree vec_dest;
> tree new_temp = NULL_TREE, new_name, new_scalar_dest;
> gimple *epilog_stmt = NULL;
> gimple *exit_phi;
> @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v
> }
> gcc_assert (!nested_in_vect_loop || double_reduc);
>
> - vectype = STMT_VINFO_VECTYPE (stmt_info);
> + vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
> gcc_assert (vectype);
> mode = TYPE_MODE (vectype);
>
> @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v
> one vector. */
> if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
> {
> + gimple_seq stmts = NULL;
> tree first_vect = PHI_RESULT (new_phis[0]);
> - gassign *new_vec_stmt = NULL;
> - vec_dest = vect_create_destination_var (scalar_dest, vectype);
> + first_vect = gimple_convert (&stmts, vectype, first_vect);
> for (k = 1; k < new_phis.length (); k++)
> {
> gimple *next_phi = new_phis[k];
> tree second_vect = PHI_RESULT (next_phi);
> - tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> - new_vec_stmt = gimple_build_assign (tem, code,
> - first_vect, second_vect);
> - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> - first_vect = tem;
> + second_vect = gimple_convert (&stmts, vectype, second_vect);
> + first_vect = gimple_build (&stmts, code, vectype,
> + first_vect, second_vect);
> }
> + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
> new_phi_result = first_vect;
> - if (new_vec_stmt)
> - {
> - new_phis.truncate (0);
> - new_phis.safe_push (new_vec_stmt);
> - }
> + new_phis.truncate (0);
> + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> }
> /* Likewise if we couldn't use a single defuse cycle. */
> else if (ncopies > 1)
> {
> gcc_assert (new_phis.length () == 1);
> + gimple_seq stmts = NULL;
> tree first_vect = PHI_RESULT (new_phis[0]);
> - gassign *new_vec_stmt = NULL;
> - vec_dest = vect_create_destination_var (scalar_dest, vectype);
> + first_vect = gimple_convert (&stmts, vectype, first_vect);
> stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
> for (int k = 1; k < ncopies; ++k)
> {
> next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
> tree second_vect = PHI_RESULT (next_phi_info->stmt);
> - tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> - new_vec_stmt = gimple_build_assign (tem, code,
> - first_vect, second_vect);
> - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> - first_vect = tem;
> + second_vect = gimple_convert (&stmts, vectype, second_vect);
> + first_vect = gimple_build (&stmts, code, vectype,
> + first_vect, second_vect);
> }
> + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> new_phi_result = first_vect;
> new_phis.truncate (0);
> - new_phis.safe_push (new_vec_stmt);
> + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> }
> else
> new_phi_result = PHI_RESULT (new_phis[0]);
> @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v
> in a vector mode of smaller size and first reduce upper/lower
> halves against each other. */
> enum machine_mode mode1 = mode;
> + tree stype = TREE_TYPE (vectype);
> unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
> unsigned sz1 = sz;
> if (!slp_reduc
> && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
> sz1 = GET_MODE_SIZE (mode1).to_constant ();
>
> - tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
> + tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1);
> reduce_with_shift = have_whole_vector_shift (mode1);
> if (!VECTOR_MODE_P (mode1))
> reduce_with_shift = false;
> @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v
> {
> gcc_assert (!slp_reduc);
> sz /= 2;
> - vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
> + vectype1 = get_vectype_for_scalar_type_and_size (stype, sz);
>
> /* The target has to make sure we support lowpart/highpart
> extraction, either via direct vector extract or through
> @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v
> dump_printf_loc (MSG_NOTE, vect_location,
> "Reduce using vector shifts\n");
>
> - vec_dest = vect_create_destination_var (scalar_dest, vectype1);
> + gimple_seq stmts = NULL;
> + new_temp = gimple_convert (&stmts, vectype1, new_temp);
> for (elt_offset = nelements / 2;
> elt_offset >= 1;
> elt_offset /= 2)
> @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v
> calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
> indices.new_vector (sel, 2, nelements);
> tree mask = vect_gen_perm_mask_any (vectype1, indices);
> - epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
> - new_temp, zero_vec, mask);
> - new_name = make_ssa_name (vec_dest, epilog_stmt);
> - gimple_assign_set_lhs (epilog_stmt, new_name);
> - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> -
> - epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
> - new_temp);
> - new_temp = make_ssa_name (vec_dest, epilog_stmt);
> - gimple_assign_set_lhs (epilog_stmt, new_temp);
> - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
> + new_temp, zero_vec, mask);
> + new_temp = gimple_build (&stmts, code,
> + vectype1, new_name, new_temp);
> }
> + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
> /* 2.4 Extract the final scalar result. Create:
> s_out3 = extract_field <v_out2, bitpos> */
> @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st
> stmt_vector_for_cost *cost_vec)
> {
> tree scalar_dest;
> - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> tree vectype_in = NULL_TREE;
> loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st
> phi_info = loop_vinfo->lookup_stmt (use_stmt);
> stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> }
> - /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> - element. */
> - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> + }
> +
> + /* PHIs should not participate in patterns. */
> + gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> + gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> +
> + /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> + and compute the reduction chain length. */
> + tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> + loop_latch_edge (loop));
> + unsigned reduc_chain_length = 0;
> + bool only_slp_reduc_chain = true;
> + stmt_info = NULL;
> + while (reduc_def != PHI_RESULT (reduc_def_phi))
> + {
> + stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> + stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> + if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> {
> - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "reduction chain broken by patterns.\n");
> + return false;
> + }
> + if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> + only_slp_reduc_chain = false;
> + /* ??? For epilogue generation live members of the chain need
> + to point back to the PHI via their original stmt for
> + info_for_reduction to work. */
> + if (STMT_VINFO_LIVE_P (vdef))
> + STMT_VINFO_REDUC_DEF (def) = phi_info;
> + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
> + {
> + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
> + TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "conversion in the reduction chain.\n");
> + return false;
> + }
> }
> + else if (!stmt_info)
> + /* First non-conversion stmt. */
> + stmt_info = vdef;
> + reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> + reduc_chain_length++;
> }
> /* PHIs should not participate in patterns. */
> gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st
> nested_cycle = true;
> }
>
> + /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> + element. */
> + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> + {
> + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> + stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> + }
> if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> gcc_assert (slp_node
> && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
> @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st
> inside the loop body. The last operand is the reduction variable,
> which is defined by the loop-header-phi. */
>
> + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> + STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
> gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> enum tree_code code = gimple_assign_rhs_code (stmt);
> bool lane_reduc_code_p
> @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st
> if (!type_has_mode_precision_p (scalar_type))
> return false;
>
> - /* All uses but the last are expected to be defined in the loop.
> - The last use is the reduction variable. In case of nested cycle this
> - assumption is not true: we use reduc_index to record the index of the
> - reduction variable. */
> - gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> -
> - /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> - and compute the reduction chain length. */
> - tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> - loop_latch_edge (loop));
> - unsigned reduc_chain_length = 0;
> - bool only_slp_reduc_chain = true;
> - while (reduc_def != PHI_RESULT (reduc_def_phi))
> - {
> - stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> - stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> - if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "reduction chain broken by patterns.\n");
> - return false;
> - }
> - if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> - only_slp_reduc_chain = false;
> - /* ??? For epilogue generation live members of the chain need
> - to point back to the PHI via their original stmt for
> - info_for_reduction to work. */
> - if (STMT_VINFO_LIVE_P (vdef))
> - STMT_VINFO_REDUC_DEF (def) = phi_info;
> - reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> - reduc_chain_length++;
> - }
> -
> /* For lane-reducing ops we're reducing the number of reduction PHIs
> which means the only use of that may be in the lane-reducing operation. */
> if (lane_reduc_code_p
> @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st
> return false;
> }
>
> + /* All uses but the last are expected to be defined in the loop.
> + The last use is the reduction variable. In case of nested cycle this
> + assumption is not true: we use reduc_index to record the index of the
> + reduction variable. */
> reduc_def = PHI_RESULT (reduc_def_phi);
> for (i = 0; i < op_type; i++)
> {
> @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st
> }
> }
> if (!vectype_in)
> - vectype_in = vectype_out;
> + vectype_in = STMT_VINFO_VECTYPE (phi_info);
> STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
>
> enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
> @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st
> }
> }
>
> - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> - /* We changed STMT to be the first stmt in reduction chain, hence we
> - check that in this case the first element in the chain is STMT. */
> - gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info))
> - == vect_orig_stmt (stmt_info));
> -
> if (STMT_VINFO_LIVE_P (phi_info))
> return false;
>
> @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st
> && code != SAD_EXPR
> && reduction_type != FOLD_LEFT_REDUCTION)
> {
> - STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
> - STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
> + stmt_vec_info tem
> + = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
> + {
> + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
> + tem = REDUC_GROUP_FIRST_ELEMENT (tem);
> + }
> + STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
> + STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
> }
> else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
> {
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c (revision 277922)
> +++ gcc/tree-vect-stmts.c (working copy)
> @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt
> basic_block bb = gimple_bb (USE_STMT (use_p));
> if (!flow_bb_inside_loop_p (loop, bb))
> {
> + if (is_gimple_debug (USE_STMT (use_p)))
> + continue;
> +
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> "vec_stmt_relevant_p: used out of loop.\n");
>
> - if (is_gimple_debug (USE_STMT (use_p)))
> - continue;
> -
> /* We expect all such uses to be in the loop exit phis
> (because of loop closed form) */
> gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h (revision 277922)
> +++ gcc/tree-vectorizer.h (working copy)
> @@ -1050,6 +1050,9 @@ public:
> /* The vector input type relevant for reduction vectorization. */
> tree reduc_vectype_in;
>
> + /* The vector type for performing the actual reduction. */
> + tree reduc_vectype;
> +
> /* Whether we force a single cycle PHI during reduction vectorization. */
> bool force_single_cycle;
>
> @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_
> #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code
> #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn
> #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
> +#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype
> #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in
> #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p
>
> Index: gcc/testsuite/gcc.dg/vect/pr92205.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922)
> +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy)
> @@ -10,4 +10,4 @@ int b(int n, unsigned char *a)
> return d;
> }
>
> -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr92324-1.c (nonexistent)
> +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c (working copy)
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +
> +unsigned a, b;
> +int c, d;
> +unsigned e(int f) {
> + if (a > f)
> + return a;
> + return f;
> +}
> +void g() {
> + for (; c; c++)
> + d = e(d);
> + b = d;
> +}
> Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr92324-2.c (nonexistent)
> +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c (working copy)
> @@ -0,0 +1,21 @@
> +#include "tree-vect.h"
> +
> +unsigned b[1024];
> +
> +int __attribute__((noipa))
> +foo (int n)
> +{
> + int res = 0;
> + for (int i = 0; i < n; ++i)
> + res = res > b[i] ? res : b[i];
> + return res;
> +}
> +
> +int main ()
> +{
> + check_vect ();
> + b[15] = (unsigned)__INT_MAX__ + 1;
> + if (foo (16) != -__INT_MAX__ - 1)
> + __builtin_abort ();
> + return 0;
> +}
More information about the Gcc-patches
mailing list