This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Fix PR66251 (wrong code with strided group stores)
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Michael Matz <matz at suse dot de>,gcc-patches at gcc dot gnu dot org
- Date: Fri, 22 May 2015 18:31:56 +0200
- Subject: Re: Fix PR66251 (wrong code with strided group stores)
- Authentication-results: sourceware.org; auth=none
- References: <alpine dot LSU dot 2 dot 20 dot 1505221709220 dot 27315 at wotan dot suse dot de>
On May 22, 2015 5:13:16 PM GMT+02:00, Michael Matz <matz@suse.de> wrote:
>Hi,
>
>between Richis improvements of grouped accesses, and mine to strided
>stores is an interaction that now leads to ICEs and wrong code after
>both
>are in, for instance PR66251. The added testcases reflects this
>situation, and uses both, narrowing and widening (narrowing would still
>
>ICE, widening right now produce only wrong code). The patch fixes the
>testcase(s).
>
>It's currently regstrapping on x86_64-linux, okay for trunk if that
>passes?
OK.
Thanks,
Richard.
>
>Ciao,
>Michael.
>
> PR middle-end/66251
>
> * tree-vect-stmts.c (vect_model_store_cost): Handled strided group
> stores.
> (vect_create_vectorized_demotion_stmts): Always set
> STMT_VINFO_VEC_STMT, also with SLP.
> (vectorizable_store): Handle strided group stores.
>
>testsuite/:
> PR middle-end/66251
> * gcc.dg/vect/pr66251.c: New test.
>
>Index: tree-vect-stmts.c
>===================================================================
>--- tree-vect-stmts.c (revision 223577)
>+++ tree-vect-stmts.c (working copy)
>@@ -1000,7 +1000,8 @@ vect_model_store_cost (stmt_vec_info stm
> equivalent to the cost of GROUP_SIZE separate stores. If a grouped
> access is instead being provided by a permute-and-store operation,
> include the cost of the permutes. */
>- if (!store_lanes_p && group_size > 1)
>+ if (!store_lanes_p && group_size > 1
>+ && !STMT_VINFO_STRIDED_P (stmt_info))
> {
> /* Uses a high and low interleave or shuffle operations for each
> needed permute. */
>@@ -1014,21 +1015,24 @@ vect_model_store_cost (stmt_vec_info stm
> group_size);
> }
>
>+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> /* Costs of the stores. */
>- if (STMT_VINFO_STRIDED_P (stmt_info))
>+ if (STMT_VINFO_STRIDED_P (stmt_info)
>+ && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
> {
> /* N scalar stores plus extracting the elements. */
>- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> inside_cost += record_stmt_cost (body_cost_vec,
> ncopies * TYPE_VECTOR_SUBPARTS (vectype),
> scalar_store, stmt_info, 0, vect_body);
>- inside_cost += record_stmt_cost (body_cost_vec,
>- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>- vec_to_scalar, stmt_info, 0, vect_body);
> }
> else
> vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
>
>+ if (STMT_VINFO_STRIDED_P (stmt_info))
>+ inside_cost += record_stmt_cost (body_cost_vec,
>+ ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>+ vec_to_scalar, stmt_info, 0, vect_body);
>+
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> "vect_model_store_cost: inside_cost = %d, "
>@@ -3377,15 +3381,13 @@ vect_create_vectorized_demotion_stmts (v
> (or in STMT_VINFO_RELATED_STMT chain). */
> if (slp_node)
> SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
>+
>+ if (!*prev_stmt_info)
>+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
> else
>- {
>- if (!*prev_stmt_info)
>- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
>- else
>- STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
>+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
>
>- *prev_stmt_info = vinfo_for_stmt (new_stmt);
>- }
>+ *prev_stmt_info = vinfo_for_stmt (new_stmt);
> }
> }
>
>@@ -5155,15 +5157,27 @@ vectorizable_store (gimple stmt, gimple_
> {
> grouped_store = true;
> first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>- if (!slp && !PURE_SLP_STMT (stmt_info))
>+ group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
>+ if (!slp
>+ && !PURE_SLP_STMT (stmt_info)
>+ && !STMT_VINFO_STRIDED_P (stmt_info))
> {
>- group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> if (vect_store_lanes_supported (vectype, group_size))
> store_lanes_p = true;
> else if (!vect_grouped_store_supported (vectype, group_size))
> return false;
> }
>
>+ if (STMT_VINFO_STRIDED_P (stmt_info)
>+ && (slp || PURE_SLP_STMT (stmt_info))
>+ && (group_size > nunits
>+ || nunits % group_size != 0))
>+ {
>+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>+ "unhandled strided group store\n");
>+ return false;
>+ }
>+
> if (first_stmt == stmt)
> {
> /* STMT is the leader of the group. Check the operands of all the
>@@ -5286,10 +5300,23 @@ vectorizable_store (gimple stmt, gimple_
> ...
> */
>
>+ unsigned nstores = nunits;
>+ tree ltype = elem_type;
>+ if (slp)
>+ {
>+ nstores = nunits / group_size;
>+ if (group_size < nunits)
>+ ltype = build_vector_type (elem_type, group_size);
>+ else
>+ ltype = vectype;
>+ ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
>+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
>+ }
>+
> ivstep = stride_step;
> ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
> build_int_cst (TREE_TYPE (ivstep),
>- ncopies * nunits));
>+ ncopies * nstores));
>
> standard_iv_increment_position (loop, &incr_gsi, &insert_after);
>
>@@ -5315,22 +5342,22 @@ vectorizable_store (gimple stmt, gimple_
> else
> vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
>
>- for (i = 0; i < nunits; i++)
>+ for (i = 0; i < nstores; i++)
> {
> tree newref, newoff;
> gimple incr, assign;
>- tree size = TYPE_SIZE (elem_type);
>+ tree size = TYPE_SIZE (ltype);
> /* Extract the i'th component. */
> tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
> size);
>- tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
>+ tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
> size, pos);
>
> elem = force_gimple_operand_gsi (gsi, elem, true,
> NULL_TREE, true,
> GSI_SAME_STMT);
>
>- newref = build2 (MEM_REF, TREE_TYPE (vectype),
>+ newref = build2 (MEM_REF, ltype,
> running_off, alias_off);
>
> /* And store it to *running_off. */
>Index: testsuite/gcc.dg/vect/pr66251.c
>===================================================================
>--- testsuite/gcc.dg/vect/pr66251.c (revision 0)
>+++ testsuite/gcc.dg/vect/pr66251.c (working copy)
>@@ -0,0 +1,79 @@
>+/* { dg-require-effective-target vect_int } */
>+/* { dg-require-effective-target vect_double } */
>+/* { dg-require-effective-target vect_floatint_cvt } */
>+/* { dg-require-effective-target vect_intfloat_cvt } */
>+/* { dg-require-effective-target vect_pack_trunc } */
>+/* { dg-require-effective-target vect_unpack } */
>+/* { dg-require-effective-target vect_hw_misalign } */
>+
>+#include "tree-vect.h"
>+
>+void __attribute__((noinline,noclone))
>+test1(_Complex double *a, _Complex int *b, int stride, int n)
>+{
>+ int i;
>+ for (i = 0; i < n; i++)
>+ {
>+ a[i*stride] = b[i*stride];
>+ }
>+}
>+
>+void __attribute__((noinline,noclone))
>+test2(_Complex int *a, _Complex double *b, int stride, int n)
>+{
>+ int i;
>+ for (i = 0; i < n; i++)
>+ {
>+ a[i*stride] = b[i*stride];
>+ }
>+}
>+
>+_Complex int ia[256];
>+_Complex double da[256];
>+
>+extern void abort (void);
>+
>+int main ()
>+{
>+ int i;
>+ int stride;
>+
>+ check_vect ();
>+
>+ for (stride = 1; stride < 15; stride++)
>+ {
>+ for (i = 0; i < 256; i++)
>+ {
>+ __real__ ia[i] = (i + stride) % 19;
>+ __imag__ ia[i] = (i + stride) % 23;
>+ __asm__ volatile ("");
>+ }
>+
>+ test1(da, ia, stride, 256/stride);
>+
>+ for (i = 0; i < 256/stride; i++)
>+ {
>+ if (da[i*stride] != ia[i*stride])
>+ abort ();
>+ }
>+
>+ for (i = 0; i < 256; i++)
>+ {
>+ __real__ da[i] = (i + stride + 1) % 29;
>+ __imag__ da[i] = (i + stride + 1) % 31;
>+ __asm__ volatile ("");
>+ }
>+
>+ test2(ia, da, stride, 256/stride);
>+
>+ for (i = 0; i < 256/stride; i++)
>+ {
>+ if (da[i*stride] != ia[i*stride])
>+ abort ();
>+ }
>+ }
>+ return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
>*/
>+/* { dg-final { cleanup-tree-dump "vect" } } */