This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix wrong-code with permuted strided group loads
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 11 Jun 2015 15:55:56 +0200 (CEST)
- Subject: [PATCH] Fix wrong-code with permuted strided group loads
- Authentication-results: sourceware.org; auth=none
Just noticed this when playing with some testcases.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
2015-06-11 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_load): Properly start loads
with the first element if this is grouped loads.
* gcc.dg/vect/slp-perm-11.c: New testcase.
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c (revision 224324)
+++ gcc/tree-vect-stmts.c (working copy)
@@ -6247,13 +6247,19 @@ vectorizable_load (gimple stmt, gimple_s
gcc_assert (!nested_in_vect_loop);
+ if (grouped_load)
+ first_dr = STMT_VINFO_DATA_REF
+ (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+ else
+ first_dr = dr;
+
stride_base
= fold_build_pointer_plus
- (unshare_expr (DR_BASE_ADDRESS (dr)),
+ (DR_BASE_ADDRESS (first_dr),
size_binop (PLUS_EXPR,
- convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
- convert_to_ptrofftype (DR_INIT (dr))));
- stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+ convert_to_ptrofftype (DR_OFFSET (first_dr)),
+ convert_to_ptrofftype (DR_INIT (first_dr))));
+ stride_step = fold_convert (sizetype, DR_STEP (first_dr));
/* For a load with loop-invariant (but other than power-of-2)
stride (i.e. not a grouped access) like so:
@@ -6271,25 +6277,25 @@ vectorizable_load (gimple stmt, gimple_s
vectemp = {tmp1, tmp2, ...}
*/
- ivstep = stride_step;
- ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
+ ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), stride_step,
build_int_cst (TREE_TYPE (ivstep), vf));
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
- create_iv (stride_base, ivstep, NULL,
+ create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
loop, &incr_gsi, insert_after,
&offvar, NULL);
incr = gsi_stmt (incr_gsi);
set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
- stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
+ stride_step = force_gimple_operand (unshare_expr (stride_step),
+ &stmts, true, NULL_TREE);
if (stmts)
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
prev_stmt_info = NULL;
running_off = offvar;
- alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+ alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
int nloads = nunits;
tree ltype = TREE_TYPE (vectype);
auto_vec<tree> dr_chain;
Index: gcc/testsuite/gcc.dg/vect/slp-perm-11.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-perm-11.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/slp-perm-11.c (working copy)
@@ -0,0 +1,35 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+int a[64];
+int b[128];
+
+void __attribute__((noinline, noclone))
+foo (int s)
+{
+ int i;
+ for (i = 0; i < 32; ++i)
+ {
+ a[2*i] = b[i*s+1];
+ a[2*i+1] = b[i*s];
+ }
+}
+
+int main ()
+{
+ int i;
+ check_vect ();
+ for (i = 0; i < 128; ++i)
+ {
+ b[i] = i;
+ __asm__ volatile ("");
+ }
+ foo (4);
+ for (i = 0; i < 64; ++i)
+ if (a[i] != (4*(i/2) + (i & 1) ^ 1))
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */