This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Handle SLP group loads with trailing gaps
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 27 Mar 2013 14:12:16 +0100 (CET)
- Subject: [PATCH] Handle SLP group loads with trailing gaps
This makes the vectorizer handle loads of SLP groups with
"trailing gaps". That is, a (for now know) gap between
the groups between loop iterations as in the testcase
+ double self[1024];
+ double a[1024][1024];
+ double b[1024];
+
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+ int i, j;
+ for (i = 0; i < 1024; i+=6)
+ for (j = 0; j < 1024; j+=6)
+ {
+ self[i] = self[i] + a[i][j]*b[j];
+ self[i+1] = self[i+1] + a[i][j+1]*b[j+1];
+ }
+ }
reduced to this issue from the complex multiplication vectorization
issue in PR37021.
A next step will eventually be to allow an unknown gap between
the groups between iterations (including possible overlap).
Bootstrapped and tested on x86_64-unknown-linux-gnu.
Richard.
2013-03-27 Richard Biener <rguenther@suse.de>
PR tree-optimization/37021
* tree-vect-slp.c (vect_build_slp_tree): When not unrolling
do not restrict gaps between groups.
* tree-vect-stmts.c (vectorizable_load): Properly account for
a gap between groups.
* gcc.dg/vect/fast-math-slp-38.c: New testcase.
* gcc.dg/vect/O3-pr36098.c: Un-XFAIL.
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig 2013-03-26 13:09:18.000000000 +0100
--- gcc/tree-vect-slp.c 2013-03-26 14:27:14.135697847 +0100
*************** vect_build_slp_tree (loop_vec_info loop_
*** 740,750 ****
else
{
/* Load. */
! /* FORNOW: Check that there is no gap between the loads. */
! if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
! && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
! || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
! && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
{
if (dump_enabled_p ())
{
--- 750,765 ----
else
{
/* Load. */
! /* FORNOW: Check that there is no gap between the loads
! and no gap between the groups when we need to load
! multiple groups at once.
! ??? We should enhance this to only disallow gaps
! inside vectors. */
! if ((ncopies > 1
! && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
! && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
! || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
! && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
{
if (dump_enabled_p ())
{
*************** vect_build_slp_tree (loop_vec_info loop_
*** 762,768 ****
/* Check that the size of interleaved loads group is not
greater than the SLP group size. */
if (loop_vinfo
! && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
{
if (dump_enabled_p ())
{
--- 777,786 ----
/* Check that the size of interleaved loads group is not
greater than the SLP group size. */
if (loop_vinfo
! && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
! && ((GROUP_SIZE (vinfo_for_stmt (stmt))
! - GROUP_GAP (vinfo_for_stmt (stmt)))
! > ncopies * group_size))
{
if (dump_enabled_p ())
{
Index: gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c 2013-03-26 13:09:20.860002059 +0100
***************
*** 0 ****
--- 1,22 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_double } */
+
+ double self[1024];
+ double a[1024][1024];
+ double b[1024];
+
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+ int i, j;
+ for (i = 0; i < 1024; i+=6)
+ for (j = 0; j < 1024; j+=6)
+ {
+ self[i] = self[i] + a[i][j]*b[j];
+ self[i+1] = self[i+1] + a[i][j+1]*b[j+1];
+ }
+ }
+
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/O3-pr36098.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/O3-pr36098.c.orig 2013-03-26 13:02:00.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/O3-pr36098.c 2013-03-26 13:09:20.860002059 +0100
*************** void foo (int ncons, t_sortblock *sb, in
*** 17,22 ****
iatom[m]=sb[i].iatom[m];
}
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
-
--- 17,21 ----
iatom[m]=sb[i].iatom[m];
}
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig 2013-03-26 13:02:00.000000000 +0100
--- gcc/tree-vect-stmts.c 2013-03-26 13:09:20.861002070 +0100
*************** vectorizable_load (gimple stmt, gimple_s
*** 4316,4322 ****
gimple ptr_incr;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree realignment_token = NULL_TREE;
--- 4316,4322 ----
gimple ptr_incr;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size, group_gap;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree realignment_token = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 4766,4780 ****
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
slp_perm = true;
}
else
! vec_num = group_size;
}
else
{
first_stmt = stmt;
first_dr = dr;
group_size = vec_num = 1;
}
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
--- 4766,4785 ----
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
slp_perm = true;
+ group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
}
else
! {
! vec_num = group_size;
! group_gap = 0;
! }
}
else
{
first_stmt = stmt;
first_dr = dr;
group_size = vec_num = 1;
+ group_gap = 0;
}
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
*************** vectorizable_load (gimple stmt, gimple_s
*** 5134,5139 ****
--- 5139,5153 ----
if (slp && !slp_perm)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
+ /* Bump the vector pointer to account for a gap. */
+ if (slp && group_gap != 0)
+ {
+ tree bump = size_binop (MULT_EXPR,
+ TYPE_SIZE_UNIT (elem_type),
+ size_int (group_gap));
+ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ stmt, bump);
+ }
}
if (slp && !slp_perm)