This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] BB vectorization with intermediate aliasing stores
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 24 Oct 2018 12:03:06 +0200 (CEST)
- Subject: [PATCH] BB vectorization with intermediate aliasing stores
The following fixes data ref analysis giving up forming group accesses
when it encounters duplicates. This most happens during BB vectorization
when inside a single BB two store groups are separated by a stmt that
prevents DSE (or CSE for loads) of duplicates. This is what we for
example see in PR87105 though there are more issues in that PR.
I've not found a PR that mentions this specific limitation or
would be fixed with the bug - if you know one please let me know.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
>From b6b2cd3399872818befcc95fdd0bc0a12f0bff25 Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Wed, 24 Oct 2018 11:51:16 +0200
Subject: [PATCH] bb-slp-dups
2018-10-24 Richard Biener <rguenther@suse.de>
* tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust
dump classification.
(vect_analyze_data_ref_accesses): Handle duplicate loads and
stores by splitting the affected group after the fact.
* tree-vect-slp.c (vect_build_slp_tree_2): Dump when we
fail the SLP build because of size constraints.
* gcc.dg/vect/bb-slp-39.c: New testcase.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-39.c b/gcc/testsuite/gcc.dg/vect/bb-slp-39.c
new file mode 100644
index 00000000000..255bb1095dc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-39.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double x[1024];
+
+void foo (double *p)
+{
+ x[0] = 1.;
+ x[1] = 2.;
+ *p = 7.; // aliasing store
+ x[0] = x[0] + 1;
+ x[1] = x[1] + 1;
+ *p = 8.; // aliasing store
+ x[1] = x[1] + 1;
+ x[0] = x[0] + 1;
+}
+
+/* See that we vectorize three SLP instances. */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp2" } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index a24e1853e03..9185b1bd1c0 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2472,7 +2472,7 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info)
}
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ dump_printf_loc (MSG_NOTE, vect_location,
"Two or more load stmts share the same dr.\n");
/* For load use the same data-ref load. */
@@ -2838,6 +2838,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
determining what dependencies are reversed. */
vec<data_reference_p> datarefs_copy = datarefs.copy ();
datarefs_copy.qsort (dr_group_sort_cmp);
+ hash_set<stmt_vec_info> to_fixup;
/* Build the interleaving chains. */
for (i = 0; i < datarefs_copy.length () - 1;)
@@ -2920,36 +2921,32 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
{
gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
< gimple_uid (DR_STMT (drb)));
- /* ??? For now we simply "drop" the later reference which is
- otherwise the same rather than finishing off this group.
- In the end we'd want to re-process duplicates forming
- multiple groups from the refs, likely by just collecting
- all candidates (including duplicates and split points
- below) in a vector and then process them together. */
- continue;
+ /* Simply link in duplicates and fix up the chain below. */
}
-
- /* If init_b == init_a + the size of the type * k, we have an
- interleaving, and DRA is accessed before DRB. */
- HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
- if (type_size_a == 0
- || (init_b - init_a) % type_size_a != 0)
- break;
-
- /* If we have a store, the accesses are adjacent. This splits
- groups into chunks we support (we don't support vectorization
- of stores with gaps). */
- if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
- break;
-
- /* If the step (if not zero or non-constant) is greater than the
- difference between data-refs' inits this splits groups into
- suitable sizes. */
- if (tree_fits_shwi_p (DR_STEP (dra)))
+ else
{
- HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
- if (step != 0 && step <= (init_b - init_a))
+ /* If init_b == init_a + the size of the type * k, we have an
+ interleaving, and DRA is accessed before DRB. */
+ HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
+ if (type_size_a == 0
+ || (init_b - init_a) % type_size_a != 0)
break;
+
+ /* If we have a store, the accesses are adjacent. This splits
+ groups into chunks we support (we don't support vectorization
+ of stores with gaps). */
+ if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
+ break;
+
+ /* If the step (if not zero or non-constant) is greater than the
+ difference between data-refs' inits this splits groups into
+ suitable sizes. */
+ if (tree_fits_shwi_p (DR_STEP (dra)))
+ {
+ HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
+ if (step != 0 && step <= (init_b - init_a))
+ break;
+ }
}
if (dump_enabled_p ())
@@ -2968,9 +2965,64 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
lastinfo = stmtinfo_b;
+
+ if (init_b == init_prev
+ && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
+ && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Queuing group with duplicate access for fixup\n");
}
}
+ /* Fixup groups with duplicate entries by splitting it. */
+ while (1)
+ {
+ hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
+ if (!(it != to_fixup.end ()))
+ break;
+ stmt_vec_info grp = *it;
+ to_fixup.remove (grp);
+
+ /* Find the earliest duplicate group member. */
+ unsigned first_duplicate = -1u;
+ stmt_vec_info next, g = grp;
+ while ((next = DR_GROUP_NEXT_ELEMENT (g)))
+ {
+ if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr)
+ == DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
+ && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
+ first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
+ g = next;
+ }
+ if (first_duplicate == -1U)
+ continue;
+
+ /* Then move all stmts after the first duplicate to a new group.
+ Note this is a heuristic but one with the property that *it
+ is fixed up completely. */
+ g = grp;
+ stmt_vec_info newgroup = NULL, ng;
+ while ((next = DR_GROUP_NEXT_ELEMENT (g)))
+ {
+ if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
+ {
+ DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
+ if (!newgroup)
+ newgroup = next;
+ else
+ DR_GROUP_NEXT_ELEMENT (ng) = next;
+ ng = next;
+ DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
+ }
+ else
+ g = DR_GROUP_NEXT_ELEMENT (g);
+ }
+ DR_GROUP_NEXT_ELEMENT (ng) = NULL;
+
+ /* Fixup the new group which still may contain duplicates. */
+ to_fixup.add (newgroup);
+ }
+
FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
{
dr_vec_info *dr_info = vinfo->lookup_dr (dr);
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f60fea0a581..3aae1776ef9 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1191,6 +1191,10 @@ vect_build_slp_tree_2 (vec_info *vinfo,
if (++this_tree_size > max_tree_size)
{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "Build SLP failed: SLP tree too large\n");
FOR_EACH_VEC_ELT (children, j, child)
vect_free_slp_tree (child, false);
vect_free_oprnd_info (oprnds_info);