[PATCH] tree-optimization/98550 - fix BB vect unrolling check

Richard Biener rguenther@suse.de
Tue Jan 12 14:17:17 GMT 2021


This fixes the check that disqualifies BB vectorization because of
required unrolling to match up with the later exact_div we do.  To
not disable the ability to split groups that do not match up
exactly with a choosen vector type this also introduces a soft-fail
mechanism to vect_build_slp_tree_1 which delays failing to after
the matches[] array is populated from other checks and only then
determines the split point according to the vector type.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-01-12  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/98550
	* tree-vect-slp.c (vect_record_max_nunits): Check whether
	the group size is a multiple of the vector element count.
	(vect_build_slp_tree_1): When we need to fail because
	the vector type choosen causes unrolling do so lazily
	without affecting matches only at the end to guide group splitting.

	* g++.dg/opt/pr98550.C: New testcase.
---
 gcc/testsuite/g++.dg/opt/pr98550.C | 96 ++++++++++++++++++++++++++++++
 gcc/tree-vect-slp.c                | 40 ++++++++++---
 2 files changed, 128 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/pr98550.C

diff --git a/gcc/testsuite/g++.dg/opt/pr98550.C b/gcc/testsuite/g++.dg/opt/pr98550.C
new file mode 100644
index 00000000000..49102e6c1a1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr98550.C
@@ -0,0 +1,96 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target c++11 } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-march=z13" { target s390x-*-* } } */
+
+template <int a> struct k { static constexpr int c = a; };
+template <bool, bool, typename...> struct o;
+template <typename f, typename... g> struct o<false, false, f, g...> {
+  typedef decltype(0) h;
+};
+template <typename... g> struct p : o<k<false>::c, k<false>::c, g...> {};
+class q;
+class r {
+public:
+  void ap(q);
+};
+template <typename, typename aw> void ax(aw ay) { ay(); }
+template <typename az> void ba(az bb) {
+  using bc = p<az>;
+  using bd = typename bc::h;
+  ax<bd>(bb);
+}
+template <unsigned> class s;
+class t {
+public:
+  s<8> br();
+  template <typename...> void operator()() { ba(br()); }
+};
+class q {
+public:
+  template <typename az> q(az) { H(); }
+  struct H {
+    t cc;
+    H() { cc(); }
+  };
+};
+template <unsigned long> struct I {};
+template <unsigned long cl, typename j> void cm(j cn, I<cl>) {
+  cm(cn, I<cl - 1>());
+  cn(cl);
+}
+template <typename j> void cm(j, I<0>) {}
+template <unsigned co> struct u {
+  long cp[co];
+  void cq(const u &);
+  void cs(int);
+  void operator<(u);
+};
+template <unsigned co> void u<co>::cq(const u &l) {
+  cm([&](int i) { cp[i] &= l.cp[i]; }, I<co>());
+}
+template <unsigned co> void u<co>::cs(int m) {
+  cm([&](int i) { cp[i] >>= m; }, I<co - 2>());
+}
+template <unsigned> class K;
+template <unsigned co> class v {
+  int cv;
+  friend K<co>;
+
+public:
+  void cx(int, unsigned char *, unsigned long long);
+};
+template <unsigned co> class K {
+public:
+  static void cx(v<co> &);
+};
+template <unsigned co>
+void v<co>::cx(int, unsigned char *, unsigned long long) {
+  K<co>::cx(*this);
+}
+template <unsigned co> void K<co>::cx(v<co> &cz) {
+  u<co> a, b, d;
+  int e, n = cz.cv;
+  for (; e;)
+    if (cz.cv)
+      a.cs(cz.cv);
+  a.cq(d);
+  a < b;
+}
+template <unsigned co> class s {
+  v<co> *dh;
+
+public:
+  void operator()();
+};
+template <unsigned co> void s<co>::operator()() {
+  int f;
+  unsigned char g;
+  long h;
+  dh->cx(f, &g, h);
+}
+void d() {
+  r i;
+  t j;
+  i.ap(j);
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 877d44b2257..65b7a27e1e8 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -873,11 +873,8 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
 
   /* If populating the vector type requires unrolling then fail
      before adjusting *max_nunits for basic-block vectorization.  */
-  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
-  unsigned HOST_WIDE_INT const_nunits;
   if (is_a <bb_vec_info> (vinfo)
-      && (!nunits.is_constant (&const_nunits)
-	  || const_nunits > group_size))
+      && !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -928,6 +925,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
   stmt_vec_info first_load = NULL, prev_first_load = NULL;
   bool first_stmt_load_p = false, load_p = false;
   bool first_stmt_phi_p = false, phi_p = false;
+  bool maybe_soft_fail = false;
+  tree soft_fail_nunits_vectype = NULL_TREE;
 
   /* For every stmt in NODE find its def stmt/s.  */
   stmt_vec_info stmt_info;
@@ -977,10 +976,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 
       tree nunits_vectype;
       if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype,
-					   &nunits_vectype, group_size)
-	  || (nunits_vectype
-	      && !vect_record_max_nunits (vinfo, stmt_info, group_size,
-					  nunits_vectype, max_nunits)))
+					   &nunits_vectype, group_size))
 	{
 	  if (is_a <bb_vec_info> (vinfo) && i != 0)
 	    continue;
@@ -988,6 +984,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	  matches[0] = false;
 	  return false;
 	}
+      /* Record nunits required but continue analysis, producing matches[]
+	 as if nunits was not an issue.  This allows splitting of groups
+	 to happen.  */
+      if (nunits_vectype
+	  && !vect_record_max_nunits (vinfo, stmt_info, group_size,
+				      nunits_vectype, max_nunits))
+	{
+	  gcc_assert (is_a <bb_vec_info> (vinfo));
+	  maybe_soft_fail = true;
+	  soft_fail_nunits_vectype = nunits_vectype;
+	}
 
       gcc_assert (vectype);
 
@@ -1340,6 +1347,23 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
       *two_operators = true;
     }
 
+  if (maybe_soft_fail)
+    {
+      unsigned HOST_WIDE_INT const_nunits;
+      if (!TYPE_VECTOR_SUBPARTS
+	    (soft_fail_nunits_vectype).is_constant (&const_nunits)
+	  || const_nunits > group_size)
+	matches[0] = false;
+      else
+	{
+	  /* With constant vector elements simulate a mismatch at the
+	     point we need to split.  */
+	  unsigned tail = group_size & (const_nunits - 1);
+	  memset (&matches[group_size - tail], 0, sizeof (bool) * tail);
+	}
+      return false;
+    }
+
   return true;
 }
 
-- 
2.26.2


More information about the Gcc-patches mailing list