[PATCH v2] middle-end slp: fix sharing of SLP only patterns.

Tamar Christina tamar.christina@arm.com
Tue Feb 23 16:16:57 GMT 2021


Hi Richi,

The attached testcase ICEs due to a couple of issues.
In the testcase you have two SLP instances that share the majority of their
definition with each other.  One tree defines a COMPLEX_MUL sequence and the
other tree a COMPLEX_FMA.

The ice happens because:

1. the refcounts are wrong, in particular the FMA case doesn't correctly count
the references for the COMPLEX_MUL that it consumes.

2. when the FMA is created it incorrectly assumes it can just tear apart the MUL
node that it's consuming.  This is wrong and should only be done when there is
no more uses of the node, in which case the vector only pattern is no longer
relevant.

To fix the last part the SLP only pattern reset code was moved into
vect_free_slp_tree which results in cleaner code.  I also think it does belong
there since that function knows when there are no more uses of the node and so
the pattern should be unmarked, so when the the vectorizer is inspecting the BB
it doesn't find the now invalid vector only patterns.

The patch also clears the SLP_TREE_REPRESENTATIVE when stores are removed such
that we don't hit an error later trying to free the stmt_vec_info again.

Lastly it also tweaks the results of whether a pattern has been detected or not
to return true when another SLP instance has created a pattern that is only used
by a different instance (due to the trees being unshared).

Instead of ICEing this code now produces

        adrp    x1, .LANCHOR0
        add     x2, x1, :lo12:.LANCHOR0
        movi    v1.2s, 0
        mov     w0, 0
        ldr     x4, [x1, #:lo12:.LANCHOR0]
        ldrsw   x3, [x2, 16]
        ldr     x1, [x2, 8]
        ldrsw   x2, [x2, 20]
        ldr     d0, [x4]
        ldr     d2, [x1, x3, lsl 3]
        fcmla   v2.2s, v0.2s, v0.2s, #0
        fcmla   v2.2s, v0.2s, v0.2s, #90
        str     d2, [x1, x3, lsl 3]
        fcmla   v1.2s, v0.2s, v0.2s, #0
        fcmla   v1.2s, v0.2s, v0.2s, #90
        str     d1, [x1, x2, lsl 3]
        ret

PS. This testcase actually shows that the codegen we get in these cases is not
optimal. It should generate a MUL + ADD instead MUL + FMA.

But that's for GCC 12.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/99149
	* tree-vect-slp-patterns.c (vect_detect_pair_op): Don't recreate the
	buffer.
	(vect_slp_reset_pattern): Remove.
	(complex_fma_pattern::matches): Remove call to vect_slp_reset_pattern.
	(complex_mul_pattern::build, complex_fma_pattern::build,
	complex_fms_pattern::build): Fix ref counts.
	* tree-vect-slp.c (vect_free_slp_tree): Undo SLP only pattern relevancy
	when node is being deleted.
	(vect_match_slp_patterns_2): Correct result of cache hit on patterns.
	(vect_schedule_slp): Invalidate SLP_TREE_REPRESENTATIVE of removed
	stores.
	* tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize value.

gcc/testsuite/ChangeLog:

	PR tree-optimization/99149
	* g++.dg/vect/pr99149.cc: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc b/gcc/testsuite/g++.dg/vect/pr99149.cc
new file mode 100755
index 0000000000000000000000000000000000000000..902a26f576fcc79d2802bec093668674cca1c84f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-slp-all" { target { aarch64*-*-* } } } */
+
+class a {
+  float b;
+  float c;
+
+public:
+  a(float d, float e) : b(d), c(e) {}
+  a operator+(a d) { return a(b + d.b, c + d.c); }
+  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
+};
+int f, g;
+class {
+  a *h;
+  a *i;
+
+public:
+  void j() {
+    a k = h[0], l = i[g], m = k * i[f];
+    i[g] = l + m;
+    i[f] = m;
+  }
+} n;
+main() { n.j(); }
+
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_FMA" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
index f0817da9f622d22e3df2e30410d1cf610b4ffa1d..1e2769662a54229ab8e24390f97dfe206f17ab57 100644
--- a/gcc/tree-vect-slp-patterns.c
+++ b/gcc/tree-vect-slp-patterns.c
@@ -407,9 +407,8 @@ vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
 
   if (result != CMPLX_NONE && ops != NULL)
     {
-      ops->create (2);
-      ops->quick_push (node1);
-      ops->quick_push (node2);
+      ops->safe_push (node1);
+      ops->safe_push (node2);
     }
   return result;
 }
@@ -1090,15 +1089,17 @@ complex_mul_pattern::build (vec_info *vinfo)
 {
   slp_tree node;
   unsigned i;
+  slp_tree newnode
+    = vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
+  SLP_TREE_REF_COUNT (this->m_ops[2])++;
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
     vect_free_slp_tree (node);
 
   /* First re-arrange the children.  */
   SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
   SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
-  SLP_TREE_CHILDREN (*this->m_node)[1] =
-    vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
-  SLP_TREE_REF_COUNT (this->m_ops[2])++;
+  SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
 
   /* And then rewrite the node itself.  */
   complex_pattern::build (vinfo);
@@ -1133,18 +1134,6 @@ class complex_fma_pattern : public complex_pattern
     }
 };
 
-/* Helper function to "reset" a previously matched node and undo the changes
-   made enough so that the node is treated as an irrelevant node.  */
-
-static inline void
-vect_slp_reset_pattern (slp_tree node)
-{
-  stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
-  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
-  STMT_SLP_TYPE (stmt_info) = pure_slp;
-  SLP_TREE_REPRESENTATIVE (node) = stmt_info;
-}
-
 /* Pattern matcher for trying to match complex multiply and accumulate
    and multiply and subtract patterns in SLP tree.
    If the operation matches then IFN is set to the operation it matched and
@@ -1208,15 +1197,6 @@ complex_fma_pattern::matches (complex_operation_t op,
   if (!vect_pattern_validate_optab (ifn, vnode))
     return IFN_LAST;
 
-  /* FMA matched ADD + CMUL.  During the matching of CMUL the
-     stmt that starts the pattern is marked as being in a pattern,
-     namely the CMUL.  When replacing this with a CFMA we have to
-     unmark this statement as being in a pattern.  This is because
-     vect_mark_pattern_stmts will only mark the current stmt as being
-     in a pattern.  Later on when the scalar stmts are examined the
-     old statement which is supposed to be irrelevant will point to
-     CMUL unless we undo the pattern relationship here.  */
-  vect_slp_reset_pattern (node);
   ops->truncate (0);
   ops->create (3);
 
@@ -1259,10 +1239,17 @@ complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
 void
 complex_fma_pattern::build (vec_info *vinfo)
 {
+  slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1];
+
   SLP_TREE_CHILDREN (*this->m_node).release ();
   SLP_TREE_CHILDREN (*this->m_node).create (3);
   SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
 
+  SLP_TREE_REF_COUNT (this->m_ops[1])++;
+  SLP_TREE_REF_COUNT (this->m_ops[2])++;
+
+  vect_free_slp_tree (node);
+
   complex_pattern::build (vinfo);
 }
 
@@ -1427,6 +1414,11 @@ complex_fms_pattern::build (vec_info *vinfo)
 {
   slp_tree node;
   unsigned i;
+  slp_tree newnode =
+    vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
+  SLP_TREE_REF_COUNT (this->m_ops[0])++;
+  SLP_TREE_REF_COUNT (this->m_ops[1])++;
+
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
     vect_free_slp_tree (node);
 
@@ -1436,10 +1428,7 @@ complex_fms_pattern::build (vec_info *vinfo)
   /* First re-arrange the children.  */
   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
-  SLP_TREE_CHILDREN (*this->m_node).quick_push (
-    vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node));
-  SLP_TREE_REF_COUNT (this->m_ops[0])++;
-  SLP_TREE_REF_COUNT (this->m_ops[1])++;
+  SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
 
   /* And then rewrite the node itself.  */
   complex_pattern::build (vinfo);
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index ea8a97b01c6371791ac66de3e1dabfedee69cb67..605873714a5cafaaf822f61f1f769f96b3876694 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -146,6 +146,16 @@ vect_free_slp_tree (slp_tree node)
     if (child)
       vect_free_slp_tree (child);
 
+  /* If the node defines any SLP only patterns then those patterns are no
+     longer valid and should be removed.  */
+  stmt_vec_info rep_stmt_info = SLP_TREE_REPRESENTATIVE (node);
+  if (rep_stmt_info && STMT_VINFO_SLP_VECT_ONLY_PATTERN (rep_stmt_info))
+    {
+      stmt_vec_info stmt_info = vect_orig_stmt (rep_stmt_info);
+      STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
+      STMT_SLP_TYPE (stmt_info) = STMT_SLP_TYPE (rep_stmt_info);
+    }
+
   delete node;
 }
 
@@ -2395,7 +2405,9 @@ vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo,
   slp_tree node = *ref_node;
   bool found_p = false;
   if (!node || visited->add (node))
-    return false;
+    return node
+	   && SLP_TREE_REPRESENTATIVE (node)
+	   && STMT_VINFO_SLP_VECT_ONLY_PATTERN (SLP_TREE_REPRESENTATIVE (node));
 
   slp_tree child;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
@@ -6451,6 +6463,11 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance> slp_instances)
 	  store_info = vect_orig_stmt (store_info);
 	  /* Free the attached stmt_vec_info and remove the stmt.  */
 	  vinfo->remove_stmt (store_info);
+
+	  /* Invalidate SLP_TREE_REPRESENTATIVE in case we released it
+	     to not crash in vect_free_slp_tree later.  */
+	  if (SLP_TREE_REPRESENTATIVE (root) == store_info)
+	    SLP_TREE_REPRESENTATIVE (root) = NULL;
         }
     }
 }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 5b45df3a4e00266b7530eb4da6985f0d940cb05b..63ba594f2276850a00fc372072d98326891f19e6 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -695,6 +695,7 @@ vec_info::new_stmt_vec_info (gimple *stmt)
   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
   STMT_VINFO_REDUC_IDX (res) = -1;
   STMT_VINFO_SLP_VECT_ONLY (res) = false;
+  STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
   STMT_VINFO_VEC_STMTS (res) = vNULL;
 
   if (is_a <loop_vec_info> (this)


-- 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: rb14220.patch
Type: text/x-diff
Size: 7500 bytes
Desc: not available
URL: <https://gcc.gnu.org/pipermail/gcc-patches/attachments/20210223/91596864/attachment-0001.bin>


More information about the Gcc-patches mailing list