[PATCH] slp: remove unneeded permute calculation (PR99656)

Tamar Christina tamar.christina@arm.com
Fri Mar 19 14:03:30 GMT 2021


Hi Richi,

The attach testcase ICEs because as you showed on the PR we have one child
which is an internal with a PERM of EVENEVEN and one with TOP.

The problem is while we can conceptually merge the permute itself into EVENEVEN,
merging the lanes don't really make sense.

That said, we no longer even require the merged lanes as we create the permutes
based on the KIND directly.

This patch just removes all of that code.

Unfortunately it still won't vectorize with the cost model enabled due to the
blend that's created combining the load and the external

	note: node 0x51f2ce8 (max_nunits=1, refcnt=1)
	note: op: VEC_PERM_EXPR
	note:       { }
	note:       lane permutation { 0[0] 1[1] }
	note:       children 0x51f23e0 0x51f2578
	note: node 0x51f23e0 (max_nunits=2, refcnt=1)
	note: op template: _16 = REALPART_EXPR <*t1_9(D)>;
	note:       stmt 0 _16 = REALPART_EXPR <*t1_9(D)>;
	note:       stmt 1 _16 = REALPART_EXPR <*t1_9(D)>;
	note:       load permutation { 0 0 }
	note: node (external) 0x51f2578 (max_nunits=1, refcnt=1)
	note:       { _18, _18 }

which costs the cost for the load-and-split and the cost of the external splat,
and the one for blending them while in reality it's just a scalar load and
insert.

The compiler (with the cost model disabled) generates

	ldr     q1, [x19]
	dup     v1.2d, v1.d[0]
	ldr     d0, [x19, 8]
	fneg    d0, d0
	ins     v1.d[1], v0.d[0]

while really it should be

	ldp     d1, d0, [x19]
	fneg    d0, d0
	ins     v1.d[1], v0.d[0]

but that's for another time.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/99656
	* tree-vect-slp-patterns.c (linear_loads_p,
	complex_add_pattern::matches, is_eq_or_top,
	vect_validate_multiplication, complex_mul_pattern::matches, 
	complex_fms_pattern::matches): Remove complex_perm_kinds_t.
	* tree-vectorizer.h: (complex_load_perm_t): Removed.
	(slp_tree_to_load_perm_map_t): Use complex_perm_kinds_t instead of
	complex_load_perm_t.

gcc/testsuite/ChangeLog:

	PR tree-optimization/99656
	* gfortran.dg/vect/pr99656.f90: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gfortran.dg/vect/pr99656.f90 b/gcc/testsuite/gfortran.dg/vect/pr99656.f90
new file mode 100644
index 0000000000000000000000000000000000000000..59a28ee19e8f352d534e51558a7fce5c4d78100e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr99656.f90
@@ -0,0 +1,24 @@
+! { dg-do compile { target { aarch64*-*-* } } }
+! { dg-require-effective-target le }
+! { dg-additional-options "-march=armv8.3-a -O1 -ftree-slp-vectorize" }
+
+SUBROUTINE ZLAHQR2(H, LDH, H22, T1)
+
+      INTEGER            LDH
+      COMPLEX*16         H(LDH, *)
+
+      INTEGER            NR
+      COMPLEX*16         H22, SUM, T1, V2
+
+      COMPLEX*16         V( 3 )
+
+      EXTERNAL           ZLARFG
+      INTRINSIC          DCONJG
+
+      V2 = H22
+      CALL ZLARFG(T1)
+      SUM = DCONJG(T1) * H(1, 1)
+      H(1, 1) = SUM * V2
+
+      RETURN
+END
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
index 1e2769662a54229ab8e24390f97dfe206f17ab57..85f2d03754d3ed87e4e34befdca417f2dd4ea21d 100644
--- a/gcc/tree-vect-slp-patterns.c
+++ b/gcc/tree-vect-slp-patterns.c
@@ -203,68 +203,49 @@ vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
 /* Check to see if all loads rooted in ROOT are linear.  Linearity is
    defined as having no gaps between values loaded.  */
 
-static complex_load_perm_t
+static complex_perm_kinds_t
 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
 {
   if (!root)
-    return std::make_pair (PERM_UNKNOWN, vNULL);
+    return PERM_UNKNOWN;
 
   unsigned i;
-  complex_load_perm_t *tmp;
+  complex_perm_kinds_t *tmp;
 
   if ((tmp = perm_cache->get (root)) != NULL)
     return *tmp;
 
-  complex_load_perm_t retval = std::make_pair (PERM_UNKNOWN, vNULL);
+  complex_perm_kinds_t retval = PERM_UNKNOWN;
   perm_cache->put (root, retval);
 
   /* If it's a load node, then just read the load permute.  */
   if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
     {
-      retval.first = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
-      retval.second = SLP_TREE_LOAD_PERMUTATION (root);
+      retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
       perm_cache->put (root, retval);
       return retval;
     }
   else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
     {
-      retval.first = PERM_TOP;
+      retval = PERM_TOP;
       perm_cache->put (root, retval);
       return retval;
     }
 
-  auto_vec<load_permutation_t> all_loads;
   complex_perm_kinds_t kind = PERM_TOP;
 
   slp_tree child;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
     {
-      complex_load_perm_t res = linear_loads_p (perm_cache, child);
-      kind = vect_merge_perms (kind, res.first);
+      complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
+      kind = vect_merge_perms (kind, res);
       /* Unknown and Top are not valid on blends as they produce no permute.  */
-      retval.first = kind;
+      retval = kind;
       if (kind == PERM_UNKNOWN || kind == PERM_TOP)
 	return retval;
-      all_loads.safe_push (res.second);
     }
 
-  if (SLP_TREE_LANE_PERMUTATION (root).exists ())
-    {
-      lane_permutation_t perm = SLP_TREE_LANE_PERMUTATION (root);
-      load_permutation_t nloads;
-      nloads.create (SLP_TREE_LANES (root));
-      nloads.quick_grow (SLP_TREE_LANES (root));
-      for (i = 0; i < SLP_TREE_LANES (root); i++)
-	nloads[i] = all_loads[perm[i].first][perm[i].second];
-
-      retval.first = kind;
-      retval.second = nloads;
-    }
-  else
-    {
-      retval.first = kind;
-      retval.second = all_loads[0];
-    }
+  retval = kind;
 
   perm_cache->put (root, retval);
   return retval;
@@ -704,11 +685,11 @@ complex_add_pattern::matches (complex_operation_t op,
   vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
 
   /* First node must be unpermuted.  */
-  if (linear_loads_p (perm_cache, children[0]).first != PERM_EVENODD)
+  if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
     return IFN_LAST;
 
   /* Second node must be permuted.  */
-  if (linear_loads_p (perm_cache, children[1]).first != PERM_ODDEVEN)
+  if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
     return IFN_LAST;
 
   if (!vect_pattern_validate_optab (ifn, *node))
@@ -795,9 +776,9 @@ vect_normalize_conj_loc (vec<slp_tree> args, bool *neg_first_p = NULL)
 /* Helper function to check if PERM is KIND or PERM_TOP.  */
 
 static inline bool
-is_eq_or_top (complex_load_perm_t perm, complex_perm_kinds_t kind)
+is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
 {
-  return perm.first == kind || perm.first == PERM_TOP;
+  return perm == kind || perm == PERM_TOP;
 }
 
 /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
@@ -828,7 +809,7 @@ vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
       /* Canonicalization for fms is not consistent. So have to test both
 	 variants to be sure.  This needs to be fixed in the mid-end so
 	 this part can be simpler.  */
-      kind = linear_loads_p (perm_cache, right_op[0]).first;
+      kind = linear_loads_p (perm_cache, right_op[0]);
       if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
 	   && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
 			     PERM_ODDEVEN))
@@ -839,7 +820,7 @@ vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
     }
   else
     {
-      if (linear_loads_p (perm_cache, right_op[1]).first != PERM_ODDODD
+      if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
 	  && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
 			    PERM_ODDEVEN))
 	return false;
@@ -852,15 +833,15 @@ vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
   /* Check if the conjugate is on the second first or second operand.  The
      order of the node with the conjugate value determines this, and the dup
      node must be one of lane 0 of the same DR as the neg node.  */
-  kind = linear_loads_p (perm_cache, left_op[index1]).first;
+  kind = linear_loads_p (perm_cache, left_op[index1]);
   if (kind == PERM_TOP)
     {
-      if (linear_loads_p (perm_cache, left_op[index2]).first == PERM_EVENODD)
+      if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
 	return true;
     }
   else if (kind == PERM_EVENODD)
     {
-      if ((kind = linear_loads_p (perm_cache, left_op[index2]).first) == PERM_EVENODD)
+      if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD)
 	return false;
       return true;
     }
@@ -1003,7 +984,7 @@ complex_mul_pattern::matches (complex_operation_t op,
   left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
   right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
 
-  if (linear_loads_p (perm_cache, left_op[1]).first == PERM_ODDEVEN)
+  if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
     return IFN_LAST;
 
   bool neg_first = false;
@@ -1035,7 +1016,7 @@ complex_mul_pattern::matches (complex_operation_t op,
   ops->truncate (0);
   ops->create (3);
 
-  complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]).first;
+  complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
   if (kind == PERM_EVENODD)
     {
       ops->quick_push (left_op[1]);
@@ -1356,7 +1337,7 @@ complex_fms_pattern::matches (complex_operation_t op,
   ops->truncate (0);
   ops->create (4);
 
-  complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]).first;
+  complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
   if (kind == PERM_EVENODD)
     {
       ops->quick_push (child);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b861c97ab3aef179ba9b2900701cf09e75a847a5..9861d9e88102138c0e2de8dfc34422ff65a0e9e0 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2059,13 +2059,8 @@ typedef enum _complex_perm_kinds {
    PERM_TOP
 } complex_perm_kinds_t;
 
-/* A pair with a load permute and a corresponding complex_perm_kind which gives
-   information about the load it represents.  */
-typedef std::pair<complex_perm_kinds_t, load_permutation_t>
-  complex_load_perm_t;
-
 /* Cache from nodes to the load permutation they represent.  */
-typedef hash_map <slp_tree, complex_load_perm_t>
+typedef hash_map <slp_tree, complex_perm_kinds_t>
   slp_tree_to_load_perm_map_t;
 
 /* Vector pattern matcher base class.  All SLP pattern matchers must inherit


-- 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: rb14297.patch
Type: text/x-diff
Size: 8419 bytes
Desc: not available
URL: <https://gcc.gnu.org/pipermail/gcc-patches/attachments/20210319/68380958/attachment-0001.bin>


More information about the Gcc-patches mailing list