This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, rs6000] Allow swap removal for convert-splat idiom


Hi,

A not uncommon idiom on Power for vector floating-point computation is
used to convert a double-precision value to single-precision and copy it
to all elements of a vector float.  For this we see a specific convert
UNSPEC feeding an xxspltw pattern that copies from BE element zero.
Since all elements of the result are the same regardless of whether
swaps are present, this should not kill the vector swap removal
optimization for the containing computation.  This patch permits that.

The issue was reported privately to me, and I have created a test case
that reduces and anonymizes the original code.

Is this ok for trunk after GCC 5 branches?  I would also like to
backport it to GCC 5 subsequently.

Thanks,
Bill


[gcc]

2015-01-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (rtx_is_swappable_p): Commentary
	adjustments.
	(insn_is_swappable_p): Return 1 for a convert from double to
	single precision when all of its uses are splats of BE element
	zero.

[gcc/testsuite]

2015-01-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/swaps-p8-18.c: New test.


Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 219191)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -34046,7 +34046,8 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	   order-dependent element, so additional fixup code would be
 	   needed to make those work.  Vector set and non-immediate-form
 	   vector splat are element-order sensitive.  A few of these
-	   cases might be workable with special handling if required.  */
+	   cases might be workable with special handling if required.
+	   Adding cost modeling would be appropriate in some cases.  */
 	int val = XINT (op, 1);
 	switch (val)
 	  {
@@ -34085,12 +34086,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLPX:
 	  case UNSPEC_VUPKLS_V4SF:
 	  case UNSPEC_VUPKLU_V4SF:
-	  /* The following could be handled as an idiom with XXSPLTW.
-	     These place a scalar in BE element zero, but the XXSPLTW
-	     will currently expect it in BE element 2 in a swapped
-	     region.  When one of these feeds an XXSPLTW with no other
-	     defs/uses either way, we can avoid the lane change for
-	     XXSPLTW and things will be correct.  TBD.  */
 	  case UNSPEC_VSX_CVDPSPN:
 	  case UNSPEC_VSX_CVSPDP:
 	  case UNSPEC_VSX_CVSPDPN:
@@ -34179,6 +34174,36 @@ insn_is_swappable_p (swap_web_entry *insn_entry, r
 	return 0;
     }
 
+  /* A convert to single precision can be left as is provided that
+     all of its uses are in xxspltw instructions that splat BE element
+     zero.  */
+  if (GET_CODE (body) == SET
+      && GET_CODE (SET_SRC (body)) == UNSPEC
+      && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
+    {
+      df_ref def;
+      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+
+      FOR_EACH_INSN_INFO_DEF (def, insn_info)
+	{
+	  struct df_link *link = DF_REF_CHAIN (def);
+	  if (!link)
+	    return 0;
+
+	  for (; link; link = link->next) {
+	    rtx use_insn = DF_REF_INSN (link->ref);
+	    rtx use_body = PATTERN (use_insn);
+	    if (GET_CODE (use_body) != SET
+		|| GET_CODE (SET_SRC (use_body)) != UNSPEC
+		|| XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
+		|| XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
+	      return 0;
+	  }
+	}
+
+      return 1;
+    }
+
   /* Otherwise check the operands for vector lane violations.  */
   return rtx_is_swappable_p (body, special);
 }
Index: gcc/testsuite/gcc.target/powerpc/swaps-p8-18.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/swaps-p8-18.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/swaps-p8-18.c	(working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3" } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* This is a test for a specific convert-splat permute removal.  */
+
+void compute (float*, float*, float*, int, int);
+double test (void);
+double gorp;
+
+int main (void)
+{
+  float X[10000], Y[256], Z[2000];
+  int i;
+  for (i = 0; i < 2500; i++)
+    compute (X, Y, Z, 256, 2000);
+  gorp = test ();
+}
+
+void compute(float *X, float *Y, float *Z, int m, int n)
+{
+  int i, j;
+  float w, *x, *y;
+
+  for (i = 0; i < n; i++)
+    {
+      w = 0.0;
+      x = X++;
+      y = Y;
+      for (j = 0; j < m; j++)
+	w += (*x++) * (*y++);
+      Z[i] = w;
+    }
+}



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]