[PATCH] rs6000: Add MMA __builtin_vsx_lxvp and __builtin_vsx_stxvp built-ins

Peter Bergner bergner@linux.ibm.com
Wed Jun 30 21:56:04 GMT 2021


LLVM added the __builtin_vsx_lxvp and __builtin_vsx_stxvp built-ins.
The following patch adds support for them to GCC so that we stay in sync
with LLVM.

The following patch is bootstrapping and regtesting on powerpc64le-linux.
Ok for trunk if there are no regressions?  Ok to backport to GCC 11 and
GCC 10 after baking on trunk for a while?

Peter


gcc/
	* config/rs6000/rs6000-builtin.def (BU_MMA_LD, BU_MMA_ST): New macros.
	(__builtin_vsx_lxvp, __builtin_vsx_stxvp): New built-ins.
	* config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Expand
	lxvp and stxvp built-ins.
	(mma_init_builtins): Handle lxvp and stxvp built-ins.
	(builtin_function_type): Likewise.

gcc/testsuite/
	* gcc.target/powerpc/mma-builtin-7.c: New test.
	* gcc.target/powerpc/mma-builtin-8.c: New test.

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index d7ce4de421e..a42c3cbd55b 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -484,6 +484,25 @@
 		     | RS6000_BTC_SENARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
+#define BU_MMA_LD(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY				\
+		     | RS6000_BTC_GIMPLE),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_MMA_ST(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_M (VSX_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY				\
+		     | RS6000_BTC_VOID					\
+		     | RS6000_BTC_GIMPLE),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
 /* ISA 2.05 (power6) convenience macros. */
 /* For functions that depend on the CMPB instruction */
 #define BU_P6_2(ENUM, NAME, ATTR, ICODE)				\
@@ -3253,6 +3272,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
 BU_P10V_VSX_1 (XVCVBF16SPN,	 "xvcvbf16spn",	MISC, vsx_xvcvbf16spn)
 BU_P10V_VSX_1 (XVCVSPBF16,	    "xvcvspbf16",	MISC, vsx_xvcvspbf16)
 
+BU_MMA_LD (LXVP,	  "lxvp",	MISC)
+BU_MMA_ST (STXVP,	  "stxvp",	PAIR)
+
 BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b67789845a5..ebc03da9b24 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -11913,6 +11913,30 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
       gsi_replace_with_seq (gsi, new_seq, true);
       return true;
     }
+  else if (fncode == VSX_BUILTIN_LXVP)
+    {
+      push_gimplify_context (true);
+      tree offset = gimple_call_arg (stmt, 0);
+      tree ptr = gimple_call_arg (stmt, 1);
+      tree lhs = gimple_call_lhs (stmt);
+      tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, offset));
+      gimplify_assign (lhs, mem, &new_seq);
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }
+  else if (fncode == VSX_BUILTIN_STXVP)
+    {
+      push_gimplify_context (true);
+      tree src = gimple_call_arg (stmt, 0);
+      tree offset = gimple_call_arg (stmt, 1);
+      tree ptr = gimple_call_arg (stmt, 2);
+      tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, offset));
+      gimplify_assign (mem, src, &new_seq);
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
@@ -14264,11 +14288,14 @@ mma_init_builtins (void)
       if (gimple_func)
 	{
 	  gcc_assert (icode == CODE_FOR_nothing);
-	  op[nopnds++] = void_type_node;
 	  /* Some MMA built-ins that are expanded into gimple are converted
 	     into internal MMA built-ins that are expanded into rtl.
 	     The internal built-in follows immediately after this built-in.  */
-	  icode = d[1].icode;
+	  if (d[1].icode != CODE_FOR_nothing)
+	    {
+	      op[nopnds++] = void_type_node;
+	      icode = d[1].icode;
+	    }
 	}
       else
 	{
@@ -14291,6 +14318,19 @@ mma_init_builtins (void)
 	  else
 	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	}
+      else if (d->code == VSX_BUILTIN_LXVP)
+	{
+	  op[nopnds++] = vector_pair_type_node;
+	  op[nopnds++] = sizetype;
+	  op[nopnds++] = build_pointer_type (vector_pair_type_node);
+	}
+      else if (d->code == VSX_BUILTIN_STXVP)
+	{
+	  op[nopnds++] = void_type_node;
+	  op[nopnds++] = vector_pair_type_node;
+	  op[nopnds++] = sizetype;
+	  op[nopnds++] = build_pointer_type (vector_pair_type_node);
+	}
       else
 	{
 	  /* This is a normal MMA built-in function.  */
@@ -14781,6 +14821,16 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
       h.uns_p[2] = 1;
       break;
 
+    case VSX_BUILTIN_LXVP:
+      h.uns_p[0] = 1;
+      h.uns_p[2] = 1;
+      break;
+
+    case VSX_BUILTIN_STXVP:
+      h.uns_p[1] = 1;
+      h.uns_p[3] = 1;
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c
new file mode 100644
index 00000000000..c661a4b84bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+void
+foo (__vector_pair *dst, __vector_pair *src, long idx)
+{
+  dst[0] = __builtin_vsx_lxvp (0, src);
+  dst[2] = __builtin_vsx_lxvp (32, src);
+  dst[4] = __builtin_vsx_lxvp (64, src);
+  /* Non-constant offset should generate a lxvpx.  */
+  dst[6] = __builtin_vsx_lxvp (idx, src);
+  /* Non-aligned offset should generate a plxvp.  */
+  dst[8] = __builtin_vsx_lxvp (257, src);
+}
+
+#if !__has_builtin (__builtin_vsx_lxvp)
+#  error "__has_builtin (__builtin_vsx_lxvp) failed"
+#endif
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mlxvpx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mplxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 5 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c
new file mode 100644
index 00000000000..af29e479f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+void
+foo (__vector_pair *dst, __vector_pair *src, long idx)
+{
+  __vector_pair pair = *src;
+  __builtin_vsx_stxvp (pair, 0, dst);
+  __builtin_vsx_stxvp (pair, 32, dst);
+  __builtin_vsx_stxvp (pair, 64, dst);
+  /* Non-constant offset should generate a stxvpx.  */
+  __builtin_vsx_stxvp (pair, idx, dst);
+  /* Non-aligned offset should generate a pstxvp.  */
+  __builtin_vsx_stxvp (pair, 257, dst);
+}
+
+#if !__has_builtin (__builtin_vsx_stxvp)
+#  error "__has_builtin (__builtin_vsx_stxvp) failed"
+#endif
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mstxvpx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpstxvp\M} 1 } } */


More information about the Gcc-patches mailing list