[PATCH] rs6000: Add xxgenpcvwm and xxgenpcvdm instructions

Bill Schmidt wschmidt@linux.ibm.com
Sat May 9 17:05:08 GMT 2020


From: Carl Love <cel@us.ibm.com>

Add support for xxgenpcv[dw]m, along with individual and overloaded
built-in functions for access.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a POWER9 compiler.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-09  Carl Love  <cel@us.ibm.com>

	* config/rs6000/altivec.h (vec_genpcvm): New #define.
	* config/rs6000/rs6000-builtin.def (XXGENPCVM_V16QI): New built-in
	instantiation.
	(XXGENPCVM_V8HI): Likewise.
	(XXGENPCVM_V4SI): Likewise.
	(XXGENPCVM_V2DI): Likewise.
	(XXGENPCVM): New overloaded built-in instantiation.
	* config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add
	entries for FUTURE_BUILTIN_VEC_XXGENPCVM.
	(altivec_expand_builtin): Add special handling for
	FUTURE_BUILTIN_VEC_XXGENPCVM.
	(builtin_function_type): Add handling for
	FUTURE_BUILTIN_XXGENPCVM_{V16QI,V8HI,V4SI,V2DI}.
	* config/rs6000/vsx.md (VSX_EXTRACT_I4): New mode iterator.
	(UNSPEC_XXGENPCV): New constant.
	(xxgenpcvm_<mode>): New insn.
	(xxgenpcvm<mode>): New expansion.
	* doc/extend.texi: Add documentation for vec_genpcvm built-ins.

[gcc/testsuite]

2020-05-09  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/xxgenpc-runnable.c: New.
---
 gcc/config/rs6000/altivec.h                   |   1 +
 gcc/config/rs6000/rs6000-builtin.def          |   5 +
 gcc/config/rs6000/rs6000-call.c               |  31 +++
 gcc/config/rs6000/vsx.md                      |  31 +++
 gcc/doc/extend.texi                           |  12 +
 .../gcc.target/powerpc/xxgenpc-runnable.c     | 231 ++++++++++++++++++
 6 files changed, 311 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index b29413deb6d..3729ceaf336 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -694,6 +694,7 @@ __altivec_scalar_pred(vec_any_nle,
 #define vec_pdep(a, b)	__builtin_altivec_vpdepd (a, b)
 #define vec_pext(a, b)	__builtin_altivec_vpextd (a, b)
 #define vec_cfuge(a, b)	__builtin_altivec_vcfuged (a, b)
+#define vec_genpcvm(a, b)	__builtin_vec_xxgenpcvm (a, b)
 
 /* Overloaded built-in functions for future architecture.  */
 #define vec_gnb(a, b)	__builtin_vec_gnb (a, b)
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 1f86293d0e2..b5b08be512a 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2611,6 +2611,10 @@ BU_FUTURE_V_2 (VPDEPD, "vpdepd", CONST, vpdepd)
 BU_FUTURE_V_2 (VPEXTD, "vpextd", CONST, vpextd)
 BU_FUTURE_V_2 (VGNB, "vgnb", CONST, vgnb)
 BU_FUTURE_V_4 (XXEVAL, "xxeval", CONST, xxeval)
+BU_FUTURE_V_2 (XXGENPCVM_V16QI, "xxgenpcvm_v16qi", CONST, xxgenpcvmv16qi)
+BU_FUTURE_V_2 (XXGENPCVM_V8HI, "xxgenpcvm_v8hi", CONST, xxgenpcvmv8hi)
+BU_FUTURE_V_2 (XXGENPCVM_V4SI, "xxgenpcvm_v4si", CONST, xxgenpcvmv4si)
+BU_FUTURE_V_2 (XXGENPCVM_V2DI, "xxgenpcvm_v2di", CONST, xxgenpcvmv2di)
 
 BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
 BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
@@ -2627,6 +2631,7 @@ BU_FUTURE_OVERLOAD_2 (CLRL, "clrl")
 BU_FUTURE_OVERLOAD_2 (CLRR, "clrr")
 BU_FUTURE_OVERLOAD_2 (GNB, "gnb")
 BU_FUTURE_OVERLOAD_4 (XXEVAL, "xxeval")
+BU_FUTURE_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
 
 BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
 BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 64a9ba2818d..0b9ed7ef018 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5532,6 +5532,15 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI, 0 },
   { FUTURE_BUILTIN_VEC_GNB, FUTURE_BUILTIN_VGNB, RS6000_BTI_unsigned_long_long,
     RS6000_BTI_unsigned_V1TI, RS6000_BTI_UINTQI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_INTSI, 0 },
 
   /* The overloaded XXEVAL definitions are handled specially because the
      fourth unsigned char operand is not encoded in this table.  */
@@ -10384,6 +10393,24 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
 	}
       break;
 
+    case FUTURE_BUILTIN_VEC_XXGENPCVM:
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      STRIP_NOPS (arg1);
+
+      /* Generate a normal call if it is invalid.  */
+      if (arg1 == error_mark_node)
+	return expand_call (exp, target, false);
+
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 3))
+	{
+	  size_t uns_fcode = (size_t) fcode;
+	  const char *name = rs6000_builtin_info[uns_fcode].name;
+	  error ("Second argument of %qs must be in the range [0, 3].", name);
+	  return expand_call (exp, target, false);
+	}
+      break;
+
     default:
       break;
       /* Fall through.  */
@@ -13202,6 +13229,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case FUTURE_BUILTIN_VGNB:
     case FUTURE_BUILTIN_VPDEPD:
     case FUTURE_BUILTIN_VPEXTD:
+    case FUTURE_BUILTIN_XXGENPCVM_V16QI:
+    case FUTURE_BUILTIN_XXGENPCVM_V8HI:
+    case FUTURE_BUILTIN_XXGENPCVM_V4SI:
+    case FUTURE_BUILTIN_XXGENPCVM_V2DI:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 1fcc1b03096..c555f5d42aa 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -217,6 +217,7 @@ (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 ;; done on ISA 2.07 and not just ISA 3.0.
 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
+(define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
 
 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 		  		     (V8HI "h")
@@ -342,6 +343,7 @@ (define_c_enum "unspec"
    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
    UNSPEC_VSX_FIRST_MISMATCH_INDEX
    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
+   UNSPEC_XXGENPCV
   ])
 
 ;; VSX moves
@@ -2998,6 +3000,35 @@ (define_insn "xxswapd_<mode>"
   "xxpermdi %x0,%x1,%x1,2"
   [(set_attr "type" "vecperm")])
 
+(define_insn "xxgenpcvm_<mode>"
+  [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
+	(unspec:VSX_EXTRACT_I4
+	 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
+	  (match_operand:QI 2 "const_0_to_3_operand" "n")]
+	 UNSPEC_XXGENPCV))]
+    "TARGET_FUTURE && TARGET_64BIT"
+    "xxgenpcv<wd>m %x0,%1,%2"
+    [(set_attr "type" "vecsimple")])
+
+(define_expand "xxgenpcvm<mode>"
+  [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
+   (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
+   (use (match_operand:QI 2 "immediate_operand"))]
+  "TARGET_FUTURE"
+{
+  if (!BYTES_BIG_ENDIAN)
+    {
+      /* gen_xxgenpcvm assumes Big Endian order.  If LE,
+	 change swap upper and lower double words.  */
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
+      operands[1] = tmp;
+    }
+    emit_insn (gen_xxgenpcvm_<mode> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 ;; lxvd2x for little endian loads.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_lxvd2x2_le_<mode>"
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e35db4387dc..c352c512285 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20889,6 +20889,18 @@ little-endian targets.  Choose this built-in to check for presence of
 zero element if the same argument is also passed to @code{vec_strir}.
 @findex vec_strir_p
 
+@smallexample
+@exdent vector unsigned char vec_genpcvm (vector unsigned char, const int)
+@exdent vector unsigned short vec_genpcvm (vector unsigned short, const int)
+@exdent vector unsigned int vec_genpcvm (vector unsigned int, const int)
+@exdent vector unsigned int vec_genpcvm (vector unsigned long long int,
+                                         const int)
+@end smallexample
+Generate PCV from specified Mask size, as if implemented by the Future
+@code{xxgenpcvbm}, @code{xxgenpcvhm}, @code{xxgenpcvwm} instructions, where
+immediate value is either 0, 1, 2 or 3.
+@findex vec_genpcvm
+
 @node PowerPC Hardware Transactional Memory Built-in Functions
 @subsection PowerPC Hardware Transactional Memory Built-in Functions
 GCC provides two interfaces for accessing the Hardware Transactional
diff --git a/gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c b/gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c
new file mode 100644
index 00000000000..de309efd449
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c
@@ -0,0 +1,231 @@
+/* { dg-do run } */
+/* { dg-options "-mcpu=future -O2" } */
+/* { dg-require-effective-target powerpc_future_hw } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define IMM0 0
+#define IMM1 1
+#define IMM2 2
+#define IMM3 3
+
+void abort (void);
+
+int main ()
+{
+  int i;
+  vector unsigned char vec_byte_arg;
+  vector unsigned char vec_byte_result, vec_byte_expected0, vec_byte_expected1;
+  vector unsigned char vec_byte_expected2, vec_byte_expected3;
+
+  vector unsigned short vec_hword_arg;
+  vector unsigned short vec_hword_result, vec_hword_expected0;
+  vector unsigned short vec_hword_expected1, vec_hword_expected2;
+  vector unsigned short vec_hword_expected3;
+
+  vector unsigned int vec_word_arg;
+  vector unsigned int vec_word_result, vec_word_expected0, vec_word_expected1;
+  vector unsigned int vec_word_expected2, vec_word_expected3;
+
+  vec_byte_arg = (vector unsigned char ){ 0xFF, 0xF0, 0x7F, 0xFF,
+					  0xFF, 0xFF, 0xFF, 0xFF,
+					  0x00, 0x00, 0x01, 0x23,
+					  0x45, 0x67, 0x00, 0x00 };
+
+  vec_byte_result = (vector unsigned char ){ 0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF };
+
+  vec_byte_expected0 = (vector unsigned char){ 0x1F, 0x1E, 0x1D, 0x1C,
+					       0x1B, 0x1A, 0x19, 0x18,
+					       0x06, 0x05, 0x15, 0x04,
+					       0x03, 0x02, 0x01, 0x00 };
+
+  vec_byte_expected1 = (vector unsigned char){ 0x00, 0x00, 0x00, 0x00,
+					       0x00, 0x00, 0x00, 0x00,
+					       0x00, 0x07, 0x06, 0x04,
+					       0x03, 0x02, 0x01, 0x00 };
+
+  vec_byte_expected2 = (vector unsigned char){ 0x10, 0x11, 0x12, 0x13,
+					       0x14, 0x15, 0x16, 0x17,
+					       0x00, 0x01, 0x1a, 0x02,
+					       0x03, 0x04, 0x05, 0x06 };
+
+  vec_byte_expected3 = (vector unsigned char){ 0x08, 0x09, 0x0B, 0x0C,
+					       0x0D, 0x0E, 0x0F, 0x00,
+					       0x00, 0x00, 0x00, 0x00,
+					       0x00, 0x00, 0x00, 0x00 };
+
+  vec_hword_arg = (vector unsigned short) { 0x0004, 0xF003, 0x0002, 0x0001,
+					    0xF004, 0x1003, 0xF002, 0x0001 };
+  vec_hword_expected0 = (vector unsigned short int){ 0x405, 0x1c1d, 0x203,
+						     0x1819, 0x1617, 0x1,
+						     0x1213, 0x1011 };
+  vec_hword_expected1 = (vector unsigned short int){ 0x0, 0x0, 0x0, 0x0,
+						     0x0, 0xe0f, 0xa0b, 0x405 };
+  vec_hword_expected2 = (vector unsigned short int){ 0x100, 0x1312, 0x302,
+						     0x1716, 0x1918, 0x504,
+						     0x1d1c, 0x1f1e };
+  vec_hword_expected3 = (vector unsigned short int){ 0x100, 0x504, 0xb0a, 0x0,
+						     0x0, 0x0, 0x0, 0x0 };
+
+  vec_word_arg = (vector unsigned int){ 0xFEDCBA90, 0xF101, 0xF0000202, 0xF303 };
+  vec_word_expected0 = (vector unsigned int){ 0x4050607, 0x18191a1b,
+					      0x10203, 0x10111213 };
+  vec_word_expected1 = (vector unsigned int){ 0x0, 0x0, 0xc0d0e0f, 0x4050607 };
+  vec_word_expected2 = (vector unsigned int){ 0x3020100, 0x17161514,
+					      0x7060504, 0x1f1e1d1c };
+  vec_word_expected3 = (vector unsigned int){ 0x3020100, 0xb0a0908, 0x0, 0x0 };
+
+  vec_byte_result = vec_genpcvm (vec_byte_arg, IMM0);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_byte_expected0[i] != vec_byte_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 0), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n",
+	     i, vec_byte_expected0[i], i, vec_byte_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_byte_result = vec_genpcvm (vec_byte_arg, IMM1);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_byte_expected1[i] != vec_byte_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 1), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n",
+	     i, vec_byte_expected1[i], i, vec_byte_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_byte_result = vec_genpcvm (vec_byte_arg, IMM2);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_byte_expected2[i] != vec_byte_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvmbm(IMM = 2), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n",
+	     i, vec_byte_expected2[i], i, vec_byte_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_byte_result = vec_genpcvm (vec_byte_arg, IMM3);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_byte_expected3[i] != vec_byte_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 3), vec_byte_expected[%d] = 0x%x does not match vec_byte_result[%d] = 0x%x\n",
+	     i, vec_byte_expected3[i], i, vec_byte_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_hword_result = vec_genpcvm (vec_hword_arg, IMM0);
+
+  for (i = 0; i < 8; i++) {
+    if (vec_hword_expected0[i] != vec_hword_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvmhm(IMM = 0), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n",
+	     i, vec_hword_expected0[i], i, vec_hword_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_hword_result = vec_genpcvm (vec_hword_arg, IMM1);
+
+  for (i = 0; i < 8; i++) {
+    if (vec_hword_expected1[i] != vec_hword_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 1), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n",
+	     i, vec_hword_expected1[i], i, vec_hword_result[i]);
+#else
+     abort();
+#endif
+  }
+
+  vec_hword_result = vec_genpcvm (vec_hword_arg, IMM2);
+
+  for (i = 0; i < 8; i++) {
+    if (vec_hword_expected2[i] != vec_hword_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 2), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n",
+	     i, vec_hword_expected2[i], i, vec_hword_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_hword_result = vec_genpcvm (vec_hword_arg, IMM3);
+
+  for (i = 0; i < 8; i++) {
+    if (vec_hword_expected3[i] != vec_hword_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 3), vec_hword_expected[%d] = 0x%x does not match vec_hword_result[%d] = 0x%x\n",
+	     i, vec_hword_expected3[i], i, vec_hword_result[i]);
+#else
+    abort();
+#endif
+  }
+
+
+  vec_word_result = vec_genpcvm (vec_word_arg, IMM0);
+
+  for (i = 0; i < 4; i++) {
+    if (vec_word_expected0[i] != vec_word_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 0), vec_word_expected[%d] = 0x%x does not match vec_word_result[%d] = 0x%x\n",
+	     i, vec_word_expected0[i], i, vec_word_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_word_result = vec_genpcvm (vec_word_arg, IMM1);
+
+  for (i = 0; i < 4; i++) {
+    if (vec_word_expected1[i] != vec_word_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 1), vec_word_expected[%d] = 0%x does not match vec_word_result[%d] = 0x%x\n",
+	     i, vec_word_expected1[i], i, vec_word_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_word_result = vec_genpcvm (vec_word_arg, IMM2);
+
+  for (i = 0; i < 4; i++) {
+    if (vec_word_expected2[i] != vec_word_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 2), vec_word_expected[%d] = 0x%x does not match vec_word_result[%d] = 0x%x\n",
+	     i, vec_word_expected2[i], i, vec_word_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  vec_word_result = vec_genpcvm (vec_word_arg, IMM3);
+
+  for (i = 0; i < 4; i++) {
+    if (vec_word_expected3[i] != vec_word_result[i])
+#if DEBUG
+      printf("ERROR: vec_genpcvm(IMM = 3), vec_word_expected[%d] = 0x%x does not match vec_word_result[%d] = 0x%x\n",
+	     i, vec_word_expected3[i], i, vec_word_result[i]);
+#else
+    abort();
+#endif
+  }
+
+  return 0;
+}
-- 
2.17.1



More information about the Gcc-patches mailing list