This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH,rs6000] Add vec_permxor support.


GCC maintainers:

The following patch add support for the vec_permxor builtins.  

Note the change in file rs6000-p8swap.c is actually a fix for the
existing define_insn "crypto_vpermxor in crypto.md.  The fix disables
swap optimization for the vpermxor instruction.  Without this fix
optimization with -O1 and above results in the upper and lower 64-bits
of the three operands are swapped messing up the indexing specified by
the third operand.  This change will need to be back ported to GCC 6
and 7. 

  powerpc64-unknown-linux-gnu (Power 8 BE)
  powerpc64le-unknown-linux-gnu (Power 8 LE)
  powerpc64le-unknown-linux-gnu (Power 9 LE)

with no regressions.

Let me know if the patch looks OK or not. Thanks.

               Carl Love

----------------------------------------------------------------------

gcc/ChangeLog:

2018-03-12  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-c.c: Add macro definitions for
	ALTIVEC_BUILTIN_VEC_PERM.
	* config/rs6000/rs6000.h: Add #define for vec_permxor builtin.
	* config/rs6000/rs6000-builtin.def: Add macro expansions for VPERMXOR.
	* config/rs6000/altivec.md: Add define_insn for altivec-vpermxor.
	Add UNSPEC_VNOR, define_insn altivec_vnor_v16qi3.
	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Add case
	UNSPEC_VPERMXOR.
	* config/doc/extend.texi: Add prototypes for vec_permxor.

gcc/testsuite/ChangeLog:

2018-03-12  Carl Love  <cel@us.ibm.com>
	* gcc.target/powerpc/builtins-7-runnable.c: Add tests for vec_permxor.
---
 gcc/config/rs6000/altivec.h                        |   1 +
 gcc/config/rs6000/altivec.md                       |  33 ++++++
 gcc/config/rs6000/rs6000-builtin.def               |   3 +
 gcc/config/rs6000/rs6000-c.c                       |  10 ++
 gcc/config/rs6000/rs6000-p8swap.c                  |   1 +
 gcc/doc/extend.texi                                |   6 ++
 .../gcc.target/powerpc/builtins-7-runnable.c       | 112 +++++++++++++++++++++
 7 files changed, 166 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 1e495e6..5a34162 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -76,6 +76,7 @@
 #define vec_vor vec_or
 #define vec_vpkpx vec_packpx
 #define vec_vperm vec_perm
+#define vec_permxor __builtin_vec_vpermxor
 #define vec_vrefp vec_re
 #define vec_vrfin vec_round
 #define vec_vrsqrtefp vec_rsqrte
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2759f2d..7770743 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -44,6 +44,7 @@
    UNSPEC_VMULOSH
    UNSPEC_VMULOUW
    UNSPEC_VMULOSW
+   UNSPEC_VNOR
    UNSPEC_VPKPX
    UNSPEC_VPACK_SIGN_SIGN_SAT
    UNSPEC_VPACK_SIGN_UNS_SAT
@@ -3827,6 +3828,38 @@
   DONE;
 })
 
+(define_insn "altivec_vnor_v16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+   (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+		  (match_operand:V16QI 2 "register_operand" "v")]
+		 UNSPEC_VNOR))]
+  ""
+  "vnor %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vpermxor"
+  [(use (match_operand:V16QI 0 "register_operand"))
+   (use (match_operand:V16QI 1 "register_operand"))
+   (use (match_operand:V16QI 2 "register_operand"))
+   (use (match_operand:V16QI 3 "register_operand"))]
+  "TARGET_P8_VECTOR"
+{
+  if (!BYTES_BIG_ENDIAN)
+    {
+      /* vpermxor indexes the bytes using Big Endian numbering.  If LE,
+	 change indexing in operand[3] to BE index.  */
+      rtx be_index = gen_reg_rtx (V16QImode);
+
+      emit_insn (gen_altivec_vnor_v16qi3 (be_index, operands[3], operands[3]));
+      emit_insn (gen_crypto_vpermxor_v16qi (operands[0], operands[1],
+					    operands[2], be_index));
+    }
+  else
+    emit_insn (gen_crypto_vpermxor_v16qi (operands[0], operands[1],
+					  operands[2], operands[3]));
+  DONE;
+})
+
 (define_expand "altivec_negv4sf2"
   [(use (match_operand:V4SF 0 "register_operand"))
    (use (match_operand:V4SF 1 "register_operand"))]
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f9548a0..25d45a9 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2008,6 +2008,8 @@ BU_P8V_AV_P (VCMPEQUD_P,	"vcmpequd_p",	CONST,	vector_eq_v2di_p)
 BU_P8V_AV_P (VCMPGTSD_P,	"vcmpgtsd_p",	CONST,	vector_gt_v2di_p)
 BU_P8V_AV_P (VCMPGTUD_P,	"vcmpgtud_p",	CONST,	vector_gtu_v2di_p)
 
+BU_P8V_AV_3 (VPERMXOR,		"vpermxor",	CONST, 	altivec_vpermxor)
+
 /* ISA 2.05 overloaded 2 argument functions.  */
 BU_P6_OVERLOAD_2 (CMPB, "cmpb")
 
@@ -2069,6 +2071,7 @@ BU_P8V_OVERLOAD_3 (VADDECUQ,	"vaddecuq")
 BU_P8V_OVERLOAD_3 (VADDEUQM,	"vaddeuqm")
 BU_P8V_OVERLOAD_3 (VSUBECUQ,	"vsubecuq")
 BU_P8V_OVERLOAD_3 (VSUBEUQM,	"vsubeuqm")
+BU_P8V_OVERLOAD_3 (VPERMXOR,   "vpermxor")
 
 /* ISA 3.0 vector overloaded 2-argument functions. */
 BU_P9V_AV_2 (VSLV,		"vslv",			CONST, vslv)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index cc8e4e1..9ffb253 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3585,6 +3585,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
   { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
     RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+
+  { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_bool_V16QI },
+  { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+
   { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index ffcbba9..d2b39f3 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -753,6 +753,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VPERM_UNS:
 	  case UNSPEC_VPERMHI:
 	  case UNSPEC_VPERMSI:
+	  case UNSPEC_VPERMXOR:
 	  case UNSPEC_VPKPX:
 	  case UNSPEC_VSLDOI:
 	  case UNSPEC_VSLO:
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1379502..d10fb46 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18448,6 +18448,12 @@ vector double vec_perm (vector double, vector double, vector unsigned char);
 vector long vec_perm (vector long, vector long, vector unsigned char);
 vector unsigned long vec_perm (vector unsigned long, vector unsigned long,
                                vector unsigned char);
+vector bool char vec_permxor (vector bool char, vector bool char,
+                              vector bool char);
+vector unsigned char vec_permxor (vector signed char, vector signed char,
+                                  vector signed char);
+vector unsigned char vec_permxor (vector unsigned char, vector unsigned char,
+                                  vector unsigned char);
 vector double vec_rint (vector double);
 vector double vec_recip (vector double, vector double);
 vector double vec_rsqrt (vector double);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c
new file mode 100644
index 0000000..b77b1e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c
@@ -0,0 +1,112 @@
+/* { dg-do run { target { powerpc*-*-* && { lp64 && p8vector_hw } } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main() {
+  int i;
+  vector bool char ubc_arg1, ubc_arg2, ubc_arg3;
+  vector unsigned char uc_arg1, uc_arg2, uc_arg3;
+  vector signed char sc_arg1, sc_arg2, sc_arg3;
+
+  vector bool char vec_ubc_expected1, vec_ubc_result1;
+  vector unsigned char vec_uc_expected1, vec_uc_result1;
+  vector signed char vec_sc_expected1, vec_sc_result1;
+
+  /* vec_permxor: bool char args, result */
+  ubc_arg1 = (vector bool char){0xA, 0x2, 0xB0, 0x4,
+				0x5, 0x6, 0x7, 0x8,
+				0x9, 0x10, 0x11, 0x12,
+				0x13, 0x15, 0x15, 0x16};
+  ubc_arg2 = (vector bool char){0x5, 0x20, 0xC, 0x40,
+				0x55, 0x66, 0x77, 0x88,
+				0x9, 0xFF, 0x0, 0xED,
+				0x4, 0x5, 0x6, 0x7};
+  ubc_arg3 = (vector bool char){0x08, 0x19, 0x2A, 0x3B,
+				0x4D, 0x5C, 0x6D, 0x7E,
+				0x8F, 0x90, 0xA1, 0xB2,
+				0xC3, 0xD4, 0xE5, 0xF6};
+  vec_ubc_expected1 = (vector bool char){0x3, 0xFD, 0xB0, 0xE9,
+					 0x0, 0x2, 0x2, 0xE,
+					 0xE, 0x15, 0x31, 0x1E,
+					 0x53, 0x40, 0x73, 0x61};
+  vec_ubc_result1 = vec_permxor (ubc_arg1, ubc_arg2, ubc_arg3);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_ubc_expected1[i] != vec_ubc_result1[i])
+#ifdef DEBUG
+      printf("ERROR vec_permxor (ubc, ubc, ubc) result[%d]=0x%x != expected[%d]=0x%x\n",
+	     i, vec_ubc_result1[i],  i, vec_ubc_expected1[i]);
+#else
+      abort();
+#endif
+  }
+
+  /* vec_permxor: signed char args, result */
+  sc_arg1 = (vector signed char){0x1, 0x2, 0x3, 0x4,
+				 0x5, 0x6, 0x7, 0x8,
+				 0x9, 0x10, 0xA, 0xB,
+				 0xC, 0xD, 0xE, 0xF};
+  sc_arg2 = (vector signed char){0x5, 0x5, 0x7, 0x8,
+				 0x9, 0xA, 0xB, 0xC,
+				 0xD, 0xE, 0xF, 0x0,
+				 0x1, 0x2, 0x3, 0x4};
+  sc_arg3 = (vector signed char){0x08, 0x19, 0x2A, 0x3B,
+				0x4D, 0x5C, 0x6D, 0x7E,
+				0x8F, 0x90, 0xA1, 0xB2,
+				0xC3, 0xD4, 0xE5, 0xF6};
+  vec_sc_expected1 = (vector signed char){0xC, 0xC, 0xC, 0x4,
+					  0x7, 0x7, 0x5, 0xB,
+					  0xD, 0x15, 0xF, 0xC,
+					  0x4, 0x4, 0x4, 0x4};
+  vec_sc_result1 = vec_permxor (sc_arg1, sc_arg2, sc_arg3);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_sc_expected1[i] != vec_sc_result1[i])
+#ifdef DEBUG
+      printf("ERROR vec_permxor (sc, sc, sc) result[%d]=0x%x != expected[%d]=0x%x\n",
+	     i, vec_sc_result1[i],  i, vec_sc_expected1[i]);
+#else
+      abort();
+#endif
+  }
+
+  /* vec_permxor: unsigned char args, result */
+  uc_arg1 = (vector unsigned char){0xA, 0xB, 0xC, 0xD,
+				   0xE, 0xF, 0x0, 0x1,
+				   0x2, 0x3, 0x4, 0x5,
+				   0x6, 0x7, 0x8, 0x9};
+  uc_arg2 = (vector unsigned char){0x5, 0x6, 0x7, 0x8,
+				   0x9, 0xA, 0xB, 0xC,
+				   0xD, 0xE, 0xF, 0x0,
+				   0x1, 0x2, 0x3, 0x4};
+  uc_arg3 = (vector unsigned char){0x08, 0x19, 0x2A, 0x3B,
+				   0x4D, 0x5C, 0x6D, 0x7E,
+				   0x8F, 0x90, 0xA1, 0xB2,
+				   0xC3, 0xD4, 0xE5, 0xF6};
+  vec_uc_expected1 = (vector unsigned char){0x7, 0x5, 0x3, 0xD,
+					    0xC, 0xE, 0x2, 0x2,
+					    0x6, 0x6, 0x2, 0x2,
+					    0xE, 0xE, 0x2, 0x2};
+  vec_uc_result1 = vec_permxor (uc_arg1, uc_arg2, uc_arg3);
+
+  for (i = 0; i < 16; i++) {
+    if (vec_uc_expected1[i] != vec_uc_result1[i])
+#ifdef DEBUG
+      printf("ERROR vec_permxor (uc, uc, uc) result[%d]=0x%x != expected[%d]=0x%x\n",
+	     i, vec_uc_result1[i],  i, vec_uc_expected1[i]);
+#else
+      abort();
+#endif
+  }
+}
-- 
2.7.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]