This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH,rs6000] Add vec_permxor support.
- From: Carl Love <cel at us dot ibm dot com>
- To: Segher Boessenkool <segher at kernel dot crashing dot org>, gcc-patches at gcc dot gnu dot org, David Edelsohn <dje dot gcc at gmail dot com>
- Cc: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>, cel at us dot ibm dot com
- Date: Mon, 12 Mar 2018 08:21:08 -0700
- Subject: [PATCH,rs6000] Add vec_permxor support.
- Authentication-results: sourceware.org; auth=none
GCC maintainers:
The following patch add support for the vec_permxor builtins.
Note the change in file rs6000-p8swap.c is actually a fix for the
existing define_insn "crypto_vpermxor in crypto.md. The fix disables
swap optimization for the vpermxor instruction. Without this fix
optimization with -O1 and above results in the upper and lower 64-bits
of the three operands are swapped messing up the indexing specified by
the third operand. This change will need to be back ported to GCC 6
and 7.
powerpc64-unknown-linux-gnu (Power 8 BE)
powerpc64le-unknown-linux-gnu (Power 8 LE)
powerpc64le-unknown-linux-gnu (Power 9 LE)
with no regressions.
Let me know if the patch looks OK or not. Thanks.
Carl Love
----------------------------------------------------------------------
gcc/ChangeLog:
2018-03-12 Carl Love <cel@us.ibm.com>
* config/rs6000/rs6000-c.c: Add macro definitions for
ALTIVEC_BUILTIN_VEC_PERM.
* config/rs6000/rs6000.h: Add #define for vec_permxor builtin.
* config/rs6000/rs6000-builtin.def: Add macro expansions for VPERMXOR.
* config/rs6000/altivec.md: Add define_insn for altivec-vpermxor.
Add UNSPEC_VNOR, define_insn altivec_vnor_v16qi3.
* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Add case
UNSPEC_VPERMXOR.
* config/doc/extend.texi: Add prototypes for vec_permxor.
gcc/testsuite/ChangeLog:
2018-03-12 Carl Love <cel@us.ibm.com>
* gcc.target/powerpc/builtins-7-runnable.c: Add tests for vec_permxor.
---
gcc/config/rs6000/altivec.h | 1 +
gcc/config/rs6000/altivec.md | 33 ++++++
gcc/config/rs6000/rs6000-builtin.def | 3 +
gcc/config/rs6000/rs6000-c.c | 10 ++
gcc/config/rs6000/rs6000-p8swap.c | 1 +
gcc/doc/extend.texi | 6 ++
.../gcc.target/powerpc/builtins-7-runnable.c | 112 +++++++++++++++++++++
7 files changed, 166 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 1e495e6..5a34162 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -76,6 +76,7 @@
#define vec_vor vec_or
#define vec_vpkpx vec_packpx
#define vec_vperm vec_perm
+#define vec_permxor __builtin_vec_vpermxor
#define vec_vrefp vec_re
#define vec_vrfin vec_round
#define vec_vrsqrtefp vec_rsqrte
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 2759f2d..7770743 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -44,6 +44,7 @@
UNSPEC_VMULOSH
UNSPEC_VMULOUW
UNSPEC_VMULOSW
+ UNSPEC_VNOR
UNSPEC_VPKPX
UNSPEC_VPACK_SIGN_SIGN_SAT
UNSPEC_VPACK_SIGN_UNS_SAT
@@ -3827,6 +3828,38 @@
DONE;
})
+(define_insn "altivec_vnor_v16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+ (match_operand:V16QI 2 "register_operand" "v")]
+ UNSPEC_VNOR))]
+ ""
+ "vnor %0,%1,%2"
+ [(set_attr "type" "vecsimple")])
+
+(define_expand "altivec_vpermxor"
+ [(use (match_operand:V16QI 0 "register_operand"))
+ (use (match_operand:V16QI 1 "register_operand"))
+ (use (match_operand:V16QI 2 "register_operand"))
+ (use (match_operand:V16QI 3 "register_operand"))]
+ "TARGET_P8_VECTOR"
+{
+ if (!BYTES_BIG_ENDIAN)
+ {
+ /* vpermxor indexes the bytes using Big Endian numbering. If LE,
+ change indexing in operand[3] to BE index. */
+ rtx be_index = gen_reg_rtx (V16QImode);
+
+ emit_insn (gen_altivec_vnor_v16qi3 (be_index, operands[3], operands[3]));
+ emit_insn (gen_crypto_vpermxor_v16qi (operands[0], operands[1],
+ operands[2], be_index));
+ }
+ else
+ emit_insn (gen_crypto_vpermxor_v16qi (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
(define_expand "altivec_negv4sf2"
[(use (match_operand:V4SF 0 "register_operand"))
(use (match_operand:V4SF 1 "register_operand"))]
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index f9548a0..25d45a9 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2008,6 +2008,8 @@ BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p)
BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p)
BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p)
+BU_P8V_AV_3 (VPERMXOR, "vpermxor", CONST, altivec_vpermxor)
+
/* ISA 2.05 overloaded 2 argument functions. */
BU_P6_OVERLOAD_2 (CMPB, "cmpb")
@@ -2069,6 +2071,7 @@ BU_P8V_OVERLOAD_3 (VADDECUQ, "vaddecuq")
BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm")
BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq")
BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm")
+BU_P8V_OVERLOAD_3 (VPERMXOR, "vpermxor")
/* ISA 3.0 vector overloaded 2-argument functions. */
BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index cc8e4e1..9ffb253 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3585,6 +3585,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+
+ { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_bool_V16QI },
+ { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+ { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+
{ ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI },
{ ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index ffcbba9..d2b39f3 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -753,6 +753,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VPERM_UNS:
case UNSPEC_VPERMHI:
case UNSPEC_VPERMSI:
+ case UNSPEC_VPERMXOR:
case UNSPEC_VPKPX:
case UNSPEC_VSLDOI:
case UNSPEC_VSLO:
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1379502..d10fb46 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18448,6 +18448,12 @@ vector double vec_perm (vector double, vector double, vector unsigned char);
vector long vec_perm (vector long, vector long, vector unsigned char);
vector unsigned long vec_perm (vector unsigned long, vector unsigned long,
vector unsigned char);
+vector bool char vec_permxor (vector bool char, vector bool char,
+ vector bool char);
+vector unsigned char vec_permxor (vector signed char, vector signed char,
+ vector signed char);
+vector unsigned char vec_permxor (vector unsigned char, vector unsigned char,
+ vector unsigned char);
vector double vec_rint (vector double);
vector double vec_recip (vector double, vector double);
vector double vec_rsqrt (vector double);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c
new file mode 100644
index 0000000..b77b1e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c
@@ -0,0 +1,112 @@
+/* { dg-do run { target { powerpc*-*-* && { lp64 && p8vector_hw } } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+int main() {
+ int i;
+ vector bool char ubc_arg1, ubc_arg2, ubc_arg3;
+ vector unsigned char uc_arg1, uc_arg2, uc_arg3;
+ vector signed char sc_arg1, sc_arg2, sc_arg3;
+
+ vector bool char vec_ubc_expected1, vec_ubc_result1;
+ vector unsigned char vec_uc_expected1, vec_uc_result1;
+ vector signed char vec_sc_expected1, vec_sc_result1;
+
+ /* vec_permxor: bool char args, result */
+ ubc_arg1 = (vector bool char){0xA, 0x2, 0xB0, 0x4,
+ 0x5, 0x6, 0x7, 0x8,
+ 0x9, 0x10, 0x11, 0x12,
+ 0x13, 0x15, 0x15, 0x16};
+ ubc_arg2 = (vector bool char){0x5, 0x20, 0xC, 0x40,
+ 0x55, 0x66, 0x77, 0x88,
+ 0x9, 0xFF, 0x0, 0xED,
+ 0x4, 0x5, 0x6, 0x7};
+ ubc_arg3 = (vector bool char){0x08, 0x19, 0x2A, 0x3B,
+ 0x4D, 0x5C, 0x6D, 0x7E,
+ 0x8F, 0x90, 0xA1, 0xB2,
+ 0xC3, 0xD4, 0xE5, 0xF6};
+ vec_ubc_expected1 = (vector bool char){0x3, 0xFD, 0xB0, 0xE9,
+ 0x0, 0x2, 0x2, 0xE,
+ 0xE, 0x15, 0x31, 0x1E,
+ 0x53, 0x40, 0x73, 0x61};
+ vec_ubc_result1 = vec_permxor (ubc_arg1, ubc_arg2, ubc_arg3);
+
+ for (i = 0; i < 16; i++) {
+ if (vec_ubc_expected1[i] != vec_ubc_result1[i])
+#ifdef DEBUG
+ printf("ERROR vec_permxor (ubc, ubc, ubc) result[%d]=0x%x != expected[%d]=0x%x\n",
+ i, vec_ubc_result1[i], i, vec_ubc_expected1[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* vec_permxor: signed char args, result */
+ sc_arg1 = (vector signed char){0x1, 0x2, 0x3, 0x4,
+ 0x5, 0x6, 0x7, 0x8,
+ 0x9, 0x10, 0xA, 0xB,
+ 0xC, 0xD, 0xE, 0xF};
+ sc_arg2 = (vector signed char){0x5, 0x5, 0x7, 0x8,
+ 0x9, 0xA, 0xB, 0xC,
+ 0xD, 0xE, 0xF, 0x0,
+ 0x1, 0x2, 0x3, 0x4};
+ sc_arg3 = (vector signed char){0x08, 0x19, 0x2A, 0x3B,
+ 0x4D, 0x5C, 0x6D, 0x7E,
+ 0x8F, 0x90, 0xA1, 0xB2,
+ 0xC3, 0xD4, 0xE5, 0xF6};
+ vec_sc_expected1 = (vector signed char){0xC, 0xC, 0xC, 0x4,
+ 0x7, 0x7, 0x5, 0xB,
+ 0xD, 0x15, 0xF, 0xC,
+ 0x4, 0x4, 0x4, 0x4};
+ vec_sc_result1 = vec_permxor (sc_arg1, sc_arg2, sc_arg3);
+
+ for (i = 0; i < 16; i++) {
+ if (vec_sc_expected1[i] != vec_sc_result1[i])
+#ifdef DEBUG
+ printf("ERROR vec_permxor (sc, sc, sc) result[%d]=0x%x != expected[%d]=0x%x\n",
+ i, vec_sc_result1[i], i, vec_sc_expected1[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* vec_permxor: unsigned char args, result */
+ uc_arg1 = (vector unsigned char){0xA, 0xB, 0xC, 0xD,
+ 0xE, 0xF, 0x0, 0x1,
+ 0x2, 0x3, 0x4, 0x5,
+ 0x6, 0x7, 0x8, 0x9};
+ uc_arg2 = (vector unsigned char){0x5, 0x6, 0x7, 0x8,
+ 0x9, 0xA, 0xB, 0xC,
+ 0xD, 0xE, 0xF, 0x0,
+ 0x1, 0x2, 0x3, 0x4};
+ uc_arg3 = (vector unsigned char){0x08, 0x19, 0x2A, 0x3B,
+ 0x4D, 0x5C, 0x6D, 0x7E,
+ 0x8F, 0x90, 0xA1, 0xB2,
+ 0xC3, 0xD4, 0xE5, 0xF6};
+ vec_uc_expected1 = (vector unsigned char){0x7, 0x5, 0x3, 0xD,
+ 0xC, 0xE, 0x2, 0x2,
+ 0x6, 0x6, 0x2, 0x2,
+ 0xE, 0xE, 0x2, 0x2};
+ vec_uc_result1 = vec_permxor (uc_arg1, uc_arg2, uc_arg3);
+
+ for (i = 0; i < 16; i++) {
+ if (vec_uc_expected1[i] != vec_uc_result1[i])
+#ifdef DEBUG
+ printf("ERROR vec_permxor (uc, uc, uc) result[%d]=0x%x != expected[%d]=0x%x\n",
+ i, vec_uc_result1[i], i, vec_uc_expected1[i]);
+#else
+ abort();
+#endif
+ }
+}
--
2.7.4