This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, rs6000] Add Power 8 support to vec_revb
- From: Carl Love <cel at us dot ibm dot com>
- To: Segher Boessenkool <segher at kernel dot crashing dot org>
- Cc: gcc-patches at gcc dot gnu dot org, David Edelsohn <dje dot gcc at gmail dot com>, Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>, cel at us dot ibm dot com
- Date: Tue, 17 Oct 2017 09:56:43 -0700
- Subject: Re: [PATCH, rs6000] Add Power 8 support to vec_revb
- Authentication-results: sourceware.org; auth=none
- References: <1507563417.12239.171.camel@us.ibm.com> <20171009165345.GD4406@gate.crashing.org>
GCC maintainers:
I have addressed the issues with the vec_revb patch mentioned by Segher.
I have retested the updated patch on:
powerpc64-unknown-linux-gnu (Power 8 BE),
powerpc64le-unknown-linux-gnu (Power 8 LE),
powerpc64le-unknown-linux-gnu (Power 9 LE)
without regressions.
Please let me know if the following patch is acceptable. Thanks.
Carl Love
-----------------------------------------------------------------------
gcc/ChangeLog:
2017-10-17 Carl Love <cel@us.ibm.com>
* config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB):
Add power 8 definitions for the builtin instances.
(P9V_BUILTIN_VEC_REVB): Remove the power 9 instance
definitions.
* config/rs6000/altivec.h (vec_revb): Change the
#define from power 9 to power 8.
* config/rs6000/r6000-protos.h (swap_selector_for_mode): Add extern
declaration.
* config/rs6000/rs6000.c (swap_selector_for_mode): Add
endian option to function.
* config/rs6000/rs6000-builtin.def (BU_P8V_VSX_1,
BU_P8V_OVERLOAD_1): Add power 8 macro expansions.
(BU_P9V_OVERLOAD_1): Remove power 9 overload expansion.
* config/rs6000/vsx.md (revb_<mode>): Add define_expand
to generate power 8 instructions for the vec_revb builtin.
gcc/testsuite/ChangeLog:
2017-10-17 Carl Love <cel@us.ibm.com>
* gcc.target/powerpc/builtins-revb-runnable.c: New
runnable test file for the vec_revb builtin.
---
gcc/config/rs6000/altivec.h | 3 +-
gcc/config/rs6000/rs6000-builtin.def | 10 +-
gcc/config/rs6000/rs6000-c.c | 44 +--
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000.c | 102 ++++--
gcc/config/rs6000/vsx.md | 45 +++
.../gcc.target/powerpc/builtins-revb-runnable.c | 352 +++++++++++++++++++++
7 files changed, 500 insertions(+), 58 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index c8e508c..a05e23a 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -415,6 +415,7 @@
#define vec_vsubuqm __builtin_vec_vsubuqm
#define vec_vupkhsw __builtin_vec_vupkhsw
#define vec_vupklsw __builtin_vec_vupklsw
+#define vec_revb __builtin_vec_revb
#endif
#ifdef __POWER9_VECTOR__
@@ -476,8 +477,6 @@
#define vec_xlx __builtin_vec_vextulx
#define vec_xrx __builtin_vec_vexturx
-
-#define vec_revb __builtin_vec_revb
#endif
/* Predicates.
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 850164a..7ca2974 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1853,6 +1853,13 @@ BU_P6_64BIT_2 (CMPB, "cmpb", CONST, cmpbdi3)
/* 1 argument VSX instructions added in ISA 2.07. */
BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn)
BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn)
+BU_P8V_VSX_1 (REVB_V1TI, "revb_v1ti", CONST, revb_v1ti)
+BU_P8V_VSX_1 (REVB_V2DI, "revb_v2di", CONST, revb_v2di)
+BU_P8V_VSX_1 (REVB_V4SI, "revb_v4si", CONST, revb_v4si)
+BU_P8V_VSX_1 (REVB_V8HI, "revb_v8hi", CONST, revb_v8hi)
+BU_P8V_VSX_1 (REVB_V16QI, "revb_v16qi", CONST, revb_v16qi)
+BU_P8V_VSX_1 (REVB_V2DF, "revb_v2df", CONST, revb_v2df)
+BU_P8V_VSX_1 (REVB_V4SF, "revb_v4sf", CONST, revb_v4sf)
/* 1 argument altivec instructions added in ISA 2.07. */
BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2)
@@ -1962,6 +1969,7 @@ BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh")
BU_P8V_OVERLOAD_1 (VPOPCNTUW, "vpopcntuw")
BU_P8V_OVERLOAD_1 (VPOPCNTUD, "vpopcntud")
BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
+BU_P8V_OVERLOAD_1 (REVB, "revb")
/* ISA 2.07 vector overloaded 2 argument functions. */
BU_P8V_OVERLOAD_2 (EQV, "eqv")
@@ -2073,8 +2081,6 @@ BU_P9V_OVERLOAD_1 (VSTDCNQP, "scalar_test_neg_qp")
BU_P9V_OVERLOAD_1 (VSTDCNDP, "scalar_test_neg_dp")
BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp")
-BU_P9V_OVERLOAD_1 (REVB, "revb")
-
BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth")
BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 897306c..0706319 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -5532,36 +5532,38 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
RS6000_BTI_unsigned_V16QI, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI,
- RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI,
- RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI,
- RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI,
- RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V1TI,
RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V1TI,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DF,
- RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SF,
- RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI,
RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
- { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI,
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI,
RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+ { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index f9be5d3..648c198 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -142,6 +142,8 @@ extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
extern void rs6000_split_signbit (rtx, rtx);
extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
+extern rtx swap_selector_for_mode (machine_mode mode, bool endian);
+
extern void rs6000_expand_atomic_exchange (rtx op[]);
extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6d613c3..2c8fb6c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -14288,39 +14288,75 @@ paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
return target;
}
-/* Return a constant vector for use as a little-endian permute control vector
- to reverse the order of elements of the given vector mode. */
-static rtx
-swap_selector_for_mode (machine_mode mode)
+
+/* Return a constant vector for use as a big endian or little-endian
+ permute control vector to reverse the order of elements of the
+ given vector mode. */
+rtx
+swap_selector_for_mode (machine_mode mode, bool endian)
{
/* These are little endian vectors, so their elements are reversed
from what you would normally expect for a permute control vector. */
- unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
- unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
- unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
- unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ unsigned int le_swap1[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ unsigned int le_swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
+ unsigned int le_swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
+ unsigned int le_swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
+ unsigned int le_swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+ unsigned int be_swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+ unsigned int be_swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
+ unsigned int be_swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
+ unsigned int be_swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
+ unsigned int be_swap16[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
unsigned int *swaparray, i;
rtx perm[16];
- switch (mode)
- {
- case E_V2DFmode:
- case E_V2DImode:
- swaparray = swap2;
- break;
- case E_V4SFmode:
- case E_V4SImode:
- swaparray = swap4;
- break;
- case E_V8HImode:
- swaparray = swap8;
- break;
- case E_V16QImode:
- swaparray = swap16;
- break;
- default:
- gcc_unreachable ();
- }
+ if ( endian == VECTOR_ELT_ORDER_BIG)
+ switch (mode)
+ {
+ case E_V1TImode:
+ swaparray = be_swap1;
+ break;
+ case E_V2DFmode:
+ case E_V2DImode:
+ swaparray = be_swap2;
+ break;
+ case E_V4SFmode:
+ case E_V4SImode:
+ swaparray = be_swap4;
+ break;
+ case E_V8HImode:
+ swaparray = be_swap8;
+ break;
+ case E_V16QImode:
+ swaparray = be_swap16;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ else
+ switch (mode)
+ {
+ case E_V1TImode:
+ swaparray = le_swap1;
+ break;
+ case E_V2DFmode:
+ case E_V2DImode:
+ swaparray = le_swap2;
+ break;
+ case E_V4SFmode:
+ case E_V4SImode:
+ swaparray = le_swap4;
+ break;
+ case E_V8HImode:
+ swaparray = le_swap8;
+ break;
+ case E_V16QImode:
+ swaparray = le_swap16;
+ break;
+ default:
+ gcc_unreachable ();
+ }
for (i = 0; i < 16; ++i)
perm[i] = GEN_INT (swaparray[i]);
@@ -14338,7 +14374,7 @@ altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
rtx load = gen_rtx_SET (tmp, op1);
rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
- rtx sel = swap_selector_for_mode (mode);
+ rtx sel = swap_selector_for_mode (mode, !VECTOR_ELT_ORDER_BIG);
rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
gcc_assert (REG_P (op0));
@@ -14356,7 +14392,7 @@ altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
rtx store = gen_rtx_SET (op0, tmp);
rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
- rtx sel = swap_selector_for_mode (mode);
+ rtx sel = swap_selector_for_mode (mode, !VECTOR_ELT_ORDER_BIG);
rtx vperm;
gcc_assert (REG_P (op1));
@@ -14373,7 +14409,7 @@ altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
machine_mode inner_mode = GET_MODE_INNER (mode);
rtx tmp = gen_reg_rtx (mode);
rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
- rtx sel = swap_selector_for_mode (mode);
+ rtx sel = swap_selector_for_mode (mode, !VECTOR_ELT_ORDER_BIG);
rtx vperm;
gcc_assert (REG_P (op1));
@@ -14437,7 +14473,7 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
rtx temp = gen_reg_rtx (tmode);
emit_insn (gen_rtx_SET (temp, addr));
- rtx sel = swap_selector_for_mode (tmode);
+ rtx sel = swap_selector_for_mode (tmode, !VECTOR_ELT_ORDER_BIG);
rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
UNSPEC_VPERM);
emit_insn (gen_rtx_SET (target, vperm));
@@ -14509,7 +14545,7 @@ altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool b
/* Reverse element order of elements if in LE mode */
if (!VECTOR_ELT_ORDER_BIG)
{
- rtx sel = swap_selector_for_mode (tmode);
+ rtx sel = swap_selector_for_mode (tmode, !VECTOR_ELT_ORDER_BIG);
rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel),
UNSPEC_VPERM);
emit_insn (gen_rtx_SET (target, vperm));
@@ -14647,7 +14683,7 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
{
rtx temp = gen_reg_rtx (tmode);
- rtx sel = swap_selector_for_mode (tmode);
+ rtx sel = swap_selector_for_mode (tmode, !VECTOR_ELT_ORDER_BIG);
rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
UNSPEC_VPERM);
emit_insn (gen_rtx_SET (temp, vperm));
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b47eeac..953b57e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -61,6 +61,24 @@
(KF "FLOAT128_VECTOR_P (KFmode)")
(TF "FLOAT128_VECTOR_P (TFmode)")])
+;; Iterator for xxperm types supported by VSX
+(define_mode_iterator XXBR_L [V16QI
+ V8HI
+ V4SI
+ V2DI
+ V4SF
+ V2DF
+ V1TI])
+
+;; Attribute for xxbr instructions
+(define_mode_attr VSX_XXBR [(V16QI "q_v16qi")
+ (V8HI "h_v8hi")
+ (V4SI "w_v4si")
+ (V2DI "d_v2di")
+ (V4SF "w_v4sf")
+ (V2DF "d_v2df")
+ (V1TI "q_v1ti")])
+
;; Iterator for memory moves.
(define_mode_iterator VSX_M [V16QI
V8HI
@@ -4712,6 +4730,33 @@
"xxbrw %x0,%x1"
[(set_attr "type" "vecperm")])
+
+;; Swap all bytes in each element
+(define_expand "revb_<mode>"
+ [(set (match_operand:XXBR_L 0 "vsx_register_operand")
+ (bswap:XXBR_L (match_operand:XXBR_L 1 "vsx_register_operand")))]
+ "TARGET_P9_VECTOR"
+{
+ rtx sel;
+
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_p9_xxbr<VSX_XXBR> (operands[0], operands[1]));
+ else
+ {
+ /* Want to have the elements in reverse order relative
+ to the endian mode in use, i.e. in LE mode, put elements
+ in BE order. */
+ sel = swap_selector_for_mode(<MODE>mode, VECTOR_ELT_ORDER_BIG);
+ emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
+ operands[1], sel));
+ }
+
+ DONE;
+}
+
+;; [(set_attr "type" "vecperm")])
+)
+
;; Swap all bytes in each 16-bit element
(define_insn "p9_xxbrh_v8hi"
[(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c
new file mode 100644
index 0000000..21f4f94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c
@@ -0,0 +1,352 @@
+/* { dg-do run { target { powerpc*-*-* && p8vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3" } */
+
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void abort (void);
+
+/* Verify vec_revb builtins */
+
+int
+main()
+{
+ int i;
+ vector bool char arg_bc, result_bc, expected_bc;
+ vector unsigned char arg_uc, result_uc, expected_uc;
+ vector signed char arg_sc, result_sc, expected_sc;
+
+ vector bool short int arg_bsi, result_bsi, expected_bsi;
+ vector unsigned short int arg_usi, result_usi, expected_usi;
+ vector short int arg_si, result_si, expected_si;
+
+ vector bool int arg_bi, result_bi, expected_bi;
+ vector unsigned int arg_ui, result_ui, expected_ui;
+ vector int arg_int, result_int, expected_int;
+
+ vector bool long long int arg_blli, result_blli, expected_blli;
+ vector unsigned long long int arg_ulli, result_ulli, expected_ulli;
+ vector long long int arg_lli, result_lli, expected_lli;
+
+ vector __uint128_t arg_uint128, result_uint128, expected_uint128;
+ vector __int128_t arg_int128, result_int128, expected_int128;
+
+ vector float arg_f, result_f, expected_f;
+ vector double arg_d, result_d, expected_d;
+
+ /* 8-bit ints */
+ arg_bc = (vector bool char) {0x01, 0x23, 0x45, 0x67,
+ 0x7E, 0x7C, 0x7A, 0x78,
+ 0x02, 0x46, 0x7A, 0x7E,
+ 0x13, 0x57, 0x7B, 0x7F};
+ expected_bc = (vector bool char) {0x7F, 0x7b, 0x57, 0x13,
+ 0x7E, 0x7A, 0x46, 0x02,
+ 0x78, 0x7A, 0x7C, 0x7E,
+ 0x67, 0x45, 0x23, 0x01};
+#if 0
+ result_bc = vec_revb (arg_bc);
+
+ for (i = 0; i < 16; i++) {
+ if (result_bc[i] != expected_bc[i])
+#ifdef DEBUG
+ printf("arg_bc[%d] = 0x%x, result_bc[%d] = 0x%x, expected_bc[%d] = 0x%x\n",
+ i, arg_bc[i], i, result_bc[i], i, expected_bc[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_uc = (vector unsigned char) {0x01, 0x23, 0x45, 0x67,
+ 0x7E, 0x7C, 0x7A, 0x78,
+ 0x02, 0x46, 0x7A, 0x7E,
+ 0x13, 0x57, 0x7B, 0x7F};
+ expected_uc = (vector unsigned char) {0x7F, 0x7b, 0x57, 0x13,
+ 0x7E, 0x7A, 0x46, 0x02,
+ 0x78, 0x7A, 0x7C, 0x7E,
+ 0x67, 0x45, 0x23, 0x01};
+
+ result_uc = vec_revb (arg_uc);
+
+ for (i = 0; i < 16; i++) {
+ if (result_uc[i] != expected_uc[i])
+#ifdef DEBUG
+ printf("arg_uc[%d] = 0x%x, result_uc[%d] = 0x%x, expected_uc[%d] = 0x%x\n",
+ i, arg_uc[i], i, result_uc[i], i, expected_uc[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_sc = (vector signed char) {0x01, 0x23, 0x45, 0x67,
+ 0x7E, 0x7C, 0x7A, 0x78,
+ 0x02, 0x46, 0x7A, 0x7E,
+ 0x13, 0x57, 0x7B, 0x7F};
+ expected_sc = (vector signed char) {0x7F, 0x7b, 0x57, 0x13,
+ 0x7E, 0x7A, 0x46, 0x02,
+ 0x78, 0x7A, 0x7C, 0x7E,
+ 0x67, 0x45, 0x23, 0x01};
+
+ result_sc = vec_revb (arg_sc);
+
+ for (i = 0; i < 16; i++) {
+ if (result_sc[i] != expected_sc[i])
+#ifdef DEBUG
+ printf("arg_sc[%d] = 0x%x, result_sc[%d] = 0x%x, expected_sc[%d] = 0x%x\n",
+ i, arg_sc[i], i, result_sc[i], i, expected_sc[i]);
+#else
+ abort();
+#endif
+ }
+#endif
+
+ /* 16-bit ints */
+ arg_bsi = (vector bool short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, 0x0246,
+ 0x8ACE, 0x1357, 0x9BDF};
+ expected_bsi = (vector bool short int) {0x2301, 0x6745, 0xDCFE, 0x98BA,
+ 0x4602, 0xCE8A, 0x5713, 0xDF9B};
+
+ result_bsi = vec_revb (arg_bsi);
+
+ for (i = 0; i < 8; i++) {
+ if (result_bsi[i] != expected_bsi[i])
+#ifdef DEBUG
+ printf("arg_bsi[%d] = 0x%x, result_bsi[%d] = 0x%x, expected_bsi[%d] = 0x%x\n",
+ i, arg_bsi[i], i, result_bsi[i], i, expected_bsi[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_usi = (vector unsigned short int) {0x0123, 0x4567, 0xFEDC, 0xBA98,
+ 0x0246, 0x8ACE, 0x1357, 0x9BDF};
+ expected_usi = (vector unsigned short int) {0x2301, 0x6745, 0xDCFE, 0x98BA,
+ 0x4602, 0xCE8A, 0x5713, 0xDF9B};
+
+ result_usi = vec_revb (arg_usi);
+
+ for (i = 0; i < 8; i++) {
+ if (result_usi[i] != expected_usi[i])
+#ifdef DEBUG
+ printf("arg_usi[%d] = 0x%x, result_usi[%d] = 0x%x, expected_usi[%d] = 0x%x\n",
+ i, arg_usi[i], i, result_usi[i], i, expected_usi[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_si = (vector short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, 0x0246, 0x8ACE,
+ 0x1357, 0x9BDF};
+ expected_si = (vector short int) {0x2301, 0x6745, 0xDCFE, 0x98BA, 0x4602,
+ 0xCE8A, 0x5713, 0xDF9B};
+
+ result_si = vec_revb (arg_si);
+
+ for (i = 0; i < 8; i++) {
+ if (result_si[i] != expected_si[i])
+#ifdef DEBUG
+ printf("arg_si[%d] = 0x%x, result_si[%d] = 0x%x, expected_si[%d] = 0x%x\n",
+ i, arg_si[i], i, result_si[i], i, expected_si[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 32-bit ints */
+ arg_bi = (vector bool int) {0x01234567, 0xFEDCBA98, 0x02468ACE, 0x13579BDF};
+ expected_bi = (vector bool int) {0x67452301, 0x98BADCFE, 0xCE8A4602,
+ 0xDF9B5713};
+
+ result_bi = vec_revb (arg_bi);
+
+ for (i = 0; i < 4; i++) {
+ if (result_bi[i] != expected_bi[i])
+#ifdef DEBUG
+ printf("arg_bi[%d] = 0x%x, result_bi[%d] = 0x%x, expected_bi[%d] = 0x%x\n",
+ i, arg_bi[i], i, result_bi[i], i, expected_bi[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_ui = (vector unsigned int) {0x01234567, 0xFEDCBA98, 0x02468ACE,
+ 0x13579BDF};
+ expected_ui = (vector unsigned int) {0x67452301, 0x98BADCFE, 0xCE8A4602,
+ 0xDF9B5713};
+
+ result_ui = vec_revb (arg_ui);
+
+ for (i = 0; i < 4; i++) {
+ if (result_ui[i] != expected_ui[i])
+#ifdef DEBUG
+ printf("arg_ui[%d] = 0x%x, result_ui[%d] = 0x%x, expected_ui[%d] = 0x%x\n",
+ i, arg_ui[i], i, result_ui[i], i, expected_ui[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_int = (vector int) {0x01234567, 0xFEDCBA98, 0x02468ACE, 0x13579BDF};
+ expected_int = (vector int) {0x67452301, 0x98BADCFE, 0xCE8A4602, 0xDF9B5713};
+
+ result_int = vec_revb (arg_int);
+
+ for (i = 0; i < 4; i++) {
+ if (result_int[i] != expected_int[i])
+#ifdef DEBUG
+ printf("arg_int[%d] = 0x%x, result_int[%d] = 0x%x, expected_int[%d] = 0x%x\n",
+ i, arg_int[i], i, result_int[i], i, expected_int[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 64-bit ints */
+ arg_blli = (vector bool long long int) {0x01234567FEDCBA98,
+ 0x02468ACE13579BDF};
+ expected_blli = (vector bool long long int) {0x98BADCFE67452301,
+ 0xDF9B5713CE8A4602};
+
+ result_blli = vec_revb (arg_blli);
+
+ for (i = 0; i < 2; i++) {
+ if (result_blli[i] != expected_blli[i])
+#ifdef DEBUG
+ printf("arg_blli[%d] = 0x%x, result_blli[%d] = 0x%llx, expected_blli[%d] = 0x%llx\n",
+ i, arg_blli[i], i, result_blli[i], i, expected_blli[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_ulli = (vector unsigned long long int) {0x01234567FEDCBA98,
+ 0x02468ACE13579BDF};
+ expected_ulli = (vector unsigned long long int) {0x98BADCFE67452301,
+ 0xDF9B5713CE8A4602};
+
+ result_ulli = vec_revb (arg_ulli);
+
+ for (i = 0; i < 2; i++) {
+ if (result_ulli[i] != expected_ulli[i])
+#ifdef DEBUG
+ printf("arg_ulli[%d] = 0x%x, result_ulli[%d] = 0x%llx, expected_ulli[%d] = 0x%llx\n",
+ i, arg_ulli[i], i, result_ulli[i], i, expected_ulli[i]);
+#else
+ abort();
+#endif
+ }
+
+ arg_lli = (vector long long int) {0x01234567FEDCBA98, 0x02468ACE13579BDF};
+ expected_lli = (vector long long int) {0x98BADCFE67452301,
+ 0xDF9B5713CE8A4602};
+
+ result_lli = vec_revb (arg_lli);
+
+ for (i = 0; i < 2; i++) {
+ if (result_lli[i] != expected_lli[i])
+#ifdef DEBUG
+ printf("arg_lli[%d] = 0x%x, result_lli[%d] = 0x%llx, expected_lli[%d] = 0x%llx\n",
+ i, arg_lli[i], i, result_lli[i], i, expected_lli[i]);
+#else
+ abort();
+#endif
+ }
+
+ /* 128-bit ints */
+ arg_uint128[0] = 0x1627384950617243;
+ arg_uint128[0] = arg_uint128[0] << 64;
+ arg_uint128[0] |= 0x9405182930415263;
+ expected_uint128[0] = 0x6352413029180594;
+ expected_uint128[0] = expected_uint128[0] << 64;
+ expected_uint128[0] |= 0x4372615049382716;
+
+ result_uint128 = vec_revb (arg_uint128);
+
+ if (result_uint128[0] != expected_uint128[0])
+ {
+#ifdef DEBUG
+ printf("result_uint128[0] doesn't match expected_u128[0]\n");
+ printf("arg_uint128[0] = %llx ", arg_uint128[0] >> 64);
+ printf(" %llx\n", arg_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("result_uint128[0] = %llx ", result_uint128[0] >> 64);
+ printf(" %llx\n", result_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("expected_uint128[0] = %llx ", expected_uint128[0] >> 64);
+ printf(" %llx\n", expected_uint128[0] & 0xFFFFFFFFFFFFFFFF);
+#else
+ abort();
+#endif
+ }
+
+ arg_int128[0] = 0x1627384950617283;
+ arg_int128[0] = arg_int128[0] << 64;
+ arg_int128[0] |= 0x9405182930415263;
+ expected_int128[0] = 0x6352413029180594;
+ expected_int128[0] = expected_int128[0] << 64;
+ expected_int128[0] |= 0x8372615049382716;;
+
+ result_int128 = vec_revb (arg_int128);
+
+ if (result_int128[0] != expected_int128[0])
+ {
+#ifdef DEBUG
+ printf("result_int128[0] doesn't match expected128[0]\n");
+ printf("arg_int128[0] = %llx ", arg_int128[0] >> 64);
+ printf(" %llx\n", arg_int128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("result_int128[0] = %llx ", result_int128[0] >> 64);
+ printf(" %llx\n", result_int128[0] & 0xFFFFFFFFFFFFFFFF);
+
+ printf("expected_int128[0] = %llx ", expected_int128[0] >> 64);
+ printf(" %llx\n", expected_int128[0] & 0xFFFFFFFFFFFFFFFF);
+#else
+ abort();
+#endif
+ }
+
+ /* 32-bit floats */
+ /* 0x42f7224e, 0x43e471ec, 0x49712062, 0x4a0f2b38 */
+ arg_f = (vector float) {123.567, 456.89, 987654.123456, 2345678.0};
+ /* 0x4e22F742, 0xec71e443, 0x62207149, 0x382b0f4a */
+ expected_f = (vector float) {683528320.0,
+ -1169716232068291395011477504.0,
+ 739910526898278498304.0,
+ 0.0000407838160754181444644927978515625};
+
+ result_f = vec_revb (arg_f);
+
+ for (i = 0; i < 4; i++) {
+ if (result_f[i] != expected_f[i])
+ {
+#ifdef DEBUG
+ printf(" arg_f[%d] = %f, result_f[%d] = %f, expected_f[%d] = %f\n",
+ i, arg_f[i], i, result_f[i], i, expected_f[i]);
+#else
+ abort();
+#endif
+ }
+ }
+
+ /* 64-bit floats */
+ /* 0x419D6F34547E6B75 0x4194E5FEC781948B */
+ arg_d = (vector double) {123456789.123456789, 87654321.87654321};
+ /* 0x756B7E54346F9D41 0x8B9481C7FEE59441 */
+ expected_d = (vector double) {4.12815412905659550518671402044E257,
+ -6.99269992046390236552018719554E-253};
+
+ result_d = vec_revb (arg_d);
+
+ for (i = 0; i < 2; i++) {
+ if (result_d[i] != expected_d[i])
+#ifdef DEBUG
+ printf("arg_d[%d] = %f, result_d[%d] = %f, expected_d[%d] = %f\n",
+ i, arg_d[i], i, result_d[i], i, expected_d[i]);
+#else
+ abort();
+#endif
+ }
+}
--
2.7.4