This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, RS6000] Add support for the vec_cmpne altivec builtins
- From: Bill Seurer <seurer at linux dot vnet dot ibm dot com>
- To: gcc Patches <gcc-patches at gcc dot gnu dot org>
- Cc: David Edelsohn <dje dot gcc at gmail dot com>, Segher Boessenkool <segher at kernel dot crashing dot org>
- Date: Wed, 25 May 2016 10:37:43 -0500
- Subject: [PATCH, RS6000] Add support for the vec_cmpne altivec builtins
- Authentication-results: sourceware.org; auth=none
This patch adds support for the vec_cmpne altivec builtins from the Power
Architecture 64-Bit ELF V2 ABI OpenPOWER ABI for Linux Supplement (16 July
2015 Version 1.1). There are many of the builtins that are missing and this
is part of a series of patches to add them.
There aren't instructions for vec_cmpne so the output code is built from other
built-ins that do have instructions which in this case is the following.
vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb), vec_cmpeq (va, vb))
The new test cases are executable tests which verify that the generated
code produces expected values. C macros were used so that the same
test case could be used for both the signed and unsigned versions of various
basic types. A separate executable test case is used for the long long versions
of vec_cmpne because of some differences in loading and storing the vectors.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu (power8) and
powerpc64-unknown-linux-gnu (both power7 and power8) with no regressions. Is
this ok for trunk?
[gcc]
2016-05-25 Bill Seurer <seurer@linux.vnet.ibm.com>
* config/rs6000/altivec.h (vec_cmpne): Add #define for vec_cmpne.
* config/rs6000/rs6000-builtin.def (vec_cmpne): Add vec_cmpne as a
special case builtin.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Add
code for ALTIVEC_BUILTIN_VEC_CMPNE.
* config/rs6000/rs6000.c (altivec_init_builtins): Add definition
for __builtin_vec_cmpne.
[gcc/testsuite]
2016-05-25 Bill Seurer <seurer@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-cmpne.c: New test.
* gcc.target/powerpc/vec-cmpne-long.c: New test.
Index: gcc/config/rs6000/altivec.h
===================================================================
--- gcc/config/rs6000/altivec.h (revision 236702)
+++ gcc/config/rs6000/altivec.h (working copy)
@@ -196,6 +196,7 @@
#define vec_andc __builtin_vec_andc
#define vec_avg __builtin_vec_avg
#define vec_cmpeq __builtin_vec_cmpeq
+#define vec_cmpne __builtin_vec_cmpne
#define vec_cmpgt __builtin_vec_cmpgt
#define vec_ctf __builtin_vec_ctf
#define vec_dst __builtin_vec_dst
Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def (revision 236702)
+++ gcc/config/rs6000/rs6000-builtin.def (working copy)
@@ -1282,6 +1282,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p")
/* Overloaded Altivec builtins that are handled as special cases. */
BU_ALTIVEC_OVERLOAD_X (ADDE, "adde")
BU_ALTIVEC_OVERLOAD_X (ADDEC, "addec")
+BU_ALTIVEC_OVERLOAD_X (CMPNE, "cmpne")
BU_ALTIVEC_OVERLOAD_X (CTF, "ctf")
BU_ALTIVEC_OVERLOAD_X (CTS, "cts")
BU_ALTIVEC_OVERLOAD_X (CTU, "ctu")
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c (revision 236702)
+++ gcc/config/rs6000/rs6000-c.c (working copy)
@@ -4675,6 +4675,66 @@ assignment for unaligned loads and stores");
warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \
assignment for unaligned loads and stores");
+
+ if (fcode == ALTIVEC_BUILTIN_VEC_CMPNE)
+ {
+ /* vec_cmpne needs to be special cased because there are no instructions
+ for it (prior to power 9). */
+ if (nargs != 2)
+ {
+ error ("vec_cmpne only accepts 2 arguments");
+ return error_mark_node;
+ }
+
+ tree arg0 = (*arglist)[0];
+ tree arg0_type = TREE_TYPE (arg0);
+ tree arg1 = (*arglist)[1];
+ tree arg1_type = TREE_TYPE (arg1);
+
+ /* Both arguments must be vectors and the types must match. */
+ if (arg0_type != arg1_type)
+ goto bad;
+ if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+ goto bad;
+
+ switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+ {
+ /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
+ vec_cmpeq (va, vb)). */
+ /* Note: vec_nand also works but opt changes vec_nand's to vec_nor's
+ anyway. */
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case TImode:
+ case SFmode:
+ case DFmode:
+ {
+ /* call = vec_cmpeq (va, vb)
+ result = vec_nor (call, call)
+ */
+ vec<tree, va_gc> *params = make_tree_vector ();
+ vec_safe_push (params, arg0);
+ vec_safe_push (params, arg1);
+ tree call = altivec_resolve_overloaded_builtin
+ (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_CMPEQ], params);
+ /* Use save_expr to ensure that operands used more than once
+ that may have side effects (like calls) are only evaluated
+ once. */
+ call = save_expr (call);
+ params = make_tree_vector ();
+ vec_safe_push (params, call);
+ vec_safe_push (params, call);
+ return altivec_resolve_overloaded_builtin
+ (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_NOR], params);
+ }
+ /* Other types are errors. */
+ default:
+ goto bad;
+ }
+ }
+
if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
{
/* vec_adde needs to be special cased because there is no instruction
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 236702)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -16293,6 +16293,10 @@ altivec_init_builtins (void)
= build_function_type_list (opaque_V4SI_type_node,
opaque_V4SI_type_node, opaque_V4SI_type_node,
opaque_V4SI_type_node, NULL_TREE);
+ tree opaque_ftype_opaque_opaque
+ = build_function_type_list (opaque_V4SI_type_node,
+ opaque_V4SI_type_node, opaque_V4SI_type_node,
+ NULL_TREE);
tree int_ftype_int_opaque_opaque
= build_function_type_list (integer_type_node,
integer_type_node, opaque_V4SI_type_node,
@@ -16567,6 +16571,8 @@ altivec_init_builtins (void)
ALTIVEC_BUILTIN_VEC_ADDE);
def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
ALTIVEC_BUILTIN_VEC_ADDEC);
+ def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
+ ALTIVEC_BUILTIN_VEC_CMPNE);
/* Cell builtins. */
def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
Index: gcc/testsuite/gcc.target/powerpc/vec-cmpne-long.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-cmpne-long.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-cmpne-long.c (working copy)
@@ -0,0 +1,110 @@
+/* { dg-do run { target { powerpc64*-*-* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -mpower8-vector -O3" } */
+
+/* Test that the vec_cmpne builtin works as expected for long long
+ and double vectors. */
+
+#include "altivec.h"
+
+#define N 4096
+
+void abort ();
+
+#define define_test_functions(VBTYPE, RTYPE, STYPE, NAME) \
+\
+RTYPE result_ne_##NAME[N] __attribute__((aligned(16))); \
+RTYPE result_eq_##NAME[N] __attribute__((aligned(16))); \
+STYPE operand1_##NAME[N] __attribute__((aligned(16))); \
+STYPE operand2_##NAME[N] __attribute__((aligned(16))); \
+RTYPE expected_##NAME[N] __attribute__((aligned(16))); \
+\
+__attribute__((noinline)) void vector_tests_##NAME () \
+{ \
+ vector STYPE v1_##NAME, v2_##NAME; \
+ vector bool VBTYPE tmp_##NAME; \
+ int i; \
+ for (i = 0; i < N; i+=16/sizeof (STYPE)) \
+ { \
+ /* result_ne = operand1!=operand2. */ \
+ v1_##NAME = (vector STYPE) { operand1_##NAME[i], \
+ operand1_##NAME[i+1] }; \
+ v2_##NAME = (vector STYPE) { operand2_##NAME[i], \
+ operand2_##NAME[i+1] }; \
+\
+ tmp_##NAME = vec_cmpeq (v1_##NAME, v2_##NAME); \
+ result_eq_##NAME[i] = tmp_##NAME[0]; \
+ result_eq_##NAME[i+1] = tmp_##NAME[1]; \
+\
+ tmp_##NAME = vec_cmpne (v1_##NAME, v2_##NAME); \
+ result_ne_##NAME[i] = tmp_##NAME[0]; \
+ result_ne_##NAME[i+1] = tmp_##NAME[1]; \
+ } \
+} \
+\
+__attribute__((noinline)) void init_##NAME () \
+{ \
+ int i; \
+ for (i = 0; i < N; ++i) \
+ { \
+ result_ne_##NAME[i] = 7; \
+ result_eq_##NAME[i] = 15; \
+ if (i%3 == 0) \
+ { \
+ /* op1 < op2. */ \
+ operand1_##NAME[i] = 1; \
+ operand2_##NAME[i] = 2; \
+ } \
+ else if (i%3 == 1) \
+ { \
+ /* op1 > op2. */ \
+ operand1_##NAME[i] = 2; \
+ operand2_##NAME[i] = 1; \
+ } \
+ else if (i%3 == 2) \
+ { \
+ /* op1 == op2. */ \
+ operand1_##NAME[i] = 3; \
+ operand2_##NAME[i] = 3; \
+ } \
+ /* For vector comparisons: "For each element of the result_ne, the \
+ value of each bit is 1 if the corresponding elements of ARG1 and \
+ ARG2 are equal." {or whatever the comparison is} "Otherwise, the \
+ value of each bit is 0." */ \
+ expected_##NAME[i] = -1 * (RTYPE)(operand1_##NAME[i] != operand2_##NAME[i]); \
+ } \
+} \
+\
+__attribute__((noinline)) void verify_results_##NAME () \
+{ \
+ int i; \
+ for (i = 0; i < N; ++i) \
+ { \
+ if ( ((result_ne_##NAME[i] != expected_##NAME[i]) || \
+ (result_ne_##NAME[i] == result_eq_##NAME[i]))) \
+ abort (); \
+ } \
+}
+
+
+#define execute_test_functions(VBTYPE, RTYPE, STYPE, NAME) \
+{ \
+ init_##NAME (); \
+ vector_tests_##NAME (); \
+ verify_results_##NAME (); \
+}
+
+
+define_test_functions (long long, signed long long, signed long long, si);
+define_test_functions (long long, signed long long, double, dd);
+
+int main ()
+{
+ execute_test_functions (long long, signed long long, signed long long, si);
+ execute_test_functions (long long, signed long long, double, dd);
+
+ return 0;
+}
+
+
Index: gcc/testsuite/gcc.target/powerpc/vec-cmpne.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-cmpne.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-cmpne.c (working copy)
@@ -0,0 +1,114 @@
+/* { dg-do run { target { powerpc64*-*-* } } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mvsx -O3" } */
+
+/* Test that the vec_cmpne builtin works as expected. */
+
+#include "altivec.h"
+
+#define N 4096
+
+void abort ();
+
+#define define_test_functions(VBTYPE, RTYPE, STYPE, NAME) \
+\
+RTYPE result_ne_##NAME[N] __attribute__((aligned(16))); \
+RTYPE result_eq_##NAME[N] __attribute__((aligned(16))); \
+STYPE operand1_##NAME[N] __attribute__((aligned(16))); \
+STYPE operand2_##NAME[N] __attribute__((aligned(16))); \
+RTYPE expected_##NAME[N] __attribute__((aligned(16))); \
+\
+__attribute__((noinline)) void vector_tests_##NAME () \
+{ \
+ vector STYPE v1_##NAME, v2_##NAME; \
+ vector bool VBTYPE tmp_##NAME; \
+ int i; \
+ for (i = 0; i < N; i+=16/sizeof (STYPE)) \
+ { \
+ /* result_ne = operand1!=operand2. */ \
+ v1_##NAME = vec_vsx_ld (0, (const vector STYPE*)&operand1_##NAME[i]); \
+ v2_##NAME = vec_vsx_ld (0, (const vector STYPE*)&operand2_##NAME[i]); \
+\
+ tmp_##NAME = vec_cmpeq (v1_##NAME, v2_##NAME); \
+ vec_vsx_st (tmp_##NAME, 0, &result_eq_##NAME[i]); \
+\
+ tmp_##NAME = vec_cmpne (v1_##NAME, v2_##NAME); \
+ vec_vsx_st (tmp_##NAME, 0, &result_ne_##NAME[i]); \
+ } \
+} \
+\
+__attribute__((noinline)) void init_##NAME () \
+{ \
+ int i; \
+ for (i = 0; i < N; ++i) \
+ { \
+ result_ne_##NAME[i] = 7; \
+ result_eq_##NAME[i] = 15; \
+ if (i%3 == 0) \
+ { \
+ /* op1 < op2. */ \
+ operand1_##NAME[i] = 1; \
+ operand2_##NAME[i] = 2; \
+ } \
+ else if (i%3 == 1) \
+ { \
+ /* op1 > op2. */ \
+ operand1_##NAME[i] = 2; \
+ operand2_##NAME[i] = 1; \
+ } \
+ else if (i%3 == 2) \
+ { \
+ /* op1 == op2. */ \
+ operand1_##NAME[i] = 3; \
+ operand2_##NAME[i] = 3; \
+ } \
+ /* For vector comparisons: "For each element of the result_ne, the \
+ value of each bit is 1 if the corresponding elements of ARG1 and \
+ ARG2 are equal." {or whatever the comparison is} "Otherwise, the \
+ value of each bit is 0." */ \
+ expected_##NAME[i] = -1 * (RTYPE)(operand1_##NAME[i] != operand2_##NAME[i]); \
+ } \
+} \
+\
+__attribute__((noinline)) void verify_results_##NAME () \
+{ \
+ int i; \
+ for (i = 0; i < N; ++i) \
+ { \
+ if ( ((result_ne_##NAME[i] != expected_##NAME[i]) || \
+ (result_ne_##NAME[i] == result_eq_##NAME[i]))) \
+ abort (); \
+ } \
+}
+
+
+#define execute_test_functions(VBTYPE, RTYPE, STYPE, NAME) \
+{ \
+ init_##NAME (); \
+ vector_tests_##NAME (); \
+ verify_results_##NAME (); \
+}
+
+
+define_test_functions (int, signed int, signed int, si);
+define_test_functions (int, unsigned int, unsigned int, ui);
+define_test_functions (short, signed short, signed short, ss);
+define_test_functions (short, unsigned short, unsigned short, us);
+define_test_functions (char, signed char, signed char, sc);
+define_test_functions (char, unsigned char, unsigned char, uc);
+define_test_functions (int, signed int, float, ff);
+
+int main ()
+{
+ execute_test_functions (int, signed int, signed int, si);
+ execute_test_functions (int, unsigned int, unsigned int, ui);
+ execute_test_functions (short, signed short, signed short, ss);
+ execute_test_functions (short, unsigned short, unsigned short, us);
+ execute_test_functions (char, signed char, signed char, sc);
+ execute_test_functions (char, unsigned char, unsigned char, uc);
+ execute_test_functions (int, signed int, float, ff);
+
+ return 0;
+}
+
+
--
-Bill Seurer