This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, rs6000] Add more overloaded built-ins for vec_cmpge and vec_cmple
- From: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: dje dot gcc at gmail dot com
- Date: Mon, 29 Jun 2015 21:05:13 -0500
- Subject: [PATCH, rs6000] Add more overloaded built-ins for vec_cmpge and vec_cmple
- Authentication-results: sourceware.org; auth=none
Hi,
This patch adds built-in functions for vec_cmpge and vec_cmple required
by the ELFv2 ABI but not yet present in gcc. These make use of the
existing patterns for gt:VEC_I and gtu:VEC_I, applying the "not"
operator and reversing the order of operands as needed. The result is
generation of a vmpgt[su][bhwd] instruction followed by an xxlnor.
Since it is likely that a vec_cmp[gl]e will be followed by a vec_sel, it
will be important to optimize the xxlnor/xxsel* to remove the xxlnor by
reversing the sense of the select. A separate patch will follow to add
this pattern to simplify-rtx.c.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions. Ok for trunk?
Thanks,
Bill
[gcc]
2015-06-29 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000-builtin.def (CMPGE_16QI): New built-in
definition.
(CMPGE_8HI): Likewise.
(CMPGE_4SI): Likewise.
(CMPGE_2DI): Likewise.
(CMPGE_U16QI): Likewise.
(CMPGE_U8HI): Likewise.
(CMPGE_U4SI): Likewise.
(CMPGE_U2DI): Likewise.
(CMPLE_16QI): Likewise.
(CMPLE_8HI): Likewise.
(CMPLE_4SI): Likewise.
(CMPLE_2DI): Likewise.
(CMPLE_U16QI): Likewise.
(CMPLE_U8HI): Likewise.
(CMPLE_U4SI): Likewise.
(CMPLE_U2DI): Likewise.
* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
overloads for ALTIVEC_BUILTIN_VEC_CMPGE and
ALTIVEC_BUILTIN_VEC_CMPLE.
* config/rs6000/vector.md (vector_ge<mode>): Restrict to
floating-point vector modes.
(vector_nlt<mode>): New define_expand.
(vector_nltu<mode>): Likewise.
(vector_ngt<mode>): Likewise.
(vector_ngtu<mode>): Likewise.
[gcc/testsuite]
2015-06-29 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-cmp.c: New test.
Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def (revision 224924)
+++ gcc/config/rs6000/rs6000-builtin.def (working copy)
@@ -1284,6 +1284,24 @@ BU_VSX_2 (XVCVUXDDP_SCALE, "xvcvuxddp_scale", C
BU_VSX_2 (XVCVDPSXDS_SCALE, "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale)
BU_VSX_2 (XVCVDPUXDS_SCALE, "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale)
+BU_VSX_2 (CMPGE_16QI, "cmpge_16qi", CONST, vector_nltv16qi)
+BU_VSX_2 (CMPGE_8HI, "cmpge_8hi", CONST, vector_nltv8hi)
+BU_VSX_2 (CMPGE_4SI, "cmpge_4si", CONST, vector_nltv4si)
+BU_VSX_2 (CMPGE_2DI, "cmpge_2di", CONST, vector_nltv2di)
+BU_VSX_2 (CMPGE_U16QI, "cmpge_u16qi", CONST, vector_nltuv16qi)
+BU_VSX_2 (CMPGE_U8HI, "cmpge_u8hi", CONST, vector_nltuv8hi)
+BU_VSX_2 (CMPGE_U4SI, "cmpge_u4si", CONST, vector_nltuv4si)
+BU_VSX_2 (CMPGE_U2DI, "cmpge_u2di", CONST, vector_nltuv2di)
+
+BU_VSX_2 (CMPLE_16QI, "cmple_16qi", CONST, vector_ngtv16qi)
+BU_VSX_2 (CMPLE_8HI, "cmple_8hi", CONST, vector_ngtv8hi)
+BU_VSX_2 (CMPLE_4SI, "cmple_4si", CONST, vector_ngtv4si)
+BU_VSX_2 (CMPLE_2DI, "cmple_2di", CONST, vector_ngtv2di)
+BU_VSX_2 (CMPLE_U16QI, "cmple_u16qi", CONST, vector_ngtuv16qi)
+BU_VSX_2 (CMPLE_U8HI, "cmple_u8hi", CONST, vector_ngtuv8hi)
+BU_VSX_2 (CMPLE_U4SI, "cmple_u4si", CONST, vector_ngtuv4si)
+BU_VSX_2 (CMPLE_U2DI, "cmple_u2di", CONST, vector_ngtuv2di)
+
/* VSX abs builtin functions. */
BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2)
BU_VSX_A (XVNABSDP, "xvnabsdp", CONST, vsx_nabsv2df2)
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c (revision 224924)
+++ gcc/config/rs6000/rs6000-c.c (working copy)
@@ -1096,6 +1096,26 @@ const struct altivec_builtin_types altivec_overloa
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP,
RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_8HI,
+ RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U8HI,
+ RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_4SI,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U4SI,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0},
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB,
RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB,
@@ -1146,6 +1166,26 @@ const struct altivec_builtin_types altivec_overloa
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP,
RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_8HI,
+ RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U8HI,
+ RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_4SI,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U4SI,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0},
+ { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0},
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB,
RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB,
Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md (revision 224924)
+++ gcc/config/rs6000/vector.md (working copy)
@@ -446,12 +446,25 @@
"")
(define_expand "vector_ge<mode>"
- [(set (match_operand:VEC_C 0 "vlogical_operand" "")
- (ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
- (match_operand:VEC_C 2 "vlogical_operand" "")))]
+ [(set (match_operand:VEC_F 0 "vlogical_operand" "")
+ (ge:VEC_F (match_operand:VEC_F 1 "vlogical_operand" "")
+ (match_operand:VEC_F 2 "vlogical_operand" "")))]
"VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
+; >= for integer vectors: swap operands and apply not-greater-than
+(define_expand "vector_nlt<mode>"
+ [(set (match_operand:VEC_I 3 "vlogical_operand" "")
+ (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "")
+ (match_operand:VEC_I 1 "vlogical_operand" "")))
+ (set (match_operand:VEC_I 0 "vlogical_operand" "")
+ (not:VEC_I (match_dup 3)))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+}")
+
(define_expand "vector_gtu<mode>"
[(set (match_operand:VEC_I 0 "vint_operand" "")
(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
@@ -459,6 +472,19 @@
"VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
+; >= for integer vectors: swap operands and apply not-greater-than
+(define_expand "vector_nltu<mode>"
+ [(set (match_operand:VEC_I 3 "vlogical_operand" "")
+ (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "")
+ (match_operand:VEC_I 1 "vlogical_operand" "")))
+ (set (match_operand:VEC_I 0 "vlogical_operand" "")
+ (not:VEC_I (match_dup 3)))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+}")
+
(define_expand "vector_geu<mode>"
[(set (match_operand:VEC_I 0 "vint_operand" "")
(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
@@ -466,6 +492,31 @@
"VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
+; <= for integer vectors: apply not-greater-than
+(define_expand "vector_ngt<mode>"
+ [(set (match_operand:VEC_I 3 "vlogical_operand" "")
+ (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "")
+ (match_operand:VEC_I 2 "vlogical_operand" "")))
+ (set (match_operand:VEC_I 0 "vlogical_operand" "")
+ (not:VEC_I (match_dup 3)))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+}")
+
+(define_expand "vector_ngtu<mode>"
+ [(set (match_operand:VEC_I 3 "vlogical_operand" "")
+ (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "")
+ (match_operand:VEC_I 2 "vlogical_operand" "")))
+ (set (match_operand:VEC_I 0 "vlogical_operand" "")
+ (not:VEC_I (match_dup 3)))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ operands[3] = gen_reg_rtx_and_attrs (operands[0]);
+}")
+
(define_insn_and_split "*vector_uneq<mode>"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
Index: gcc/testsuite/gcc.target/powerpc/vec-cmp.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-cmp.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-cmp.c (working copy)
@@ -0,0 +1,113 @@
+/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc64*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+/* { dg-final { scan-assembler-times "vcmpgtsb" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtub" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsh" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuh" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsw" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuw" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsd" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpgtud" 2 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 16 } } */
+
+#include <altivec.h>
+
+vector bool char
+cmple_sc (vector signed char x, vector signed char y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool char
+cmple_uc (vector unsigned char x, vector unsigned char y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool short
+cmple_ss (vector signed short x, vector signed short y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool short
+cmple_us (vector unsigned short x, vector unsigned short y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool int
+cmple_si (vector signed int x, vector signed int y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool int
+cmple_ui (vector unsigned int x, vector unsigned int y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool long long
+cmple_sl (vector signed long long x, vector signed long long y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool long long
+cmple_ul (vector unsigned long long x, vector unsigned long long y)
+{
+ return vec_cmple (x, y);
+}
+
+vector bool char
+cmpge_sc (vector signed char x, vector signed char y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool char
+cmpge_uc (vector unsigned char x, vector unsigned char y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool short
+cmpge_ss (vector signed short x, vector signed short y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool short
+cmpge_us (vector unsigned short x, vector unsigned short y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool int
+cmpge_si (vector signed int x, vector signed int y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool int
+cmpge_ui (vector unsigned int x, vector unsigned int y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool long long
+cmpge_sl (vector signed long long x, vector signed long long y)
+{
+ return vec_cmpge (x, y);
+}
+
+vector bool long long
+cmpge_ul (vector unsigned long long x, vector unsigned long long y)
+{
+ return vec_cmpge (x, y);
+}
+