[PATCH] PR target/66144, PowerPC improve vector compare
Michael Meissner
meissner@linux.vnet.ibm.com
Fri Feb 3 21:25:00 GMT 2017
This patch improves the code generation for:
vector char
compare_vc (vector char a, vector char b)
{
return a == b;
}
Previously it generated:
vcmpequb 2,2,3
vspltisw 1,-1
vspltisw 0,0
xxsel 34,32,33,34
and now it generates:
vcmpequb 2,2,3
I changed the vector conditional support for integer vectors to allow constant
all 0's or all 1's in addition to registers, and it knows that after a vector
comparison, the vector element will be all 1's if the comparison was true, and
all 0's if the comparison was false.
I did the normal bootstrap build and make check. There were no regressions.
I built Spec 2006 with the compiler, and I noticed that 15 out of the 29
benchmarks had code changes with this fix (bzip2, gcc, gamess,milc, gromacs,
gobmk, dealII, calculix, hmmer, sjeng, libquantum, h264ref, tonto, wrf,
sphinx3). I did a quick run (one pass instead of the normal 3) of the
benchmarks that had code changes. There were no regressions in performance,
and 2 runs that were faster (gcc and tonto) by 3-4%.
Can I check this fix into the GCC trunk?
Note, I will be on starting on vacation on Wednesday (February 8th), and I will
return to work on Friday February 24th. If I don't get the approval by Monday,
I will likely hold the patches until I return to work.
[gcc]
2017-02-03 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/66144
* config/rs6000/vector.md (vcond<mode><mode>): Allow the true and
false values to be constant vectors with all 0 or all 1 bits set.
(vcondu<mode><mode>): Likewise.
* config/rs6000/predicates.md (vector_int_same_bit): New
predicate.
(vector_int_reg_or_same_bit): Likewise.
(fpmask_comparison_operator): Update comment.
(vecint_comparison_operator): New predicate.
* config/rs6000/rs6000.c (rs6000_emit_vector_cond_expr): Optimize
vector conditionals when the true and false values are constant
vectors with all 0 bits or all 1 bits set.
[gcc/testsuite]
2017-02-03 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/66144
* gcc.target/powerpc/pr66144-1.c: New test.
* gcc.target/powerpc/pr66144-2.c: Likewise.
* gcc.target/powerpc/pr66144-3.c: Likewise.
--
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.vnet.ibm.com, phone: +1 (978) 899-4797
-------------- next part --------------
Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md (revision 245101)
+++ gcc/config/rs6000/vector.md (working copy)
@@ -390,13 +390,13 @@ (define_expand "vcond<mode><mode>"
}")
(define_expand "vcond<mode><mode>"
- [(set (match_operand:VEC_I 0 "vint_operand" "")
+ [(set (match_operand:VEC_I 0 "vint_operand")
(if_then_else:VEC_I
(match_operator 3 "comparison_operator"
- [(match_operand:VEC_I 4 "vint_operand" "")
- (match_operand:VEC_I 5 "vint_operand" "")])
- (match_operand:VEC_I 1 "vint_operand" "")
- (match_operand:VEC_I 2 "vint_operand" "")))]
+ [(match_operand:VEC_I 4 "vint_operand")
+ (match_operand:VEC_I 5 "vint_operand")])
+ (match_operand:VEC_I 1 "vector_int_reg_or_same_bit")
+ (match_operand:VEC_I 2 "vector_int_reg_or_same_bit")))]
"VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
@@ -446,13 +446,13 @@ (define_expand "vcondv4siv4sf"
}")
(define_expand "vcondu<mode><mode>"
- [(set (match_operand:VEC_I 0 "vint_operand" "")
+ [(set (match_operand:VEC_I 0 "vint_operand")
(if_then_else:VEC_I
(match_operator 3 "comparison_operator"
- [(match_operand:VEC_I 4 "vint_operand" "")
- (match_operand:VEC_I 5 "vint_operand" "")])
- (match_operand:VEC_I 1 "vint_operand" "")
- (match_operand:VEC_I 2 "vint_operand" "")))]
+ [(match_operand:VEC_I 4 "vint_operand")
+ (match_operand:VEC_I 5 "vint_operand")])
+ (match_operand:VEC_I 1 "vector_int_reg_or_same_bit")
+ (match_operand:VEC_I 2 "vector_int_reg_or_same_bit")))]
"VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"
{
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md (revision 245101)
+++ gcc/config/rs6000/predicates.md (working copy)
@@ -808,6 +808,33 @@ (define_predicate "all_ones_constant"
(and (match_code "const_int,const_double,const_wide_int,const_vector")
(match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)")))
+;; Return 1 if operand is either a vector constant of all 0 bits of a vector
+;; constant of all 1 bits.
+(define_predicate "vector_int_same_bit"
+ (match_code "const_vector")
+{
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+ return 0;
+
+ else
+ return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode);
+})
+
+;; Return 1 if operand is a vector int register or is either a vector constant
+;; of all 0 bits of a vector constant of all 1 bits.
+(define_predicate "vector_int_reg_or_same_bit"
+ (match_code "reg,subreg,const_vector")
+{
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+ return 0;
+
+ else if (REG_P (op) || SUBREG_P (op))
+ return vint_operand (op, mode);
+
+ else
+ return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode);
+})
+
;; Return 1 if operand is 0.0.
(define_predicate "zero_fp_constant"
(and (match_code "const_double")
@@ -1260,8 +1287,8 @@ (define_predicate "scc_rev_comparison_op
(and (match_operand 0 "branch_comparison_operator")
(match_code "ne,le,ge,leu,geu,ordered")))
-;; Return 1 if OP is a comparison operator suitable for vector/scalar
-;; comparisons that generate a -1/0 mask.
+;; Return 1 if OP is a comparison operator suitable for floating point
+;; vector/scalar comparisons that generate a -1/0 mask.
(define_predicate "fpmask_comparison_operator"
(match_code "eq,gt,ge"))
@@ -1271,6 +1298,11 @@ (define_predicate "fpmask_comparison_ope
(define_predicate "invert_fpmask_comparison_operator"
(match_code "ne,unlt,unle"))
+;; Return 1 if OP is a comparison operation suitable for integer vector/scalar
+;; comparisons that generate a -1/0 mask.
+(define_predicate "vecint_comparison_operator"
+ (match_code "eq,gt,gtu"))
+
;; Return 1 if OP is a comparison operation that is valid for a branch
;; insn, which is true if the corresponding bit in the CC register is set.
(define_predicate "branch_positive_comparison_operator"
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 245101)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -25122,8 +25122,9 @@ rs6000_emit_vector_cond_expr (rtx dest,
machine_mode cc_mode = CCmode;
rtx mask;
rtx cond2;
- rtx tmp;
bool invert_move = false;
+ rtx constant_0;
+ rtx constant_m1;
if (VECTOR_UNIT_NONE_P (dest_mode))
return 0;
@@ -25166,15 +25167,40 @@ rs6000_emit_vector_cond_expr (rtx dest,
if (!mask)
return 0;
+ constant_0 = CONST0_RTX (dest_mode);
+ constant_m1 = CONSTM1_RTX (dest_mode);
+
if (invert_move)
+ std::swap (op_true, op_false);
+
+ /* Optimize vec1 == vec2, to know the mask generates -1/0. */
+ if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT)
{
- tmp = op_true;
- op_true = op_false;
- op_false = tmp;
+ if (op_true == constant_m1 && op_false == constant_0)
+ {
+ emit_move_insn (dest, mask);
+ return 1;
+ }
+
+ else if (op_true == constant_0 && op_false == constant_m1)
+ {
+ emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
+ return 1;
+ }
}
+ if (op_true == constant_m1 && dest_mode == mask_mode)
+ op_true = mask;
+ else if (!REG_P (op_true) && !SUBREG_P (op_true))
+ op_true = force_reg (dest_mode, op_true);
+
+ if (op_false == constant_0 && dest_mode == mask_mode)
+ op_false = mask;
+ else if (!REG_P (op_false) && !SUBREG_P (op_false))
+ op_false = force_reg (dest_mode, op_false);
+
cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
- CONST0_RTX (dest_mode));
+ constant_0);
emit_insn (gen_rtx_SET (dest,
gen_rtx_IF_THEN_ELSE (dest_mode,
cond2,
Index: gcc/testsuite/gcc.target/powerpc/pr66144-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr66144-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr66144-1.c (revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+/* Verify that we optimize vector1 = (vector2 == vector3) by not loading up
+ 0/-1. */
+
+vector int
+test (vector int a, vector int b)
+{
+ return a == b;
+}
+
+/* { dg-final { scan-assembler {\mvcmpequw\M} } } */
+/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
+/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlxor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlxorc\M} } } */
+/* { dg-final { scan-assembler-not {\mxxsel\M} } } */
Index: gcc/testsuite/gcc.target/powerpc/pr66144-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr66144-2.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr66144-2.c (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Verify that we optimize vector1 = (vector2 != vector3) by not loading up
+ 0/-1. */
+
+vector unsigned char
+test (vector unsigned char a, vector unsigned char b)
+{
+ return a != b;
+}
+
+/* { dg-final { scan-assembler {\mvcmpequb\M} } } */
+/* { dg-final { scan-assembler {\mxxlnor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
+/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlxor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlxorc\M} } } */
+/* { dg-final { scan-assembler-not {\mxxsel\M} } } */
Index: gcc/testsuite/gcc.target/powerpc/pr66144-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr66144-3.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr66144-3.c (revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { powerpc64*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize" } */
+
+/* Verify that we can optimize a vector conditional move, where one of the arms
+ is all 1's into using the mask as one of the inputs to XXSEL. */
+
+#include <altivec.h>
+
+static int a[1024], b[1024], c[1024];
+
+int *p_a = a, *p_b = b, *p_c = c;
+
+void
+test (void)
+{
+ unsigned long i;
+
+ for (i = 0; i < 1024; i++)
+ a[i] = (b[i] == c[i]) ? -1 : a[i];
+}
+
+/* { dg-final { scan-assembler {\mvcmpequw\M} } } */
+/* { dg-final { scan-assembler {\mxxsel\M} } } */
+/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlorc\M} } } */
More information about the Gcc-patches
mailing list