[PATCH 2/4] S/390: Merge compare of compare results

Tue Nov 29 09:43:00 GMT 2016

With this patch EQ and NE compares on CC mode reader patterns are
folded.  This allows using the result of the vec_all_* and vec_any_*
builtins directly in a conditional jump instruction as in the attached
testcase.

gcc/ChangeLog:

2016-11-29  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390-protos.h (s390_reverse_condition): New
	prototype.
	* config/s390/s390.c (s390_canonicalize_comparison): Fold compares
	of CC mode values.
	(s390_reverse_condition): New function.
	* config/s390/s390.h (REVERSE_CC_MODE, REVERSE_CONDITION): Define
	target macros.

gcc/testsuite/ChangeLog:

2016-11-29  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/zvector/vec-cmp-2.c: New test.
---
 gcc/config/s390/s390-protos.h                     |   1 +
 gcc/config/s390/s390.c                            |  42 +++++
 gcc/config/s390/s390.h                            |  12 ++
 gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c | 203 ++++++++++++++++++++++
 4 files changed, 258 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 7ae98d4..000a677 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -119,6 +119,7 @@ extern void s390_expand_atomic (machine_mode, enum rtx_code,
 extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
 extern void s390_expand_vec_compare (rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
+extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
 extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
 extern rtx s390_return_addr_rtx (int, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 445c147..dab4f43 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1722,6 +1722,31 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 	}
       tmp = *op0; *op0 = *op1; *op1 = tmp;
     }
+
+  /* A comparison result is compared against zero.  Replace it with
+     the (perhaps inverted) original comparison.
+     This probably should be done by simplify_relational_operation.  */
+  if ((*code == EQ || *code == NE)
+      && *op1 == const0_rtx
+      && COMPARISON_P (*op0)
+      && CC_REG_P (XEXP (*op0, 0)))
+    {
+      enum rtx_code new_code;
+
+      if (*code == EQ)
+	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
+						   XEXP (*op0, 0),
+						   XEXP (*op1, 0), NULL);
+      else
+	new_code = GET_CODE (*op0);
+
+      if (new_code != UNKNOWN)
+	{
+	  *code = new_code;
+	  *op1 = XEXP (*op0, 1);
+	  *op0 = XEXP (*op0, 0);
+	}
+    }
 }
 
 /* Helper function for s390_emit_compare.  If possible emit a 64 bit
@@ -6343,6 +6368,23 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 					tmp_reg, target));
 }
 
+/* Invert the comparison CODE applied to a CC mode.  This is only safe
+   if we know whether there result was created by a floating point
+   compare or not.  For the CCV modes this is encoded as part of the
+   mode.  */
+enum rtx_code
+s390_reverse_condition (machine_mode mode, enum rtx_code code)
+{
+  /* Reversal of FP compares takes care -- an ordered compare
+     becomes an unordered compare and vice versa.  */
+  if (mode == CCVFALLmode || mode == CCVFANYmode)
+    return reverse_condition_maybe_unordered (code);
+  else if (mode == CCVIALLmode || mode == CCVIANYmode)
+    return reverse_condition (code);
+  else
+    gcc_unreachable ();
+}
+
 /* Generate a vector comparison expression loading either elements of
    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
    and CMP_OP2.  */
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 6be4d34..1d6d7b2 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -513,6 +513,18 @@ extern const char *s390_host_detect_local_cpu (int argc, const char **argv);
 #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
   s390_cannot_change_mode_class ((FROM), (TO), (CLASS))
 
+/* We can reverse a CC mode safely if we know whether it comes from a
+   floating point compare or not.  With the vector modes it is encoded
+   as part of the mode.
+   FIXME: It might make sense to do this for other cc modes as well.  */
+#define REVERSIBLE_CC_MODE(MODE)				\
+  ((MODE) == CCVIALLmode || (MODE) == CCVIANYmode		\
+   || (MODE) == CCVFALLmode || (MODE) == CCVFANYmode)
+
+/* Given a condition code and a mode, return the inverse condition.  */
+#define REVERSE_CONDITION(CODE, MODE) s390_reverse_condition (MODE, CODE)
+
+
 /* Register classes.  */
 
 /* We use the following register classes:
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
new file mode 100644
index 0000000..0711f9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
@@ -0,0 +1,203 @@
+/* Similiar to vec-cmp-1.c but requires that
+   s390_canonicalize_comparison is able to merge the the two nested
+   compares.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13 -mzvector -fno-asynchronous-unwind-tables" } */
+
+#include <vecintrin.h>
+
+extern void foo (void);
+
+int __attribute__((noinline,noclone))
+all_eq_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ne_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_gt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_lt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ge_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_le_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+any_eq_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ne_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+
+int __attribute__((noinline,noclone))
+any_gt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_lt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ge_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_le_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_eq_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ne_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_gt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_lt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ge_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_le_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_eq_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ne_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+
+int __attribute__((noinline,noclone))
+any_gt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_lt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ge_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tje 1 } } */
+
+int __attribute__((noinline,noclone))
+any_le_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+
-- 
2.9.1