This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Adapt a couple of scalar comparison match.pd optimizations for vector comparisons against uniform vectors (PR target/88152)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 29 Nov 2018 08:54:39 +0100
- Subject: [PATCH] Adapt a couple of scalar comparison match.pd optimizations for vector comparisons against uniform vectors (PR target/88152)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
The following patch adapts a couple of scalar comparison against INTEGER_CST
optimizations to vector comparison against uniform_vector_p VECTOR_CST.
The PR was specifically asking for the a > INT_MAX, a >= INT_MAX etc.
to (signed) a < 0, the first two hunks are prerequsites of that though
in order not to have to duplicate everything for the boundary values +/- 1.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2018-11-29 Jakub Jelinek <jakub@redhat.com>
PR target/88152
* match.pd: For lt/le/gt/ge against unifoprm vector VECTOR_CST,
perform similar simplifications like for scalar comparisons.
* g++.dg/tree-ssa/pr88152.C: New test.
--- gcc/match.pd.jj 2018-11-14 17:42:53.000000000 +0100
+++ gcc/match.pd 2018-11-28 16:57:28.377978794 +0100
@@ -3109,14 +3109,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(simplify
(cmp @0 INTEGER_CST@1)
(if (tree_int_cst_sgn (@1) == -1)
- (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) + 1); }))))
+ (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) + 1); })))
+ (simplify
+ (cmp @0 VECTOR_CST@1)
+ (with { tree cst = uniform_vector_p (@1); }
+ (if (cst && TREE_CODE (cst) == INTEGER_CST && tree_int_cst_sgn (cst) == -1)
+ (acmp @0 { build_vector_from_val (TREE_TYPE (@1),
+ wide_int_to_tree (TREE_TYPE (cst),
+ wi::to_wide (cst)
+ + 1)); })))))
(for cmp (ge lt)
acmp (gt le)
(simplify
(cmp @0 INTEGER_CST@1)
(if (tree_int_cst_sgn (@1) == 1)
- (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) - 1); }))))
-
+ (acmp @0 { wide_int_to_tree (TREE_TYPE (@1), wi::to_wide (@1) - 1); })))
+ (simplify
+ (cmp @0 VECTOR_CST@1)
+ (with { tree cst = uniform_vector_p (@1); }
+ (if (cst && TREE_CODE (cst) == INTEGER_CST && tree_int_cst_sgn (cst) == 1)
+ (acmp @0 { build_vector_from_val (TREE_TYPE (@1),
+ wide_int_to_tree (TREE_TYPE (cst),
+ wi::to_wide (cst)
+ - 1)); })))))
/* We can simplify a logical negation of a comparison to the
inverted comparison. As we cannot compute an expression
@@ -3993,7 +4008,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(with { tree st = signed_type_for (arg1_type); }
(if (cmp == LE_EXPR)
(ge (convert:st @0) { build_zero_cst (st); })
- (lt (convert:st @0) { build_zero_cst (st); }))))))))))
+ (lt (convert:st @0) { build_zero_cst (st); })))))))))
+ /* And the same for vector comparisons against uniform vector csts. */
+ (simplify
+ (cmp (convert?@2 @0) VECTOR_CST@1)
+ (if (VECTOR_TYPE_P (TREE_TYPE (@1))
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (@1)))
+ && uniform_vector_p (@1)
+ && tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0)))
+ (with
+ {
+ tree arg1_type = TREE_TYPE (TREE_TYPE (@1));
+ tree cst = uniform_vector_p (@1);
+ unsigned int prec = TYPE_PRECISION (arg1_type);
+ wide_int max = wi::max_value (arg1_type);
+ wide_int signed_max = wi::max_value (prec, SIGNED);
+ wide_int min = wi::min_value (arg1_type);
+ }
+ (switch
+ (if (wi::to_wide (cst) == max)
+ (switch
+ (if (cmp == GT_EXPR)
+ { constant_boolean_node (false, type); })
+ (if (cmp == GE_EXPR)
+ (eq @2 @1))
+ (if (cmp == LE_EXPR)
+ { constant_boolean_node (true, type); })
+ (if (cmp == LT_EXPR)
+ (ne @2 @1))))
+ (if (wi::to_wide (cst) == min)
+ (switch
+ (if (cmp == LT_EXPR)
+ { constant_boolean_node (false, type); })
+ (if (cmp == LE_EXPR)
+ (eq @2 @1))
+ (if (cmp == GE_EXPR)
+ { constant_boolean_node (true, type); })
+ (if (cmp == GT_EXPR)
+ (ne @2 @1))))
+ (if (wi::to_wide (cst) == max - 1)
+ (switch
+ (if (cmp == GT_EXPR)
+ (eq @2 { build_vector_from_val (type,
+ wide_int_to_tree (TREE_TYPE (cst),
+ wi::to_wide (cst)
+ + 1)); }))
+ (if (cmp == LE_EXPR)
+ (ne @2 { build_vector_from_val (type,
+ wide_int_to_tree (TREE_TYPE (cst),
+ wi::to_wide (cst)
+ + 1)); }))))
+ (if (wi::to_wide (cst) == min + 1)
+ (switch
+ (if (cmp == GE_EXPR)
+ (ne @2 { build_vector_from_val (type,
+ wide_int_to_tree (TREE_TYPE (cst),
+ wi::to_wide (cst)
+ - 1)); }))
+ (if (cmp == LT_EXPR)
+ (eq @2 { build_vector_from_val (type,
+ wide_int_to_tree (TREE_TYPE (cst),
+ wi::to_wide (cst)
+ - 1)); }))))
+ (if (wi::to_wide (cst) == signed_max
+ && TYPE_UNSIGNED (arg1_type)
+ /* We will flip the signedness of the comparison operator
+ associated with the mode of @1, so the sign bit is
+ specified by this mode. Check that @1 is the signed
+ max associated with this sign bit. */
+ && prec == GET_MODE_PRECISION (SCALAR_INT_TYPE_MODE (arg1_type))
+ /* signed_type does not work on pointer types. */
+ && INTEGRAL_TYPE_P (arg1_type))
+ /* The following case also applies to X < signed_max+1
+ and X >= signed_max+1 because previous transformations. */
+ (if (cmp == LE_EXPR || cmp == GT_EXPR)
+ (with { tree st = signed_type_for (TREE_TYPE (@1)); }
+ (if (cmp == LE_EXPR)
+ (ge (view_convert:st @0) { build_zero_cst (st); })
+ (lt (view_convert:st @0) { build_zero_cst (st); }))))))))))
(for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
/* If the second operand is NaN, the result is constant. */
--- gcc/testsuite/g++.dg/tree-ssa/pr88152.C.jj 2018-11-28 17:06:41.282815253 +0100
+++ gcc/testsuite/g++.dg/tree-ssa/pr88152.C 2018-11-28 17:07:37.202886556 +0100
@@ -0,0 +1,55 @@
+// PR target/88152
+// { dg-do compile }
+// { dg-options "-O2 -std=c++14 -fdump-tree-forwprop1" }
+// { dg-final { scan-tree-dump-times " \(?:<|>=\) \{ 0\[, ]" 120 "forwprop1" } }
+
+template <typename T, int N>
+using V [[gnu::vector_size (sizeof (T) * N)]] = T;
+
+void *foo ();
+
+template <typename T, int N, T max, T maxp1>
+__attribute__((noipa)) void
+test_uns ()
+{
+ V<T, N> *x = (V<T, N> *) foo ();
+ x[1] = x[0] > max;
+ x[3] = x[2] >= maxp1;
+ x[5] = x[4] <= max;
+ x[7] = x[6] < maxp1;
+}
+
+template <typename T, int N>
+__attribute__((noipa)) void
+test ()
+{
+ V<T, N> *x = (V<T, N> *) foo ();
+ x[1] = x[0] >= 0;
+ x[3] = x[2] > -1;
+ x[5] = x[4] < 0;
+ x[7] = x[6] <= -1;
+}
+
+template <int N>
+__attribute__((noipa)) void
+tests ()
+{
+ test_uns<unsigned char, N, __SCHAR_MAX__, 1U + __SCHAR_MAX__> ();
+ test<signed char, N> ();
+ test_uns<unsigned short int, N, __SHRT_MAX__, 1U + __SHRT_MAX__> ();
+ test<short int, N> ();
+ test_uns<unsigned int, N, __INT_MAX__, 1U + __INT_MAX__> ();
+ test<int, N> ();
+ test_uns<unsigned long int, N, __LONG_MAX__, 1UL + __LONG_MAX__> ();
+ test<long int, N> ();
+ test_uns<unsigned long long int, N, __LONG_LONG_MAX__, 1ULL + __LONG_LONG_MAX__> ();
+ test<long long int, N> ();
+}
+
+void
+all_tests ()
+{
+ tests<1> ();
+ tests<2> ();
+ tests<8> ();
+}
Jakub