[PATCH] Fix X % -Y => X % Y optimization (PR tree-optimization/69097, PR middle-end/50865)
Richard Biener
rguenther@suse.de
Sat Jan 9 07:23:00 GMT 2016
On January 8, 2016 9:07:11 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>As mentioned in the PRs, the X % -Y to X % Y optimization for signed
>modulo is not valid unless we can prove that it won't be INT_MIN %
>-(-1),
>which is valid, but where INT_MIN % -1 is invalid.
>
>The following patch use range info to figure that out.
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
Thanks,
Richard.
>2016-01-08 Jakub Jelinek <jakub@redhat.com>
>
> PR middle-end/50865
> PR tree-optimization/69097
> * fold-const.h (expr_not_equal_to): New prototype.
> * fold-const.c: Include stringpool.h and tree-ssanames.h.
> (expr_not_equal_to): New function.
> * match.pd (X % -Y is the same as X % Y): Don't optimize
> unless X is known not to be equal to minimum or Y is known
> not to be equal to -1.
> * tree-vrp.c (simplify_div_or_mod_using_ranges): Add GSI argument.
> fold TRUNC_MOD_EXPR if the second argument is not a power of two.
> (simplify_stmt_using_ranges): Adjust caller.
> (vrp_finalize): Call set_value_range on SSA_NAMEs before calling
> substitute_and_fold.
>
> * gcc.c-torture/execute/pr50865.c: New test.
> * gcc.c-torture/execute/pr69097-1.c: New test.
> * gcc.c-torture/execute/pr69097-2.c: New test.
> * gcc.dg/pr69097-1.c: New test.
> * gcc.dg/pr69097-2.c: New test.
>
>--- gcc/fold-const.h.jj 2016-01-05 12:38:17.020555325 +0100
>+++ gcc/fold-const.h 2016-01-08 13:22:25.921536489 +0100
>@@ -177,6 +177,7 @@ extern bool merge_ranges (int *, tree *,
> tree, tree);
> extern tree sign_bit_p (tree, const_tree);
> extern tree exact_inverse (tree, tree);
>+extern bool expr_not_equal_to (tree t, const wide_int &);
> extern tree const_unop (enum tree_code, tree, tree);
> extern tree const_binop (enum tree_code, tree, tree, tree);
> extern bool negate_mathfn_p (combined_fn);
>--- gcc/fold-const.c.jj 2016-01-05 12:38:17.011555454 +0100
>+++ gcc/fold-const.c 2016-01-08 13:22:25.925536432 +0100
>@@ -74,6 +74,8 @@ along with GCC; see the file COPYING3.
> #include "tree-into-ssa.h"
> #include "md5.h"
> #include "case-cfn-macros.h"
>+#include "stringpool.h"
>+#include "tree-ssanames.h"
>
> #ifndef LOAD_EXTEND_OP
> #define LOAD_EXTEND_OP(M) UNKNOWN
>@@ -9101,6 +9103,45 @@ tree_expr_nonzero_p (tree t)
> return ret;
> }
>
>+/* Return true if T is known not to be equal to an integer W. */
>+
>+bool
>+expr_not_equal_to (tree t, const wide_int &w)
>+{
>+ wide_int min, max, nz;
>+ value_range_type rtype;
>+ switch (TREE_CODE (t))
>+ {
>+ case INTEGER_CST:
>+ return wi::ne_p (t, w);
>+
>+ case SSA_NAME:
>+ if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
>+ return false;
>+ rtype = get_range_info (t, &min, &max);
>+ if (rtype == VR_RANGE)
>+ {
>+ if (wi::lt_p (max, w, TYPE_SIGN (TREE_TYPE (t))))
>+ return true;
>+ if (wi::lt_p (w, min, TYPE_SIGN (TREE_TYPE (t))))
>+ return true;
>+ }
>+ else if (rtype == VR_ANTI_RANGE
>+ && wi::le_p (min, w, TYPE_SIGN (TREE_TYPE (t)))
>+ && wi::le_p (w, max, TYPE_SIGN (TREE_TYPE (t))))
>+ return true;
>+ /* If T has some known zero bits and W has any of those bits
>set,
>+ then T is known not to be equal to W. */
>+ if (wi::ne_p (wi::zext (wi::bit_and_not (w, get_nonzero_bits
>(t)),
>+ TYPE_PRECISION (TREE_TYPE (t))), 0))
>+ return true;
>+ return false;
>+
>+ default:
>+ return false;
>+ }
>+}
>+
> /* Fold a binary expression of code CODE and type TYPE with operands
> OP0 and OP1. LOC is the location of the resulting expression.
> Return the folded expression if folding is successful. Otherwise,
>--- gcc/match.pd.jj 2016-01-05 12:38:16.979555912 +0100
>+++ gcc/match.pd 2016-01-08 13:23:28.815653802 +0100
>@@ -295,7 +295,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (trunc_mod @0 (convert? (negate @1)))
> (if (!TYPE_UNSIGNED (type)
> && !TYPE_OVERFLOW_TRAPS (type)
>- && tree_nop_conversion_p (type, TREE_TYPE (@1)))
>+ && tree_nop_conversion_p (type, TREE_TYPE (@1))
>+ /* Avoid this transformation if X might be INT_MIN or
>+ Y might be -1, because we would then change valid
>+ INT_MIN % -(-1) into invalid INT_MIN % -1. */
>+ && (expr_not_equal_to (@0, TYPE_MIN_VALUE (type))
>+ || expr_not_equal_to (@1, wi::minus_one (TYPE_PRECISION
>+ (TREE_TYPE (@1))))))
> (trunc_mod @0 (convert @1))))
>
> /* X - (X / Y) * Y is the same as X % Y. */
>--- gcc/tree-vrp.c.jj 2016-01-04 14:55:51.000000000 +0100
>+++ gcc/tree-vrp.c 2016-01-08 13:53:11.101943765 +0100
>@@ -8942,7 +8942,7 @@ simplify_truth_ops_using_ranges (gimple_
> modulo. */
>
> static bool
>-simplify_div_or_mod_using_ranges (gimple *stmt)
>+simplify_div_or_mod_using_ranges (gimple_stmt_iterator *gsi, gimple
>*stmt)
> {
> enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
> tree val = NULL;
>@@ -8971,12 +8971,19 @@ simplify_div_or_mod_using_ranges (gimple
> }
>
> if (!integer_pow2p (op1))
>- return false;
>-
>- if (TYPE_UNSIGNED (TREE_TYPE (op0)))
> {
>- val = integer_one_node;
>+ /* X % -Y can be only optimized into X % Y either if
>+ X is not INT_MIN, or Y is not -1. Fold it now, as after
>+ remove_range_assertions the range info might be not available
>+ anymore. */
>+ if (rhs_code == TRUNC_MOD_EXPR
>+ && fold_stmt (gsi, follow_single_use_edges))
>+ return true;
>+ return false;
> }
>+
>+ if (TYPE_UNSIGNED (TREE_TYPE (op0)))
>+ val = integer_one_node;
> else
> {
> bool sop = false;
>@@ -9890,7 +9897,7 @@ simplify_stmt_using_ranges (gimple_stmt_
> case TRUNC_MOD_EXPR:
> if (TREE_CODE (rhs1) == SSA_NAME
> && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
>- return simplify_div_or_mod_using_ranges (stmt);
>+ return simplify_div_or_mod_using_ranges (gsi, stmt);
> break;
>
> /* Transform ABS (X) into X or -X as appropriate. */
>@@ -10200,16 +10207,6 @@ vrp_finalize (bool warn_array_bounds_p)
> fprintf (dump_file, "\n");
> }
>
>- substitute_and_fold (op_with_constant_singleton_value_range,
>- vrp_fold_stmt, false);
>-
>- if (warn_array_bounds && warn_array_bounds_p)
>- check_all_array_refs ();
>-
>- /* We must identify jump threading opportunities before we release
>- the datastructures built by VRP. */
>- identify_jump_threads ();
>-
> /* Set value range to non pointer SSA_NAMEs. */
> for (i = 0; i < num_vr_values; i++)
> if (vr_value[i])
>@@ -10230,6 +10227,16 @@ vrp_finalize (bool warn_array_bounds_p)
> vr_value[i]->max);
> }
>
>+ substitute_and_fold (op_with_constant_singleton_value_range,
>+ vrp_fold_stmt, false);
>+
>+ if (warn_array_bounds && warn_array_bounds_p)
>+ check_all_array_refs ();
>+
>+ /* We must identify jump threading opportunities before we release
>+ the datastructures built by VRP. */
>+ identify_jump_threads ();
>+
> /* Free allocated memory. */
> for (i = 0; i < num_vr_values; i++)
> if (vr_value[i])
>--- gcc/testsuite/gcc.c-torture/execute/pr50865.c.jj 2016-01-08
>14:44:28.749265388 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr50865.c 2016-01-08
>14:52:07.000000000 +0100
>@@ -0,0 +1,27 @@
>+/* PR middle-end/50865 */
>+
>+#define INT64_MIN (-__LONG_LONG_MAX__ - 1)
>+
>+int
>+main ()
>+{
>+ volatile long long l1 = 1;
>+ volatile long long l2 = -1;
>+ volatile long long l3 = -1;
>+
>+ if ((INT64_MIN % 1LL) != 0)
>+ __builtin_abort ();
>+ if ((INT64_MIN % l1) != 0)
>+ __builtin_abort ();
>+ if (l2 == -1)
>+ {
>+ if ((INT64_MIN % 1LL) != 0)
>+ __builtin_abort ();
>+ }
>+ else if ((INT64_MIN % -l2) != 0)
>+ __builtin_abort ();
>+ if ((INT64_MIN % -l3) != 0)
>+ __builtin_abort ();
>+
>+ return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr69097-1.c.jj 2016-01-08
>14:08:11.577422788 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr69097-1.c 2016-01-08
>14:06:51.000000000 +0100
>@@ -0,0 +1,14 @@
>+/* PR tree-optimization/69097 */
>+
>+int a, b;
>+unsigned int c;
>+
>+int
>+main ()
>+{
>+ int d = b;
>+ b = ~(~a + (~d | b));
>+ a = ~(~c >> b);
>+ c = a % b;
>+ return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr69097-2.c.jj 2016-01-08
>14:08:14.726378977 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr69097-2.c 2016-01-08
>14:07:49.000000000 +0100
>@@ -0,0 +1,30 @@
>+/* PR tree-optimization/69097 */
>+
>+__attribute__((noinline, noclone)) int
>+f1 (int x, int y)
>+{
>+ return x % y;
>+}
>+
>+__attribute__((noinline, noclone)) int
>+f2 (int x, int y)
>+{
>+ return x % -y;
>+}
>+
>+__attribute__((noinline, noclone)) int
>+f3 (int x, int y)
>+{
>+ int z = -y;
>+ return x % z;
>+}
>+
>+int
>+main ()
>+{
>+ if (f1 (-__INT_MAX__ - 1, 1) != 0
>+ || f2 (-__INT_MAX__ - 1, -1) != 0
>+ || f3 (-__INT_MAX__ - 1, -1) != 0)
>+ __builtin_abort ();
>+ return 0;
>+}
>--- gcc/testsuite/gcc.dg/pr69097-1.c.jj 2016-01-08 14:23:01.631064387
>+0100
>+++ gcc/testsuite/gcc.dg/pr69097-1.c 2016-01-08 14:02:45.000000000
>+0100
>@@ -0,0 +1,140 @@
>+/* PR tree-optimization/69097 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* All the x % -y below should be optimized into x % y, as
>+ it should never be INT_MIN % -(-1). */
>+/* { dg-final { scan-tree-dump-not "-y" "optimized" } } */
>+
>+int
>+f1 (int x, int y)
>+{
>+ if (x == -__INT_MAX__ - 1)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f2 (int x, int y)
>+{
>+ if (x < -__INT_MAX__)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f3 (int x, int y)
>+{
>+ if (y == -1)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f4 (int x, int y)
>+{
>+ if (y < 0)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f5 (int x, int y)
>+{
>+ if (y >= -1)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f6 (int x, int y)
>+{
>+ if (y < 0 || y > 24)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f7 (int x, int y)
>+{
>+ if (y <= -17 || y >= -1)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f8 (int x, int y)
>+{
>+ if (y >= -13 && y <= 15)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f9 (int x, int y)
>+{
>+ return x % -(y & ~4);
>+}
>+
>+int
>+f10 (int x, int y)
>+{
>+ if (x != -__INT_MAX__ - 1)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f11 (int x, int y)
>+{
>+ if (x >= -__INT_MAX__)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f12 (int x, int y)
>+{
>+ if (y != -1)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f13 (int x, int y)
>+{
>+ if (y >= 0)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f14 (int x, int y)
>+{
>+ if (y < -1)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f15 (int x, int y)
>+{
>+ if (y >= 0 && y <= 24)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f16 (int x, int y)
>+{
>+ if (y > -17 && y < -1)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f17 (int x, int y)
>+{
>+ if (y < -13 || y > 15)
>+ return x % -y;
>+ return 34;
>+}
>--- gcc/testsuite/gcc.dg/pr69097-2.c.jj 2016-01-08 14:23:14.032892362
>+0100
>+++ gcc/testsuite/gcc.dg/pr69097-2.c 2016-01-08 14:29:59.550327493
>+0100
>@@ -0,0 +1,138 @@
>+/* PR tree-optimization/69097 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump-times "-y" 17 "optimized" } } */
>+
>+int
>+f1 (int x, int y)
>+{
>+ if (x == -__INT_MAX__)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f2 (int x, int y)
>+{
>+ if (x >= -__INT_MAX__ + 1)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f3 (int x, int y)
>+{
>+ if (y == -2)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f4 (int x, int y)
>+{
>+ if (y < -1)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f5 (int x, int y)
>+{
>+ if (y >= 0)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f6 (int x, int y)
>+{
>+ if (y < -1 || y > 24)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f7 (int x, int y)
>+{
>+ if (y <= -17 || y >= 0)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f8 (int x, int y)
>+{
>+ if (y >= -13 && y <= -2)
>+ __builtin_unreachable ();
>+ return x % -y;
>+}
>+
>+int
>+f9 (int x, int y)
>+{
>+ return x % -y;
>+}
>+
>+int
>+f10 (int x, int y)
>+{
>+ if (x != -__INT_MAX__)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f11 (int x, int y)
>+{
>+ if (x < -__INT_MAX__ + 2)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f12 (int x, int y)
>+{
>+ if (y != -2)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f13 (int x, int y)
>+{
>+ if (y >= -1)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f14 (int x, int y)
>+{
>+ if (y < 0)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f15 (int x, int y)
>+{
>+ if (y >= -1 && y <= 24)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f16 (int x, int y)
>+{
>+ if (y > -17 && y < 0)
>+ return x % -y;
>+ return 34;
>+}
>+
>+int
>+f17 (int x, int y)
>+{
>+ if (y < -13 || y > -4)
>+ return x % -y;
>+ return 34;
>+}
>
> Jakub
More information about the Gcc-patches
mailing list