[PATCH] Optimize (A / (cast) (1 << B)) -> (A >> B) (PR middle-end/91680)

Jakub Jelinek jakub@redhat.com
Tue Sep 10 07:45:00 GMT 2019


Hi!

The following patch optimizes (A / (cast) (1 << B)) -> (A >> B)
in addition to already supported (A / (1 << B)) -> (A >> B).

The patch only supports same precision cast (i.e. a sign change),
or widening cast, in that case either zero extension (then the extended
value is known to be a power of two and all is fine), or sign extension
if the first operand has all upper bits starting with the sign bit of the
narrower type clear.  That is because (unsigned long long) (1 << 31)
is 0xffffffff80000000ULL and we need to make sure that dividing by that
huge value is equal to >> 31.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-09-10  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/91680
	* match.pd ((A / (1 << B)) -> (A >> B)): Allow widening cast from
	the shift type to type.

	* gcc.dg/tree-ssa/pr91680.c: New test.
	* g++.dg/torture/pr91680.C: New test.

--- gcc/match.pd.jj	2019-09-09 17:33:08.551582878 +0200
+++ gcc/match.pd	2019-09-09 20:05:25.966107441 +0200
@@ -305,13 +305,29 @@ (define_operator_list COND_TERNARY
 /* (A / (1 << B)) -> (A >> B).
    Only for unsigned A.  For signed A, this would not preserve rounding
    toward zero.
-   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.
+   Also also widening conversions, like:
+   (A / (unsigned long long) (1U << B)) -> (A >> B)
+   or
+   (A / (unsigned long long) (1 << B)) -> (A >> B).
+   If the left shift is signed, it can be done only if the upper bits
+   of A starting from shift's type sign bit are zero, as
+   (unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL,
+   so it is valid only if A >> 31 is zero.  */
 (simplify
- (trunc_div @0 (lshift integer_onep@1 @2))
+ (trunc_div @0 (convert? (lshift integer_onep@1 @2)))
  (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
       && (!VECTOR_TYPE_P (type)
 	  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
-	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
+      && (useless_type_conversion_p (type, TREE_TYPE (@1))
+	  || (element_precision (type) >= element_precision (TREE_TYPE (@1))
+	      && (TYPE_UNSIGNED (TREE_TYPE (@1))
+		  || (element_precision (type)
+		      == element_precision (TREE_TYPE (@1)))
+		  || (get_nonzero_bits (@0)
+		      & wi::mask (element_precision (TREE_TYPE (@1)) - 1, true,
+				  element_precision (type))) == 0))))
   (rshift @0 @2)))
 
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
--- gcc/testsuite/gcc.dg/tree-ssa/pr91680.c.jj	2019-09-09 20:18:04.349619827 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr91680.c	2019-09-09 20:18:33.922171856 +0200
@@ -0,0 +1,37 @@
+/* PR middle-end/91680 */
+/* { dg-do compile { target { ilp32 || lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* { dg-final { scan-tree-dump-times " / " 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times " >> " 3 "forwprop1" } } */
+
+__attribute__((noipa)) unsigned long long
+foo (unsigned char x)
+{
+  unsigned long long q = 1 << x;
+  return 256 / q;
+}
+
+__attribute__((noipa)) unsigned long long
+bar (unsigned char x)
+{
+  unsigned long long q = 1U << x;
+  return 256 / q;
+}
+
+__attribute__((noipa)) unsigned long long
+baz (unsigned char x, unsigned long long y)
+{
+  /* This can't be optimized, at least not in C++ and maybe not
+     in C89, because for x 31 q is -2147483648ULL, not
+     2147483648ULL, and e.g. 2147483648ULL >> 31 is 1, while
+     2147483648ULL / -2147483648ULL is 0.  */
+  unsigned long long q = 1 << x;
+  return y / q;
+}
+
+__attribute__((noipa)) unsigned long long
+qux (unsigned char x, unsigned long long y)
+{
+  unsigned long long q = 1U << x;
+  return y / q;
+}
--- gcc/testsuite/g++.dg/torture/pr91680.C.jj	2019-09-09 20:25:51.775539166 +0200
+++ gcc/testsuite/g++.dg/torture/pr91680.C	2019-09-09 20:26:18.610132667 +0200
@@ -0,0 +1,35 @@
+/* PR middle-end/91680 */
+/* { dg-do run { target { ilp32 || lp64 } } } */
+
+extern "C" void abort ();
+
+#include "../../gcc.dg/tree-ssa/pr91680.c"
+
+int
+main ()
+{
+  unsigned char i;
+  for (i = 0; i < __SIZEOF_INT__ * __CHAR_BIT__; i++)
+    {
+      volatile unsigned long long q = 1 << i;
+      if (foo (i) != 256 / q)
+	abort ();
+      q = 1U << i;
+      if (bar (i) != 256 / q)
+	abort ();
+      q = 1 << i;
+      if (baz (i, (1U << i) - 1) != ((1U << i) - 1) / q)
+	abort ();
+      if (baz (i, 1U << i) != (1U << i) / q)
+	abort ();
+      if (baz (i, -1) != -1 / q)
+	abort ();
+      q = 1U << i;
+      if (qux (i, (1U << i) - 1) != ((1U << i) - 1) / q)
+	abort ();
+      if (qux (i, 1U << i) != (1U << i) / q)
+	abort ();
+      if (qux (i, -1) != -1 / q)
+	abort ();
+    }
+}

	Jakub



More information about the Gcc-patches mailing list