This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve (x >> c) << c match.pd optimization (PR tree-optimization/93118)


Hi!

As can be seen in the testcase, for the (x >> c) << c optimization into
x & (-1<<c) we don't really care if the right shift is aritmetic or logical,
as the affected bits are shifted away.
Furthermore, while match.pd can handle
((unsigned long long)(unsigned)(x >> 32))<<32
for unsigned long long x - we figure out that after the logical right shift
the upper 32 bits are already zero and optimize away those two casts -
we don't handle that for arithmetic shift or e.g. for
((unsigned long long)(int)(x >> 32))<<32
Still, the upper 32 bits don't really matter on the result and can be
anything.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2019-01-03  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/93118
	* match.pd ((x >> c) << c -> x & (-1<<c)): Add nop_convert?.  Add new
	simplifier with two intermediate conversions.

	* gcc.dg/tree-ssa/pr93118.c: New test.

--- gcc/match.pd.jj	2020-01-01 12:15:50.000000000 +0100
+++ gcc/match.pd	2020-01-02 10:00:49.213022408 +0100
@@ -2738,9 +2738,26 @@ (define_operator_list COND_TERNARY
 
 /* Optimize (x >> c) << c into x & (-1<<c).  */
 (simplify
- (lshift (rshift @0 INTEGER_CST@1) @1)
+ (lshift (nop_convert? (rshift @0 INTEGER_CST@1)) @1)
  (if (wi::ltu_p (wi::to_wide (@1), element_precision (type)))
-  (bit_and @0 (lshift { build_minus_one_cst (type); } @1))))
+  /* It doesn't matter if the right shift is arithmetic or logical.  */
+  (bit_and (view_convert @0) (lshift { build_minus_one_cst (type); } @1))))
+
+(simplify
+ (lshift (convert (convert@2 (rshift @0 INTEGER_CST@1))) @1)
+ (if (wi::ltu_p (wi::to_wide (@1), element_precision (type))
+      /* Allow intermediate conversion to integral type with whatever sign, as
+	 long as the low TYPE_PRECISION (type)
+	 - TYPE_PRECISION (TREE_TYPE (@2)) bits are preserved.  */
+      && INTEGRAL_TYPE_P (type)
+      && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+      && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+      && (TYPE_PRECISION (TREE_TYPE (@2)) >= TYPE_PRECISION (type)
+	  || wi::geu_p (wi::to_wide (@1),
+			TYPE_PRECISION (type)
+			- TYPE_PRECISION (TREE_TYPE (@2)))))
+  (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1))))
 
 /* Optimize (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned
    types.  */
--- gcc/testsuite/gcc.dg/tree-ssa/pr93118.c.jj	2020-01-02 09:58:21.186274254 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr93118.c	2020-01-02 09:57:44.959825348 +0100
@@ -0,0 +1,45 @@
+/* PR tree-optimization/93118 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not ">>" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */
+
+#if __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4 && __CHAR_BIT__ == 8
+unsigned long long
+foo (unsigned long long a)
+{
+  unsigned long long b = a >> 32;
+  int c = b;
+  unsigned long long d = c;
+  return d << 32;
+}
+
+unsigned long long
+bar (unsigned long long a)
+{
+  unsigned long long b = a >> 32;
+  unsigned c = b;
+  unsigned long long d = c;
+  return d << 32;
+}
+
+unsigned long long
+baz (long long a)
+{
+  long long b = a >> 32;
+  unsigned long long c = b;
+  return c << 32;
+}
+
+typedef unsigned V __attribute__((vector_size (2 * sizeof (int))));
+typedef int W __attribute__((vector_size (2 * sizeof (int))));
+
+void
+quux (W *w, V *v)
+{
+  W a = (W) (*v >> 16);
+  *w = a << 16;
+}
+#else
+int i;
+#endif

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]