This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Improve (x >> c) << c match.pd optimization (PR tree-optimization/93118)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Fri, 3 Jan 2020 09:16:07 +0100
- Subject: [PATCH] Improve (x >> c) << c match.pd optimization (PR tree-optimization/93118)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
As can be seen in the testcase, for the (x >> c) << c optimization into
x & (-1<<c) we don't really care if the right shift is aritmetic or logical,
as the affected bits are shifted away.
Furthermore, while match.pd can handle
((unsigned long long)(unsigned)(x >> 32))<<32
for unsigned long long x - we figure out that after the logical right shift
the upper 32 bits are already zero and optimize away those two casts -
we don't handle that for arithmetic shift or e.g. for
((unsigned long long)(int)(x >> 32))<<32
Still, the upper 32 bits don't really matter on the result and can be
anything.
Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?
2019-01-03 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/93118
* match.pd ((x >> c) << c -> x & (-1<<c)): Add nop_convert?. Add new
simplifier with two intermediate conversions.
* gcc.dg/tree-ssa/pr93118.c: New test.
--- gcc/match.pd.jj 2020-01-01 12:15:50.000000000 +0100
+++ gcc/match.pd 2020-01-02 10:00:49.213022408 +0100
@@ -2738,9 +2738,26 @@ (define_operator_list COND_TERNARY
/* Optimize (x >> c) << c into x & (-1<<c). */
(simplify
- (lshift (rshift @0 INTEGER_CST@1) @1)
+ (lshift (nop_convert? (rshift @0 INTEGER_CST@1)) @1)
(if (wi::ltu_p (wi::to_wide (@1), element_precision (type)))
- (bit_and @0 (lshift { build_minus_one_cst (type); } @1))))
+ /* It doesn't matter if the right shift is arithmetic or logical. */
+ (bit_and (view_convert @0) (lshift { build_minus_one_cst (type); } @1))))
+
+(simplify
+ (lshift (convert (convert@2 (rshift @0 INTEGER_CST@1))) @1)
+ (if (wi::ltu_p (wi::to_wide (@1), element_precision (type))
+ /* Allow intermediate conversion to integral type with whatever sign, as
+ long as the low TYPE_PRECISION (type)
+ - TYPE_PRECISION (TREE_TYPE (@2)) bits are preserved. */
+ && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+ && (TYPE_PRECISION (TREE_TYPE (@2)) >= TYPE_PRECISION (type)
+ || wi::geu_p (wi::to_wide (@1),
+ TYPE_PRECISION (type)
+ - TYPE_PRECISION (TREE_TYPE (@2)))))
+ (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1))))
/* Optimize (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned
types. */
--- gcc/testsuite/gcc.dg/tree-ssa/pr93118.c.jj 2020-01-02 09:58:21.186274254 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr93118.c 2020-01-02 09:57:44.959825348 +0100
@@ -0,0 +1,45 @@
+/* PR tree-optimization/93118 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not ">>" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */
+
+#if __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4 && __CHAR_BIT__ == 8
+unsigned long long
+foo (unsigned long long a)
+{
+ unsigned long long b = a >> 32;
+ int c = b;
+ unsigned long long d = c;
+ return d << 32;
+}
+
+unsigned long long
+bar (unsigned long long a)
+{
+ unsigned long long b = a >> 32;
+ unsigned c = b;
+ unsigned long long d = c;
+ return d << 32;
+}
+
+unsigned long long
+baz (long long a)
+{
+ long long b = a >> 32;
+ unsigned long long c = b;
+ return c << 32;
+}
+
+typedef unsigned V __attribute__((vector_size (2 * sizeof (int))));
+typedef int W __attribute__((vector_size (2 * sizeof (int))));
+
+void
+quux (W *w, V *v)
+{
+ W a = (W) (*v >> 16);
+ *w = a << 16;
+}
+#else
+int i;
+#endif
Jakub