This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] [match.pd]: missing optimization on comparison


i,

this patch implements some folding patterns about integral comparison of
condition.  I noticed the lack of folding in gcc by comparing gcc with llvm
result. These optimization are interesting as they are helping to linearize
some conditional expressions.  The following patterns are implemented:

I)      (a < 0) & (b < 0) -> (a & b) < 0
II)     (a >= 0) & (b >= 0) -> (a | b) >= 0
III)    (a < 0) | (b < 0) -> (a | b) < 0
IV)     (a >= 0) | (b >= 0) -> (a & b) >= 0
V)      (a == -1) & (b == -1) -> (a & b) == -1
VI)     (a != -1) | (b != -1) -> (a & b) != -1

Additionally this patch implements some foldings of such new
combined condition, like:
        ((a | b) < 0) | (a == 0) -> (a <= 0) | (b < 0)

These break-ups of combined condition  might be good candidates for
tree-ssa-reassoc pass. But for simplicity of this patch, I kept them
in match.pd.

I ran also spec 2017 and 2006 on aarch64 architecture for it. For some
tests I could messure an improvment up to 2 percent. There was no regression.

ChangeLog

        Kai Tietz <kai.tietz@linaro.org>

        * match.pd: New optimization for integral comparison
        of condition.
        (INTEGRAL_TYPE_P): New helper macro.
        (INTEGRAL_SAME_SIGN_P): Likewise.
        (INTEGRALS_SIGN_PREC_MATCH): Likewise.

ChangeLog testsuite

        * gcc.dg/fold-compare-9.c: New file.
        * gcc.dg/fold-compare-10.c: Likewise.
        * gcc.dg/fold-compare-11.c: Likewise.
        * gcc.dg/fold-compare-12.c: Likewise.
        * gcc.dg/fold-compare-13.c: Likewise.
        * gcc.dg/fold-compare-14.c: Likewise.

Regards,
Kai
Hi,

this patch implements some folding patterns about integral comparison of
condition.  I noticed the lack of folding in gcc by comparing gcc with llvm
result. These optimization are interesting as they are helping to linearize
some conditional expressions.  The following patterns are implemented:

I)	(a < 0) & (b < 0) -> (a & b) < 0
II)	(a >= 0) & (b >= 0) -> (a | b) >= 0
III)	(a < 0) | (b < 0) -> (a | b) < 0
IV)	(a >= 0) | (b >= 0) -> (a & b) >= 0
V)	(a == -1) & (b == -1) -> (a & b) == -1
VI)	(a != -1) | (b != -1) -> (a & b) != -1

Additionally this patch implements some foldings of such new
combined condition, like:
	((a | b) < 0) | (a == 0) -> (a <= 0) | (b < 0)

These break-ups of combined condition  might be good candidates for
tree-ssa-reassoc pass. But for simplicity of this patch, I kept them
in match.pd.

I ran also spec 2017 and 2006 on aarch64 architecture for it. For some
tests I could messure an improvment up to 2 percent. There was no regression.

Regards,
Kai
 
ChangeLog

	Kai Tietz <kai.tietz@linaro.org>

	* match.pd: New optimization for integral comparison
	of condition.
	(INTEGRAL_TYPE_P): New helper macro.
	(INTEGRAL_SAME_SIGN_P): Likewise.
	(INTEGRALS_SIGN_PREC_MATCH): Likewise.

ChangeLog testsuite

	* gcc.dg/fold-compare-9.c: New file.
	* gcc.dg/fold-compare-10.c: Likewise.
	* gcc.dg/fold-compare-11.c: Likewise.
	* gcc.dg/fold-compare-12.c: Likewise.
	* gcc.dg/fold-compare-13.c: Likewise.
	* gcc.dg/fold-compare-14.c: Likewise.

Index: trunk/gcc/match.pd
===================================================================
--- trunk.orig/gcc/match.pd
+++ trunk/gcc/match.pd
@@ -679,11 +679,124 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (if (tem)
      (rdiv { tem; } @1)))))
 
+#define INTEGRAL_TYPES_P(A, B) \
+  INTEGRAL_TYPE_P (TREE_TYPE (A)) && INTEGRAL_TYPE_P (TREE_TYPE (B))
+#define INTEGRAL_SAME_SIGN_P(A, B) \
+  TYPE_UNSIGNED (TREE_TYPE (A)) == TYPE_UNSIGNED (TREE_TYPE (B))
+#define INTEGRALS_SIGN_PREC_MATCH(A, B) \
+  TYPE_PRECISION (TREE_TYPE (A)) == TYPE_PRECISION (TREE_TYPE (B)) \
+  (TYPE_PRECISION (TREE_TYPE (A)) > TYPE_PRECISION (TREE_TYPE (B)) \
+   && !TYPE_UNSIGNED (TREE_TYPE (B)))
+
 /* Simplify ~X & X as zero.  */
 (simplify
  (bit_and:c (convert? @0) (convert? (bit_not @0)))
   { build_zero_cst (type); })
 
+/* These operation need to be processed without side-effect.  For diagnostic
+   messages these operation have not to be performed on AST.  */
+#if GIMPLE
+/* Combine (a == -1) & (b == -1) into (a & b) == -1 */
+(simplify (bit_and:c (eq @0 integer_minus_onep@2) (eq @1 integer_minus_onep))
+  (if (INTEGRAL_TYPES_P (@0, @1)
+       && INTEGRALS_SIGN_PREC_MATCH (@0, @1))
+    (eq (bit_and @0 (convert @1)) @2)))
+/* Combine (a != -1) | (b != -1) into (a & b) != -1 */
+(simplify (bit_ior:c (ne @0 integer_minus_onep@2) (ne @1 integer_minus_onep))
+  (if (INTEGRAL_TYPES_P (@0, @1)
+       && INTEGRALS_SIGN_PREC_MATCH (@0, @1))
+    (ne (bit_and @0 (convert @1)) @2)))
+/* Sink redundant compare into (a & b) == -1 for */
+/*  ... & (a <= cst) -or- ... & (a >= cst) */
+(for cmp (le ge lt gt)
+ (simplify
+  (bit_and:c (eq (bit_and:c @0 @1) integer_minus_onep@2) (cmp @0 INTEGER_CST@4))
+  (if (INTEGRAL_TYPES_P (@0, @1)
+       && !TYPE_UNSIGNED (TREE_TYPE (@0))
+       && tree_fits_shwi_p (@4))
+    (with { HOST_WIDE_INT cst4 = tree_to_shwi (@4); }
+      (if ((cst4 >= -1 && cmp == LE_EXPR)
+           || (cst4 < 0 && cmp == GE_EXPR)
+           || (cst4 > -1 && cmp == LT_EXPR)
+           || (cst4 < -1 && cmp == GT_EXPR))
+        (eq (bit_and @0 @1) @2)))
+    (with { HOST_WIDE_INT cst4 = tree_to_shwi (@4); }
+/*   for cst < -1 into false. */
+      (if ((cst4 < -1 && cmp == LE_EXPR) || (cst4 > -1 && cmp == GE_EXPR))
+        { constant_boolean_node (false, type); })))))
+/* ... & (a == cst) -or- ... & (a != cst). */
+(for cmp (eq ne)
+ (simplify
+  (bit_and:c (eq (bit_and:c @0 @1) integer_minus_onep@2) (cmp @0 INTEGER_CST@4))
+  (if (INTEGRAL_TYPES_P (@0, @1)
+       && !TYPE_UNSIGNED (TREE_TYPE (@0))
+       && tree_fits_shwi_p (@4))
+    (with { HOST_WIDE_INT cst4 = tree_to_shwi (@4); }
+      (if ((cst4 == -1) == (cmp == EQ_EXPR))
+        (eq (bit_and @0 @1) @2)))
+    (with { HOST_WIDE_INT cst4 = tree_to_shwi (@4); }
+      (if ((cst4 != -1) == (cmp == EQ_EXPR))
+        { constant_boolean_node (false, type); })))))
+
+/* Combine (a < 0) | (b < 0) into (a | b) < 0 */
+(simplify
+ (bit_ior:c (lt @0 integer_zerop@2) (lt @1 integer_zerop))
+ (if (INTEGRAL_TYPES_P (@0, @1)
+      && INTEGRAL_SAME_SIGN_P (@0, @1)
+      && TYPE_PRECISION (TREE_TYPE (@0)) >= TYPE_PRECISION (TREE_TYPE (@1)))
+  (lt (bit_ior @0 (convert @1)) @2)))
+
+/* Combine (a >= 0) & (b >= 0) into (a | b) >= 0 */
+(simplify
+ (bit_and:c (ge @0 integer_zerop@2) (ge @1 integer_zerop))
+ (if (INTEGRAL_TYPES_P (@0, @1)
+      && INTEGRAL_SAME_SIGN_P (@0, @1)
+      && TYPE_PRECISION (TREE_TYPE (@0)) >= TYPE_PRECISION (TREE_TYPE (@1)))
+  (ge (bit_ior @0 (convert @1)) @2)))
+
+/* Combine (a < 0) & (b < 0) into (a & b) < 0 */
+(simplify
+ (bit_and:c (lt @0 integer_zerop@2) (lt @1 integer_zerop))
+ (if (INTEGRAL_TYPES_P (@0, @1)
+      && INTEGRAL_SAME_SIGN_P (@0, @1)
+      && TYPE_PRECISION (TREE_TYPE (@0)) >= TYPE_PRECISION (TREE_TYPE (@1)))
+  (lt (bit_and @0 (convert @1)) @2)))
+
+/* Combine (a >= 0) | (b >= 0) into (a & b) >= 0 */
+(simplify
+ (bit_ior:c (ge @0 integer_zerop@2) (ge @1 integer_zerop))
+ (if (INTEGRAL_TYPES_P (@0, @1)
+      && INTEGRAL_SAME_SIGN_P (@0, @1)
+      && TYPE_PRECISION (TREE_TYPE (@0)) >= TYPE_PRECISION (TREE_TYPE (@1)))
+  (ge (bit_and @0 (convert @1)) @2)))
+
+/* Fold ((a | b) < 0) | (b == 0) into (a < 0) | (b <= 0) */
+(simplify
+ (bit_ior:c (lt (bit_ior:c @0 @1) integer_zerop@2) (eq @0 integer_zerop))
+ (bit_ior (le @0 @2) (lt @1 @2)))
+
+/* Fold ((a | b) >= 0) & (a == 0) into (a == 0) & (b >= 0),
+   ((a | b) >= 0 & (a > 0) into (a > 0) & (b >= 0),
+   and ((a | b) >= 0) & (a >= 0) into (a >= 0) & (b >= 0). */
+(for cmp (eq gt ge)
+ (simplify
+  (bit_and:c (ge (bit_ior:c @0 @1) integer_zerop@2) (cmp @0 integer_zerop))
+  (bit_and (cmp @0 @2) (ge @1 @2))))
+
+/* Fold ((a & b) < 0) & (a < 0) into (a & b) < 0 */
+(simplify
+ (bit_and:c (lt (bit_and:c @0 @1) integer_zerop@2) (lt @0 integer_zerop))
+ (lt (bit_and @0 @1) @2))
+
+/* Fold ((a & b) >= 0) | (a == 0) into (a & b) >= 0,
+   ((a & b) >= 0) | (a > 0) into (a & b) >= 0,
+   and ((a & b) >= 0) | (a >= 0) into (a & b) >= 0. */
+(for cmp (eq gt ge)
+ (simplify
+  (bit_ior:c (ge (bit_and:c @0 @1) integer_zerop@2) (cmp @0 integer_zerop))
+  (ge (bit_and @0 @1) @2)))
+#endif
+
 /* PR71636: Transform x & ((1U << b) - 1) -> x & ~(~0U << b);  */
 (simplify
   (bit_and:c @0 (plus:s (lshift:s integer_onep @1) integer_minus_onep))
@@ -697,8 +810,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (x != 0 | y != 0) -> (x | typeof(x)(y)) != 0.  */
  (simplify
   (bitop (cmp @0 integer_zerop@2) (cmp @1 integer_zerop))
-   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-	&& INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   (if (INTEGRAL_TYPES_P (@0, @1)
 	&& TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1)))
     (cmp (bit_ior @0 (convert @1)) @2)))
  /* Transform:
Index: trunk/gcc/testsuite/gcc.dg/fold-compare-10.c
===================================================================
--- /dev/null
+++ trunk/gcc/testsuite/gcc.dg/fold-compare-10.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+int f1(int a, int b)
+{
+  if (a < 0 && b < 0)
+   return 2;
+  return 0;
+}
+
+int f2(char a, int b)
+{
+  if (a < 0 && b < 0)
+   return 2;
+  return 0;
+}
+
+int f3(int a, char b)
+{
+  if (a < 0 && b < 0)
+   return 2;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "return" 3 "optimized" } } */
+
Index: trunk/gcc/testsuite/gcc.dg/fold-compare-11.c
===================================================================
--- /dev/null
+++ trunk/gcc/testsuite/gcc.dg/fold-compare-11.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+int f1(int a, int b)
+{
+  if (a >= 0 || b >= 0)
+   return 2;
+  return 0;
+}
+
+int f2(char a, int b)
+{
+  if (a >= 0 || b >= 0)
+   return 2;
+  return 0;
+}
+
+int f3(int a, char b)
+{
+  if (a >= 0 || b >= 0)
+   return 2;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "return" 3 "optimized" } } */
+
Index: trunk/gcc/testsuite/gcc.dg/fold-compare-12.c
===================================================================
--- /dev/null
+++ trunk/gcc/testsuite/gcc.dg/fold-compare-12.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+int f1(int a, int b)
+{
+  if (a >= 0 && b >= 0)
+   return 2;
+  return 0;
+}
+
+int f2(char a, int b)
+{
+  if (a >= 0 && b >= 0)
+   return 2;
+  return 0;
+}
+
+int f3(int a, char b)
+{
+  if (a >= 0 && b >= 0)
+   return 2;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "return" 3 "optimized" } } */
+
Index: trunk/gcc/testsuite/gcc.dg/fold-compare-13.c
===================================================================
--- /dev/null
+++ trunk/gcc/testsuite/gcc.dg/fold-compare-13.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+int f1(int a, int b)
+{
+ /* fold into (a<=0 || b<0) */
+ if ((a|b)<0 || a==0)
+   return 2;
+  return 0;
+}
+
+int f2(int a, int b)
+{
+  /* fold into (a|b)>=0 */
+  if ((a|b)>=0 && a==0)
+   return 2;
+  return 0;
+}
+
+int f3(int a, int b)
+{
+  /* fold into (a&b)<0 */
+  if ((a&b)<0 && a<0)
+   return 2;
+  return 0;
+}
+
+int f4(int a, int b)
+{
+  /* Fold into (a&b)>=0 */
+  if ((a&b)>=0 || a>0)
+    return 2;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "return" 4 "optimized" } } */
+
Index: trunk/gcc/testsuite/gcc.dg/fold-compare-14.c
===================================================================
--- /dev/null
+++ trunk/gcc/testsuite/gcc.dg/fold-compare-14.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+int f1(int a, int b)
+{
+ if (a == -1 && b == -1)
+   return 2;
+  return 0;
+}
+
+int f2(int a, char b)
+{
+  if (a == -1 && b == -1)
+   return 2;
+  return 0;
+}
+
+int f3(int a, int b)
+{
+  if (a!=-1 || b!=-1)
+   return 2;
+  return 0;
+}
+
+int f4(int a, short b)
+{
+  if (a != -1 || b != -1)
+    return 2;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times " & " 4 "optimized" } } */
+
Index: trunk/gcc/testsuite/gcc.dg/fold-compare-9.c
===================================================================
--- /dev/null
+++ trunk/gcc/testsuite/gcc.dg/fold-compare-9.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+int f1(int a, int b)
+{
+  if (a < 0 || b < 0)
+   return 2;
+  return 0;
+}
+
+int f2(char a, int b)
+{
+  if (a < 0 || b < 0)
+   return 2;
+  return 0;
+}
+
+int f3(int a, char b)
+{
+  if (a < 0 || b < 0)
+   return 2;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "return" 3 "optimized" } } */
+

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]