This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Simple bitop reassoc in match.pd (was: Canonicalize X u< X to UNORDERED_EXPR)


On Tue, 3 May 2016, Richard Biener wrote:

On Tue, May 3, 2016 at 3:26 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
On Tue, 3 May 2016, Richard Biener wrote:

On Tue, May 3, 2016 at 8:36 AM, Marc Glisse <marc.glisse@inria.fr> wrote:

This removes the duplication. I also removed the case (A&B)&(A&C) which
is
handled by reassoc. And I need 2 NOP checks, for the case where @0 is a
constant (that couldn't happen before my patch because canonicalization
would put the constant as second operand).


Nicely spotted.  Not sure we want to delay (A&B)&(A&C) until re-assoc.  We
have
many patterns that reassoc would also catch, like (A + CST) + CST or (A +
B)- A,
albeit reassoc only handles the unsigned cases.


(A&B)&A seems simple enough for match.pd, I thought (A&B)&(A&C) was starting
to be a bit specialized. I could easily add it back (making it work with |
at the same time), but then I am not convinced A&(B&C) is the best output.
If A&B or A&C have several uses, then (A&B)&C or B&(A&C) seem preferable
(and we would still have a transformation for (A&cst1)&cst2 so we wouldn't
lose in the case where B and C are constants). We may still end up having to
add some :s to the transformation I just touched.

Yeah, these are always tricky questions.  Note that re-assoc won't
handle the case
with multi-use A&C or A&B.  The only reason to care for the single-use case is
when we change operations for the mixed operand cases.  For the all-&| case
there is only the (usual) consideration about SSA lifetime extension.

So maybe it makes sense to split out the all-&| cases.

Here they are. I did (X&Y)&X and (X&Y)&(X&Z). The next one would be ((X&Y)&Z)&X, but at some point we have to defer to reassoc.

I didn't add the convert?+tree_nop_conversion_p to the existing transform I modified. I guess at some point we should make a pass and add them to all the transformations on bit operations...

For (X & Y) & Y, I believe that any conversion is fine. For the others, tree_nop_conversion_p is probably too strict (narrowing should be fine for all), but I was too lazy to look for tighter conditions.

(X ^ Y) ^ Y -> X should probably have (non_lvalue ...) on its output, but in a simple test it didn't seem to matter. Is non_lvalue still needed?


Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

2016-05-06  Marc Glisse  <marc.glisse@inria.fr>

gcc/
	* fold-const.c (fold_binary_loc) [(X ^ Y) & Y]: Remove and merge with...
	* match.pd ((X & Y) ^ Y): ... this.
	((X & Y) & Y, (X | Y) | Y, (X ^ Y) ^ Y, (X & Y) & (X & Z), (X | Y)
	| (X | Z), (X ^ Y) ^ (X ^ Z)): New transformations.

gcc/testsuite/
	* gcc.dg/tree-ssa/bit-assoc.c: New testcase.
	* gcc.dg/tree-ssa/pr69270.c: Adjust.
	* gcc.dg/tree-ssa/vrp59.c: Disable forwprop.

--
Marc Glisse
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 235933)
+++ gcc/fold-const.c	(working copy)
@@ -10063,59 +10063,20 @@ fold_binary_loc (location_t loc,
 	}
       /* Fold !X & 1 as X == 0.  */
       if (TREE_CODE (arg0) == TRUTH_NOT_EXPR
 	  && integer_onep (arg1))
 	{
 	  tem = TREE_OPERAND (arg0, 0);
 	  return fold_build2_loc (loc, EQ_EXPR, type, tem,
 				  build_zero_cst (TREE_TYPE (tem)));
 	}
 
-      /* Fold (X ^ Y) & Y as ~X & Y.  */
-      if (TREE_CODE (arg0) == BIT_XOR_EXPR
-	  && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0))
-	{
-	  tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
-	  return fold_build2_loc (loc, BIT_AND_EXPR, type,
-			      fold_build1_loc (loc, BIT_NOT_EXPR, type, tem),
-			      fold_convert_loc (loc, type, arg1));
-	}
-      /* Fold (X ^ Y) & X as ~Y & X.  */
-      if (TREE_CODE (arg0) == BIT_XOR_EXPR
-	  && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0)
-	  && reorder_operands_p (TREE_OPERAND (arg0, 1), arg1))
-	{
-	  tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1));
-	  return fold_build2_loc (loc, BIT_AND_EXPR, type,
-			      fold_build1_loc (loc, BIT_NOT_EXPR, type, tem),
-			      fold_convert_loc (loc, type, arg1));
-	}
-      /* Fold X & (X ^ Y) as X & ~Y.  */
-      if (TREE_CODE (arg1) == BIT_XOR_EXPR
-	  && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
-	{
-	  tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
-	  return fold_build2_loc (loc, BIT_AND_EXPR, type,
-			      fold_convert_loc (loc, type, arg0),
-			      fold_build1_loc (loc, BIT_NOT_EXPR, type, tem));
-	}
-      /* Fold X & (Y ^ X) as ~Y & X.  */
-      if (TREE_CODE (arg1) == BIT_XOR_EXPR
-	  && operand_equal_p (arg0, TREE_OPERAND (arg1, 1), 0)
-	  && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0)))
-	{
-	  tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
-	  return fold_build2_loc (loc, BIT_AND_EXPR, type,
-			      fold_build1_loc (loc, BIT_NOT_EXPR, type, tem),
-			      fold_convert_loc (loc, type, arg0));
-	}
-
       /* Fold (X * Y) & -(1 << CST) to X * Y if Y is a constant
          multiple of 1 << CST.  */
       if (TREE_CODE (arg1) == INTEGER_CST)
 	{
 	  wide_int cst1 = arg1;
 	  wide_int ncst1 = -cst1;
 	  if ((cst1 & ncst1) == ncst1
 	      && multiple_of_p (type, arg0,
 				wide_int_to_tree (TREE_TYPE (arg1), ncst1)))
 	    return fold_convert_loc (loc, type, arg0);
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 235933)
+++ gcc/match.pd	(working copy)
@@ -667,39 +667,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (tree_nop_conversion_p (type, TREE_TYPE (@0))
       && tree_nop_conversion_p (type, TREE_TYPE (@1)))
   (bit_xor (convert @0) (convert @1))))
 
 /* Convert ~X ^ C to X ^ ~C.  */
 (simplify
  (bit_xor (convert? (bit_not @0)) INTEGER_CST@1)
  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
   (bit_xor (convert @0) (bit_not @1))))
 
-/* Fold (X & Y) ^ Y as ~X & Y.  */
-(simplify
- (bit_xor:c (bit_and:c @0 @1) @1)
- (bit_and (bit_not @0) @1))
+/* Fold (X & Y) ^ Y and (X ^ Y) & Y as ~X & Y.  */
+(for opo (bit_and bit_xor)
+     opi (bit_xor bit_and)
+ (simplify
+  (opo:c (opi:c @0 @1) @1) 
+  (bit_and (bit_not @0) @1)))
 
 /* Given a bit-wise operation CODE applied to ARG0 and ARG1, see if both
    operands are another bit-wise operation with a common input.  If so,
    distribute the bit operations to save an operation and possibly two if
    constants are involved.  For example, convert
      (A | B) & (A | C) into A | (B & C)
    Further simplification will occur if B and C are constants.  */
 (for op (bit_and bit_ior bit_xor)
      rop (bit_ior bit_and bit_and)
  (simplify
   (op (convert? (rop:c @0 @1)) (convert? (rop:c @0 @2)))
   (if (tree_nop_conversion_p (type, TREE_TYPE (@1))
        && tree_nop_conversion_p (type, TREE_TYPE (@2)))
    (rop (convert @0) (op (convert @1) (convert @2))))))
 
+/* Some simple reassociation for bit operations, also handled in reassoc.  */
+/* (X & Y) & Y -> X & Y
+   (X | Y) | Y -> X | Y  */
+(for op (bit_and bit_ior)
+ (simplify
+  (op:c (convert?@2 (op:c @0 @1)) (convert? @1))
+  @2))
+/* (X ^ Y) ^ Y -> X  */
+(simplify
+ (bit_xor:c (convert? (bit_xor:c @0 @1)) (convert? @1))
+ (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+  (convert @0)))
+/* (X & Y) & (X & Z) -> (X & Y) & Z
+   (X | Y) | (X | Z) -> (X | Y) | Z  */
+(for op (bit_and bit_ior)
+ (simplify
+  (op:c (convert?@3 (op:c@4 @0 @1)) (convert?@5 (op:c@6 @0 @2)))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@1))
+       && tree_nop_conversion_p (type, TREE_TYPE (@2)))
+   (if (single_use (@5) && single_use (@6))
+    (op @3 (convert @2))
+    (if (single_use (@3) && single_use (@4))
+     (op (convert @1) @5))))))
+/* (X ^ Y) ^ (X ^ Z) -> Y ^ Z  */
+(simplify
+ (bit_xor (convert? (bit_xor:c @0 @1)) (convert? (bit_xor:c @0 @2)))
+ (if (tree_nop_conversion_p (type, TREE_TYPE (@1))
+      && tree_nop_conversion_p (type, TREE_TYPE (@2)))
+  (convert (bit_xor @1 @2))))
 
 (simplify
  (abs (abs@1 @0))
  @1)
 (simplify
  (abs (negate @0))
  (abs @0))
 (simplify
  (abs tree_expr_nonnegative_p@0)
  @0)
Index: gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c	(working copy)
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-details -fdump-tree-ccp1-details" } */
+
+int f1(int a, int b){
+  int c = a & b;
+  return c & b;
+}
+int f2(int a, int b){
+  int c = a | b;
+  return b | c;
+}
+int g1(int a, int b, int c){
+  int d = a & b;
+  int e = b & c;
+  return d & e;
+}
+int g2(int a, int b, int c){
+  int d = a | b;
+  int e = c | b;
+  return d | e;
+}
+int g3(int a, int b, int c){
+  int d = a ^ b;
+  int e = b ^ c;
+  return e ^ d;
+}
+
+/* { dg-final { scan-tree-dump-times "Match-and-simplified" 2 "ccp1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "forwprop1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/pr69270.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr69270.c	(revision 235933)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr69270.c	(working copy)
@@ -1,21 +1,23 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fsplit-paths -fdump-tree-dom3-details" } */
 
 /* There should be two references to bufferstep that turn into
    constants.  */
 /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .0." 1 "dom3"} } */
 /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .1." 1 "dom3"} } */
 
 /* And some assignments ought to fold down to constants.  */
-/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 2 "dom3"} } */
-/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 2 "dom3"} } */
+/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = -1;" 1 "dom3"} } */
+/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = -2;" 1 "dom3"} } */
+/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 1 "dom3"} } */
+/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 1 "dom3"} } */
 
 /* The XOR operations should have been optimized to constants.  */
 /* { dg-final { scan-tree-dump-not "bit_xor" "dom3"} } */
 
 
 extern int *stepsizeTable;
 
 void
 adpcm_coder (signed char *outdata, int len)
 {
Index: gcc/testsuite/gcc.dg/tree-ssa/vrp59.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/vrp59.c	(revision 235933)
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp59.c	(working copy)
@@ -1,12 +1,12 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-ccp -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fno-tree-ccp -fno-tree-forwprop -fdump-tree-vrp1" } */
 
 int f(int x)
 {
   if (x >= 0 && x <= 3)
     {
       x = x ^ 3;
       x = x & 3;
     }
   return x;
 }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]