[patch middle-end]: Missed optimization for (x & ~y) | (~x & y)

Kai Tietz ktietz70@googlemail.com
Wed Apr 20 16:10:00 GMT 2011


2011/4/20 Richard Henderson <rth@redhat.com>:
> On 04/20/2011 08:22 AM, Kai Tietz wrote:
>> +      if (TREE_CODE (arg0) == BIT_AND_EXPR
>> +       && TREE_CODE (arg1) == BIT_AND_EXPR)
>> +        {
>> +       tree a0, a1, l0, l1, n0, n1;
>> +
>> +       a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
>> +       a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
>> +
>> +       l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
>> +       l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1));
>> +
>> +       n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0);
>> +       n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1);
>> +
>> +       if ((operand_equal_p (n0, a0, 0)
>> +            && operand_equal_p (n1, a1, 0))
>> +           || (operand_equal_p (n0, a1, 0)
>> +               && operand_equal_p (n1, a0, 0)))
>> +         return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1);
>
> First, you typoed BIT_XOR_EXPR in this first block.

Duh, corrected.

> Second, I don't see how you're arbitrarily choosing L0 and N1 in the
> expansion.  If you write the expression the other way around,
>
>  (~x & y) | (x & ~y)
>
> don't you wind up with
>
>  (~x ^ ~y)
>
> ?  Or do the extra NOT expressions get folded away anyway?

Not I didn't wind up here. First ~X ^ ~Y is in result the same as X ^
Y, and for this I used here the explicit folding. Well, it might be a
bit slower, but it has the advantage to compare equal transformations
in doubt.

>> +      if (TREE_CODE (arg0) == TREE_CODE (arg1)
>> +       && (TREE_CODE (arg1) == TRUTH_AND_EXPR
>> +           || TREE_CODE (arg1) == TRUTH_ANDIF_EXPR))
>
> I don't believe you want to apply this transformation with ANDIF.

Yes, it is superflous. I removed it.

>
> r~
>

Adjusted patch attached.

Kai
-------------- next part --------------
Index: gcc/gcc/fold-const.c
===================================================================
--- gcc.orig/gcc/fold-const.c	2011-04-20 17:10:39.478091900 +0200
+++ gcc/gcc/fold-const.c	2011-04-20 17:41:23.427677200 +0200
@@ -10660,6 +10660,28 @@ fold_binary_loc (location_t loc,
 	  && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0)))
 	return omit_one_operand_loc (loc, type, arg0, TREE_OPERAND (arg1, 0));
 
+      /* (X & ~Y) | (~X & Y) is X ^ Y */
+      if (TREE_CODE (arg0) == BIT_AND_EXPR
+	  && TREE_CODE (arg1) == BIT_AND_EXPR)
+        {
+	  tree a0, a1, l0, l1, n0, n1;
+
+	  a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
+	  a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
+
+	  l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
+	  l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1));
+	  
+	  n0 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l0);
+	  n1 = fold_build1_loc (loc, BIT_NOT_EXPR, type, l1);
+	  
+	  if ((operand_equal_p (n0, a0, 0)
+	       && operand_equal_p (n1, a1, 0))
+	      || (operand_equal_p (n0, a1, 0)
+		  && operand_equal_p (n1, a0, 0)))
+	    return fold_build2_loc (loc, BIT_XOR_EXPR, type, l0, n1);
+	}
+
       t1 = distribute_bit_expr (loc, code, type, arg0, arg1);
       if (t1 != NULL_TREE)
 	return t1;
@@ -12039,6 +12061,27 @@ fold_binary_loc (location_t loc,
 	  && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
 	return omit_one_operand_loc (loc, type, integer_one_node, arg0);
 
+      /* (X && !Y) || (!X && Y) is X ^ Y */
+      if (TREE_CODE (arg0) == TREE_CODE (arg1)
+	  && TREE_CODE (arg1) == TRUTH_AND_EXPR)
+        {
+	  tree a0, a1, l0, l1, n0, n1;
+
+	  a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
+	  a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
+
+	  l0 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
+	  l1 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1));
+	  
+	  n0 = fold_build1_loc (loc, TRUTH_NOT_EXPR, type, l0);
+	  n1 = fold_build1_loc (loc, TRUTH_NOT_EXPR, type, l1);
+	  
+	  if ((operand_equal_p (n0, a0, 0)
+	       && operand_equal_p (n1, a1, 0))
+	      || (operand_equal_p (n0, a1, 0)
+		  && operand_equal_p (n1, a0, 0)))
+	    return fold_build2_loc (loc, TRUTH_XOR_EXPR, type, l0, n1);
+	}
       goto truth_andor;
 
     case TRUTH_XOR_EXPR:
Index: gcc/gcc/testsuite/gcc.dg/binop-xor1.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/gcc/testsuite/gcc.dg/binop-xor1.c	2011-04-20 17:11:22.905039900 +0200
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int a, int b, int c)
+{
+  return ((a && !b && c) || (!a && b && c));
+}
+
+/* We expect to see "<bb N>"; confirm that, so that we know to count
+   it in the real test.  */
+/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 5 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/gcc/testsuite/gcc.dg/binop-xor2.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/gcc/testsuite/gcc.dg/binop-xor2.c	2011-04-20 17:11:22.908540300 +0200
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int a, int b)
+{
+  return ((a & ~b) | (~a & b));
+}
+
+/* We expect to see "<bb N>"; confirm that, so that we know to count
+   it in the real test.  */
+/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/gcc/testsuite/gcc.dg/binop-xor3.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/gcc/testsuite/gcc.dg/binop-xor3.c	2011-04-20 17:11:22.911040600 +0200
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int a, int b)
+{
+  return ((a && !b) || (!a && b));
+}
+
+/* We expect to see "<bb N>"; confirm that, so that we know to count
+   it in the real test.  */
+/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/gcc/testsuite/gcc.dg/binop-xor4.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/gcc/testsuite/gcc.dg/binop-xor4.c	2011-04-20 17:11:22.913541000 +0200
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int a, int b, int c)
+{
+  return ((a & ~b) | (~a & b)) & c;
+}
+
+/* We expect to see "<bb N>"; confirm that, so that we know to count
+   it in the real test.  */
+/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/gcc/testsuite/gcc.dg/binop-xor5.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/gcc/testsuite/gcc.dg/binop-xor5.c	2011-04-20 17:11:22.916541300 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int a, int b, int c)
+{
+  return ((a & ~b & c) | (~a & b & c));
+}
+
+/* We expect to see "<bb N>"; confirm that, so that we know to count
+   it in the real test.  */
+/* { dg-final { scan-tree-dump-times "<bb\[^>\]*>" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\^" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\&" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */


More information about the Gcc-patches mailing list