This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Compile (X & C) == 0 to (X >> C) & 1 == 0 only at RTL level
- From: Paolo Bonzini <bonzini at gnu dot org>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Richard Guenther <rguenther at suse dot de>, Jeffrey A Law <law at redhat dot com>
- Date: Thu, 28 Aug 2008 11:55:08 +0200
- Subject: [PATCH] Compile (X & C) == 0 to (X >> C) & 1 == 0 only at RTL level
This patch mostly reverts this patch from Jeff Law:
2003-07-02 Jeff Law <law@redhat.com>
* expr.c (do_store_flag): Remove special case folding for
single bit tests. Instead call back into the commonized folder
routine.
* fold-const.c (fold_single_bit_test): New function, mostly
extracted from do_store_flag, with an additional case extracted
from fold.
(fold): Call fold_single_bit_test appropriately.
* tree.h (fold_single_bit_test): Prototype.
The reason is that nowadays the canonical form of single-bit tests is
back to being (X & C) == 0, and under bad conditions that canonical form
will be reestablished within fold_single_bit_test after carefully
crafting a (X >> C) & 1 expression. This causes an infinite loop
because do_store_flag is called again to expand (X & C) == 0.
In my case, the problem is that I added a NOP_EXPR folding. Then the
last fold_convert in fold_single_bit_test rebuilds the BIT_AND_EXPR for
(X >> C) & 1 and, fold_binary causes the canonicalization to happen. At
least this means that my NOP_EXPR folding is very effective in making
canonical forms. :-)
I decided to revert (almost) Jeff's patch because fold-const.c is *not*
anymore calling fold_single_bit_test; only expr.c is. Instead,
fold_single_bit_test_into_sign_test is being called from fold-const.c,
and that path is left in place.
Bootstrapped i686-pc-linux-gnu, regtest in progress. Ok?
Paolo
2008-08-28 Paolo Bonzini <bonzini@gnu.org>
* expr.c (do_store_flag): Try fold_single_bit_test_into_sign_test.
Do not shadow the `type' variable from function scope.
* fold-const.c (fold_single_bit_test_into_sign_test): Export.
* tree.h (fold_single_bit_test_into_sign_test): Export.
Revert:
2003-07-02 Jeff Law <law@redhat.com>
* expr.c (do_store_flag): Remove special case folding for
single bit tests. Instead call back into the commonized folder
routine.
* fold-const.c (fold_single_bit_test): New function, mostly
extracted from do_store_flag, with an additional case extracted
from fold.
(fold): Call fold_single_bit_test appropriately.
* tree.h (fold_single_bit_test): Prototype.
Index: expr.c
===================================================================
--- expr.c (revision 139423)
+++ expr.c (working copy)
@@ -9793,19 +9793,70 @@ do_store_flag (tree exp, rtx target, enu
do this by shifting the bit being tested to the low-order bit and
masking the result with the constant 1. If the condition was EQ,
we xor it with 1. This does not require an scc insn and is faster
- than an scc insn even if we have it.
-
- The code to make this transformation was moved into fold_single_bit_test,
- so we just call into the folder and expand its result. */
+ than an scc insn even if we have it. */
if ((code == NE || code == EQ)
&& TREE_CODE (arg0) == BIT_AND_EXPR && integer_zerop (arg1)
&& integer_pow2p (TREE_OPERAND (arg0, 1)))
{
- tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
- return expand_expr (fold_single_bit_test (code == NE ? NE_EXPR : EQ_EXPR,
- arg0, arg1, type),
- target, VOIDmode, EXPAND_NORMAL);
+ enum tree_code code = (code == NE ? NE_EXPR : EQ_EXPR);
+ tree tem, inner;
+ int bitnum, ops_unsignedp;
+
+ tem = fold_single_bit_test_into_sign_test
+ (code, arg0, arg1,
+ lang_hooks.types.type_for_mode (mode, unsignedp));
+ if (tem)
+ return expand_expr (tem, target, VOIDmode, EXPAND_NORMAL);
+
+ /* If INNER is a right shift of a constant and it plus BITNUM does
+ not overflow, adjust BITNUM and INNER. */
+
+ inner = TREE_OPERAND (arg0, 0);
+ bitnum = tree_log2 (TREE_OPERAND (arg0, 1));
+ if (TREE_CODE (inner) == RSHIFT_EXPR
+ && TREE_CODE (TREE_OPERAND (inner, 1)) == INTEGER_CST
+ && TREE_INT_CST_HIGH (TREE_OPERAND (inner, 1)) == 0
+ && bitnum < TYPE_PRECISION (type)
+ && 0 > compare_tree_int (TREE_OPERAND (inner, 1),
+ bitnum - TYPE_PRECISION (type)))
+ {
+ bitnum += TREE_INT_CST_LOW (TREE_OPERAND (inner, 1));
+ inner = TREE_OPERAND (inner, 0);
+ }
+
+ /* If we are going to be able to omit the AND below, we must do our
+ operations as unsigned. If we must use the AND, we have a choice.
+ Normally unsigned is faster, but for some machines signed is. */
+#ifdef LOAD_EXTEND_OP
+ ops_unsignedp = (LOAD_EXTEND_OP (operand_mode) != SIGN_EXTEND);
+#else
+ ops_unsignedp = 1;
+#endif
+ ops_unsignedp = ops_unsignedp || (bitnum == TYPE_PRECISION (type) - 1);
+
+ if (! get_subtarget (subtarget)
+ || GET_MODE (subtarget) != operand_mode
+ || ! safe_from_p (subtarget, inner, 1))
+ subtarget = 0;
+
+ op0 = expand_expr (inner, subtarget, VOIDmode, 0);
+ if (bitnum != 0)
+ op0 = expand_shift (RSHIFT_EXPR, operand_mode, op0,
+ size_int (bitnum), subtarget, ops_unsignedp);
+
+ if (GET_MODE (op0) != mode)
+ op0 = convert_to_mode (mode, op0, ops_unsignedp);
+
+ if ((code == EQ && ! invert) || (code == NE && invert))
+ op0 = expand_binop (mode, xor_optab, op0, const1_rtx, subtarget,
+ ops_unsignedp, OPTAB_LIB_WIDEN);
+
+ /* Put the AND last so it can combine with more things. */
+ if (bitnum != TYPE_PRECISION (type) - 1)
+ op0 = expand_and (mode, op0, const1_rtx, subtarget);
+
+ return op0;
}
/* Now see if we are likely to be able to do this. Return if not. */
Index: tree.h
===================================================================
--- tree.h (revision 139423)
+++ tree.h (working copy)
@@ -4735,7 +4735,7 @@ extern tree fold_build_call_array (tree,
extern tree fold_build_call_array_initializer (tree, tree, int, tree *);
extern bool fold_convertible_p (const_tree, const_tree);
extern tree fold_convert (tree, tree);
-extern tree fold_single_bit_test (enum tree_code, tree, tree, tree);
+extern tree fold_single_bit_test_into_sign_test (enum tree_code, tree, tree, tree);
extern tree fold_ignored_result (tree);
extern tree fold_abs_const (tree, tree);
extern tree fold_indirect_ref_1 (tree, tree);
Index: fold-const.c
===================================================================
--- fold-const.c (revision 139423)
+++ fold-const.c (working copy)
@@ -6508,7 +6508,7 @@ fold_div_compare (enum tree_code code, t
using a sign testing. Otherwise return NULL. TYPE is the desired
result type. */
-static tree
+tree
fold_single_bit_test_into_sign_test (enum tree_code code, tree arg0, tree arg1,
tree result_type)
{
@@ -6537,87 +6537,6 @@ fold_single_bit_test_into_sign_test (enu
return NULL_TREE;
}
-/* If CODE with arguments ARG0 and ARG1 represents a single bit
- equality/inequality test, then return a simplified form of
- the test using shifts and logical operations. Otherwise return
- NULL. TYPE is the desired result type. */
-
-tree
-fold_single_bit_test (enum tree_code code, tree arg0, tree arg1,
- tree result_type)
-{
- /* If this is testing a single bit, we can optimize the test. */
- if ((code == NE_EXPR || code == EQ_EXPR)
- && TREE_CODE (arg0) == BIT_AND_EXPR && integer_zerop (arg1)
- && integer_pow2p (TREE_OPERAND (arg0, 1)))
- {
- tree inner = TREE_OPERAND (arg0, 0);
- tree type = TREE_TYPE (arg0);
- int bitnum = tree_log2 (TREE_OPERAND (arg0, 1));
- enum machine_mode operand_mode = TYPE_MODE (type);
- int ops_unsigned;
- tree signed_type, unsigned_type, intermediate_type;
- tree tem, one;
-
- /* First, see if we can fold the single bit test into a sign-bit
- test. */
- tem = fold_single_bit_test_into_sign_test (code, arg0, arg1,
- result_type);
- if (tem)
- return tem;
-
- /* Otherwise we have (A & C) != 0 where C is a single bit,
- convert that into ((A >> C2) & 1). Where C2 = log2(C).
- Similarly for (A & C) == 0. */
-
- /* If INNER is a right shift of a constant and it plus BITNUM does
- not overflow, adjust BITNUM and INNER. */
- if (TREE_CODE (inner) == RSHIFT_EXPR
- && TREE_CODE (TREE_OPERAND (inner, 1)) == INTEGER_CST
- && TREE_INT_CST_HIGH (TREE_OPERAND (inner, 1)) == 0
- && bitnum < TYPE_PRECISION (type)
- && 0 > compare_tree_int (TREE_OPERAND (inner, 1),
- bitnum - TYPE_PRECISION (type)))
- {
- bitnum += TREE_INT_CST_LOW (TREE_OPERAND (inner, 1));
- inner = TREE_OPERAND (inner, 0);
- }
-
- /* If we are going to be able to omit the AND below, we must do our
- operations as unsigned. If we must use the AND, we have a choice.
- Normally unsigned is faster, but for some machines signed is. */
-#ifdef LOAD_EXTEND_OP
- ops_unsigned = (LOAD_EXTEND_OP (operand_mode) == SIGN_EXTEND
- && !flag_syntax_only) ? 0 : 1;
-#else
- ops_unsigned = 1;
-#endif
-
- signed_type = lang_hooks.types.type_for_mode (operand_mode, 0);
- unsigned_type = lang_hooks.types.type_for_mode (operand_mode, 1);
- intermediate_type = ops_unsigned ? unsigned_type : signed_type;
- inner = fold_convert (intermediate_type, inner);
-
- if (bitnum != 0)
- inner = build2 (RSHIFT_EXPR, intermediate_type,
- inner, size_int (bitnum));
-
- one = build_int_cst (intermediate_type, 1);
-
- if (code == EQ_EXPR)
- inner = fold_build2 (BIT_XOR_EXPR, intermediate_type, inner, one);
-
- /* Put the AND last so it can combine with more things. */
- inner = build2 (BIT_AND_EXPR, intermediate_type, inner, one);
-
- /* Make sure to return the proper type. */
- inner = fold_convert (result_type, inner);
-
- return inner;
- }
- return NULL_TREE;
-}
-
/* Check whether we are allowed to reorder operands arg0 and arg1,
such that the evaluation of arg1 occurs before arg0. */
@@ -7861,53 +7780,47 @@ fold_unary (enum tree_code code, tree ty
return tem;
}
- /* Convert (T)(x & c) into (T)x & (T)c, if c is an integer
+ /* Convert (T)(x OP c) into (T)x OP (T)c, if c is an integer
constants (if x has signed type, the sign bit cannot be set
- in c). This folds extension into the BIT_AND_EXPR.
+ in c). For bit values, this folds the extension into the operation.
??? We don't do it for BOOLEAN_TYPE or ENUMERAL_TYPE because they
very likely don't have maximal range for their precision and this
transformation effectively doesn't preserve non-maximal ranges. */
if (TREE_CODE (type) == INTEGER_TYPE
- && TREE_CODE (op0) == BIT_AND_EXPR
+ && TREE_CODE_CLASS (TREE_CODE (op0)) == tcc_binary
+ && TREE_CODE (op0) != POINTER_PLUS_EXPR
+
+ /* Do not extend except for bitwise operations to avoid multi-word
+ operations. */
+ && (TREE_CODE (op0) == BIT_AND_EXPR
+ || TREE_CODE (op0) == BIT_IOR_EXPR
+ || TREE_CODE (op0) == BIT_XOR_EXPR
+ || TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (op0)))
&& TREE_CODE (TREE_OPERAND (op0, 1)) == INTEGER_CST)
{
- tree and = op0;
- tree and0 = TREE_OPERAND (and, 0), and1 = TREE_OPERAND (and, 1);
+ tree op00 = TREE_OPERAND (op0, 0), cst = TREE_OPERAND (op0, 1);
int change = 0;
- if (TYPE_UNSIGNED (TREE_TYPE (and))
- || (TYPE_PRECISION (type)
- <= TYPE_PRECISION (TREE_TYPE (and))))
+ if (TYPE_UNSIGNED (TREE_TYPE (op0))
+ || TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (op0)))
change = 1;
- else if (TYPE_PRECISION (TREE_TYPE (and1))
- <= HOST_BITS_PER_WIDE_INT
- && host_integerp (and1, 1))
- {
- unsigned HOST_WIDE_INT cst;
-
- cst = tree_low_cst (and1, 1);
- cst &= (HOST_WIDE_INT) -1
- << (TYPE_PRECISION (TREE_TYPE (and1)) - 1);
- change = (cst == 0);
-#ifdef LOAD_EXTEND_OP
- if (change
- && !flag_syntax_only
- && (LOAD_EXTEND_OP (TYPE_MODE (TREE_TYPE (and0)))
- == ZERO_EXTEND))
- {
- tree uns = unsigned_type_for (TREE_TYPE (and0));
- and0 = fold_convert (uns, and0);
- and1 = fold_convert (uns, and1);
- }
-#endif
+ else if (TYPE_PRECISION (TREE_TYPE (cst)) <= HOST_BITS_PER_WIDE_INT
+ && host_integerp (cst, 1))
+ {
+ unsigned HOST_WIDE_INT val;
+
+ val = tree_low_cst (cst, 1);
+ val &= (HOST_WIDE_INT) -1
+ << (TYPE_PRECISION (TREE_TYPE (cst)) - 1);
+ change = (val == 0);
}
if (change)
{
- tem = force_fit_type_double (type, TREE_INT_CST_LOW (and1),
- TREE_INT_CST_HIGH (and1), 0,
- TREE_OVERFLOW (and1));
- return fold_build2 (BIT_AND_EXPR, type,
- fold_convert (type, and0), tem);
+ cst = force_fit_type_double (type, TREE_INT_CST_LOW (cst),
+ TREE_INT_CST_HIGH (cst), 0,
+ TREE_OVERFLOW (cst));
+ return fold_build2 (TREE_CODE (op0), type,
+ fold_convert (type, op00), cst);
}
}