This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 29 Sep 2011 23:15:52 +0200
- Subject: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch implements a fold_range_test like optimization on GIMPLE, inside
tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
shared in between the two.
The advantage of the reassoc optimization is that it doesn't attempt to
merge just 2 ranges at a time, instead it sorts the ranges for the same
SSA_NAME and thus can optimize even cases where source code doesn't have
the numbers in a range test in increasing or decreasing order (and also
can optimize things that were in multiple statements in the source).
Additionally, it optimizes cases like
x == 1 || x == 3
into (x & ~2) == 1.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2011-09-27 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/46309
* fold-const.c (make_range, merge_ranges): Remove prototypes.
(range_binop): Likewise, no longer static.
(make_range_step): New function.
(make_range): Use it.
* tree.h (make_range_step, range_binop): New prototypes.
* Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
* tree-ssa-reassoc.c: Include diagnostic-core.h.
(struct range_entry): New type.
(init_range_entry, range_entry_cmp, update_range_test,
optimize_range_tests): New functions.
(reassociate_bb): Call optimize_range_tests.
* gcc.dg/pr46309.c: New test.
--- gcc/fold-const.c.jj 2011-09-05 12:28:53.000000000 +0200
+++ gcc/fold-const.c 2011-09-27 16:39:09.000000000 +0200
@@ -112,12 +112,8 @@ static tree decode_field_reference (loca
static int all_ones_mask_p (const_tree, int);
static tree sign_bit_p (tree, const_tree);
static int simple_operand_p (const_tree);
-static tree range_binop (enum tree_code, tree, tree, int, tree, int);
static tree range_predecessor (tree);
static tree range_successor (tree);
-extern tree make_range (tree, int *, tree *, tree *, bool *);
-extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
- tree, tree);
static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, tree);
static tree unextend (tree, int, int, tree);
@@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp)
must be specified for a comparison. ARG1 will be converted to ARG0's
type if both are specified. */
-static tree
+tree
range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
tree arg1, int upper1_p)
{
@@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t
return constant_boolean_node (result, type);
}
+/* Helper routine for make_range. Perform one step for it, return
+ new expression if the loop should continue or NULL_TREE if it should
+ stop. */
+
+tree
+make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
+ tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
+ bool *strict_overflow_p)
+{
+ tree arg0_type = TREE_TYPE (arg0);
+ tree n_low, n_high, low = *p_low, high = *p_high;
+ int in_p = *p_in_p, n_in_p;
+
+ switch (code)
+ {
+ case TRUTH_NOT_EXPR:
+ *p_in_p = ! in_p;
+ return arg0;
+
+ case EQ_EXPR: case NE_EXPR:
+ case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
+ /* We can only do something if the range is testing for zero
+ and if the second operand is an integer constant. Note that
+ saying something is "in" the range we make is done by
+ complementing IN_P since it will set in the initial case of
+ being not equal to zero; "out" is leaving it alone. */
+ if (low == NULL_TREE || high == NULL_TREE
+ || ! integer_zerop (low) || ! integer_zerop (high)
+ || TREE_CODE (arg1) != INTEGER_CST)
+ return NULL_TREE;
+
+ switch (code)
+ {
+ case NE_EXPR: /* - [c, c] */
+ low = high = arg1;
+ break;
+ case EQ_EXPR: /* + [c, c] */
+ in_p = ! in_p, low = high = arg1;
+ break;
+ case GT_EXPR: /* - [-, c] */
+ low = 0, high = arg1;
+ break;
+ case GE_EXPR: /* + [c, -] */
+ in_p = ! in_p, low = arg1, high = 0;
+ break;
+ case LT_EXPR: /* - [c, -] */
+ low = arg1, high = 0;
+ break;
+ case LE_EXPR: /* + [-, c] */
+ in_p = ! in_p, low = 0, high = arg1;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* If this is an unsigned comparison, we also know that EXP is
+ greater than or equal to zero. We base the range tests we make
+ on that fact, so we record it here so we can parse existing
+ range tests. We test arg0_type since often the return type
+ of, e.g. EQ_EXPR, is boolean. */
+ if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
+ {
+ if (! merge_ranges (&n_in_p, &n_low, &n_high,
+ in_p, low, high, 1,
+ build_int_cst (arg0_type, 0),
+ NULL_TREE))
+ return NULL_TREE;
+
+ in_p = n_in_p, low = n_low, high = n_high;
+
+ /* If the high bound is missing, but we have a nonzero low
+ bound, reverse the range so it goes from zero to the low bound
+ minus 1. */
+ if (high == 0 && low && ! integer_zerop (low))
+ {
+ in_p = ! in_p;
+ high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
+ integer_one_node, 0);
+ low = build_int_cst (arg0_type, 0);
+ }
+ }
+
+ *p_low = low;
+ *p_high = high;
+ *p_in_p = in_p;
+ return arg0;
+
+ case NEGATE_EXPR:
+ /* (-x) IN [a,b] -> x in [-b, -a] */
+ n_low = range_binop (MINUS_EXPR, exp_type,
+ build_int_cst (exp_type, 0),
+ 0, high, 1);
+ n_high = range_binop (MINUS_EXPR, exp_type,
+ build_int_cst (exp_type, 0),
+ 0, low, 0);
+ if (n_high != 0 && TREE_OVERFLOW (n_high))
+ return NULL_TREE;
+ goto normalize;
+
+ case BIT_NOT_EXPR:
+ /* ~ X -> -X - 1 */
+ return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
+ build_int_cst (exp_type, 1));
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ if (TREE_CODE (arg1) != INTEGER_CST)
+ return NULL_TREE;
+
+ /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
+ move a constant to the other side. */
+ if (!TYPE_UNSIGNED (arg0_type)
+ && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
+ return NULL_TREE;
+
+ /* If EXP is signed, any overflow in the computation is undefined,
+ so we don't worry about it so long as our computations on
+ the bounds don't overflow. For unsigned, overflow is defined
+ and this is exactly the right thing. */
+ n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
+ arg0_type, low, 0, arg1, 0);
+ n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
+ arg0_type, high, 1, arg1, 0);
+ if ((n_low != 0 && TREE_OVERFLOW (n_low))
+ || (n_high != 0 && TREE_OVERFLOW (n_high)))
+ return NULL_TREE;
+
+ if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
+ *strict_overflow_p = true;
+
+ normalize:
+ /* Check for an unsigned range which has wrapped around the maximum
+ value thus making n_high < n_low, and normalize it. */
+ if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
+ {
+ low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
+ integer_one_node, 0);
+ high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
+ integer_one_node, 0);
+
+ /* If the range is of the form +/- [ x+1, x ], we won't
+ be able to normalize it. But then, it represents the
+ whole range or the empty set, so make it
+ +/- [ -, - ]. */
+ if (tree_int_cst_equal (n_low, low)
+ && tree_int_cst_equal (n_high, high))
+ low = high = 0;
+ else
+ in_p = ! in_p;
+ }
+ else
+ low = n_low, high = n_high;
+
+ *p_low = low;
+ *p_high = high;
+ *p_in_p = in_p;
+ return arg0;
+
+ CASE_CONVERT:
+ case NON_LVALUE_EXPR:
+ if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
+ return NULL_TREE;
+
+ if (! INTEGRAL_TYPE_P (arg0_type)
+ || (low != 0 && ! int_fits_type_p (low, arg0_type))
+ || (high != 0 && ! int_fits_type_p (high, arg0_type)))
+ return NULL_TREE;
+
+ n_low = low, n_high = high;
+
+ if (n_low != 0)
+ n_low = fold_convert_loc (loc, arg0_type, n_low);
+
+ if (n_high != 0)
+ n_high = fold_convert_loc (loc, arg0_type, n_high);
+
+ /* If we're converting arg0 from an unsigned type, to exp,
+ a signed type, we will be doing the comparison as unsigned.
+ The tests above have already verified that LOW and HIGH
+ are both positive.
+
+ So we have to ensure that we will handle large unsigned
+ values the same way that the current signed bounds treat
+ negative values. */
+
+ if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
+ {
+ tree high_positive;
+ tree equiv_type;
+ /* For fixed-point modes, we need to pass the saturating flag
+ as the 2nd parameter. */
+ if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
+ equiv_type
+ = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type),
+ TYPE_SATURATING (arg0_type));
+ else
+ equiv_type
+ = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1);
+
+ /* A range without an upper bound is, naturally, unbounded.
+ Since convert would have cropped a very large value, use
+ the max value for the destination type. */
+ high_positive
+ = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
+ : TYPE_MAX_VALUE (arg0_type);
+
+ if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
+ high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
+ fold_convert_loc (loc, arg0_type,
+ high_positive),
+ build_int_cst (arg0_type, 1));
+
+ /* If the low bound is specified, "and" the range with the
+ range for which the original unsigned value will be
+ positive. */
+ if (low != 0)
+ {
+ if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high,
+ 1, fold_convert_loc (loc, arg0_type,
+ integer_zero_node),
+ high_positive))
+ return NULL_TREE;
+
+ in_p = (n_in_p == in_p);
+ }
+ else
+ {
+ /* Otherwise, "or" the range with the range of the input
+ that will be interpreted as negative. */
+ if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high,
+ 1, fold_convert_loc (loc, arg0_type,
+ integer_zero_node),
+ high_positive))
+ return NULL_TREE;
+
+ in_p = (in_p != n_in_p);
+ }
+ }
+
+ *p_low = n_low;
+ *p_high = n_high;
+ *p_in_p = in_p;
+ return arg0;
+
+ default:
+ return NULL_TREE;
+ }
+}
+
/* Given EXP, a logical expression, set the range it is testing into
variables denoted by PIN_P, PLOW, and PHIGH. Return the expression
actually being tested. *PLOW and *PHIGH will be made of the same
@@ -3804,10 +4049,10 @@ make_range (tree exp, int *pin_p, tree *
bool *strict_overflow_p)
{
enum tree_code code;
- tree arg0 = NULL_TREE, arg1 = NULL_TREE;
- tree exp_type = NULL_TREE, arg0_type = NULL_TREE;
- int in_p, n_in_p;
- tree low, high, n_low, n_high;
+ tree arg0, arg1 = NULL_TREE;
+ tree exp_type, nexp;
+ int in_p;
+ tree low, high;
location_t loc = EXPR_LOCATION (exp);
/* Start with simply saying "EXP != 0" and then look at the code of EXP
@@ -3823,255 +4068,26 @@ make_range (tree exp, int *pin_p, tree *
{
code = TREE_CODE (exp);
exp_type = TREE_TYPE (exp);
+ arg0 = NULL_TREE;
if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
{
if (TREE_OPERAND_LENGTH (exp) > 0)
arg0 = TREE_OPERAND (exp, 0);
- if (TREE_CODE_CLASS (code) == tcc_comparison
- || TREE_CODE_CLASS (code) == tcc_unary
- || TREE_CODE_CLASS (code) == tcc_binary)
- arg0_type = TREE_TYPE (arg0);
if (TREE_CODE_CLASS (code) == tcc_binary
|| TREE_CODE_CLASS (code) == tcc_comparison
|| (TREE_CODE_CLASS (code) == tcc_expression
&& TREE_OPERAND_LENGTH (exp) > 1))
arg1 = TREE_OPERAND (exp, 1);
}
+ if (arg0 == NULL_TREE)
+ break;
- switch (code)
- {
- case TRUTH_NOT_EXPR:
- in_p = ! in_p, exp = arg0;
- continue;
-
- case EQ_EXPR: case NE_EXPR:
- case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
- /* We can only do something if the range is testing for zero
- and if the second operand is an integer constant. Note that
- saying something is "in" the range we make is done by
- complementing IN_P since it will set in the initial case of
- being not equal to zero; "out" is leaving it alone. */
- if (low == 0 || high == 0
- || ! integer_zerop (low) || ! integer_zerop (high)
- || TREE_CODE (arg1) != INTEGER_CST)
- break;
-
- switch (code)
- {
- case NE_EXPR: /* - [c, c] */
- low = high = arg1;
- break;
- case EQ_EXPR: /* + [c, c] */
- in_p = ! in_p, low = high = arg1;
- break;
- case GT_EXPR: /* - [-, c] */
- low = 0, high = arg1;
- break;
- case GE_EXPR: /* + [c, -] */
- in_p = ! in_p, low = arg1, high = 0;
- break;
- case LT_EXPR: /* - [c, -] */
- low = arg1, high = 0;
- break;
- case LE_EXPR: /* + [-, c] */
- in_p = ! in_p, low = 0, high = arg1;
- break;
- default:
- gcc_unreachable ();
- }
-
- /* If this is an unsigned comparison, we also know that EXP is
- greater than or equal to zero. We base the range tests we make
- on that fact, so we record it here so we can parse existing
- range tests. We test arg0_type since often the return type
- of, e.g. EQ_EXPR, is boolean. */
- if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
- {
- if (! merge_ranges (&n_in_p, &n_low, &n_high,
- in_p, low, high, 1,
- build_int_cst (arg0_type, 0),
- NULL_TREE))
- break;
-
- in_p = n_in_p, low = n_low, high = n_high;
-
- /* If the high bound is missing, but we have a nonzero low
- bound, reverse the range so it goes from zero to the low bound
- minus 1. */
- if (high == 0 && low && ! integer_zerop (low))
- {
- in_p = ! in_p;
- high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
- integer_one_node, 0);
- low = build_int_cst (arg0_type, 0);
- }
- }
-
- exp = arg0;
- continue;
-
- case NEGATE_EXPR:
- /* (-x) IN [a,b] -> x in [-b, -a] */
- n_low = range_binop (MINUS_EXPR, exp_type,
- build_int_cst (exp_type, 0),
- 0, high, 1);
- n_high = range_binop (MINUS_EXPR, exp_type,
- build_int_cst (exp_type, 0),
- 0, low, 0);
- if (n_high != 0 && TREE_OVERFLOW (n_high))
- break;
- goto normalize;
-
- case BIT_NOT_EXPR:
- /* ~ X -> -X - 1 */
- exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
- build_int_cst (exp_type, 1));
- continue;
-
- case PLUS_EXPR: case MINUS_EXPR:
- if (TREE_CODE (arg1) != INTEGER_CST)
- break;
-
- /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
- move a constant to the other side. */
- if (!TYPE_UNSIGNED (arg0_type)
- && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
- break;
-
- /* If EXP is signed, any overflow in the computation is undefined,
- so we don't worry about it so long as our computations on
- the bounds don't overflow. For unsigned, overflow is defined
- and this is exactly the right thing. */
- n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
- arg0_type, low, 0, arg1, 0);
- n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
- arg0_type, high, 1, arg1, 0);
- if ((n_low != 0 && TREE_OVERFLOW (n_low))
- || (n_high != 0 && TREE_OVERFLOW (n_high)))
- break;
-
- if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
- *strict_overflow_p = true;
-
- normalize:
- /* Check for an unsigned range which has wrapped around the maximum
- value thus making n_high < n_low, and normalize it. */
- if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
- {
- low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
- integer_one_node, 0);
- high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
- integer_one_node, 0);
-
- /* If the range is of the form +/- [ x+1, x ], we won't
- be able to normalize it. But then, it represents the
- whole range or the empty set, so make it
- +/- [ -, - ]. */
- if (tree_int_cst_equal (n_low, low)
- && tree_int_cst_equal (n_high, high))
- low = high = 0;
- else
- in_p = ! in_p;
- }
- else
- low = n_low, high = n_high;
-
- exp = arg0;
- continue;
-
- CASE_CONVERT: case NON_LVALUE_EXPR:
- if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
- break;
-
- if (! INTEGRAL_TYPE_P (arg0_type)
- || (low != 0 && ! int_fits_type_p (low, arg0_type))
- || (high != 0 && ! int_fits_type_p (high, arg0_type)))
- break;
-
- n_low = low, n_high = high;
-
- if (n_low != 0)
- n_low = fold_convert_loc (loc, arg0_type, n_low);
-
- if (n_high != 0)
- n_high = fold_convert_loc (loc, arg0_type, n_high);
-
-
- /* If we're converting arg0 from an unsigned type, to exp,
- a signed type, we will be doing the comparison as unsigned.
- The tests above have already verified that LOW and HIGH
- are both positive.
-
- So we have to ensure that we will handle large unsigned
- values the same way that the current signed bounds treat
- negative values. */
-
- if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
- {
- tree high_positive;
- tree equiv_type;
- /* For fixed-point modes, we need to pass the saturating flag
- as the 2nd parameter. */
- if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
- equiv_type = lang_hooks.types.type_for_mode
- (TYPE_MODE (arg0_type),
- TYPE_SATURATING (arg0_type));
- else
- equiv_type = lang_hooks.types.type_for_mode
- (TYPE_MODE (arg0_type), 1);
-
- /* A range without an upper bound is, naturally, unbounded.
- Since convert would have cropped a very large value, use
- the max value for the destination type. */
- high_positive
- = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
- : TYPE_MAX_VALUE (arg0_type);
-
- if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
- high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
- fold_convert_loc (loc, arg0_type,
- high_positive),
- build_int_cst (arg0_type, 1));
-
- /* If the low bound is specified, "and" the range with the
- range for which the original unsigned value will be
- positive. */
- if (low != 0)
- {
- if (! merge_ranges (&n_in_p, &n_low, &n_high,
- 1, n_low, n_high, 1,
- fold_convert_loc (loc, arg0_type,
- integer_zero_node),
- high_positive))
- break;
-
- in_p = (n_in_p == in_p);
- }
- else
- {
- /* Otherwise, "or" the range with the range of the input
- that will be interpreted as negative. */
- if (! merge_ranges (&n_in_p, &n_low, &n_high,
- 0, n_low, n_high, 1,
- fold_convert_loc (loc, arg0_type,
- integer_zero_node),
- high_positive))
- break;
-
- in_p = (in_p != n_in_p);
- }
- }
-
- exp = arg0;
- low = n_low, high = n_high;
- continue;
-
- default:
- break;
- }
-
- break;
+ nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low,
+ &high, &in_p, strict_overflow_p);
+ if (nexp == NULL_TREE)
+ break;
+ exp = nexp;
}
/* If EXP is a constant, we can evaluate whether this is true or false. */
--- gcc/tree.h.jj 2011-09-20 21:43:07.000000000 +0200
+++ gcc/tree.h 2011-09-27 16:38:58.000000000 +0200
@@ -5384,7 +5384,10 @@ extern unsigned int get_pointer_alignmen
extern tree fold_call_stmt (gimple, bool);
extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function);
extern tree make_range (tree, int *, tree *, tree *, bool *);
+extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
+ tree *, tree *, int *, bool *);
extern tree build_range_check (location_t, tree, tree, int, tree, tree);
+extern tree range_binop (enum tree_code, tree, tree, int, tree, int);
extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
tree, tree);
extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
--- gcc/Makefile.in.jj 2011-09-18 21:20:04.000000000 +0200
+++ gcc/Makefile.in 2011-09-29 14:09:42.000000000 +0200
@@ -2641,7 +2641,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c
$(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \
tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \
$(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \
- tree-pretty-print.h gimple-pretty-print.h
+ tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H)
tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(TM_P_H) $(GGC_H) output.h \
$(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \
--- gcc/tree-ssa-reassoc.c.jj 2011-09-08 11:21:10.000000000 +0200
+++ gcc/tree-ssa-reassoc.c 2011-09-29 13:33:00.000000000 +0200
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.
#include "flags.h"
#include "target.h"
#include "params.h"
+#include "diagnostic-core.h"
/* This is a simple global reassociation pass. It is, in part, based
on the LLVM pass of the same name (They do some things more/less
@@ -1568,6 +1569,422 @@ optimize_ops_list (enum tree_code opcode
optimize_ops_list (opcode, ops);
}
+/* The following functions are subroutines to optimize_range_tests and allow
+ it to try to change a logical combination of comparisons into a range
+ test.
+
+ For example, both
+ X == 2 || X == 5 || X == 3 || X == 4
+ and
+ X >= 2 && X <= 5
+ are converted to
+ (unsigned) (X - 2) <= 3
+
+ For more information see comments above fold_test_range in fold-const.c,
+ this implementation is for GIMPLE. */
+
+struct range_entry
+{
+ tree exp;
+ tree low;
+ tree high;
+ bool in_p;
+ bool strict_overflow_p;
+ unsigned int idx, next;
+};
+
+/* This is similar to make_range in fold-const.c, but on top of
+ GIMPLE instead of trees. */
+
+static void
+init_range_entry (struct range_entry *r, tree exp)
+{
+ int in_p;
+ tree low, high;
+ bool is_bool, strict_overflow_p;
+
+ r->exp = NULL_TREE;
+ r->in_p = false;
+ r->strict_overflow_p = false;
+ r->low = NULL_TREE;
+ r->high = NULL_TREE;
+ if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
+ return;
+
+ /* Start with simply saying "EXP != 0" and then look at the code of EXP
+ and see if we can refine the range. Some of the cases below may not
+ happen, but it doesn't seem worth worrying about this. We "continue"
+ the outer loop when we've changed something; otherwise we "break"
+ the switch, which will "break" the while. */
+ low = build_int_cst (TREE_TYPE (exp), 0);
+ high = low;
+ in_p = 0;
+ strict_overflow_p = false;
+ is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
+
+ while (1)
+ {
+ gimple stmt;
+ enum tree_code code;
+ tree arg0, arg1, exp_type;
+ tree nexp;
+ location_t loc;
+
+ if (TREE_CODE (exp) != SSA_NAME)
+ break;
+
+ stmt = SSA_NAME_DEF_STMT (exp);
+ if (!is_gimple_assign (stmt))
+ break;
+
+ code = gimple_assign_rhs_code (stmt);
+ arg0 = gimple_assign_rhs1 (stmt);
+ arg1 = gimple_assign_rhs2 (stmt);
+ exp_type = TREE_TYPE (exp);
+ loc = gimple_location (stmt);
+ switch (code)
+ {
+ case BIT_NOT_EXPR:
+ if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
+ {
+ in_p = !in_p;
+ exp = arg0;
+ continue;
+ }
+ break;
+ case SSA_NAME:
+ exp = arg0;
+ continue;
+ CASE_CONVERT:
+ is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
+ goto do_default;
+ case EQ_EXPR:
+ case NE_EXPR:
+ case LT_EXPR:
+ case LE_EXPR:
+ case GE_EXPR:
+ case GT_EXPR:
+ is_bool = true;
+ /* FALLTHRU */
+ do_default:
+ default:
+ if (!is_bool)
+ return;
+ nexp = make_range_step (loc, code, arg0, arg1, exp_type,
+ &low, &high, &in_p,
+ &strict_overflow_p);
+ if (nexp != NULL_TREE)
+ {
+ exp = nexp;
+ gcc_assert (TREE_CODE (exp) == SSA_NAME);
+ continue;
+ }
+ break;
+ }
+ break;
+ }
+ if (is_bool)
+ {
+ r->exp = exp;
+ r->in_p = in_p;
+ r->low = low;
+ r->high = high;
+ r->strict_overflow_p = strict_overflow_p;
+ }
+}
+
+/* Comparison function for qsort. Sort entries
+ without SSA_NAME exp first, then with SSA_NAMEs sorted
+ by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
+ by increasing ->low and if ->low is the same, by increasing
+ ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
+ maximum. */
+
+static int
+range_entry_cmp (const void *a, const void *b)
+{
+ const struct range_entry *p = (const struct range_entry *) a;
+ const struct range_entry *q = (const struct range_entry *) b;
+
+ if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
+ {
+ if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
+ {
+ /* Group range_entries for the same SSA_NAME together. */
+ if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
+ return -1;
+ else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
+ return 1;
+ /* If ->low is different, NULL low goes first, then by
+ ascending low. */
+ if (p->low != NULL_TREE)
+ {
+ if (q->low != NULL_TREE)
+ {
+ if (integer_onep (range_binop (LT_EXPR, integer_type_node,
+ p->low, 0, q->low, 0)))
+ return -1;
+ if (integer_onep (range_binop (GT_EXPR, integer_type_node,
+ p->low, 0, q->low, 0)))
+ return 1;
+ }
+ else
+ return 1;
+ }
+ else if (q->low != NULL_TREE)
+ return -1;
+ /* If ->high is different, NULL high goes last, before that by
+ ascending high. */
+ if (p->high != NULL_TREE)
+ {
+ if (q->high != NULL_TREE)
+ {
+ if (integer_onep (range_binop (LT_EXPR, integer_type_node,
+ p->high, 0, q->high, 0)))
+ return -1;
+ if (integer_onep (range_binop (GT_EXPR, integer_type_node,
+ p->high, 0, q->high, 0)))
+ return 1;
+ }
+ else
+ return -1;
+ }
+ else if (p->high != NULL_TREE)
+ return 1;
+ /* If both ranges are the same, sort below by ascending idx. */
+ }
+ else
+ return 1;
+ }
+ else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
+ return -1;
+
+ if (p->idx < q->idx)
+ return -1;
+ else
+ {
+ gcc_checking_assert (p->idx > q->idx);
+ return 1;
+ }
+}
+
+/* Helper routine of optimize_range_test.
+ [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
+ RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
+ OPCODE and OPS are arguments of optimize_range_tests. Return
+ true if the range merge has been successful. */
+
+static bool
+update_range_test (struct range_entry *range, struct range_entry *otherrange,
+ unsigned int count, enum tree_code opcode,
+ VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
+ tree low, tree high, bool strict_overflow_p)
+{
+ tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
+ location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
+ tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
+ enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
+ gimple_stmt_iterator gsi;
+
+ if (tem == NULL_TREE)
+ return false;
+
+ if (strict_overflow_p && issue_strict_overflow_warning (wc))
+ warning_at (loc, OPT_Wstrict_overflow,
+ "assuming signed overflow does not occur "
+ "when simplifying range test");
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ struct range_entry *r;
+ fprintf (dump_file, "Optimizing range tests ");
+ print_generic_expr (dump_file, range->exp, 0);
+ fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
+ print_generic_expr (dump_file, range->low, 0);
+ fprintf (dump_file, ", ");
+ print_generic_expr (dump_file, range->high, 0);
+ fprintf (dump_file, "]");
+ for (r = otherrange; r < otherrange + count; r++)
+ {
+ fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
+ print_generic_expr (dump_file, r->low, 0);
+ fprintf (dump_file, ", ");
+ print_generic_expr (dump_file, r->high, 0);
+ fprintf (dump_file, "]");
+ }
+ fprintf (dump_file, "\n into ");
+ print_generic_expr (dump_file, tem, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ if (opcode == BIT_IOR_EXPR)
+ tem = invert_truthvalue_loc (loc, tem);
+
+ tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
+ gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
+ tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
+ GSI_SAME_STMT);
+
+ VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
+ range->exp = exp;
+ range->low = low;
+ range->high = high;
+ range->in_p = in_p;
+ range->strict_overflow_p = false;
+
+ for (range = otherrange; range < otherrange + count; range++)
+ {
+ VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
+ range->exp = NULL_TREE;
+ }
+ return true;
+}
+
+/* Optimize range tests, similarly how fold_range_test optimizes
+ it on trees. The tree code for the binary
+ operation between all the operands is OPCODE. */
+
+static void
+optimize_range_tests (enum tree_code opcode,
+ VEC (operand_entry_t, heap) **ops)
+{
+ unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
+ operand_entry_t oe;
+ struct range_entry *ranges;
+ bool any_changes = false;
+
+ if (length == 1)
+ return;
+
+ ranges = XNEWVEC (struct range_entry, length);
+ for (i = 0; i < length; i++)
+ {
+ ranges[i].idx = i;
+ init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
+ /* For | invert it now, we will invert it again before emitting
+ the optimized expression. */
+ if (opcode == BIT_IOR_EXPR)
+ ranges[i].in_p = !ranges[i].in_p;
+ }
+
+ qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
+ for (i = 0; i < length; i++)
+ if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
+ break;
+
+ /* Try to merge ranges. */
+ for (first = i; i < length; i++)
+ {
+ tree low = ranges[i].low;
+ tree high = ranges[i].high;
+ int in_p = ranges[i].in_p;
+ bool strict_overflow_p = ranges[i].strict_overflow_p;
+
+ for (j = i + 1; j < length; j++)
+ {
+ if (ranges[i].exp != ranges[j].exp)
+ break;
+ if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
+ ranges[j].in_p, ranges[j].low, ranges[j].high))
+ break;
+ strict_overflow_p |= ranges[j].strict_overflow_p;
+ }
+
+ if (j > i + 1
+ && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+ ops, ranges[i].exp, in_p, low, high,
+ strict_overflow_p))
+ {
+ i = j - 1;
+ any_changes = true;
+ }
+ }
+
+ /* Optimize X == CST1 || X == CST2
+ if popcount (CST1 ^ CST2) == 1 into
+ (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
+ Similarly for ranges. E.g.
+ X != 2 && X != 3 && X != 10 && X != 11
+ will be transformed by the above loop into
+ (X - 2U) <= 1U && (X - 10U) <= 1U
+ and this loop can transform that into
+ ((X & ~8) - 2U) <= 1U. */
+ for (i = first; i < length; i++)
+ {
+ tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
+
+ if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
+ continue;
+ type = TREE_TYPE (ranges[i].exp);
+ if (!INTEGRAL_TYPE_P (type))
+ continue;
+ lowi = ranges[i].low;
+ if (lowi == NULL_TREE)
+ lowi = TYPE_MIN_VALUE (type);
+ highi = ranges[i].high;
+ if (highi == NULL_TREE)
+ continue;
+ for (j = i + 1; j < length && j < i + 64; j++)
+ {
+ if (ranges[j].exp == NULL_TREE)
+ continue;
+ if (ranges[i].exp != ranges[j].exp)
+ break;
+ if (ranges[j].in_p)
+ continue;
+ lowj = ranges[j].low;
+ if (lowj == NULL_TREE)
+ continue;
+ highj = ranges[j].high;
+ if (highj == NULL_TREE)
+ highj = TYPE_MAX_VALUE (type);
+ if (!integer_onep (range_binop (GT_EXPR, integer_type_node,
+ lowj, 0, highi, 0)))
+ continue;
+ lowxor = range_binop (BIT_XOR_EXPR, NULL_TREE, lowi, 0, lowj, 0);
+ if (lowxor == NULL_TREE)
+ continue;
+ gcc_checking_assert (!integer_zerop (lowxor));
+ tem = range_binop (MINUS_EXPR, NULL_TREE, lowxor, 0,
+ build_int_cst (type, 1), 0);
+ tem = range_binop (BIT_AND_EXPR, NULL_TREE, lowxor, 0, tem, 0);
+ if (!integer_zerop (tem))
+ continue;
+ highxor = range_binop (BIT_XOR_EXPR, NULL_TREE, highi, 0, highj, 0);
+ if (!tree_int_cst_equal (lowxor, highxor))
+ continue;
+ tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
+ exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
+ lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
+ highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
+ if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
+ ranges[i].in_p, lowj, highj,
+ ranges[i].strict_overflow_p
+ || ranges[j].strict_overflow_p))
+ {
+ any_changes = true;
+ break;
+ }
+ }
+ }
+
+ if (any_changes)
+ {
+ j = 0;
+ FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
+ {
+ if (oe->op == error_mark_node)
+ continue;
+ else if (i != j)
+ VEC_replace (operand_entry_t, *ops, j, oe);
+ j++;
+ }
+ VEC_truncate (operand_entry_t, *ops, j);
+ }
+
+ XDELETEVEC (ranges);
+}
+
/* Return true if OPERAND is defined by a PHI node which uses the LHS
of STMT in it's operands. This is also known as a "destructive
update" operation. */
@@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
optimize_ops_list (rhs_code, &ops);
}
+ if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
+ optimize_range_tests (rhs_code, &ops);
+
if (VEC_length (operand_entry_t, ops) == 1)
{
if (dump_file && (dump_flags & TDF_DETAILS))
--- gcc/testsuite/gcc.dg/pr46309.c.jj 2011-09-29 14:03:28.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr46309.c 2011-09-29 14:08:32.000000000 +0200
@@ -0,0 +1,66 @@
+/* PR tree-optimization/46309 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc-details" } */
+
+int
+f1 (int a)
+{
+ int v1 = (a == 3);
+ int v2 = (a == 1);
+ int v3 = (a == 4);
+ int v4 = (a == 2);
+ return v1 || v2 || v3 || v4;
+}
+
+int
+f2 (int a)
+{
+ int v1 = (a == 1);
+ int v2 = (a == 2);
+ int v3 = (a == 3);
+ int v4 = (a == 4);
+ return v1 || v2 || v3 || v4;
+}
+
+int
+f3 (int a)
+{
+ int v1 = (a == 3);
+ int v2 = (a == 1);
+ return v1 || v2;
+}
+
+int
+f4 (int a)
+{
+ int v1 = (a == 1);
+ int v2 = (a == 2);
+ return v1 || v2;
+}
+
+int
+f5 (unsigned int a)
+{
+ int v1 = (a <= 31);
+ int v2 = (a >= 64 && a <= 95);
+ return v1 || v2;
+}
+
+int
+f6 (unsigned int a)
+{
+ int v1 = (a <= 31);
+ int v2 = (a >= 64 && a <= 95);
+ int v3 = (a >= 128 && a <= 159);
+ int v4 = (a >= 192 && a <= 223);
+ return v1 || v2 || v3 || v4;
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc2" } } */
Jakub