This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Inner-loop optimization regression from 3.3 to 3.4
- From: Jan Hubicka <jh at suse dot cz>
- To: Richard Henderson <rth at redhat dot com>, Jan Hubicka <jh at suse dot cz>,Zack Weinberg <zack at codesourcery dot com>, gcc at gcc dot gnu dot org,Jan Hubicka <hubicka at ucw dot cz>
- Date: Sun, 23 Nov 2003 17:08:53 +0100
- Subject: Re: Inner-loop optimization regression from 3.3 to 3.4
- References: <87vfqt66ge.fsf@codesourcery.com> <20031013130323.GY14005@kam.mff.cuni.cz> <87fzhvdxf3.fsf@codesourcery.com> <20031017141002.GF6212@kam.mff.cuni.cz> <20031018201817.GB29612@redhat.com> <20031101113548.GC14974@kam.mff.cuni.cz> <20031101201629.GC11339@redhat.com>
> On Sat, Nov 01, 2003 at 12:35:48PM +0100, Jan Hubicka wrote:
> > Does this look OK?
>
> I don't know. Testing on Alpha, with the following modified patch,
>
> void f(int a, int b, int c, int d)
> {
> if (a >= 0 && b <= 0)
> f1();
> if (c >= 6 && d < 10)
> f2();
> }
>
> This test is still converted at rtl expansion time; ifcvt has nothing
> to work with. Weren't you trying to remove that code? Trying again,
>
> void g(int a, int b, int c, int d)
> {
> if (a < 0) goto L1;
> if (b > 0) goto L1;
> f1();
> L1:
> if (c < 6) goto L2;
> if (d >= 10) goto L2;
> f2();
> L2:;
> }
>
> Does ok with the first test but gives up with the second.
> There doesn't seem to be any reason for that.
>
> The bar is set fairly high here for your ifcvt routine, since making
> the transformation during fold-const is sure to be a win most of the
> time, for most targets.
Hi,
the problem is that for proving side effects of the branch, we need liveness
information. On the rtlopt/cfg branch we do ifcvt before combining to solve
this. I've tested the idea on mainline and it seems to work well. Any
slowdowns caused by the pass appear to be in the noise at least for make
bootstrap (I tried it 4 times and got mixed results) and c-frontend components
ocmpiling tests.
On c-frontend this saved 0.5% of instructions with branch combining code.
Without branch combining code this has little effect (0.03%) because we prodice
conditional branch sequences pretty uncombinable, but it may differ on other
architectures.
I've also found the simplifier in fold-const that converted the test in your
testcase and disabled it.
The patch has been bootstrapped & regtested i686-linux. Does it look as good
sollution?
Honza
Sat Jun 1 20:10:19 CEST 2002 Jan Hubicka <jh@suse.cz>
* ifcvt.c (num_possible_double_test_blocks,
num_updated_double_test_blocks, num_removed_blocks_double_test): New
static variables.
(double_test_info): New structure.
(nonzero_test, double_test_ior_and, m1_test, find_double_test_block,
process_double_test_block, block_side_effects_p): New static functions.
(find_if_header): Call find_double_test_block.
(dead_or_predicatble): Properly update LABEL_NUSES.
(if_convert): Output statistics; update DU chains.
* fold-const.c (RANGE_TEST_NON_SHORT_CIRCUIT): Kill.
(fold_range_test): Kill branch combining.
(fold_truthop): Likewise
* toplev.c (dump_file_index, dump_file): Add new ifcvt pass before combine.
(rest_of_handle_ifcvt_before_combine): New.
(rest_of_handle_ifcvt_after_combine): Use DFI_ce3.
(rest_of_compilation): Do ifcvt before combine; add support for DFI_ce4
Index: fold-const.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fold-const.c,v
retrieving revision 1.316
diff -c -3 -p -r1.316 fold-const.c
*** fold-const.c 1 Nov 2003 00:59:53 -0000 1.316
--- fold-const.c 23 Nov 2003 15:57:55 -0000
*************** merge_ranges (int *pin_p, tree *plow, tr
*** 3577,3586 ****
return 1;
}
- #ifndef RANGE_TEST_NON_SHORT_CIRCUIT
- #define RANGE_TEST_NON_SHORT_CIRCUIT (BRANCH_COST >= 2)
- #endif
-
/* EXP is some logical combination of boolean tests. See if we can
merge it into some range test. Return the new tree if so. */
--- 3577,3582 ----
*************** fold_range_test (tree exp)
*** 3613,3653 ****
in_p, low, high))))
return or_op ? invert_truthvalue (tem) : tem;
- /* On machines where the branch cost is expensive, if this is a
- short-circuited branch and the underlying object on both sides
- is the same, make a non-short-circuit operation. */
- else if (RANGE_TEST_NON_SHORT_CIRCUIT
- && lhs != 0 && rhs != 0
- && (TREE_CODE (exp) == TRUTH_ANDIF_EXPR
- || TREE_CODE (exp) == TRUTH_ORIF_EXPR)
- && operand_equal_p (lhs, rhs, 0))
- {
- /* If simple enough, just rewrite. Otherwise, make a SAVE_EXPR
- unless we are at top level or LHS contains a PLACEHOLDER_EXPR, in
- which cases we can't do this. */
- if (simple_operand_p (lhs))
- return build (TREE_CODE (exp) == TRUTH_ANDIF_EXPR
- ? TRUTH_AND_EXPR : TRUTH_OR_EXPR,
- TREE_TYPE (exp), TREE_OPERAND (exp, 0),
- TREE_OPERAND (exp, 1));
-
- else if ((*lang_hooks.decls.global_bindings_p) () == 0
- && ! CONTAINS_PLACEHOLDER_P (lhs))
- {
- tree common = save_expr (lhs);
-
- if (0 != (lhs = build_range_check (TREE_TYPE (exp), common,
- or_op ? ! in0_p : in0_p,
- low0, high0))
- && (0 != (rhs = build_range_check (TREE_TYPE (exp), common,
- or_op ? ! in1_p : in1_p,
- low1, high1))))
- return build (TREE_CODE (exp) == TRUTH_ANDIF_EXPR
- ? TRUTH_AND_EXPR : TRUTH_OR_EXPR,
- TREE_TYPE (exp), lhs, rhs);
- }
- }
-
return 0;
}
--- 3609,3614 ----
*************** fold_truthop (enum tree_code code, tree
*** 3813,3852 ****
else if (compcode != -1)
return build (compcode_to_comparison (compcode),
truth_type, ll_arg, lr_arg);
- }
-
- /* If the RHS can be evaluated unconditionally and its operands are
- simple, it wins to evaluate the RHS unconditionally on machines
- with expensive branches. In this case, this isn't a comparison
- that can be merged. Avoid doing this if the RHS is a floating-point
- comparison since those can trap. */
-
- if (BRANCH_COST >= 2
- && ! FLOAT_TYPE_P (TREE_TYPE (rl_arg))
- && simple_operand_p (rl_arg)
- && simple_operand_p (rr_arg))
- {
- /* Convert (a != 0) || (b != 0) into (a | b) != 0. */
- if (code == TRUTH_OR_EXPR
- && lcode == NE_EXPR && integer_zerop (lr_arg)
- && rcode == NE_EXPR && integer_zerop (rr_arg)
- && TREE_TYPE (ll_arg) == TREE_TYPE (rl_arg))
- return build (NE_EXPR, truth_type,
- build (BIT_IOR_EXPR, TREE_TYPE (ll_arg),
- ll_arg, rl_arg),
- integer_zero_node);
-
- /* Convert (a == 0) && (b == 0) into (a | b) == 0. */
- if (code == TRUTH_AND_EXPR
- && lcode == EQ_EXPR && integer_zerop (lr_arg)
- && rcode == EQ_EXPR && integer_zerop (rr_arg)
- && TREE_TYPE (ll_arg) == TREE_TYPE (rl_arg))
- return build (EQ_EXPR, truth_type,
- build (BIT_IOR_EXPR, TREE_TYPE (ll_arg),
- ll_arg, rl_arg),
- integer_zero_node);
-
- return build (code, truth_type, lhs, rhs);
}
/* See if the comparisons can be merged. Then get all the parameters for
--- 3774,3779 ----
Index: ifcvt.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ifcvt.c,v
retrieving revision 1.130
diff -c -3 -p -r1.130 ifcvt.c
*** ifcvt.c 21 Nov 2003 06:52:23 -0000 1.130
--- ifcvt.c 23 Nov 2003 15:58:05 -0000
***************
*** 40,45 ****
--- 40,46 ----
#include "tm_p.h"
#include "cfgloop.h"
#include "target.h"
+ #include "optabs.h"
#ifndef HAVE_conditional_execution
***************
*** 68,73 ****
--- 69,81 ----
#define NULL_EDGE ((struct edge_def *)NULL)
#define NULL_BLOCK ((struct basic_block_def *)NULL)
+ /* # of (test || test2) blocks we looked at */
+ static int num_possible_double_test_blocks;
+
+ /* # of (test || test2) blocks were converted to conditional
+ execution. */
+ static int num_updated_double_test_blocks;
+
/* # of IF-THEN or IF-THEN-ELSE blocks we looked at */
static int num_possible_if_blocks;
*************** static bool life_data_ok;
*** 87,92 ****
--- 95,126 ----
/* The post-dominator relation on the original block numbers. */
static dominance_info post_dominators;
+ struct double_test_info
+ {
+ /* Basic blocks containins first and second test. */
+ basic_block test1_bb, test2_bb;
+
+ /* Then_bb is basic block reached when one of tests above is true.
+ Else_bb is reached otherwise. */
+ basic_block then_bb, else_bb;
+
+ /* If reverse is true, jump info else_bb instead of then_bb after
+ combining an condition. */
+ bool reverse;
+
+ /* The conditions. */
+ enum rtx_code cond1_code, cond2_code;
+ rtx cond1_op0, cond1_op1, cond2_op0, cond2_op1;
+
+ /* Probability of the first and second condition to be true. */
+ int probability1, probability2;
+ gcov_type count1, count2;
+
+ /* Probability of the overall condition to be true. */
+ int probability;
+ };
+
+
/* Forward references. */
static int count_bb_insns (basic_block);
static rtx first_active_insn (basic_block);
*************** static int dead_or_predicable (basic_blo
*** 113,118 ****
--- 147,153 ----
static void noce_emit_move_insn (rtx, rtx);
static rtx block_has_only_trap (basic_block);
static void mark_loop_exit_edges (void);
+ static int find_double_test_block (basic_block, edge, edge);
/* Sets EDGE_LOOP_EXIT flag for all loop exits. */
static void
*************** noce_operand_ok (rtx op)
*** 1764,1769 ****
--- 1799,2301 ----
return ! may_trap_p (op);
}
+ /* Return true if block has side effects. */
+
+ static bool
+ block_side_effects_p (basic_block bb)
+ {
+ rtx insn;
+ for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn))
+ {
+ rtx set;
+ rtx dest;
+
+ if (!active_insn_p (insn))
+ continue;
+ set = single_set (insn);
+ if (!set)
+ return true;
+ dest = SET_DEST (set);
+ if (GET_CODE (dest) == STRICT_LOW_PART)
+ dest = XEXP (dest, 0);
+ if (GET_CODE (dest) == SUBREG)
+ dest = SUBREG_REG (dest);
+ if (dest == pc_rtx || dest == cc0_rtx)
+ continue;
+ if (!REG_P (dest))
+ return true;
+ if (!life_data_ok || (bb->flags & BB_DIRTY))
+ return true;
+ if (REGNO_REG_SET_P (bb->global_live_at_end, REGNO (dest)))
+ return true;
+ if (!noce_operand_ok (PATTERN (insn)))
+ return true;
+ }
+ return false;
+ }
+
+ /* Emit code to set register -1 iff condition is false.
+ Handle only special cases we can do cheaply, as otherwise
+ the setcc instruction and | will be used instead. */
+
+ static rtx
+ m1_test (enum rtx_code cond, rtx op0, rtx op1)
+ {
+ /* Using AND allways adds extra instruction to compare with -1, so be
+ more strict about BRANCH_COST than nonzero_test. */
+ if (!BRANCH_COST)
+ return NULL;
+ /* First special cases first. */
+ if (INTEGRAL_MODE_P (GET_MODE (op0)))
+ switch (cond)
+ {
+ case NE:
+ if (op1 == constm1_rtx)
+ {
+ rtx reg = gen_reg_rtx (GET_MODE (op0));
+
+ /* We must move the operand to new register, as operand of
+ first test may be clobbered while computing operand for
+ the second. */
+ emit_move_insn (reg, op0);
+ return reg;
+ }
+ break;
+ case GE:
+ /* Convert (a >= 0) into (a >> (bits_per_mode - 1)) != -1. */
+ if (op1 == const0_rtx && BRANCH_COST > 1)
+ {
+ rtx alt1, alt2;
+ nonnegative:
+
+ /* Some machines may have expensive shifting. */
+ alt1 = gen_rtx_ASHIFTRT (GET_MODE (op0), op0,
+ GEN_INT (GET_MODE_BITSIZE
+ (GET_MODE (op0)) - 1));
+ alt2 = gen_rtx_GE (SImode, op0, const0_rtx);
+
+ if (rtx_cost (alt1, SET) <= rtx_cost (alt2, SET))
+ return expand_simple_binop (GET_MODE (op0), ASHIFTRT, op0,
+ GEN_INT (GET_MODE_BITSIZE
+ (GET_MODE (op0)) - 1),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ }
+ break;
+ case GT:
+ /* (a > -1) is equivalent to (a >= 0). */
+ if (op1 == constm1_rtx && BRANCH_COST > 1)
+ goto nonnegative;
+ break;
+ default:
+ break;
+ }
+ return NULL;
+ }
+
+ /* Emit code to set register nonzero iff condition is true. Return the
+ register. */
+
+ static rtx
+ nonzero_test (enum rtx_code cond, rtx op0, rtx op1)
+ {
+ rtx x;
+ enum machine_mode mode;
+ enum insn_code icode;
+
+ /* First special cases first. */
+ if (INTEGRAL_MODE_P (GET_MODE (op0)))
+ switch (cond)
+ {
+ case NE:
+ /* Convert (a != 0) into a, (a != b) into (a ^ b). */
+ if (op1 == const0_rtx)
+ {
+ rtx reg = gen_reg_rtx (GET_MODE (op0));
+
+ /* We must move the operand to new register, as operand of
+ first test may be clobbered while computing operand for
+ the second. */
+ emit_move_insn (reg, op0);
+ return reg;
+ }
+ if (BRANCH_COST > 0)
+ return expand_simple_binop (GET_MODE (op0), XOR, op0, op1, NULL_RTX, 1,
+ OPTAB_DIRECT);
+ break;
+ case LT:
+ /* Convert (a < 0) into (a >> (bits_per_mode - 1)). */
+ if (op1 == const0_rtx && BRANCH_COST > 0)
+ {
+ rtx alt1, alt2;
+
+ negative:
+
+ /* Some machines may have expensive shifting. */
+ alt1 = gen_rtx_ASHIFTRT (GET_MODE (op0), op0,
+ GEN_INT (GET_MODE_BITSIZE
+ (GET_MODE (op0)) - 1));
+ alt2 = gen_rtx_GE (SImode, op0, const0_rtx);
+
+ if (rtx_cost (alt1, SET) <= rtx_cost (alt2, SET))
+ return expand_simple_binop (GET_MODE (op0), LSHIFTRT, op0,
+ GEN_INT (GET_MODE_BITSIZE
+ (GET_MODE (op0)) - 1),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ }
+ break;
+ case LE:
+ /* (a <= -1) is equivalent to (a < 0). */
+ if (op1 == constm1_rtx && BRANCH_COST > 0)
+ goto negative;
+ break;
+ case GT:
+ /* Convert (a > -1) into (~a < 0). */
+ if (op1 == constm1_rtx && BRANCH_COST > 1)
+ {
+ positive:
+ op0 = expand_simple_unop (GET_MODE (op0), NOT, op0, NULL_RTX, 1);
+ if (!op0)
+ return NULL;
+ goto negative;
+ }
+ break;
+ case GE:
+ /* Convert (a >= 0) into (~a < 0). */
+ if (op1 == const0_rtx && BRANCH_COST > 1)
+ goto positive;
+ break;
+ default:
+ break;
+ }
+ /* Try m1_test and add 1 to the result. */
+ x = m1_test (cond, op0, op1);
+ if (x)
+ return expand_simple_binop (GET_MODE (x), PLUS, x, const1_rtx, NULL_RTX,
+ 1, OPTAB_DIRECT);
+
+
+ if (BRANCH_COST < 2)
+ return NULL;
+
+ /* In case target do have setcc instruction, choose mode used by the pattern.
+ Otherwise use Pmode as it will probably combine well with the other
+ test. */
+
+ mode = Pmode;
+ icode = setcc_gen_code[(int) cond];
+ if (icode != CODE_FOR_nothing)
+ mode = insn_data[(int) icode].operand[0].mode;
+
+ /* Use store_flag as default. */
+ return emit_store_flag (gen_reg_rtx (mode),
+ cond, op0, op1, VOIDmode,
+ (cond == LTU || cond == LEU
+ || cond == GEU || cond == GTU), 0);
+ }
+
+ /* Attempt to convert double test of form (cond1 || cond2)
+ as (cond1 | cond2) or ((cond1 ? -1 : 0) & (cond2 ? -1 : 0)).
+
+ Return sequence containing the conditional jump. Set TEST
+ to the code to process operand0 before it is possibly clobbered
+ by the original code copied from TEST_BB. */
+
+ static rtx
+ double_test_ior_and (struct double_test_info *info, rtx *test)
+ {
+ rtx op0, op1 = NULL, res;
+ rtx insn;
+ enum machine_mode mode;
+ bool and_code = true;
+
+ if (! general_operand (info->cond1_op0, GET_MODE (info->cond1_op0))
+ || ! general_operand (info->cond1_op1, GET_MODE (info->cond1_op1))
+ || ! general_operand (info->cond2_op0, GET_MODE (info->cond2_op0))
+ || ! general_operand (info->cond2_op1, GET_MODE (info->cond2_op1)))
+ return NULL;
+
+ /* Attempt to prepare operands for & or | operation. First try &,
+ as we make it suceed only when both tests are chaper than alternative
+ doable by |.
+
+ | has the advantage of comparing result with 0, so for instance i386 can
+ do it by one instruction, while for & we compare with -1. */
+
+ start_sequence ();
+ op0 = m1_test (info->cond1_code, info->cond1_op0, info->cond1_op1);
+ *test = get_insns ();
+ end_sequence ();
+
+ if (op0)
+ {
+ start_sequence ();
+ op1 = m1_test (info->cond2_code, info->cond2_op0, info->cond2_op1);
+ if (!op1)
+ end_sequence ();
+ }
+
+ if (!op0 || !op1)
+ {
+ start_sequence ();
+ op0 = nonzero_test (info->cond1_code, info->cond1_op0, info->cond1_op1);
+ if (!op0)
+ goto fail;
+ *test = get_insns ();
+ end_sequence ();
+
+ start_sequence ();
+ op1 = nonzero_test (info->cond2_code, info->cond2_op0, info->cond2_op1);
+ if (!op1)
+ goto fail;
+ and_code = false;
+ }
+
+ if (GET_MODE_BITSIZE (GET_MODE (op0)) > GET_MODE_BITSIZE (GET_MODE (op1)))
+ mode = GET_MODE (op0);
+ else
+ mode = GET_MODE (op1);
+
+ res = expand_simple_binop (mode, and_code ? AND : IOR,
+ op0, op1, NULL_RTX, !and_code, OPTAB_WIDEN);
+
+ if (!res)
+ goto fail;
+
+ emit_cmp_and_jump_insns (res, and_code ? constm1_rtx : const0_rtx,
+ info->reverse ? EQ : NE,
+ NULL_RTX, GET_MODE (res), and_code ? 1 : 0,
+ block_label (info->reverse
+ ? info->else_bb : info->then_bb));
+ insn = get_last_insn ();
+ mark_jump_label (PATTERN (insn), insn, 0);
+
+ insn = get_insns ();
+ end_sequence ();
+ return insn;
+
+ fail:
+ end_sequence ();
+ return NULL;
+ }
+
+ /* Attempt to reduce amount of exits from superblocks by converting
+ constructions of type (const1 || cond2) into (cond1 | cond2) when
+ profitable.
+
+ TEST_BB is the block with first condition, TEST2_BB second condition,
+ THEN_BB is block to jump into if eighter condition is true, ELSE_BB is
+ the other block. REVERSE1 and REVERSE2 specifies if the condition
+ in TEST_BB or TEST2_BB needs to be reversed. */
+
+ static bool
+ process_double_test_block (basic_block test_bb,
+ /* Basic block test is in */
+ basic_block test2_bb,
+ /* Basic block second test is in */
+ basic_block then_bb,
+ /* Basic block for case both conditions are met */
+ basic_block else_bb,
+ /* Basic block other cases */
+ int reverse1, int reverse2)
+ {
+ struct double_test_info info;
+ rtx cond1, cond2;
+ rtx jump1, jump2;
+ rtx earliest1, earliest2;
+ rtx insn;
+ rtx first, last;
+ rtx test_insn;
+ edge fallthru_edge = FALLTHRU_EDGE (test_bb);
+ edge branch_edge = BRANCH_EDGE (test_bb);
+ edge fallthru2_edge = FALLTHRU_EDGE (test2_bb);
+ edge branch2_edge = BRANCH_EDGE (test2_bb);
+ const char *purpose;
+
+ /* Currently we support no conversions that require no new pseudos. */
+ if (no_new_pseudos)
+ return false;
+
+ info.test1_bb = test_bb;
+ info.test2_bb = test2_bb;
+ info.then_bb = then_bb;
+ info.else_bb = else_bb;
+ info.probability1 =
+ reverse1 ? fallthru_edge->probability : branch_edge->probability;
+ info.probability2 =
+ reverse2 ? fallthru2_edge->probability : branch2_edge->probability;
+ info.probability =
+ (info.probability1 +
+ (REG_BR_PROB_BASE -
+ info.probability1) * info.probability2 / REG_BR_PROB_BASE);
+ info.count1 = reverse1 ? fallthru_edge->count : branch_edge->count;
+ info.count2 = reverse2 ? fallthru2_edge->count : branch2_edge->count;
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, " probability1: %.2f probability2: %.2f overall (%.2f)\n",
+ info.probability1 * 100.0 / REG_BR_PROB_BASE,
+ info.probability2 * 100.0 / REG_BR_PROB_BASE,
+ info.probability * 100.0 / REG_BR_PROB_BASE);
+ }
+
+ /* Ensure that the TEST2 block is inside same superblock. */
+ if (info.probability1 > 2 * REG_BR_PROB_BASE / 3)
+ {
+ purpose = "too high probability of first test.";
+ goto failed;
+ }
+
+ /* If the conditional jump is more than just a conditional jump,
+ then we can not do conversion on this block. */
+ jump1 = test_bb->end;
+ jump2 = test2_bb->end;
+ if (!onlyjump_p (jump1) || !onlyjump_p (jump2)
+ || !any_condjump_p (jump1) || !any_condjump_p (jump2))
+ {
+ purpose = "Not onlyjump.";
+ goto failed;
+ }
+
+ /* If this is not a standard conditional jump, we can't parse it. */
+ cond1 = noce_get_condition (jump1, &earliest1);
+ if (!cond1)
+ {
+ purpose = "Can not get first condition.";
+ goto failed;
+ }
+ cond2 = noce_get_condition (jump2, &earliest2);
+ if (!cond2)
+ {
+ purpose = "Can not get second condition.";
+ goto failed;
+ }
+
+ /* We require test2_bb to contain only loads of registers temporary
+ for test2_bb, so we can be sure that moving these on common path
+ won't cause side effects.
+
+ Limit number of instructions to avoid too expensive computations. */
+
+ if (count_bb_insns (test2_bb) >= BRANCH_COST + 2)
+ {
+ purpose = "Too many instructions in bb2.";
+ goto failed;
+ }
+ if (block_side_effects_p (test2_bb))
+ {
+ purpose = "Side effects in bb2.";
+ goto failed;
+ }
+
+ info.cond1_code = GET_CODE (cond1);
+ info.cond2_code = GET_CODE (cond2);
+ if (reverse1)
+ info.cond1_code = reversed_comparison_code (cond1, jump1);
+ if (reverse2)
+ info.cond2_code = reversed_comparison_code (cond2, jump2);
+ if (info.cond1_code == UNKNOWN || info.cond2_code == UNKNOWN)
+ return false;
+ info.cond1_op0 = XEXP (cond1, 0);
+ info.cond1_op1 = XEXP (cond1, 1);
+ info.cond2_op0 = XEXP (cond2, 0);
+ info.cond2_op1 = XEXP (cond2, 1);
+
+ info.reverse = (fallthru_edge->dest == then_bb
+ || (fallthru_edge->dest == test2_bb
+ && fallthru2_edge->dest == then_bb));
+
+ insn = double_test_ior_and (&info, &test_insn);
+ if (!insn)
+ {
+ purpose = "Failed to match.";
+ goto failed;
+ }
+
+ /* Fix up the flowgraph. */
+ delete_insn (jump1);
+
+ /* Relink insns from test2 blocks, as they may compute temporaries used
+ by test. */
+ first = (GET_CODE (test2_bb->head) == CODE_LABEL
+ ? NEXT_INSN (NEXT_INSN (test2_bb->head))
+ : NEXT_INSN (test2_bb->head));
+ last = PREV_INSN (test2_bb->end);
+
+ emit_insn_after (test_insn, test_bb->end);
+ if (first != NEXT_INSN (last))
+ {
+ squeeze_notes (&first, &last);
+ reorder_insns (first, last, test_bb->end);
+ }
+ emit_insn_after (insn, test_bb->end);
+
+ if (fallthru_edge->dest == then_bb)
+ redirect_edge_succ (branch_edge, else_bb);
+ else if (fallthru_edge->dest == else_bb)
+ redirect_edge_succ (branch_edge, then_bb);
+ else if (fallthru_edge->dest == test2_bb)
+ {
+ if (fallthru2_edge->dest == else_bb)
+ {
+ redirect_edge_succ (fallthru_edge, else_bb);
+ redirect_edge_succ (branch_edge, then_bb);
+ }
+ else if (fallthru2_edge->dest == then_bb)
+ {
+ redirect_edge_succ (fallthru_edge, then_bb);
+ redirect_edge_succ (branch_edge, else_bb);
+ }
+ else
+ abort ();
+ }
+ else
+ abort ();
+
+ if (fallthru_edge->dest == then_bb)
+ {
+ branch_edge->probability = REG_BR_PROB_BASE - info.probability;
+ fallthru_edge->probability = info.probability;
+ branch_edge->count = test_bb->count - info.count1 - info.count2;
+ if (branch_edge->count < 0)
+ branch_edge->count = 0;
+ fallthru_edge->count = info.count1 + info.count2;
+ if (fallthru_edge->count < 0)
+ fallthru_edge->count = 0;
+ }
+ else
+ {
+ fallthru_edge->probability = REG_BR_PROB_BASE - info.probability;
+ branch_edge->probability = info.probability;
+ fallthru_edge->count = test_bb->count - info.count1 - info.count2;
+ if (fallthru_edge->count < 0)
+ fallthru_edge->count = 0;
+ branch_edge->count = info.count1 + info.count2;
+ if (branch_edge->count < 0)
+ branch_edge->count = 0;
+ }
+ update_br_prob_note (test_bb);
+
+ /* The test basic block may define some pseudos used by test2. We do not
+ want to make them appear alive anymore. */
+ test_bb->global_live_at_end = test2_bb->global_live_at_end;
+
+ /* Delete the second test basic block. */
+ if (test2_bb->pred != NULL_EDGE)
+ abort ();
+ if (post_dominators)
+ delete_from_dominance_info (post_dominators, test2_bb);
+ delete_block (test2_bb);
+ num_true_changes++;
+ num_updated_double_test_blocks++;
+ return true;
+
+ failed:
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, " %s", purpose);
+ return false;
+ }
+
+
/* Given a simple IF-THEN or IF-THEN-ELSE block, attempt to convert it
without using conditional execution. Return TRUE if we were
successful at converting the block. */
*************** find_if_header (basic_block test_bb, int
*** 2217,2222 ****
--- 2749,2761 ----
#ifdef IFCVT_INIT_EXTRA_FIELDS
IFCVT_INIT_EXTRA_FIELDS (&ce_info);
#endif
+ /* A loop as we may cascadely update the CFG. */
+ while (find_double_test_block (test_bb, then_edge, else_edge))
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "Conversion succeeded.\n");
+ }
+
if (find_if_block (&ce_info))
goto success;
*************** block_jumps_and_fallthru_p (basic_block
*** 2312,2317 ****
--- 2853,2947 ----
return n_insns;
}
+ /* Look for basic block constelation that result from
+ test1 || test2 type of conditions. */
+ static int
+ find_double_test_block (basic_block test_bb, edge then_edge, edge test2_edge)
+ {
+ int reverse_condition1 = 0;
+
+ /* Try both arms of the condition. */
+ for (reverse_condition1 = 0; reverse_condition1 <= 1; reverse_condition1++)
+ {
+ edge tmp;
+ basic_block then_bb;
+ basic_block test2_bb;
+ basic_block else_bb;
+ edge then2_edge, else_edge;
+ int reverse_condition2 = 0;
+
+ /* The then_edge is falltrought edge on entry. In case we don't
+ reverse condition, we require it to be false. */
+ tmp = then_edge;
+ then_edge = test2_edge;
+ test2_edge = tmp;
+
+ then_bb = then_edge->dest;
+ test2_bb = test2_edge->dest;
+
+ /* The test2 must be single basic block with the conditional jump. */
+ if (!test2_bb->succ || !test2_bb->succ->succ_next
+ || test2_bb->succ->succ_next->succ_next)
+ continue;
+ else_edge = test2_bb->succ;
+ then2_edge = test2_bb->succ->succ_next;
+
+ /* Avoid abnormal edges. */
+ if (then2_edge->flags & EDGE_COMPLEX
+ || else_edge->flags & EDGE_COMPLEX)
+ continue;
+
+ /* Get the fallthru edge to be else one. */
+ if (else_edge->flags & EDGE_FALLTHRU)
+ ;
+ else if (then2_edge->flags & EDGE_FALLTHRU)
+ {
+ tmp = then2_edge;
+ then2_edge = else_edge;
+ else_edge = tmp;
+ }
+ else
+ continue;
+
+ /* Find if some of edges points to the then_bb.
+ Otherwise give up. */
+ if (else_edge->dest == then_bb)
+ {
+ tmp = then2_edge;
+ then2_edge = else_edge;
+ else_edge = tmp;
+ reverse_condition2 = 1;
+ }
+ if (then2_edge->dest != then_bb)
+ continue;
+
+ else_bb = else_edge->dest;
+
+ /* Avoid multiple predecesors in the test2 block, as we are going
+ to remove it. */
+ if (test2_bb->pred->pred_next)
+ continue;
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file,
+ "\nDOUBLE-TEST block found, test1 %d, test2 %d, then %d, else %d\n",
+ test_bb->index, test2_bb->index, then_bb->index,
+ else_bb->index);
+ fprintf (rtl_dump_file,
+ " reverse_condition1:%i, reverse_condition2:%i\n",
+ reverse_condition1, reverse_condition2);
+ }
+ num_possible_double_test_blocks ++;
+ if (process_double_test_block (test_bb, test2_bb, then_bb, else_bb,
+ reverse_condition1, reverse_condition2))
+ return TRUE;
+
+ }
+ return FALSE;
+ }
+
+
/* Determine if a given basic block heads a simple IF-THEN or IF-THEN-ELSE
block. If so, we'll try to convert the insns to not require the branch.
Return TRUE if we were successful at converting the block. */
*************** dead_or_predicable (basic_block test_bb,
*** 3096,3101 ****
--- 3726,3737 ----
if (! apply_change_group ())
return FALSE;
+ if (old_dest)
+ LABEL_NUSES (old_dest) -= 1;
+ if (new_label)
+ LABEL_NUSES (new_label) += 1;
+ JUMP_LABEL (jump) = new_label;
+
if (other_bb != new_dest)
{
if (old_dest)
*************** dead_or_predicable (basic_block test_bb,
*** 3149,3155 ****
return FALSE;
}
! /* Main entry point for all if-conversion. */
void
if_convert (int x_life_data_ok)
--- 3785,3793 ----
return FALSE;
}
! /* Main entry point for all if-conversion.
! When x_life_data_ok is nonzero, update liveness.
! In addition update log links when x_life_data_ok==2. */
void
if_convert (int x_life_data_ok)
*************** if_convert (int x_life_data_ok)
*** 3160,3165 ****
--- 3798,3805 ----
num_possible_if_blocks = 0;
num_updated_if_blocks = 0;
num_true_changes = 0;
+ num_possible_double_test_blocks = 0;
+ num_updated_double_test_blocks = 0;
life_data_ok = (x_life_data_ok != 0);
if (! (* targetm.cannot_modify_jumps_p) ())
*************** if_convert (int x_life_data_ok)
*** 3230,3236 ****
}
update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
PROP_DEATH_NOTES | PROP_SCAN_DEAD_CODE
! | PROP_KILL_DEAD_CODE);
}
/* Write the final stats. */
--- 3870,3877 ----
}
update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
PROP_DEATH_NOTES | PROP_SCAN_DEAD_CODE
! | PROP_KILL_DEAD_CODE
! | (life_data_ok ? PROP_LOG_LINKS : 0));
}
/* Write the final stats. */
*************** if_convert (int x_life_data_ok)
*** 3245,3250 ****
--- 3886,3900 ----
fprintf (rtl_dump_file,
"%d true changes made.\n\n\n",
num_true_changes);
+ }
+ if (rtl_dump_file && num_possible_double_test_blocks > 0)
+ {
+ fprintf (rtl_dump_file,
+ "\n%d possible double test blocks searched.\n",
+ num_possible_double_test_blocks);
+ fprintf (rtl_dump_file,
+ "%d double test blocks converted.\n",
+ num_updated_double_test_blocks);
}
#ifdef ENABLE_CHECKING
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.845
diff -c -3 -p -r1.845 toplev.c
*** toplev.c 21 Nov 2003 04:05:05 -0000 1.845
--- toplev.c 23 Nov 2003 15:58:17 -0000
*************** enum dump_file_index
*** 267,274 ****
DFI_web,
DFI_cse2,
DFI_life,
- DFI_combine,
DFI_ce2,
DFI_regmove,
DFI_sched,
DFI_lreg,
--- 267,275 ----
DFI_web,
DFI_cse2,
DFI_life,
DFI_ce2,
+ DFI_combine,
+ DFI_ce3,
DFI_regmove,
DFI_sched,
DFI_lreg,
*************** enum dump_file_index
*** 278,284 ****
DFI_peephole2,
DFI_rnreg,
DFI_bbro,
! DFI_ce3,
DFI_branch_target_load,
DFI_sched2,
DFI_stack,
--- 279,285 ----
DFI_peephole2,
DFI_rnreg,
DFI_bbro,
! DFI_ce4,
DFI_branch_target_load,
DFI_sched2,
DFI_stack,
*************** static struct dump_file_info dump_file[D
*** 318,325 ****
{ "web", 'Z', 0, 0, 0 },
{ "cse2", 't', 1, 0, 0 },
{ "life", 'f', 1, 0, 0 }, /* Yes, duplicate enable switch. */
- { "combine", 'c', 1, 0, 0 },
{ "ce2", 'C', 1, 0, 0 },
{ "regmove", 'N', 1, 0, 0 },
{ "sched", 'S', 1, 0, 0 },
{ "lreg", 'l', 1, 0, 0 },
--- 319,327 ----
{ "web", 'Z', 0, 0, 0 },
{ "cse2", 't', 1, 0, 0 },
{ "life", 'f', 1, 0, 0 }, /* Yes, duplicate enable switch. */
{ "ce2", 'C', 1, 0, 0 },
+ { "combine", 'c', 1, 0, 0 },
+ { "ce3", 'C', 1, 0, 0 }, /* Yes, duplicate enable switch. */
{ "regmove", 'N', 1, 0, 0 },
{ "sched", 'S', 1, 0, 0 },
{ "lreg", 'l', 1, 0, 0 },
*************** static struct dump_file_info dump_file[D
*** 329,335 ****
{ "peephole2", 'z', 1, 0, 0 },
{ "rnreg", 'n', 1, 0, 0 },
{ "bbro", 'B', 1, 0, 0 },
! { "ce3", 'E', 1, 0, 0 },
{ "btl", 'd', 1, 0, 0 }, /* Yes, duplicate enable switch. */
{ "sched2", 'R', 1, 0, 0 },
{ "stack", 'k', 1, 0, 0 },
--- 331,337 ----
{ "peephole2", 'z', 1, 0, 0 },
{ "rnreg", 'n', 1, 0, 0 },
{ "bbro", 'B', 1, 0, 0 },
! { "ce4", 'E', 1, 0, 0 },
{ "btl", 'd', 1, 0, 0 }, /* Yes, duplicate enable switch. */
{ "sched2", 'R', 1, 0, 0 },
{ "stack", 'k', 1, 0, 0 },
*************** rest_of_handle_if_conversion (tree decl,
*** 2424,2442 ****
/* Rerun if-conversion, as combine may have simplified things enough
to now meet sequence length restrictions. */
static void
! rest_of_handle_if_after_combine (tree decl, rtx insns)
{
timevar_push (TV_IFCVT);
open_dump_file (DFI_ce2, decl);
no_new_pseudos = 0;
! if_convert (1);
no_new_pseudos = 1;
close_dump_file (DFI_ce2, print_rtl_with_bb, insns);
timevar_pop (TV_IFCVT);
}
static void
rest_of_handle_web (tree decl, rtx insns)
{
--- 2426,2460 ----
/* Rerun if-conversion, as combine may have simplified things enough
to now meet sequence length restrictions. */
static void
! rest_of_handle_if_before_combine (tree decl, rtx insns)
{
timevar_push (TV_IFCVT);
open_dump_file (DFI_ce2, decl);
no_new_pseudos = 0;
! if_convert (2);
no_new_pseudos = 1;
close_dump_file (DFI_ce2, print_rtl_with_bb, insns);
timevar_pop (TV_IFCVT);
}
+ /* Rerun if-conversion, as combine may have simplified things enough
+ to now meet sequence length restrictions. */
+ static void
+ rest_of_handle_if_after_combine (tree decl, rtx insns)
+ {
+ timevar_push (TV_IFCVT);
+ open_dump_file (DFI_ce3, decl);
+
+ no_new_pseudos = 0;
+ if_convert (1);
+ no_new_pseudos = 1;
+
+ close_dump_file (DFI_ce3, print_rtl_with_bb, insns);
+ timevar_pop (TV_IFCVT);
+ }
+
static void
rest_of_handle_web (tree decl, rtx insns)
{
*************** rest_of_compilation (tree decl)
*** 3318,3323 ****
--- 3336,3344 ----
rest_of_handle_life (decl, insns);
+ if (flag_if_conversion)
+ rest_of_handle_if_before_combine (decl, insns);
+
if (optimize > 0)
rest_of_handle_combine (decl, insns);
*************** rest_of_compilation (tree decl)
*** 3480,3490 ****
if (flag_if_conversion2)
{
timevar_push (TV_IFCVT2);
! open_dump_file (DFI_ce3, decl);
if_convert (1);
! close_dump_file (DFI_ce3, print_rtl_with_bb, insns);
timevar_pop (TV_IFCVT2);
}
--- 3501,3511 ----
if (flag_if_conversion2)
{
timevar_push (TV_IFCVT2);
! open_dump_file (DFI_ce4, decl);
if_convert (1);
! close_dump_file (DFI_ce4, print_rtl_with_bb, insns);
timevar_pop (TV_IFCVT2);
}