Add a new combine pass
Richard Sandiford
richard.sandiford@arm.com
Thu Dec 5 10:17:00 GMT 2019
Here's a revised version based on the feedback so far. Changes in v2:
- Don't move instructions that set or use allocatable hard registers.
- Check legitimate_combined_insn
- Check cannot_copy_insn_p when keeping the original insn in parallel
- Disable the pass if HAVE_cc0
I compared v1 and v2 in the same way as before and the new restrictions
didn't make much difference (as hoped). Also bootstrapped & regression-
tested on aarch64-linux-gnu and x86_64-linux-gnu with run-combine
defaulting to 6 (unlike in the patch, where the new pass is disabled
by default).
Thanks,
Richard
2019-12-05 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* Makefile.in (OBJS): Add combine2.o
* params.opt (--param=run-combine): New option.
* doc/invoke.texi: Document it.
* tree-pass.h (make_pass_combine2_before): Declare.
(make_pass_combine2_after): Likewise.
* passes.def: Add them.
* timevar.def (TV_COMBINE2): New timevar.
* cfgrtl.h (update_cfg_for_uncondjump): Declare.
* combine.c (update_cfg_for_uncondjump): Move to...
* cfgrtl.c (update_cfg_for_uncondjump): ...here.
* simplify-rtx.c (simplify_truncation): Handle comparisons.
* recog.h (validate_simplify_replace_rtx): Declare.
* recog.c (validate_simplify_replace_rtx_1): New function.
(validate_simplify_replace_rtx_uses): Likewise.
(validate_simplify_replace_rtx): Likewise.
* combine2.c: New file.
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in 2019-12-03 18:06:09.885650522 +0000
+++ gcc/Makefile.in 2019-12-05 10:11:50.637631870 +0000
@@ -1261,6 +1261,7 @@ OBJS = \
cgraphunit.o \
cgraphclones.o \
combine.o \
+ combine2.o \
combine-stack-adj.o \
compare-elim.o \
context.o \
Index: gcc/params.opt
===================================================================
--- gcc/params.opt 2019-12-02 17:38:20.072423250 +0000
+++ gcc/params.opt 2019-12-05 10:11:50.653631761 +0000
@@ -760,6 +760,10 @@ Use internal function id in profile look
Common Joined UInteger Var(param_rpo_vn_max_loop_depth) Init(7) IntegerRange(2, 65536) Param
Maximum depth of a loop nest to fully value-number optimistically.
+-param=run-combine=
+Target Joined UInteger Var(param_run_combine) Init(2) IntegerRange(0, 7) Param
+Choose which of the 3 available combine passes to run: bit 1 for the main combine pass, bit 0 for an earlier variant of the combine pass, and bit 2 for a later variant of the combine pass.
+
-param=sccvn-max-alias-queries-per-access=
Common Joined UInteger Var(param_sccvn_max_alias_queries_per_access) Init(1000) Param
Maximum number of disambiguations to perform per memory access.
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi 2019-12-02 17:38:18.364434903 +0000
+++ gcc/doc/invoke.texi 2019-12-05 10:11:50.653631761 +0000
@@ -11797,6 +11797,11 @@ in combiner for a pseudo register as las
@item max-combine-insns
The maximum number of instructions the RTL combiner tries to combine.
+@item run-combine
+Choose which of the 3 available combine passes to run: bit 1 for the main
+combine pass, bit 0 for an earlier variant of the combine pass, and bit 2
+for a later variant of the combine pass.
+
@item integer-share-limit
Small integer constants can use a shared data structure, reducing the
compiler's memory usage and increasing its speed. This sets the maximum
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h 2019-11-19 16:25:28.000000000 +0000
+++ gcc/tree-pass.h 2019-12-05 10:11:50.657631731 +0000
@@ -562,7 +562,9 @@ extern rtl_opt_pass *make_pass_reginfo_i
extern rtl_opt_pass *make_pass_inc_dec (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_ptr_mod (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_initialize_regs (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_combine2_before (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_combine (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_combine2_after (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_if_after_combine (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_jump_after_combine (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_ree (gcc::context *ctxt);
Index: gcc/passes.def
===================================================================
--- gcc/passes.def 2019-11-19 16:25:28.000000000 +0000
+++ gcc/passes.def 2019-12-05 10:11:50.653631761 +0000
@@ -437,7 +437,9 @@ along with GCC; see the file COPYING3.
NEXT_PASS (pass_inc_dec);
NEXT_PASS (pass_initialize_regs);
NEXT_PASS (pass_ud_rtl_dce);
+ NEXT_PASS (pass_combine2_before);
NEXT_PASS (pass_combine);
+ NEXT_PASS (pass_combine2_after);
NEXT_PASS (pass_if_after_combine);
NEXT_PASS (pass_jump_after_combine);
NEXT_PASS (pass_partition_blocks);
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def 2019-11-19 16:25:28.000000000 +0000
+++ gcc/timevar.def 2019-12-05 10:11:50.657631731 +0000
@@ -251,6 +251,7 @@ DEFTIMEVAR (TV_AUTO_INC_DEC , "
DEFTIMEVAR (TV_CSE2 , "CSE 2")
DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction")
DEFTIMEVAR (TV_COMBINE , "combiner")
+DEFTIMEVAR (TV_COMBINE2 , "second combiner")
DEFTIMEVAR (TV_IFCVT , "if-conversion")
DEFTIMEVAR (TV_MODE_SWITCH , "mode switching")
DEFTIMEVAR (TV_SMS , "sms modulo scheduling")
Index: gcc/cfgrtl.h
===================================================================
--- gcc/cfgrtl.h 2019-11-19 16:25:28.000000000 +0000
+++ gcc/cfgrtl.h 2019-12-05 10:11:50.641631840 +0000
@@ -47,6 +47,7 @@ extern void fixup_partitions (void);
extern bool purge_dead_edges (basic_block);
extern bool purge_all_dead_edges (void);
extern bool fixup_abnormal_edges (void);
+extern void update_cfg_for_uncondjump (rtx_insn *);
extern rtx_insn *unlink_insn_chain (rtx_insn *, rtx_insn *);
extern void relink_block_chain (bool);
extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *);
Index: gcc/combine.c
===================================================================
--- gcc/combine.c 2019-11-29 13:04:14.458669072 +0000
+++ gcc/combine.c 2019-12-05 10:11:50.645631815 +0000
@@ -2530,42 +2530,6 @@ reg_subword_p (rtx x, rtx reg)
&& GET_MODE_CLASS (GET_MODE (x)) == MODE_INT;
}
-/* Delete the unconditional jump INSN and adjust the CFG correspondingly.
- Note that the INSN should be deleted *after* removing dead edges, so
- that the kept edge is the fallthrough edge for a (set (pc) (pc))
- but not for a (set (pc) (label_ref FOO)). */
-
-static void
-update_cfg_for_uncondjump (rtx_insn *insn)
-{
- basic_block bb = BLOCK_FOR_INSN (insn);
- gcc_assert (BB_END (bb) == insn);
-
- purge_dead_edges (bb);
-
- delete_insn (insn);
- if (EDGE_COUNT (bb->succs) == 1)
- {
- rtx_insn *insn;
-
- single_succ_edge (bb)->flags |= EDGE_FALLTHRU;
-
- /* Remove barriers from the footer if there are any. */
- for (insn = BB_FOOTER (bb); insn; insn = NEXT_INSN (insn))
- if (BARRIER_P (insn))
- {
- if (PREV_INSN (insn))
- SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
- else
- BB_FOOTER (bb) = NEXT_INSN (insn);
- if (NEXT_INSN (insn))
- SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
- }
- else if (LABEL_P (insn))
- break;
- }
-}
-
/* Return whether PAT is a PARALLEL of exactly N register SETs followed
by an arbitrary number of CLOBBERs. */
static bool
@@ -15098,7 +15062,10 @@ const pass_data pass_data_combine =
{}
/* opt_pass methods: */
- virtual bool gate (function *) { return (optimize > 0); }
+ virtual bool gate (function *)
+ {
+ return optimize > 0 && (param_run_combine & 2) != 0;
+ }
virtual unsigned int execute (function *)
{
return rest_of_handle_combine ();
Index: gcc/cfgrtl.c
===================================================================
--- gcc/cfgrtl.c 2019-11-19 16:25:28.000000000 +0000
+++ gcc/cfgrtl.c 2019-12-05 10:11:50.641631840 +0000
@@ -3409,6 +3409,42 @@ fixup_abnormal_edges (void)
return inserted;
}
+/* Delete the unconditional jump INSN and adjust the CFG correspondingly.
+ Note that the INSN should be deleted *after* removing dead edges, so
+ that the kept edge is the fallthrough edge for a (set (pc) (pc))
+ but not for a (set (pc) (label_ref FOO)). */
+
+void
+update_cfg_for_uncondjump (rtx_insn *insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ gcc_assert (BB_END (bb) == insn);
+
+ purge_dead_edges (bb);
+
+ delete_insn (insn);
+ if (EDGE_COUNT (bb->succs) == 1)
+ {
+ rtx_insn *insn;
+
+ single_succ_edge (bb)->flags |= EDGE_FALLTHRU;
+
+ /* Remove barriers from the footer if there are any. */
+ for (insn = BB_FOOTER (bb); insn; insn = NEXT_INSN (insn))
+ if (BARRIER_P (insn))
+ {
+ if (PREV_INSN (insn))
+ SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
+ else
+ BB_FOOTER (bb) = NEXT_INSN (insn);
+ if (NEXT_INSN (insn))
+ SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
+ }
+ else if (LABEL_P (insn))
+ break;
+ }
+}
+
/* Cut the insns from FIRST to LAST out of the insns stream. */
rtx_insn *
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c 2019-11-19 16:31:13.504240251 +0000
+++ gcc/simplify-rtx.c 2019-12-05 10:11:50.657631731 +0000
@@ -851,6 +851,12 @@ simplify_truncation (machine_mode mode,
&& trunc_int_for_mode (INTVAL (XEXP (op, 1)), mode) == -1)
return constm1_rtx;
+ /* (truncate:A (cmp X Y)) is (cmp:A X Y): we can compute the result
+ in a narrower mode if useful. */
+ if (COMPARISON_P (op))
+ return simplify_gen_relational (GET_CODE (op), mode, VOIDmode,
+ XEXP (op, 0), XEXP (op, 1));
+
return NULL_RTX;
}
Index: gcc/recog.h
===================================================================
--- gcc/recog.h 2019-11-26 22:04:57.419370912 +0000
+++ gcc/recog.h 2019-12-05 10:11:50.657631731 +0000
@@ -111,6 +111,7 @@ extern int validate_replace_rtx_part_nos
extern void validate_replace_rtx_group (rtx, rtx, rtx_insn *);
extern void validate_replace_src_group (rtx, rtx, rtx_insn *);
extern bool validate_simplify_insn (rtx_insn *insn);
+extern bool validate_simplify_replace_rtx (rtx_insn *, rtx *, rtx, rtx);
extern int num_changes_pending (void);
extern bool reg_fits_class_p (const_rtx, reg_class_t, int, machine_mode);
Index: gcc/recog.c
===================================================================
--- gcc/recog.c 2019-11-29 13:04:13.978672241 +0000
+++ gcc/recog.c 2019-12-05 10:11:50.657631731 +0000
@@ -922,6 +922,226 @@ validate_simplify_insn (rtx_insn *insn)
}
return ((num_changes_pending () > 0) && (apply_change_group () > 0));
}
+
+/* A subroutine of validate_simplify_replace_rtx. Apply the replacement
+ described by R to LOC. Return true on success; leave the caller
+ to clean up on failure. */
+
+static bool
+validate_simplify_replace_rtx_1 (validate_replace_src_data &r, rtx *loc)
+{
+ rtx x = *loc;
+ enum rtx_code code = GET_CODE (x);
+ machine_mode mode = GET_MODE (x);
+
+ if (rtx_equal_p (x, r.from))
+ {
+ validate_unshare_change (r.insn, loc, r.to, 1);
+ return true;
+ }
+
+ /* Recursively apply the substitution and see if we can simplify
+ the result. This specifically shouldn't use simplify_gen_*,
+ since we want to avoid generating new expressions where possible. */
+ int old_num_changes = num_validated_changes ();
+ rtx newx = NULL_RTX;
+ bool recurse_p = false;
+ switch (GET_RTX_CLASS (code))
+ {
+ case RTX_UNARY:
+ {
+ machine_mode op0_mode = GET_MODE (XEXP (x, 0));
+ if (!validate_simplify_replace_rtx_1 (r, &XEXP (x, 0)))
+ return false;
+
+ newx = simplify_unary_operation (code, mode, XEXP (x, 0), op0_mode);
+ break;
+ }
+
+ case RTX_BIN_ARITH:
+ case RTX_COMM_ARITH:
+ {
+ if (!validate_simplify_replace_rtx_1 (r, &XEXP (x, 0))
+ || !validate_simplify_replace_rtx_1 (r, &XEXP (x, 1)))
+ return false;
+
+ newx = simplify_binary_operation (code, mode,
+ XEXP (x, 0), XEXP (x, 1));
+ break;
+ }
+
+ case RTX_COMPARE:
+ case RTX_COMM_COMPARE:
+ {
+ machine_mode op_mode = (GET_MODE (XEXP (x, 0)) != VOIDmode
+ ? GET_MODE (XEXP (x, 0))
+ : GET_MODE (XEXP (x, 1)));
+ if (!validate_simplify_replace_rtx_1 (r, &XEXP (x, 0))
+ || !validate_simplify_replace_rtx_1 (r, &XEXP (x, 1)))
+ return false;
+
+ newx = simplify_relational_operation (code, mode, op_mode,
+ XEXP (x, 0), XEXP (x, 1));
+ break;
+ }
+
+ case RTX_TERNARY:
+ case RTX_BITFIELD_OPS:
+ {
+ machine_mode op0_mode = GET_MODE (XEXP (x, 0));
+ if (!validate_simplify_replace_rtx_1 (r, &XEXP (x, 0))
+ || !validate_simplify_replace_rtx_1 (r, &XEXP (x, 1))
+ || !validate_simplify_replace_rtx_1 (r, &XEXP (x, 2)))
+ return false;
+
+ newx = simplify_ternary_operation (code, mode, op0_mode,
+ XEXP (x, 0), XEXP (x, 1),
+ XEXP (x, 2));
+ break;
+ }
+
+ case RTX_EXTRA:
+ if (code == SUBREG)
+ {
+ machine_mode inner_mode = GET_MODE (SUBREG_REG (x));
+ if (!validate_simplify_replace_rtx_1 (r, &SUBREG_REG (x)))
+ return false;
+
+ rtx inner = SUBREG_REG (x);
+ newx = simplify_subreg (mode, inner, inner_mode, SUBREG_BYTE (x));
+ /* Reject the same cases that simplify_gen_subreg would. */
+ if (!newx
+ && (GET_CODE (inner) == SUBREG
+ || GET_CODE (inner) == CONCAT
+ || GET_MODE (inner) == VOIDmode
+ || !validate_subreg (mode, inner_mode,
+ inner, SUBREG_BYTE (x))))
+ return false;
+ break;
+ }
+ else
+ recurse_p = true;
+ break;
+
+ case RTX_OBJ:
+ if (code == LO_SUM)
+ {
+ if (!validate_simplify_replace_rtx_1 (r, &XEXP (x, 0))
+ || !validate_simplify_replace_rtx_1 (r, &XEXP (x, 1)))
+ return false;
+
+ /* (lo_sum (high x) y) -> y where x and y have the same base. */
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ if (GET_CODE (op0) == HIGH)
+ {
+ rtx base0, base1, offset0, offset1;
+ split_const (XEXP (op0, 0), &base0, &offset0);
+ split_const (op1, &base1, &offset1);
+ if (rtx_equal_p (base0, base1))
+ newx = op1;
+ }
+ }
+ else if (code == REG)
+ {
+ if (REG_P (r.from) && reg_overlap_mentioned_p (x, r.from))
+ return false;
+ }
+ else
+ recurse_p = true;
+ break;
+
+ case RTX_CONST_OBJ:
+ break;
+
+ case RTX_AUTOINC:
+ if (reg_overlap_mentioned_p (XEXP (x, 0), r.from))
+ return false;
+ recurse_p = true;
+ break;
+
+ case RTX_MATCH:
+ case RTX_INSN:
+ gcc_unreachable ();
+ }
+
+ if (recurse_p)
+ {
+ const char *fmt = GET_RTX_FORMAT (code);
+ for (int i = 0; fmt[i]; i++)
+ switch (fmt[i])
+ {
+ case 'E':
+ for (int j = 0; j < XVECLEN (x, i); j++)
+ if (!validate_simplify_replace_rtx_1 (r, &XVECEXP (x, i, j)))
+ return false;
+ break;
+
+ case 'e':
+ if (XEXP (x, i)
+ && !validate_simplify_replace_rtx_1 (r, &XEXP (x, i)))
+ return false;
+ break;
+ }
+ }
+
+ if (newx && !rtx_equal_p (x, newx))
+ {
+ /* There's no longer any point unsharing the substitutions made
+ for subexpressions, since we'll just copy this one instead. */
+ for (int i = old_num_changes; i < num_changes; ++i)
+ changes[i].unshare = false;
+ validate_unshare_change (r.insn, loc, newx, 1);
+ }
+
+ return true;
+}
+
+/* A note_uses callback for validate_simplify_replace_rtx.
+ DATA points to a validate_replace_src_data object. */
+
+static void
+validate_simplify_replace_rtx_uses (rtx *loc, void *data)
+{
+ validate_replace_src_data &r = *(validate_replace_src_data *) data;
+ if (r.insn && !validate_simplify_replace_rtx_1 (r, loc))
+ r.insn = NULL;
+}
+
+/* Try to perform the equivalent of:
+
+ newx = simplify_replace_rtx (*loc, OLD_RTX, NEW_RTX);
+ validate_change (INSN, LOC, newx, 1);
+
+ but without generating as much garbage rtl when the resulting
+ pattern doesn't match.
+
+ Return true if we were able to replace all uses of OLD_RTX in *LOC
+ and if the result conforms to general rtx rules (e.g. for whether
+ subregs are meaningful).
+
+ When returning true, add all replacements to the current validation group,
+ leaving the caller to test it in the normal way. Leave both *LOC and the
+ validation group unchanged on failure. */
+
+bool
+validate_simplify_replace_rtx (rtx_insn *insn, rtx *loc,
+ rtx old_rtx, rtx new_rtx)
+{
+ validate_replace_src_data r;
+ r.from = old_rtx;
+ r.to = new_rtx;
+ r.insn = insn;
+
+ unsigned int num_changes = num_validated_changes ();
+ note_uses (loc, validate_simplify_replace_rtx_uses, &r);
+ if (!r.insn)
+ {
+ cancel_changes (num_changes);
+ return false;
+ }
+ return true;
+}
/* Return 1 if OP is a valid general operand for machine mode MODE.
This is either a register reference, a memory reference,
Index: gcc/combine2.c
===================================================================
--- /dev/null 2019-09-17 11:41:18.176664108 +0100
+++ gcc/combine2.c 2019-12-05 10:11:50.645631815 +0000
@@ -0,0 +1,1658 @@
+/* Combine instructions
+ Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "df.h"
+#include "tree-pass.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "print-rtl.h"
+#include "rtl-iter.h"
+#include "predict.h"
+#include "cfgcleanup.h"
+#include "cfghooks.h"
+#include "cfgrtl.h"
+#include "alias.h"
+#include "valtrack.h"
+#include "target.h"
+
+/* This pass tries to combine instructions in the following ways:
+
+ (1) If we have two dependent instructions:
+
+ I1: (set DEST1 SRC1)
+ I2: (...DEST1...)
+
+ and I2 is the only user of DEST1, the pass tries to combine them into:
+
+ I2: (...SRC1...)
+
+ (2) If we have two dependent instructions:
+
+ I1: (set DEST1 SRC1)
+ I2: (...DEST1...)
+
+ the pass tries to combine them into:
+
+ I2: (parallel [(set DEST1 SRC1) (...SRC1...)])
+
+ or:
+
+ I2: (parallel [(...SRC1...) (set DEST1 SRC1)])
+
+ (3) If we have two independent instructions:
+
+ I1: (set DEST1 SRC1)
+ I2: (set DEST2 SRC2)
+
+ that read from memory or from the same register, the pass tries to
+ combine them into:
+
+ I2: (parallel [(set DEST1 SRC1) (set DEST2 SRC2)])
+
+ or:
+
+ I2: (parallel [(set DEST2 SRC2) (set DEST1 SRC1)])
+
+ If the combined form is a valid instruction, the pass tries to find a
+ place between I1 and I2 inclusive for the new instruction. If there
+ are multiple valid locations, it tries to pick the best one by taking
+ the effect on register pressure into account.
+
+ If a combination succeeds and produces a single set, the pass tries to
+ combine the new form with earlier or later instructions.
+
+ The pass currently optimizes each basic block separately. It walks
+ the instructions in reverse order, building up live ranges for registers
+ and memory. It then uses these live ranges to look for possible
+ combination opportunities and to decide where the combined instructions
+ could be placed.
+
+ The pass represents positions in the block using point numbers,
+ with higher numbers indicating earlier instructions. The numbering
+ scheme is that:
+
+ - the end of the current instruction sequence has an even base point B.
+
+ - instructions initially have odd-numbered points B + 1, B + 3, etc.
+ with B + 1 being the final instruction in the sequence.
+
+ - even points after B represent gaps between instructions where combined
+ instructions could be placed.
+
+ Thus even points initially represent no instructions and odd points
+ initially represent single instructions. However, when picking a
+ place for a combined instruction, the pass may choose somewhere
+ inbetween the original two instructions, so that over time a point
+ may come to represent several instructions. When this happens,
+ the pass maintains the invariant that all instructions with the same
+ point number are independent of each other and thus can be treated as
+ acting in parallel (or as acting in any arbitrary sequence).
+
+ TODOs:
+
+ - Handle 3-instruction combinations, and possibly more.
+
+ - Handle existing clobbers more efficiently. At the moment we can't
+ move an instruction that clobbers R across another instruction that
+ clobbers R.
+
+ - Allow hard register clobbers to be added, like combine does.
+
+ - Perhaps work on EBBs, or SESE regions. */
+
+namespace {
+
+/* The number of explicit uses to record in a live range. */
+const unsigned int NUM_RANGE_USERS = 4;
+
+/* The maximum number of instructions that we can combine at once. */
+const unsigned int MAX_COMBINE_INSNS = 2;
+
+/* A fake cost for instructions that we haven't costed yet. */
+const unsigned int UNKNOWN_COST = ~0U;
+
+class combine2
+{
+public:
+ combine2 (function *);
+ ~combine2 ();
+
+ void execute ();
+
+private:
+ struct insn_info_rec;
+
+ /* Describes the live range of a register or of memory. For simplicity,
+ we treat memory as a single entity.
+
+ If we had a fully-accurate live range, updating it to account for a
+ moved instruction would be a linear-time operation. Doing this for
+ each combination would then make the pass quadratic. We therefore
+ just maintain a list of NUM_RANGE_USERS use insns and use simple,
+ conservatively-correct behavior for the rest. */
+ struct live_range_rec
+ {
+ /* Which instruction provides the dominating definition, or null if
+ we don't know yet. */
+ insn_info_rec *producer;
+
+ /* A selection of instructions that use the resource, in program order. */
+ insn_info_rec *users[NUM_RANGE_USERS];
+
+ /* An inclusive range of points that covers instructions not mentioned
+ in USERS. Both values are zero if there are no such instructions.
+
+ Once we've included a use U at point P in this range, we continue
+ to assume that some kind of use exists at P whatever happens to U
+ afterwards. */
+ unsigned int first_extra_use;
+ unsigned int last_extra_use;
+
+ /* The register number this range describes, or INVALID_REGNUM
+ for memory. */
+ unsigned int regno;
+
+ /* Forms a linked list of ranges for the same resource, in program
+ order. */
+ live_range_rec *prev_range;
+ live_range_rec *next_range;
+ };
+
+ /* Pass-specific information about an instruction. */
+ struct insn_info_rec
+ {
+ /* The instruction itself. */
+ rtx_insn *insn;
+
+ /* A null-terminated list of live ranges for the things that this
+ instruction defines. */
+ live_range_rec **defs;
+
+ /* A null-terminated list of live ranges for the things that this
+ instruction uses. */
+ live_range_rec **uses;
+
+ /* The point at which the instruction appears. */
+ unsigned int point;
+
+ /* The cost of the instruction, or UNKNOWN_COST if we haven't
+ measured it yet. */
+ unsigned int cost;
+ };
+
+ /* Describes one attempt to combine instructions. */
+ struct combination_attempt_rec
+ {
+ /* The instruction that we're currently trying to optimize.
+ If the combination succeeds, we'll use this insn_info_rec
+ to describe the new instruction. */
+ insn_info_rec *new_home;
+
+ /* The instructions we're combining, in program order. */
+ insn_info_rec *sequence[MAX_COMBINE_INSNS];
+
+ /* If we're substituting SEQUENCE[0] into SEQUENCE[1], this is the
+ live range that describes the substituted register. */
+ live_range_rec *def_use_range;
+
+ /* The earliest and latest points at which we could insert the
+ combined instruction. */
+ unsigned int earliest_point;
+ unsigned int latest_point;
+
+ /* The cost of the new instruction, once we have a successful match. */
+ unsigned int new_cost;
+ };
+
+ /* Pass-specific information about a register. */
+ struct reg_info_rec
+ {
+ /* The live range associated with the last reference to the register. */
+ live_range_rec *range;
+
+ /* The point at which the last reference occurred. */
+ unsigned int next_ref;
+
+ /* True if the register is currently live. We record this here rather
+ than in a separate bitmap because (a) there's a natural hole for
+ it on LP64 hosts and (b) we only refer to it when updating the
+ other fields, and so recording it here should give better locality. */
+ unsigned int live_p : 1;
+ };
+
+ live_range_rec *new_live_range (unsigned int, live_range_rec *);
+ live_range_rec *reg_live_range (unsigned int);
+ live_range_rec *mem_live_range ();
+ bool add_range_use (live_range_rec *, insn_info_rec *);
+ void remove_range_use (live_range_rec *, insn_info_rec *);
+ bool has_single_use_p (live_range_rec *);
+ bool known_last_use_p (live_range_rec *, insn_info_rec *);
+ unsigned int find_earliest_point (insn_info_rec *, insn_info_rec *);
+ unsigned int find_latest_point (insn_info_rec *, insn_info_rec *);
+ bool start_combination (combination_attempt_rec &, insn_info_rec *,
+ insn_info_rec *, live_range_rec * = NULL);
+ bool verify_combination (combination_attempt_rec &);
+ int estimate_reg_pressure_delta (insn_info_rec *);
+ void commit_combination (combination_attempt_rec &, bool);
+ bool try_parallel_sets (combination_attempt_rec &, rtx, rtx);
+ bool try_parallelize_insns (combination_attempt_rec &);
+ bool try_combine_def_use_1 (combination_attempt_rec &, rtx, rtx, bool);
+ bool try_combine_def_use (combination_attempt_rec &, rtx, rtx);
+ bool try_combine_two_uses (combination_attempt_rec &);
+ bool try_combine (insn_info_rec *, rtx, unsigned int);
+ bool optimize_insn (insn_info_rec *);
+ void record_defs (insn_info_rec *);
+ void record_reg_use (insn_info_rec *, df_ref);
+ void record_uses (insn_info_rec *);
+ void process_insn (insn_info_rec *);
+ void start_sequence ();
+
+ /* The function we're optimizing. */
+ function *m_fn;
+
+ /* The highest pseudo register number plus one. */
+ unsigned int m_num_regs;
+
+ /* The current basic block. */
+ basic_block m_bb;
+
+ /* True if we should optimize the current basic block for speed. */
+ bool m_optimize_for_speed_p;
+
+ /* The point number to allocate to the next instruction we visit
+ in the backward traversal. */
+ unsigned int m_point;
+
+ /* The point number corresponding to the end of the current
+ instruction sequence, i.e. the lowest point number about which
+ we still have valid information. */
+ unsigned int m_end_of_sequence;
+
+ /* The point number corresponding to the end of the current basic block.
+ This is the same as M_END_OF_SEQUENCE when processing the last
+ instruction sequence in a basic block. */
+ unsigned int m_end_of_bb;
+
+ /* The memory live range, or null if we haven't yet found a memory
+ reference in the current instruction sequence. */
+ live_range_rec *m_mem_range;
+
+ /* Gives information about each register. We track both hard and
+ pseudo registers. */
+ auto_vec<reg_info_rec> m_reg_info;
+
+ /* A bitmap of registers whose entry in m_reg_info is valid. */
+ auto_sbitmap m_valid_regs;
+
+ /* If nonnuull, an unused 2-element PARALLEL that we can use to test
+ instruction combinations. */
+ rtx m_spare_parallel;
+
+ /* A bitmap of instructions that we've already tried to combine with. */
+ auto_bitmap m_tried_insns;
+
+ /* A temporary bitmap used to hold register numbers. */
+ auto_bitmap m_true_deps;
+
+ /* An obstack used for allocating insn_info_recs and for building
+ up their lists of definitions and uses. */
+ obstack m_insn_obstack;
+
+ /* An obstack used for allocating live_range_recs. */
+ obstack m_range_obstack;
+
+ /* Start-of-object pointers for the two obstacks. */
+ char *m_insn_obstack_start;
+ char *m_range_obstack_start;
+
+ /* A list of instructions that we've optimized and whose new forms
+ change the cfg. */
+ auto_vec<rtx_insn *> m_cfg_altering_insns;
+
+ /* The INSN_UIDs of all instructions in M_CFG_ALTERING_INSNS. */
+ auto_bitmap m_cfg_altering_insn_ids;
+
+ /* We can insert new instructions at point P * 2 by inserting them
+ after M_POINTS[P - M_END_OF_SEQUENCE / 2]. We can insert new
+ instructions at point P * 2 + 1 by inserting them before
+ M_POINTS[P - M_END_OF_SEQUENCE / 2]. */
+ auto_vec<rtx_insn *, 256> m_points;
+};
+
+combine2::combine2 (function *fn)
+ : m_fn (fn),
+ m_num_regs (max_reg_num ()),
+ m_bb (NULL),
+ m_optimize_for_speed_p (false),
+ m_point (2),
+ m_end_of_sequence (m_point),
+ m_end_of_bb (m_point),
+ m_mem_range (NULL),
+ m_reg_info (m_num_regs),
+ m_valid_regs (m_num_regs),
+ m_spare_parallel (NULL_RTX)
+{
+ gcc_obstack_init (&m_insn_obstack);
+ gcc_obstack_init (&m_range_obstack);
+ m_reg_info.quick_grow (m_num_regs);
+ bitmap_clear (m_valid_regs);
+ m_insn_obstack_start = XOBNEWVAR (&m_insn_obstack, char, 0);
+ m_range_obstack_start = XOBNEWVAR (&m_range_obstack, char, 0);
+}
+
+combine2::~combine2 ()
+{
+ obstack_free (&m_insn_obstack, NULL);
+ obstack_free (&m_range_obstack, NULL);
+}
+
+/* Return true if extending the live range of REGNO might introduce a
+ spill failure during register allocation. We deliberately don't check
+ targetm.class_likely_spilled_p since:
+
+ (a) in the right circumstances, any allocatable hard register could
+ trigger a spill failure;
+
+ (b) using REGNO_REG_CLASS to get the class would on many targets lead
+ to an artificial distinction between general registers that happen
+ to be in a small class for a rarely-used constraint and those
+ whose class is GENERAL_REGS itself.
+
+ (c) there should be few cases in which moving references to allocatable
+ hard registers is important before RA. */
+
+static bool
+move_could_cause_spill_failure_p (unsigned int regno)
+{
+ return (regno != INVALID_REGNUM
+ && HARD_REGISTER_NUM_P (regno)
+ && !fixed_regs[regno]);
+}
+
+/* Return true if it's possible in principle to combine INSN with
+ other instructions. ALLOW_ASMS_P is true if the caller can cope
+ with asm statements. */
+
+static bool
+combinable_insn_p (rtx_insn *insn, bool allow_asms_p)
+{
+ rtx pattern = PATTERN (insn);
+
+ if (GET_CODE (pattern) == USE || GET_CODE (pattern) == CLOBBER)
+ return false;
+
+ if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
+ return false;
+
+ if (!allow_asms_p && asm_noperands (PATTERN (insn)) >= 0)
+ return false;
+
+ return true;
+}
+
+/* Return true if it's possible in principle to move INSN somewhere else,
+ as long as all dependencies are satisfied. */
+
+static bool
+movable_insn_p (rtx_insn *insn)
+{
+ if (JUMP_P (insn))
+ return false;
+
+ if (volatile_refs_p (PATTERN (insn)))
+ return false;
+
+ return true;
+}
+
+/* A note_stores callback. Set the bool at *DATA to true if DEST is in
+ memory. */
+
+static void
+find_mem_def (rtx dest, const_rtx, void *data)
+{
+ /* note_stores has stripped things like subregs and zero_extracts,
+ so we don't need to worry about them here. */
+ if (MEM_P (dest))
+ *(bool *) data = true;
+}
+
+/* Return true if instruction INSN writes to memory. */
+
+static bool
+insn_writes_mem_p (rtx_insn *insn)
+{
+ bool saw_mem_p = false;
+ note_stores (insn, find_mem_def, &saw_mem_p);
+ return saw_mem_p;
+}
+
+/* A note_uses callback. Set the bool at DATA to true if *LOC reads
+ from variable memory. */
+
+static void
+find_mem_use (rtx *loc, void *data)
+{
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
+ if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
+ {
+ *(bool *) data = true;
+ break;
+ }
+}
+
+/* Return true if instruction INSN reads memory, including via notes. */
+
+static bool
+insn_reads_mem_p (rtx_insn *insn)
+{
+ bool saw_mem_p = false;
+ note_uses (&PATTERN (insn), find_mem_use, &saw_mem_p);
+ for (rtx note = REG_NOTES (insn); !saw_mem_p && note; note = XEXP (note, 1))
+ if (REG_NOTE_KIND (note) == REG_EQUAL
+ || REG_NOTE_KIND (note) == REG_EQUIV)
+ note_uses (&XEXP (note, 0), find_mem_use, &saw_mem_p);
+ return saw_mem_p;
+}
+
+/* Create and return a new live range for REGNO. NEXT is the next range
+ in program order, or null if this is the first live range in the
+ sequence. */
+
+combine2::live_range_rec *
+combine2::new_live_range (unsigned int regno, live_range_rec *next)
+{
+ live_range_rec *range = XOBNEW (&m_range_obstack, live_range_rec);
+ memset (range, 0, sizeof (*range));
+
+ range->regno = regno;
+ range->next_range = next;
+ if (next)
+ next->prev_range = range;
+ return range;
+}
+
+/* Return the current live range for register REGNO, creating a new
+ one if necessary. */
+
+combine2::live_range_rec *
+combine2::reg_live_range (unsigned int regno)
+{
+ /* Initialize the liveness flag, if it isn't already valid for this BB. */
+ bool first_ref_p = !bitmap_bit_p (m_valid_regs, regno);
+ if (first_ref_p || m_reg_info[regno].next_ref < m_end_of_bb)
+ m_reg_info[regno].live_p = bitmap_bit_p (df_get_live_out (m_bb), regno);
+
+ /* See if we already have a live range associated with the current
+ instruction sequence. */
+ live_range_rec *range = NULL;
+ if (!first_ref_p && m_reg_info[regno].next_ref >= m_end_of_sequence)
+ range = m_reg_info[regno].range;
+
+ /* Create a new range if this is the first reference to REGNO in the
+ current instruction sequence or if the current range has been closed
+ off by a definition. */
+ if (!range || range->producer)
+ {
+ range = new_live_range (regno, range);
+
+ /* If the register is live after the current sequence, treat that
+ as a fake use at the end of the sequence. */
+ if (!range->next_range && m_reg_info[regno].live_p)
+ range->first_extra_use = range->last_extra_use = m_end_of_sequence;
+
+ /* Record that this is now the current range for REGNO. */
+ if (first_ref_p)
+ bitmap_set_bit (m_valid_regs, regno);
+ m_reg_info[regno].range = range;
+ m_reg_info[regno].next_ref = m_point;
+ }
+ return range;
+}
+
+/* Return the current live range for memory, treating memory as a single
+ entity. Create a new live range if necessary. */
+
+combine2::live_range_rec *
+combine2::mem_live_range ()
+{
+ if (!m_mem_range || m_mem_range->producer)
+ m_mem_range = new_live_range (INVALID_REGNUM, m_mem_range);
+ return m_mem_range;
+}
+
+/* Record that instruction USER uses the resource described by RANGE.
+ Return true if this is new information. */
+
+bool
+combine2::add_range_use (live_range_rec *range, insn_info_rec *user)
+{
+ /* See if we've already recorded the instruction, or if there's a
+ spare use slot we can use. */
+ unsigned int i = 0;
+ for (; i < NUM_RANGE_USERS && range->users[i]; ++i)
+ if (range->users[i] == user)
+ return false;
+
+ if (i == NUM_RANGE_USERS)
+ {
+ /* Since we've processed USER recently, assume that it's more
+ interesting to record explicitly than the last user in the
+ current list. Evict that last user and describe it in the
+ overflow "extra use" range instead. */
+ insn_info_rec *ousted_user = range->users[--i];
+ if (range->first_extra_use < ousted_user->point)
+ range->first_extra_use = ousted_user->point;
+ if (range->last_extra_use > ousted_user->point)
+ range->last_extra_use = ousted_user->point;
+ }
+
+ /* Insert USER while keeping the list sorted. */
+ for (; i > 0 && range->users[i - 1]->point < user->point; --i)
+ range->users[i] = range->users[i - 1];
+ range->users[i] = user;
+ return true;
+}
+
+/* Remove USER from the uses recorded for RANGE, if we can.
+ There's nothing we can do if USER was described in the
+ overflow "extra use" range. */
+
+void
+combine2::remove_range_use (live_range_rec *range, insn_info_rec *user)
+{
+ for (unsigned int i = 0; i < NUM_RANGE_USERS; ++i)
+ if (range->users[i] == user)
+ {
+ for (unsigned int j = i; j < NUM_RANGE_USERS - 1; ++j)
+ range->users[j] = range->users[j + 1];
+ range->users[NUM_RANGE_USERS - 1] = NULL;
+ break;
+ }
+}
+
+/* Return true if RANGE has a single known user. */
+
+bool
+combine2::has_single_use_p (live_range_rec *range)
+{
+ return range->users[0] && !range->users[1] && !range->first_extra_use;
+}
+
+/* Return true if we know that USER is the last user of RANGE. */
+
+bool
+combine2::known_last_use_p (live_range_rec *range, insn_info_rec *user)
+{
+ if (range->last_extra_use <= user->point)
+ return false;
+
+ for (unsigned int i = 0; i < NUM_RANGE_USERS && range->users[i]; ++i)
+ if (range->users[i] == user)
+ return i == NUM_RANGE_USERS - 1 || !range->users[i + 1];
+ else if (range->users[i]->point == user->point)
+ return false;
+
+ gcc_unreachable ();
+}
+
+/* Find the earliest point that we could move I2 up in order to combine
+ it with I1. Ignore any dependencies between I1 and I2; leave the
+ caller to deal with those instead. */
+
+unsigned int
+combine2::find_earliest_point (insn_info_rec *i2, insn_info_rec *i1)
+{
+ if (!movable_insn_p (i2->insn))
+ return i2->point;
+
+ /* Don't allow sets to be moved earlier if doing so could introduce
+ a spill failure. */
+ if (prev_real_insn (i2->insn) != i1->insn)
+ for (live_range_rec **def = i2->defs; *def; ++def)
+ if (move_could_cause_spill_failure_p ((*def)->regno))
+ return i2->point;
+
+ /* Start by optimistically assuming that we can move the instruction
+ all the way up to I1. */
+ unsigned int point = i1->point;
+
+ /* Make sure that the new position preserves all necessary true dependencies
+ on earlier instructions. */
+ for (live_range_rec **use = i2->uses; *use; ++use)
+ {
+ live_range_rec *range = *use;
+ if (range->producer
+ && range->producer != i1
+ && point >= range->producer->point)
+ point = range->producer->point - 1;
+ }
+
+ /* Make sure that the new position preserves all necessary output and
+ anti dependencies on earlier instructions. */
+ for (live_range_rec **def = i2->defs; *def; ++def)
+ if (live_range_rec *range = (*def)->prev_range)
+ {
+ if (range->producer
+ && range->producer != i1
+ && point >= range->producer->point)
+ point = range->producer->point - 1;
+
+ for (unsigned int i = NUM_RANGE_USERS - 1; i-- > 0;)
+ if (range->users[i] && range->users[i] != i1)
+ {
+ if (point >= range->users[i]->point)
+ point = range->users[i]->point - 1;
+ break;
+ }
+
+ if (range->last_extra_use && point >= range->last_extra_use)
+ point = range->last_extra_use - 1;
+ }
+
+ return point;
+}
+
+/* Find the latest point that we could move I1 down in order to combine
+ it with I2. Ignore any dependencies between I1 and I2; leave the
+ caller to deal with those instead. */
+
+unsigned int
+combine2::find_latest_point (insn_info_rec *i1, insn_info_rec *i2)
+{
+ if (!movable_insn_p (i1->insn))
+ return i1->point;
+
+ /* Start by optimistically assuming that we can move the instruction
+ all the way down to I2. */
+ unsigned int point = i2->point;
+
+ /* Make sure that the new position preserves all necessary anti dependencies
+ on later instructions. */
+ for (live_range_rec **use = i1->uses; *use; ++use)
+ if (live_range_rec *range = (*use)->next_range)
+ if (range->producer != i2 && point <= range->producer->point)
+ point = range->producer->point + 1;
+
+ /* Make sure that the new position preserves all necessary output and
+ true dependencies on later instructions. */
+ for (live_range_rec **def = i1->defs; *def; ++def)
+ {
+ live_range_rec *range = *def;
+
+ for (unsigned int i = 0; i < NUM_RANGE_USERS; ++i)
+ if (range->users[i] != i2)
+ {
+ if (range->users[i] && point <= range->users[i]->point)
+ point = range->users[i]->point + 1;
+ break;
+ }
+
+ if (range->first_extra_use && point <= range->first_extra_use)
+ point = range->first_extra_use + 1;
+
+ live_range_rec *next_range = range->next_range;
+ if (next_range
+ && next_range->producer != i2
+ && point <= next_range->producer->point)
+ point = next_range->producer->point + 1;
+ }
+
+ /* Don't allow the live range of a register to be extended if doing
+ so could introduce a spill failure. */
+ if (prev_real_insn (i2->insn) != i1->insn)
+ for (live_range_rec **use = i1->uses; *use; ++use)
+ {
+ live_range_rec *range = *use;
+ if (move_could_cause_spill_failure_p (range->regno))
+ {
+ for (unsigned int i = NUM_RANGE_USERS - 1; i-- > 0;)
+ if (range->users[i])
+ {
+ if (point < range->users[i]->point)
+ point = range->users[i]->point;
+ break;
+ }
+
+ if (range->last_extra_use && point < range->last_extra_use)
+ point = range->last_extra_use;
+ }
+ }
+
+ return point;
+}
+
+/* Initialize ATTEMPT for an attempt to combine instructions I1 and I2,
+ where I1 is the instruction that we're currently trying to optimize.
+ If DEF_USE_RANGE is nonnull, I1 defines the value described by
+ DEF_USE_RANGE and I2 uses it. */
+
+bool
+combine2::start_combination (combination_attempt_rec &attempt,
+ insn_info_rec *i1, insn_info_rec *i2,
+ live_range_rec *def_use_range)
+{
+ attempt.new_home = i1;
+ attempt.sequence[0] = i1;
+ attempt.sequence[1] = i2;
+ if (attempt.sequence[0]->point < attempt.sequence[1]->point)
+ std::swap (attempt.sequence[0], attempt.sequence[1]);
+ attempt.def_use_range = def_use_range;
+
+ /* Check that the instructions have no true dependencies other than
+ DEF_USE_RANGE. */
+ bitmap_clear (m_true_deps);
+ for (live_range_rec **def = attempt.sequence[0]->defs; *def; ++def)
+ if (*def != def_use_range)
+ bitmap_set_bit (m_true_deps, (*def)->regno);
+ for (live_range_rec **use = attempt.sequence[1]->uses; *use; ++use)
+ if (*use != def_use_range && bitmap_bit_p (m_true_deps, (*use)->regno))
+ return false;
+
+ /* Calculate the range of points at which the combined instruction
+ could live. */
+ attempt.earliest_point = find_earliest_point (attempt.sequence[1],
+ attempt.sequence[0]);
+ attempt.latest_point = find_latest_point (attempt.sequence[0],
+ attempt.sequence[1]);
+ if (attempt.earliest_point < attempt.latest_point)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "cannot combine %d and %d: no suitable"
+ " location for combined insn\n",
+ INSN_UID (attempt.sequence[0]->insn),
+ INSN_UID (attempt.sequence[1]->insn));
+ return false;
+ }
+
+ /* Make sure we have valid costs for the original instructions before
+ we start changing their patterns. */
+ for (unsigned int i = 0; i < MAX_COMBINE_INSNS; ++i)
+ if (attempt.sequence[i]->cost == UNKNOWN_COST)
+ attempt.sequence[i]->cost = insn_cost (attempt.sequence[i]->insn,
+ m_optimize_for_speed_p);
+ return true;
+}
+
+/* Check whether the combination attempt described by ATTEMPT matches
+ an .md instruction (or matches its constraints, in the case of an
+ asm statement). If so, calculate the cost of the new instruction
+ and check whether it's cheap enough. */
+
+bool
+combine2::verify_combination (combination_attempt_rec &attempt)
+{
+ rtx_insn *insn = attempt.sequence[1]->insn;
+
+ bool ok_p = verify_changes (0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ if (!ok_p)
+ fprintf (dump_file, "failed to match this instruction:\n");
+ else if (const char *name = get_insn_name (INSN_CODE (insn)))
+ fprintf (dump_file, "successfully matched this instruction to %s:\n",
+ name);
+ else
+ fprintf (dump_file, "successfully matched this instruction:\n");
+ print_rtl_single (dump_file, PATTERN (insn));
+ }
+ if (!ok_p)
+ return false;
+
+ if (INSN_CODE (insn) >= 0 && !targetm.legitimate_combined_insn (insn))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "instruction rejected by target\n");
+ return false;
+ }
+
+ unsigned int cost1 = attempt.sequence[0]->cost;
+ unsigned int cost2 = attempt.sequence[1]->cost;
+ attempt.new_cost = insn_cost (insn, m_optimize_for_speed_p);
+ ok_p = (attempt.new_cost <= cost1 + cost2);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "original cost = %d + %d, replacement cost = %d; %s\n",
+ cost1, cost2, attempt.new_cost,
+ ok_p ? "keeping replacement" : "rejecting replacement");
+ if (!ok_p)
+ return false;
+
+ confirm_change_group ();
+ return true;
+}
+
+/* Return true if we should consider register REGNO when calculating
+ register pressure estimates. */
+
+static bool
+count_reg_pressure_p (unsigned int regno)
+{
+ if (regno == INVALID_REGNUM)
+ return false;
+
+ /* Unallocatable registers aren't interesting. */
+ if (HARD_REGISTER_NUM_P (regno) && fixed_regs[regno])
+ return false;
+
+ return true;
+}
+
+/* Try to estimate the effect that the original form of INSN_INFO
+ had on register pressure, in the form "born - dying". */
+
+int
+combine2::estimate_reg_pressure_delta (insn_info_rec *insn_info)
+{
+ int delta = 0;
+
+ for (live_range_rec **def = insn_info->defs; *def; ++def)
+ if (count_reg_pressure_p ((*def)->regno))
+ delta += 1;
+
+ for (live_range_rec **use = insn_info->uses; *use; ++use)
+ if (count_reg_pressure_p ((*use)->regno)
+ && known_last_use_p (*use, insn_info))
+ delta -= 1;
+
+ return delta;
+}
+
+/* We've moved FROM_INSN's pattern to TO_INSN and are about to delete
+ FROM_INSN. Copy any useful information to TO_INSN before doing that. */
+
+static void
+transfer_insn (rtx_insn *to_insn, rtx_insn *from_insn)
+{
+ INSN_LOCATION (to_insn) = INSN_LOCATION (from_insn);
+ INSN_CODE (to_insn) = INSN_CODE (from_insn);
+ REG_NOTES (to_insn) = REG_NOTES (from_insn);
+}
+
+/* The combination attempt in ATTEMPT has succeeded and is currently
+ part of an open validate_change group. Commit to making the change
+ and decide where the new instruction should go.
+
+ KEPT_DEF_P is true if the new instruction continues to perform
+ the definition described by ATTEMPT.def_use_range. */
+
+void
+combine2::commit_combination (combination_attempt_rec &attempt,
+ bool kept_def_p)
+{
+ insn_info_rec *new_home = attempt.new_home;
+ rtx_insn *old_insn = attempt.sequence[0]->insn;
+ rtx_insn *new_insn = attempt.sequence[1]->insn;
+
+ /* Remove any notes that are no longer relevant. */
+ bool single_set_p = single_set (new_insn);
+ for (rtx *note_ptr = ®_NOTES (new_insn); *note_ptr; )
+ {
+ rtx note = *note_ptr;
+ bool keep_p = true;
+ switch (REG_NOTE_KIND (note))
+ {
+ case REG_EQUAL:
+ case REG_EQUIV:
+ case REG_NOALIAS:
+ keep_p = single_set_p;
+ break;
+
+ case REG_UNUSED:
+ keep_p = false;
+ break;
+
+ default:
+ break;
+ }
+ if (keep_p)
+ note_ptr = &XEXP (*note_ptr, 1);
+ else
+ {
+ *note_ptr = XEXP (*note_ptr, 1);
+ free_EXPR_LIST_node (note);
+ }
+ }
+
+ /* Complete the open validate_change group. */
+ confirm_change_group ();
+
+ /* Decide where the new instruction should go. */
+ unsigned int new_point = attempt.latest_point;
+ if (new_point != attempt.earliest_point
+ && prev_real_insn (new_insn) != old_insn)
+ {
+ /* Prefer the earlier point if the combined instruction reduces
+ register pressure and the latest point if it increases register
+ pressure.
+
+ The choice isn't obvious in the event of a tie, but picking
+ the earliest point should reduce the number of times that
+ we need to invalidate debug insns. */
+ int delta1 = estimate_reg_pressure_delta (attempt.sequence[0]);
+ int delta2 = estimate_reg_pressure_delta (attempt.sequence[1]);
+ bool move_up_p = (delta1 + delta2 <= 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "register pressure delta = %d + %d; using %s position\n",
+ delta1, delta2, move_up_p ? "earliest" : "latest");
+ if (move_up_p)
+ new_point = attempt.earliest_point;
+ }
+
+ /* Translate inserting at NEW_POINT into inserting before or after
+ a particular insn. */
+ rtx_insn *anchor = NULL;
+ bool before_p = (new_point & 1);
+ if (new_point != attempt.sequence[1]->point
+ && new_point != attempt.sequence[0]->point)
+ {
+ anchor = m_points[(new_point - m_end_of_sequence) / 2];
+ rtx_insn *other_side = (before_p
+ ? prev_real_insn (anchor)
+ : next_real_insn (anchor));
+ /* Inserting next to an insn X and then deleting X is just a
+ roundabout way of using X as the insertion point. */
+ if (anchor == new_insn || other_side == new_insn)
+ new_point = attempt.sequence[1]->point;
+ else if (anchor == old_insn || other_side == old_insn)
+ new_point = attempt.sequence[0]->point;
+ }
+
+ /* Actually perform the move. */
+ if (new_point == attempt.sequence[1]->point)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "using insn %d to hold the combined pattern\n",
+ INSN_UID (new_insn));
+ set_insn_deleted (old_insn);
+ }
+ else if (new_point == attempt.sequence[0]->point)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "using insn %d to hold the combined pattern\n",
+ INSN_UID (old_insn));
+ PATTERN (old_insn) = PATTERN (new_insn);
+ transfer_insn (old_insn, new_insn);
+ std::swap (old_insn, new_insn);
+ set_insn_deleted (old_insn);
+ }
+ else
+ {
+ /* We need to insert a new instruction. We can't simply move
+ NEW_INSN because it acts as an insertion anchor in m_points. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "inserting combined insn %s insn %d\n",
+ before_p ? "before" : "after", INSN_UID (anchor));
+
+ rtx_insn *added_insn = (before_p
+ ? emit_insn_before (PATTERN (new_insn), anchor)
+ : emit_insn_after (PATTERN (new_insn), anchor));
+ transfer_insn (added_insn, new_insn);
+ set_insn_deleted (old_insn);
+ set_insn_deleted (new_insn);
+ new_insn = added_insn;
+ }
+ df_insn_rescan (new_insn);
+
+ /* Unlink the old uses. */
+ for (unsigned int i = 0; i < MAX_COMBINE_INSNS; ++i)
+ for (live_range_rec **use = attempt.sequence[i]->uses; *use; ++use)
+ remove_range_use (*use, attempt.sequence[i]);
+
+ /* Work out which registers the new pattern uses. */
+ bitmap_clear (m_true_deps);
+ df_ref use;
+ FOR_EACH_INSN_USE (use, new_insn)
+ {
+ rtx reg = DF_REF_REAL_REG (use);
+ bitmap_set_range (m_true_deps, REGNO (reg), REG_NREGS (reg));
+ }
+ FOR_EACH_INSN_EQ_USE (use, new_insn)
+ {
+ rtx reg = DF_REF_REAL_REG (use);
+ bitmap_set_range (m_true_deps, REGNO (reg), REG_NREGS (reg));
+ }
+
+ /* Describe the combined instruction in NEW_HOME. */
+ new_home->insn = new_insn;
+ new_home->point = new_point;
+ new_home->cost = attempt.new_cost;
+
+ /* Build up a list of definitions for the combined instructions
+ and update all the ranges accordingly. It shouldn't matter
+ which order we do this in. */
+ for (unsigned int i = 0; i < MAX_COMBINE_INSNS; ++i)
+ for (live_range_rec **def = attempt.sequence[i]->defs; *def; ++def)
+ if (kept_def_p || *def != attempt.def_use_range)
+ {
+ obstack_ptr_grow (&m_insn_obstack, *def);
+ (*def)->producer = new_home;
+ }
+ obstack_ptr_grow (&m_insn_obstack, NULL);
+ new_home->defs = (live_range_rec **) obstack_finish (&m_insn_obstack);
+
+ /* Build up a list of uses for the combined instructions and update
+ all the ranges accordingly. Again, it shouldn't matter which
+ order we do this in. */
+ for (unsigned int i = 0; i < MAX_COMBINE_INSNS; ++i)
+ for (live_range_rec **use = attempt.sequence[i]->uses; *use; ++use)
+ {
+ live_range_rec *range = *use;
+ if (range != attempt.def_use_range
+ && (range->regno == INVALID_REGNUM
+ ? insn_reads_mem_p (new_insn)
+ : bitmap_bit_p (m_true_deps, range->regno))
+ && add_range_use (range, new_home))
+ obstack_ptr_grow (&m_insn_obstack, range);
+ }
+ obstack_ptr_grow (&m_insn_obstack, NULL);
+ new_home->uses = (live_range_rec **) obstack_finish (&m_insn_obstack);
+
+ /* There shouldn't be any remaining references to other instructions
+ in the combination. Invalidate their contents to make lingering
+ references a noisy failure. */
+ for (unsigned int i = 0; i < MAX_COMBINE_INSNS; ++i)
+ if (attempt.sequence[i] != new_home)
+ {
+ attempt.sequence[i]->insn = NULL;
+ attempt.sequence[i]->point = ~0U;
+ }
+
+ /* Unlink the def-use range. */
+ if (!kept_def_p && attempt.def_use_range)
+ {
+ live_range_rec *range = attempt.def_use_range;
+ if (range->prev_range)
+ range->prev_range->next_range = range->next_range;
+ else
+ m_reg_info[range->regno].range = range->next_range;
+ if (range->next_range)
+ range->next_range->prev_range = range->prev_range;
+ }
+
+ /* Record instructions whose new form alters the cfg. */
+ rtx pattern = PATTERN (new_insn);
+ if ((returnjump_p (new_insn)
+ || any_uncondjump_p (new_insn)
+ || (GET_CODE (pattern) == TRAP_IF && XEXP (pattern, 0) == const1_rtx))
+ && bitmap_set_bit (m_cfg_altering_insn_ids, INSN_UID (new_insn)))
+ m_cfg_altering_insns.safe_push (new_insn);
+}
+
+/* Return true if X1 and X2 are memories and if X1 does not have
+ a higher alignment than X2. */
+
+static bool
+dubious_mem_pair_p (rtx x1, rtx x2)
+{
+ return MEM_P (x1) && MEM_P (x2) && MEM_ALIGN (x1) <= MEM_ALIGN (x2);
+}
+
+/* Try implement ATTEMPT using (parallel [SET1 SET2]). */
+
+bool
+combine2::try_parallel_sets (combination_attempt_rec &attempt,
+ rtx set1, rtx set2)
+{
+ rtx_insn *insn = attempt.sequence[1]->insn;
+
+ /* Combining two loads or two stores can be useful on targets that
+ allow them to be treated as a single access. However, we use a
+ very peephole approach to picking the pairs, so we need to be
+ relatively confident that we're making a good choice.
+
+ For now just aim for cases in which the memory references are
+ consecutive and the first reference has a higher alignment.
+ We can leave the target to test the consecutive part; whatever test
+ we added here might be different from the target's, and in any case
+ it's fine if the target accepts other well-aligned cases too. */
+ if (dubious_mem_pair_p (SET_DEST (set1), SET_DEST (set2))
+ || dubious_mem_pair_p (SET_SRC (set1), SET_SRC (set2)))
+ return false;
+
+ /* Cache the PARALLEL rtx between attempts so that we don't generate
+ too much garbage rtl. */
+ if (!m_spare_parallel)
+ {
+ rtvec vec = gen_rtvec (2, set1, set2);
+ m_spare_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
+ }
+ else
+ {
+ XVECEXP (m_spare_parallel, 0, 0) = set1;
+ XVECEXP (m_spare_parallel, 0, 1) = set2;
+ }
+
+ unsigned int num_changes = num_validated_changes ();
+ validate_change (insn, &PATTERN (insn), m_spare_parallel, true);
+ if (verify_combination (attempt))
+ {
+ m_spare_parallel = NULL_RTX;
+ return true;
+ }
+ cancel_changes (num_changes);
+ return false;
+}
+
+/* Try to parallelize the two instructions in ATTEMPT. */
+
+bool
+combine2::try_parallelize_insns (combination_attempt_rec &attempt)
+{
+ rtx_insn *i1_insn = attempt.sequence[0]->insn;
+ rtx_insn *i2_insn = attempt.sequence[1]->insn;
+
+ /* Can't parallelize asm statements. */
+ if (asm_noperands (PATTERN (i1_insn)) >= 0
+ || asm_noperands (PATTERN (i2_insn)) >= 0)
+ return false;
+
+ /* For now, just handle the case in which both instructions are
+ single sets. We could handle more than 2 sets as well, but few
+ targets support that anyway. */
+ rtx set1 = single_set (i1_insn);
+ if (!set1)
+ return false;
+ rtx set2 = single_set (i2_insn);
+ if (!set2)
+ return false;
+
+ /* Make sure that we have structural proof that the destinations
+ are independent. Things like alias analysis rely on semantic
+ information and assume no undefined behavior, which is rarely a
+ good enough guarantee to allow a useful instruction combination. */
+ rtx dest1 = SET_DEST (set1);
+ rtx dest2 = SET_DEST (set2);
+ if (MEM_P (dest1)
+ ? MEM_P (dest2) && !nonoverlapping_memrefs_p (dest1, dest2, false)
+ : !MEM_P (dest2) && reg_overlap_mentioned_p (dest1, dest2))
+ return false;
+
+ /* Try the sets in both orders. */
+ if (try_parallel_sets (attempt, set1, set2)
+ || try_parallel_sets (attempt, set2, set1))
+ {
+ commit_combination (attempt, true);
+ if (MAY_HAVE_DEBUG_BIND_INSNS
+ && attempt.new_home->insn != i1_insn)
+ propagate_for_debug (i1_insn, attempt.new_home->insn,
+ SET_DEST (set1), SET_SRC (set1), m_bb);
+ return true;
+ }
+ return false;
+}
+
+/* Replace DEST with SRC in the register notes for INSN. */
+
+static void
+substitute_into_note (rtx_insn *insn, rtx dest, rtx src)
+{
+ for (rtx *note_ptr = ®_NOTES (insn); *note_ptr; )
+ {
+ rtx note = *note_ptr;
+ bool keep_p = true;
+ switch (REG_NOTE_KIND (note))
+ {
+ case REG_EQUAL:
+ case REG_EQUIV:
+ keep_p = validate_simplify_replace_rtx (insn, &XEXP (note, 0),
+ dest, src);
+ break;
+
+ default:
+ break;
+ }
+ if (keep_p)
+ note_ptr = &XEXP (*note_ptr, 1);
+ else
+ {
+ *note_ptr = XEXP (*note_ptr, 1);
+ free_EXPR_LIST_node (note);
+ }
+ }
+}
+
+/* A subroutine of try_combine_def_use. Try replacing DEST with SRC
+ in ATTEMPT. SRC might be either the original SET_SRC passed to the
+ parent routine or a value pulled from a note; SRC_IS_NOTE_P is true
+ in the latter case. */
+
+bool
+combine2::try_combine_def_use_1 (combination_attempt_rec &attempt,
+ rtx dest, rtx src, bool src_is_note_p)
+{
+ rtx_insn *def_insn = attempt.sequence[0]->insn;
+ rtx_insn *use_insn = attempt.sequence[1]->insn;
+
+ /* Mimic combine's behavior by not combining moves from allocatable hard
+ registers (e.g. when copying parameters or function return values). */
+ if (REG_P (src) && HARD_REGISTER_P (src) && !fixed_regs[REGNO (src)])
+ return false;
+
+ /* Don't mess with volatile references. For one thing, we don't yet
+ know how many copies of SRC we'll need. */
+ if (volatile_refs_p (src))
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "trying to combine %d and %d%s:\n",
+ INSN_UID (def_insn), INSN_UID (use_insn),
+ src_is_note_p ? " using equal/equiv note" : "");
+ dump_insn_slim (dump_file, def_insn);
+ dump_insn_slim (dump_file, use_insn);
+ }
+
+ unsigned int num_changes = num_validated_changes ();
+ if (!validate_simplify_replace_rtx (use_insn, &PATTERN (use_insn),
+ dest, src))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "combination failed -- unable to substitute"
+ " all uses\n");
+ return false;
+ }
+
+ /* Try matching the instruction on its own if DEST isn't used elsewhere. */
+ if (has_single_use_p (attempt.def_use_range)
+ && verify_combination (attempt))
+ {
+ live_range_rec *next_range = attempt.def_use_range->next_range;
+ substitute_into_note (use_insn, dest, src);
+ commit_combination (attempt, false);
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ {
+ rtx_insn *end_of_range = (next_range
+ ? next_range->producer->insn
+ : BB_END (m_bb));
+ propagate_for_debug (def_insn, end_of_range, dest, src, m_bb);
+ }
+ return true;
+ }
+
+ /* Try doing the new USE_INSN pattern in parallel with the DEF_INSN
+ pattern. */
+ if ((!targetm.cannot_copy_insn_p || !targetm.cannot_copy_insn_p (def_insn))
+ && try_parallelize_insns (attempt))
+ return true;
+
+ cancel_changes (num_changes);
+ return false;
+}
+
+/* ATTEMPT describes an attempt to substitute the result of the first
+ instruction into the second instruction. Try to implement it,
+ given that the first instruction sets DEST to SRC. */
+
+bool
+combine2::try_combine_def_use (combination_attempt_rec &attempt,
+ rtx dest, rtx src)
+{
+ rtx_insn *def_insn = attempt.sequence[0]->insn;
+ rtx_insn *use_insn = attempt.sequence[1]->insn;
+ rtx def_note = find_reg_equal_equiv_note (def_insn);
+
+ /* First try combining the instructions in their original form. */
+ if (try_combine_def_use_1 (attempt, dest, src, false))
+ return true;
+
+ /* Try to replace DEST with a REG_EQUAL/EQUIV value instead. */
+ if (def_note
+ && try_combine_def_use_1 (attempt, dest, XEXP (def_note, 0), true))
+ return true;
+
+ /* If USE_INSN has a REG_EQUAL/EQUIV note that refers to DEST, try
+ using that instead of the main pattern. */
+ for (rtx *link_ptr = ®_NOTES (use_insn); *link_ptr;
+ link_ptr = &XEXP (*link_ptr, 1))
+ {
+ rtx use_note = *link_ptr;
+ if (REG_NOTE_KIND (use_note) != REG_EQUAL
+ && REG_NOTE_KIND (use_note) != REG_EQUIV)
+ continue;
+
+ rtx use_set = single_set (use_insn);
+ if (!use_set)
+ break;
+
+ if (!reg_overlap_mentioned_p (dest, XEXP (use_note, 0)))
+ continue;
+
+ /* Try snipping out the note and putting it in the SET instead. */
+ validate_change (use_insn, link_ptr, XEXP (use_note, 1), 1);
+ validate_change (use_insn, &SET_SRC (use_set), XEXP (use_note, 0), 1);
+
+ if (try_combine_def_use_1 (attempt, dest, src, false))
+ return true;
+
+ if (def_note
+ && try_combine_def_use_1 (attempt, dest, XEXP (def_note, 0), true))
+ return true;
+
+ cancel_changes (0);
+ }
+
+ return false;
+}
+
+/* ATTEMPT describes an attempt to combine two instructions that use
+ the same resource. Try to implement it, returning true on success. */
+
+bool
+combine2::try_combine_two_uses (combination_attempt_rec &attempt)
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "trying to parallelize %d and %d:\n",
+ INSN_UID (attempt.sequence[0]->insn),
+ INSN_UID (attempt.sequence[1]->insn));
+ dump_insn_slim (dump_file, attempt.sequence[0]->insn);
+ dump_insn_slim (dump_file, attempt.sequence[1]->insn);
+ }
+
+ return try_parallelize_insns (attempt);
+}
+
+/* Try to optimize instruction INSN_INFO. Return true on success. */
+
+bool
+combine2::optimize_insn (insn_info_rec *i1)
+{
+ combination_attempt_rec attempt;
+
+ if (!combinable_insn_p (i1->insn, false))
+ return false;
+
+ rtx set = single_set (i1->insn);
+ if (!set)
+ return false;
+
+ /* First try combining INSN with a user of its result. */
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+ if (REG_P (dest) && REG_NREGS (dest) == 1)
+ for (live_range_rec **def = i1->defs; *def; ++def)
+ if ((*def)->regno == REGNO (dest))
+ {
+ for (unsigned int i = 0; i < NUM_RANGE_USERS; ++i)
+ {
+ insn_info_rec *use = (*def)->users[i];
+ if (use
+ && combinable_insn_p (use->insn, has_single_use_p (*def))
+ && start_combination (attempt, i1, use, *def)
+ && try_combine_def_use (attempt, dest, src))
+ return true;
+ }
+ break;
+ }
+
+ /* Try parallelizing INSN and another instruction that uses the same
+ resource. */
+ bitmap_clear (m_tried_insns);
+ for (live_range_rec **use = i1->uses; *use; ++use)
+ for (unsigned int i = 0; i < NUM_RANGE_USERS; ++i)
+ {
+ insn_info_rec *i2 = (*use)->users[i];
+ if (i2
+ && i2 != i1
+ && combinable_insn_p (i2->insn, false)
+ && bitmap_set_bit (m_tried_insns, INSN_UID (i2->insn))
+ && start_combination (attempt, i1, i2)
+ && try_combine_two_uses (attempt))
+ return true;
+ }
+
+ return false;
+}
+
+/* Record all register and memory definitions in INSN_INFO and fill in its
+ "defs" list. */
+
+void
+combine2::record_defs (insn_info_rec *insn_info)
+{
+ rtx_insn *insn = insn_info->insn;
+
+ /* Record register definitions. */
+ df_ref def;
+ FOR_EACH_INSN_DEF (def, insn)
+ {
+ rtx reg = DF_REF_REAL_REG (def);
+ unsigned int end_regno = END_REGNO (reg);
+ for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+ {
+ live_range_rec *range = reg_live_range (regno);
+ range->producer = insn_info;
+ m_reg_info[regno].live_p = false;
+ obstack_ptr_grow (&m_insn_obstack, range);
+ }
+ }
+
+ /* If the instruction writes to memory, record that too. */
+ if (insn_writes_mem_p (insn))
+ {
+ live_range_rec *range = mem_live_range ();
+ range->producer = insn_info;
+ obstack_ptr_grow (&m_insn_obstack, range);
+ }
+
+ /* Complete the list of definitions. */
+ obstack_ptr_grow (&m_insn_obstack, NULL);
+ insn_info->defs = (live_range_rec **) obstack_finish (&m_insn_obstack);
+}
+
+/* Record that INSN_INFO contains register use USE. If this requires
+ new entries to be added to INSN_INFO->uses, add those entries to the
+ list we're building in m_insn_obstack. */
+
+void
+combine2::record_reg_use (insn_info_rec *insn_info, df_ref use)
+{
+ rtx reg = DF_REF_REAL_REG (use);
+ unsigned int end_regno = END_REGNO (reg);
+ for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+ {
+ live_range_rec *range = reg_live_range (regno);
+ if (add_range_use (range, insn_info))
+ obstack_ptr_grow (&m_insn_obstack, range);
+ m_reg_info[regno].live_p = true;
+ }
+}
+
+/* Record all register and memory uses in INSN_INFO and fill in its
+ "uses" list. */
+
+void
+combine2::record_uses (insn_info_rec *insn_info)
+{
+ rtx_insn *insn = insn_info->insn;
+
+ /* Record register uses in the main pattern. */
+ df_ref use;
+ FOR_EACH_INSN_USE (use, insn)
+ record_reg_use (insn_info, use);
+
+ /* Treat REG_EQUAL uses as first-class uses. We don't lose much
+ by doing that, since it's rare for a REG_EQUAL note to mention
+ registers that the main pattern doesn't. It also gives us the
+ maximum freedom to use REG_EQUAL notes in place of the main pattern. */
+ FOR_EACH_INSN_EQ_USE (use, insn)
+ record_reg_use (insn_info, use);
+
+ /* Record a memory use if either the pattern or the notes read from
+ memory. */
+ if (insn_reads_mem_p (insn))
+ {
+ live_range_rec *range = mem_live_range ();
+ if (add_range_use (range, insn_info))
+ obstack_ptr_grow (&m_insn_obstack, range);
+ }
+
+ /* Complete the list of uses. */
+ obstack_ptr_grow (&m_insn_obstack, NULL);
+ insn_info->uses = (live_range_rec **) obstack_finish (&m_insn_obstack);
+}
+
+/* Start a new instruction sequence, discarding all information about
+ the previous one. */
+
+void
+combine2::start_sequence (void)
+{
+ m_end_of_sequence = m_point;
+ m_mem_range = NULL;
+ m_points.truncate (0);
+ obstack_free (&m_insn_obstack, m_insn_obstack_start);
+ obstack_free (&m_range_obstack, m_range_obstack_start);
+}
+
+/* Run the pass on the current function. */
+
+void
+combine2::execute (void)
+{
+ df_analyze ();
+ FOR_EACH_BB_FN (m_bb, cfun)
+ {
+ m_optimize_for_speed_p = optimize_bb_for_speed_p (m_bb);
+ m_end_of_bb = m_point;
+ start_sequence ();
+
+ rtx_insn *insn, *prev;
+ FOR_BB_INSNS_REVERSE_SAFE (m_bb, insn, prev)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ /* The current m_point represents the end of the sequence if
+ INSN is the last instruction in the sequence, otherwise it
+ represents the gap between INSN and the next instruction.
+ m_point + 1 represents INSN itself.
+
+ Instructions can be added to m_point by inserting them
+ after INSN. They can be added to m_point + 1 by inserting
+ them before INSN. */
+ m_points.safe_push (insn);
+ m_point += 1;
+
+ insn_info_rec *insn_info = XOBNEW (&m_insn_obstack, insn_info_rec);
+ insn_info->insn = insn;
+ insn_info->point = m_point;
+ insn_info->cost = UNKNOWN_COST;
+
+ record_defs (insn_info);
+ record_uses (insn_info);
+
+ /* Set up m_point for the next instruction. */
+ m_point += 1;
+
+ if (CALL_P (insn))
+ start_sequence ();
+ else
+ while (optimize_insn (insn_info))
+ gcc_assert (insn_info->insn);
+ }
+ }
+
+ /* If an instruction changes the cfg, update the containing block
+ accordingly. */
+ rtx_insn *insn;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (m_cfg_altering_insns, i, insn)
+ if (JUMP_P (insn))
+ {
+ mark_jump_label (PATTERN (insn), insn, 0);
+ update_cfg_for_uncondjump (insn);
+ }
+ else
+ {
+ remove_edge (split_block (BLOCK_FOR_INSN (insn), insn));
+ emit_barrier_after_bb (BLOCK_FOR_INSN (insn));
+ }
+
+ /* Propagate the above block-local cfg changes to the rest of the cfg. */
+ if (!m_cfg_altering_insns.is_empty ())
+ {
+ if (dom_info_available_p (CDI_DOMINATORS))
+ free_dominance_info (CDI_DOMINATORS);
+ timevar_push (TV_JUMP);
+ rebuild_jump_labels (get_insns ());
+ cleanup_cfg (0);
+ timevar_pop (TV_JUMP);
+ }
+}
+
+const pass_data pass_data_combine2 =
+{
+ RTL_PASS, /* type */
+ "combine2", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_COMBINE2, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_combine2 : public rtl_opt_pass
+{
+public:
+ pass_combine2 (gcc::context *ctxt, int flag)
+ : rtl_opt_pass (pass_data_combine2, ctxt), m_flag (flag)
+ {}
+
+ bool
+ gate (function *) OVERRIDE
+ {
+ return optimize && (param_run_combine & m_flag) != 0 && !HAVE_cc0;
+ }
+
+ unsigned int
+ execute (function *f) OVERRIDE
+ {
+ combine2 (f).execute ();
+ return 0;
+ }
+
+private:
+ unsigned int m_flag;
+}; // class pass_combine2
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_combine2_before (gcc::context *ctxt)
+{
+ return new pass_combine2 (ctxt, 1);
+}
+
+rtl_opt_pass *
+make_pass_combine2_after (gcc::context *ctxt)
+{
+ return new pass_combine2 (ctxt, 4);
+}
More information about the Gcc-patches
mailing list