This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Use MOVE_BARRIER instead of TRUE_BARRIER for jumps.
- From: "Alexander Monakov" <monoid at ispras dot ru>
- To: "gcc-patches.gcc.gnu.org" <gcc-patches at gcc dot gnu dot org>
- Cc: "Maxim Kuvyrkov" <maxim at codesourcery dot com>, "Jim Wilson" <wilson at specifix dot com>, "Vladimir N. Makarov" <vmakarov at redhat dot com>
- Date: Tue, 30 Oct 2007 18:32:07 +0300
- Subject: [PATCH] Use MOVE_BARRIER instead of TRUE_BARRIER for jumps.
Hi.
This patch "demotes" reg_pending_barrier from TRUE_BARRIER to MOVE_BARRIER
for JUMP_P insns followed by a BARRIER, so that later dependencies between
insns and such jump will be marked as anti instead of true. This allows
to schedule some insns on the same processor cycle as jump, possibly
improving performance.
The current behaviour was introduced by this patch:
http://gcc.gnu.org/ml/gcc-patches/2003-04/msg00410.html , fixing a
reported wrong-code bug on an architecture where scheduling a set of a
register and a return using that register was prohibited. In the original
report and on ia64 function return is a (parallel (return; use REG)), so
we need to make sure that true dependencies are created between setters of
REG and this return insn. To achieve that, we can make "else" part of "if
(reg_pending_barrier)" unconditional. Finally, ia64 back-end might be
taught that latency between REG setter and return is 0 (Intel's Itanium 2
docs explicitly state that branches that (implicitly) use registers see
values written on the same cycle), but I decided not to pursue this, as
this is not likely to improve anything.
Bootstrapped and regtested on ia64-linux. OK for trunk?
I would like to note that this patch sometimes increases code size on
ia64. On the testcases I was looking into, the frequent cause of this was
linked with insn priorites during first scheduling pass: some
instructions, most notably GOT address computations, would get lower
priority (because true dependence between this insn and jump ending its
basic block was replaced with anti dependence), and not moved closer to
the prologue. As far as I understand, blocks with these insns could be
duplicated during later RTL optimizations passes, and we could also get
more nops because of ld8 bundling restrictions.
This makes me wonder whether GOT address computations (addl rXX = r1
+ <offset>; ld8 rXX = [xXX]) can be generated closer to the prologue for
ia64. I imagine a situation when loop invariant motion refuses to move
GOT address computation out of inner loop (for example, if loop body has
branches, and this operation is estimated to be executed rarely), and we
end up with relatively costly pair of insns in the loop. Itanium is
somewhat exceptional here because of huge register file (so that we can
speak about GOT loads in the prologue without fear of exceeding register
file limitations). Is there anything that can be done about this in gcc?
:ADDPATCH scheduler:
Thanks.
Alexander Monakov
* gcc/sched-deps.c (sched_analyze_insn): Use MOVE_BARRIER
instead of TRUE_BARRIER for jumps. Add register dependencies
even when reg_pending_barrier is set.
--- gcc/sched-deps.c (revision 32194)
+++ gcc/sched-deps.c (local)
@@ -1920,7 +1920,7 @@ sched_analyze_insn (struct deps *deps, r
rtx next;
next = next_nonnote_insn (insn);
if (next && BARRIER_P (next))
- reg_pending_barrier = TRUE_BARRIER;
+ reg_pending_barrier = MOVE_BARRIER;
else
{
rtx pending, pending_mem;
@@ -1984,6 +1984,90 @@ sched_analyze_insn (struct deps *deps, r
|| (NONJUMP_INSN_P (insn) && control_flow_insn_p (insn)))
reg_pending_barrier = MOVE_BARRIER;
+ /* If the current insn is conditional, we can't free any
+ of the lists. */
+ if (sched_get_condition (insn))
+ {
+ EXECUTE_IF_SET_IN_REG_SET (reg_pending_uses, 0, i, rsi)
+ {
+ struct deps_reg *reg_last = &deps->reg_last[i];
+ add_dependence_list (insn, reg_last->sets, 0, REG_DEP_TRUE);
+ add_dependence_list (insn, reg_last->clobbers, 0, REG_DEP_TRUE);
+ reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses);
+ reg_last->uses_length++;
+ }
+ EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i, rsi)
+ {
+ struct deps_reg *reg_last = &deps->reg_last[i];
+ add_dependence_list (insn, reg_last->sets, 0, REG_DEP_OUTPUT);
+ add_dependence_list (insn, reg_last->uses, 0, REG_DEP_ANTI);
+ reg_last->clobbers = alloc_INSN_LIST (insn, reg_last->clobbers);
+ reg_last->clobbers_length++;
+ }
+ EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i, rsi)
+ {
+ struct deps_reg *reg_last = &deps->reg_last[i];
+ add_dependence_list (insn, reg_last->sets, 0, REG_DEP_OUTPUT);
+ add_dependence_list (insn, reg_last->clobbers, 0, REG_DEP_OUTPUT);
+ add_dependence_list (insn, reg_last->uses, 0, REG_DEP_ANTI);
+ reg_last->sets = alloc_INSN_LIST (insn, reg_last->sets);
+ SET_REGNO_REG_SET (&deps->reg_conditional_sets, i);
+ }
+ }
+ else
+ {
+ EXECUTE_IF_SET_IN_REG_SET (reg_pending_uses, 0, i, rsi)
+ {
+ struct deps_reg *reg_last = &deps->reg_last[i];
+ add_dependence_list (insn, reg_last->sets, 0, REG_DEP_TRUE);
+ add_dependence_list (insn, reg_last->clobbers, 0, REG_DEP_TRUE);
+ reg_last->uses_length++;
+ reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses);
+ }
+ EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i, rsi)
+ {
+ struct deps_reg *reg_last = &deps->reg_last[i];
+ if (reg_last->uses_length > MAX_PENDING_LIST_LENGTH
+ || reg_last->clobbers_length > MAX_PENDING_LIST_LENGTH)
+ {
+ add_dependence_list_and_free (insn, ®_last->sets, 0,
+ REG_DEP_OUTPUT);
+ add_dependence_list_and_free (insn, ®_last->uses, 0,
+ REG_DEP_ANTI);
+ add_dependence_list_and_free (insn, ®_last->clobbers, 0,
+ REG_DEP_OUTPUT);
+ reg_last->sets = alloc_INSN_LIST (insn, reg_last->sets);
+ reg_last->clobbers_length = 0;
+ reg_last->uses_length = 0;
+ }
+ else
+ {
+ add_dependence_list (insn, reg_last->sets, 0, REG_DEP_OUTPUT);
+ add_dependence_list (insn, reg_last->uses, 0, REG_DEP_ANTI);
+ }
+ reg_last->clobbers_length++;
+ reg_last->clobbers = alloc_INSN_LIST (insn, reg_last->clobbers);
+ }
+ EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i, rsi)
+ {
+ struct deps_reg *reg_last = &deps->reg_last[i];
+ add_dependence_list_and_free (insn, ®_last->sets, 0,
+ REG_DEP_OUTPUT);
+ add_dependence_list_and_free (insn, ®_last->clobbers, 0,
+ REG_DEP_OUTPUT);
+ add_dependence_list_and_free (insn, ®_last->uses, 0,
+ REG_DEP_ANTI);
+ reg_last->sets = alloc_INSN_LIST (insn, reg_last->sets);
+ reg_last->uses_length = 0;
+ reg_last->clobbers_length = 0;
+ CLEAR_REGNO_REG_SET (&deps->reg_conditional_sets, i);
+ }
+ }
+
+ IOR_REG_SET (&deps->reg_last_in_use, reg_pending_uses);
+ IOR_REG_SET (&deps->reg_last_in_use, reg_pending_clobbers);
+ IOR_REG_SET (&deps->reg_last_in_use, reg_pending_sets);
+
/* Add dependencies if a scheduling barrier was found. */
if (reg_pending_barrier)
{
@@ -2032,92 +2116,7 @@ sched_analyze_insn (struct deps *deps, r
CLEAR_REG_SET (&deps->reg_conditional_sets);
reg_pending_barrier = NOT_A_BARRIER;
}
- else
- {
- /* If the current insn is conditional, we can't free any
- of the lists. */
- if (sched_get_condition (insn))
- {
- EXECUTE_IF_SET_IN_REG_SET (reg_pending_uses, 0, i, rsi)
- {
- struct deps_reg *reg_last = &deps->reg_last[i];
- add_dependence_list (insn, reg_last->sets, 0, REG_DEP_TRUE);
- add_dependence_list (insn, reg_last->clobbers, 0, REG_DEP_TRUE);
- reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses);
- reg_last->uses_length++;
- }
- EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i, rsi)
- {
- struct deps_reg *reg_last = &deps->reg_last[i];
- add_dependence_list (insn, reg_last->sets, 0, REG_DEP_OUTPUT);
- add_dependence_list (insn, reg_last->uses, 0, REG_DEP_ANTI);
- reg_last->clobbers = alloc_INSN_LIST (insn, reg_last->clobbers);
- reg_last->clobbers_length++;
- }
- EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i, rsi)
- {
- struct deps_reg *reg_last = &deps->reg_last[i];
- add_dependence_list (insn, reg_last->sets, 0, REG_DEP_OUTPUT);
- add_dependence_list (insn, reg_last->clobbers, 0, REG_DEP_OUTPUT);
- add_dependence_list (insn, reg_last->uses, 0, REG_DEP_ANTI);
- reg_last->sets = alloc_INSN_LIST (insn, reg_last->sets);
- SET_REGNO_REG_SET (&deps->reg_conditional_sets, i);
- }
- }
- else
- {
- EXECUTE_IF_SET_IN_REG_SET (reg_pending_uses, 0, i, rsi)
- {
- struct deps_reg *reg_last = &deps->reg_last[i];
- add_dependence_list (insn, reg_last->sets, 0, REG_DEP_TRUE);
- add_dependence_list (insn, reg_last->clobbers, 0, REG_DEP_TRUE);
- reg_last->uses_length++;
- reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses);
- }
- EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i, rsi)
- {
- struct deps_reg *reg_last = &deps->reg_last[i];
- if (reg_last->uses_length > MAX_PENDING_LIST_LENGTH
- || reg_last->clobbers_length > MAX_PENDING_LIST_LENGTH)
- {
- add_dependence_list_and_free (insn, ®_last->sets, 0,
- REG_DEP_OUTPUT);
- add_dependence_list_and_free (insn, ®_last->uses, 0,
- REG_DEP_ANTI);
- add_dependence_list_and_free (insn, ®_last->clobbers, 0,
- REG_DEP_OUTPUT);
- reg_last->sets = alloc_INSN_LIST (insn, reg_last->sets);
- reg_last->clobbers_length = 0;
- reg_last->uses_length = 0;
- }
- else
- {
- add_dependence_list (insn, reg_last->sets, 0, REG_DEP_OUTPUT);
- add_dependence_list (insn, reg_last->uses, 0, REG_DEP_ANTI);
- }
- reg_last->clobbers_length++;
- reg_last->clobbers = alloc_INSN_LIST (insn, reg_last->clobbers);
- }
- EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i, rsi)
- {
- struct deps_reg *reg_last = &deps->reg_last[i];
- add_dependence_list_and_free (insn, ®_last->sets, 0,
- REG_DEP_OUTPUT);
- add_dependence_list_and_free (insn, ®_last->clobbers, 0,
- REG_DEP_OUTPUT);
- add_dependence_list_and_free (insn, ®_last->uses, 0,
- REG_DEP_ANTI);
- reg_last->sets = alloc_INSN_LIST (insn, reg_last->sets);
- reg_last->uses_length = 0;
- reg_last->clobbers_length = 0;
- CLEAR_REGNO_REG_SET (&deps->reg_conditional_sets, i);
- }
- }
- IOR_REG_SET (&deps->reg_last_in_use, reg_pending_uses);
- IOR_REG_SET (&deps->reg_last_in_use, reg_pending_clobbers);
- IOR_REG_SET (&deps->reg_last_in_use, reg_pending_sets);
- }
CLEAR_REG_SET (reg_pending_uses);
CLEAR_REG_SET (reg_pending_clobbers);
CLEAR_REG_SET (reg_pending_sets);