This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
stack adjustment pass
- To: gcc-patches at gcc dot gnu dot org
- Subject: stack adjustment pass
- From: Richard Henderson <rth at twiddle dot net>
- Date: Tue, 14 Mar 2000 06:53:00 -0800
This is something I've been threatening to do for ages. It's a
pass that knows about stack adjustments and how stores, pushes,
and calls relate to them.
It does clean up some of the uglyness we see generated by the
preferred_stack_boundary code, but it was originally envisioned
to aid x86 floating point argument passing. One cannot do pushes
from the floating point registers, which forces us to implement
push via a subtract and store. Not implementing push for the
floating point modes is not an option because expand_call will
fall over.
Consider the test case
double x[4];
void foo()
{
f1 (x[0]+x[1], x[1]+x[2]);
f2 (1, x[0]+x[1], x[1]+x[2]);
f3 (x[0]+x[1], x[1]+x[2], 1);
}
OLD CODE NEW_CODE
---------------- ----------------
fldl x+8 fldl x+8
pushl %ebp pushl %ebp
movl %esp, %ebp movl %esp, %ebp
fld %st(0) fld %st(0)
faddl x+16 faddl x+16
subl $8, %esp | subl $24, %esp
subl $8, %esp | fstpl 8(%esp)
fstpl (%esp) <
subl $8, %esp <
fldl x fldl x
faddp %st,%st(1) faddp %st,%st(1)
fstpl (%esp) fstpl (%esp)
call f1 call f1
fldl x+8 fldl x+8
addl $4, %esp | subl $12, %esp
subl $8, %esp <
fld %st(0) fld %st(0)
faddl x+16 faddl x+16
fstpl (%esp) | fstpl 8(%esp)
subl $8, %esp <
fldl x fldl x
faddp %st,%st(1) faddp %st,%st(1)
fstpl (%esp) fstpl (%esp)
pushl $1 pushl $1
call f2 call f2
fldl x+8 fldl x+8
addl $20, %esp addl $20, %esp
pushl $1 pushl $1
fld %st(0) fld %st(0)
subl $8, %esp | subl $16, %esp
faddl x+16 faddl x+16
fstpl (%esp) | fstpl 8(%esp)
subl $8, %esp <
fldl x fldl x
faddp %st,%st(1) faddp %st,%st(1)
fstpl (%esp) fstpl (%esp)
call f3 call f3
movl %ebp, %esp movl %ebp, %esp
popl %ebp popl %ebp
ret ret
x86 bootstrap succeeds. The resulting cc1 binary is about 4k
smaller with the change.
r~
* regmove.c (combine_stack_adjustments): New.
(stack_memref_p, single_set_for_csa): New.
(free_csa_memlist, record_one_stack_memref): New.
(try_apply_stack_adjustment): New.
(combine_stack_adjustments_for_block): New.
* rtl.h (combine_stack_adjustments): Declare.
* toplev.c (rest_of_compilation): Call it.
* i386.md: Revert 2000-01-16 change.
Index: regmove.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/regmove.c,v
retrieving revision 1.81
diff -u -p -d -r1.81 regmove.c
--- regmove.c 2000/03/05 02:02:09 1.81
+++ regmove.c 2000/03/14 13:58:24
@@ -2060,3 +2060,336 @@ stable_and_no_regs_but_for_p (x, src, ds
return ! rtx_unstable_p (x);
}
}
+
+/* Track stack adjustments and stack memory references. Attempt to
+ reduce the number of stack adjustments by back-propogating across
+ the memory references.
+
+ This is intended primarily for use with targets that do not define
+ ACCUMULATE_OUTGOING_ARGS. It is of significantly more value to
+ targets that define PREFERRED_STACK_BOUNDARY more aligned than
+ STACK_BOUNDARY (e.g. x86), or if not all registers can be pushed
+ (e.g. x86 fp regs) which would ordinarily have to be implemented
+ as a sub/mov pair due to restrictions in calls.c.
+
+ Propogation stops when any of the insns that need adjusting are
+ (a) no longer valid because we've exceeded their range, (b) a
+ non-trivial push instruction, or (c) a call instruction.
+
+ Restriction B is based on the assumption that push instructions
+ are smaller or faster. If a port really wants to remove all
+ pushes, it should have defined ACCUMULATE_OUTGOING_ARGS. The
+ one exception that is made is for an add immediately followed
+ by a push. */
+
+/* This structure records stack memory references between stack adjusting
+ instructions. */
+
+struct csa_memlist
+{
+ HOST_WIDE_INT sp_offset;
+ rtx insn, mem;
+ struct csa_memlist *next;
+};
+
+static int stack_memref_p PARAMS ((rtx));
+static rtx single_set_for_csa PARAMS ((rtx));
+static void free_csa_memlist PARAMS ((struct csa_memlist *));
+static struct csa_memlist *record_one_stack_memref
+ PARAMS ((rtx, rtx, struct csa_memlist *));
+static int try_apply_stack_adjustment
+ PARAMS ((rtx, struct csa_memlist *, HOST_WIDE_INT, HOST_WIDE_INT));
+static void combine_stack_adjustments_for_block PARAMS ((basic_block));
+
+
+/* Main entry point for stack adjustment combination. */
+
+void
+combine_stack_adjustments ()
+{
+ int i;
+
+ for (i = 0; i < n_basic_blocks; ++i)
+ combine_stack_adjustments_for_block (BASIC_BLOCK (i));
+}
+
+/* Recognize a MEM of the form (sp) or (plus sp const). */
+
+static int
+stack_memref_p (mem)
+ rtx mem;
+{
+ return (GET_CODE (mem) == MEM
+ && (XEXP (mem, 0) == stack_pointer_rtx
+ || (GET_CODE (XEXP (mem, 0)) == PLUS
+ && XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx
+ && GET_CODE (XEXP (XEXP (mem, 0), 0)) == CONST_INT)));
+}
+
+/* Recognize either normal single_set or the hack in i386.md for
+ tying fp and sp adjustments. */
+
+static rtx
+single_set_for_csa (insn)
+ rtx insn;
+{
+ int i;
+ rtx tmp = single_set (insn);
+ if (tmp)
+ return tmp;
+
+ if (GET_CODE (insn) != INSN
+ || GET_CODE (PATTERN (insn)) != PARALLEL)
+ return NULL_RTX;
+
+ tmp = PATTERN (insn);
+ if (GET_CODE (XVECEXP (tmp, 0, 0)) != SET)
+ return NULL_RTX;
+
+ for (i = 1; i < XVECLEN (tmp, 0); ++i)
+ {
+ rtx this = XVECEXP (tmp, 0, i);
+
+ /* The special case is allowing a no-op set. */
+ if (GET_CODE (this) == SET
+ && SET_SRC (this) == SET_DEST (this))
+ ;
+ else if (GET_CODE (this) != CLOBBER
+ && GET_CODE (this) != USE)
+ return NULL_RTX;
+ }
+
+ return XVECEXP (tmp, 0, 0);
+}
+
+/* Free the list of csa_memlist nodes. */
+
+static void
+free_csa_memlist (memlist)
+ struct csa_memlist *memlist;
+{
+ struct csa_memlist *next;
+ for (; memlist ; memlist = next)
+ {
+ next = memlist->next;
+ free (memlist);
+ }
+}
+
+/* Create a new csa_memlist node from the given memory reference.
+ It is already known that the memory is stack_memref_p. */
+
+static struct csa_memlist *
+record_one_stack_memref (insn, mem, next_memlist)
+ rtx insn, mem;
+ struct csa_memlist *next_memlist;
+{
+ struct csa_memlist *ml;
+
+ ml = (struct csa_memlist *) xmalloc (sizeof (*ml));
+
+ if (XEXP (mem, 0) == stack_pointer_rtx)
+ ml->sp_offset = 0;
+ else
+ ml->sp_offset = INTVAL (XEXP (XEXP (mem, 0), 1));
+
+ ml->insn = insn;
+ ml->mem = mem;
+ ml->next = next_memlist;
+
+ return ml;
+}
+
+/* Attempt to apply ADJUST to the stack adjusting insn INSN, as well
+ as each of the memories in MEMLIST. Return true on success. */
+
+static int
+try_apply_stack_adjustment (insn, memlist, new_adjust, delta)
+ rtx insn;
+ struct csa_memlist *memlist;
+ HOST_WIDE_INT new_adjust, delta;
+{
+ struct csa_memlist *ml;
+ rtx set;
+
+ /* We know INSN matches single_set_for_csa, because that's what we
+ recognized earlier. However, if INSN is not single_set, it is
+ doing double duty as a barrier for frame pointer memory accesses,
+ which we are not recording. Therefore, an adjust insn that is not
+ single_set may not have a positive delta applied. */
+
+ if (delta > 0 && ! single_set (insn))
+ return 0;
+ set = single_set_for_csa (insn);
+ validate_change (insn, &XEXP (SET_SRC (set), 1), GEN_INT (new_adjust), 1);
+
+ for (ml = memlist; ml ; ml = ml->next)
+ {
+ HOST_WIDE_INT c = ml->sp_offset - delta;
+
+ /* Don't reference memory below the stack pointer. */
+ if (c < 0)
+ {
+ cancel_changes (0);
+ return 0;
+ }
+
+ validate_change (ml->insn, &XEXP (ml->mem, 0),
+ plus_constant (stack_pointer_rtx, c), 1);
+ }
+
+ if (apply_change_group ())
+ {
+ /* Succeeded. Update our knowledge of the memory references. */
+ for (ml = memlist; ml ; ml = ml->next)
+ ml->sp_offset -= delta;
+
+ return 1;
+ }
+ else
+ return 0;
+}
+
+/* Subroutine of combine_stack_adjustments, called for each basic block. */
+
+static void
+combine_stack_adjustments_for_block (bb)
+ basic_block bb;
+{
+ HOST_WIDE_INT last_sp_adjust = 0;
+ rtx last_sp_set = NULL_RTX;
+ struct csa_memlist *memlist = NULL;
+ rtx pending_delete;
+ rtx insn, next;
+
+ for (insn = bb->head; ; insn = next)
+ {
+ rtx set;
+
+ pending_delete = NULL_RTX;
+ next = NEXT_INSN (insn);
+
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ goto processed;
+
+ set = single_set_for_csa (insn);
+ if (set)
+ {
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+
+ /* Find constant additions to the stack pointer. */
+ if (dest == stack_pointer_rtx
+ && GET_CODE (src) == PLUS
+ && XEXP (src, 0) == stack_pointer_rtx
+ && GET_CODE (XEXP (src, 1)) == CONST_INT)
+ {
+ HOST_WIDE_INT this_adjust = INTVAL (XEXP (src, 1));
+
+ /* If we've not seen an adjustment previously, record
+ it now and continue. */
+ if (! last_sp_set)
+ {
+ last_sp_set = insn;
+ last_sp_adjust = this_adjust;
+ goto processed;
+ }
+
+ /* If not all recorded memrefs can be adjusted, or the
+ adjustment is now too large for a constant addition,
+ we cannot merge the two stack adjustments. */
+ if (! try_apply_stack_adjustment (last_sp_set, memlist,
+ last_sp_adjust + this_adjust,
+ this_adjust))
+ {
+ free_csa_memlist (memlist);
+ memlist = NULL;
+ last_sp_set = insn;
+ last_sp_adjust = this_adjust;
+ goto processed;
+ }
+
+ /* It worked! */
+ pending_delete = insn;
+ last_sp_adjust += this_adjust;
+
+ /* If, by some accident, the adjustments cancel out,
+ delete both insns and start from scratch. */
+ if (last_sp_adjust == 0)
+ {
+ if (last_sp_set == bb->head)
+ bb->head = NEXT_INSN (last_sp_set);
+ flow_delete_insn (last_sp_set);
+
+ free_csa_memlist (memlist);
+ memlist = NULL;
+ last_sp_set = NULL_RTX;
+ }
+
+ goto processed;
+ }
+
+ /* Find loads from stack memory and record them. */
+ if (last_sp_set && stack_memref_p (src))
+ {
+ memlist = record_one_stack_memref (insn, src, memlist);
+ goto processed;
+ }
+
+ /* Find stores to stack memory and record them. */
+ if (last_sp_set && stack_memref_p (dest))
+ {
+ memlist = record_one_stack_memref (insn, dest, memlist);
+ goto processed;
+ }
+
+ /* Find a predecrement of exactly the previous adjustment and
+ turn it into a direct store. Obviously we can't do this if
+ there were any intervening uses of the stack pointer. */
+ if (memlist == NULL
+ && last_sp_adjust == GET_MODE_SIZE (GET_MODE (dest))
+ && GET_CODE (dest) == MEM
+ && GET_CODE (XEXP (dest, 0)) == PRE_DEC
+ && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx
+ && validate_change (insn, &SET_DEST (set),
+ change_address (dest, VOIDmode,
+ stack_pointer_rtx), 0))
+ {
+ if (last_sp_set == bb->head)
+ bb->head = NEXT_INSN (last_sp_set);
+ flow_delete_insn (last_sp_set);
+
+ free_csa_memlist (memlist);
+ memlist = NULL;
+ last_sp_set = NULL_RTX;
+ last_sp_adjust = 0;
+ goto processed;
+ }
+ }
+
+ /* Otherwise, we were not able to process the instruction.
+ Do not continue collecting data across such a one. */
+ if (last_sp_set
+ && (GET_CODE (insn) == CALL_INSN
+ || reg_mentioned_p (stack_pointer_rtx, PATTERN (insn))))
+ {
+ free_csa_memlist (memlist);
+ memlist = NULL;
+ last_sp_set = NULL_RTX;
+ last_sp_adjust = 0;
+ }
+
+ processed:
+ if (insn == bb->end)
+ break;
+
+ if (pending_delete)
+ flow_delete_insn (pending_delete);
+ }
+
+ if (pending_delete)
+ {
+ bb->end = PREV_INSN (pending_delete);
+ flow_delete_insn (pending_delete);
+ }
+}
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/rtl.h,v
retrieving revision 1.176
diff -u -p -d -r1.176 rtl.h
--- rtl.h 2000/03/10 08:16:55 1.176
+++ rtl.h 2000/03/14 13:58:24
@@ -1580,6 +1580,7 @@ extern void delete_null_pointer_checks P
#ifdef BUFSIZ
extern void regmove_optimize PARAMS ((rtx, int, FILE *));
#endif
+extern void combine_stack_adjustments PARAMS ((void));
/* In reorg.c */
#ifdef BUFSIZ
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/toplev.c,v
retrieving revision 1.304
diff -u -p -d -r1.304 toplev.c
--- toplev.c 2000/03/10 19:50:09 1.304
+++ toplev.c 2000/03/14 13:58:25
@@ -3547,13 +3547,17 @@ rest_of_compilation (decl)
if (optimize)
{
- TIMEVAR
- (flow2_time,
- {
- cleanup_cfg (insns);
- life_analysis (insns, max_reg_num (), rtl_dump_file, 1);
- });
+ TIMEVAR (flow2_time, { cleanup_cfg (insns); });
+#ifndef ACCUMULATE_OUTGOING_ARGS
+ TIMEVAR (flow2_time, { combine_stack_adjustments (); });
+#endif
+
+ TIMEVAR (flow2_time,
+ {
+ life_analysis (insns, max_reg_num (), rtl_dump_file, 1);
+ });
+
if (ggc_p)
ggc_collect ();
}
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.141
diff -u -p -d -r1.141 i386.md
--- i386.md 2000/03/08 19:20:22 1.141
+++ i386.md 2000/03/14 13:58:27
@@ -9386,25 +9386,6 @@
[(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC 17))])]
"operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
-
-;; Merge two successive stack adjusts. The combiner doesn't know how
-;; to do this, and doesn't see all of them.
-;; (reg:SI 7) is %esp.
-(define_peephole2
- [(parallel[
- (set (reg:SI 7)
- (plus:SI (reg:SI 7) (match_operand:SI 0 "const_int_operand" "")))
- (clobber (reg:CC 17))])
- (parallel[
- (set (reg:SI 7)
- (plus:SI (reg:SI 7) (match_operand:SI 1 "const_int_operand" "")))
- (clobber (reg:CC 17))])]
- ""
- [(parallel[
- (set (reg:SI 7)
- (plus:SI (reg:SI 7) (match_dup 2)))
- (clobber (reg:CC 17))])]
- "operands[2] = GEN_INT (INTVAL (operands[0]) + INTVAL (operands[1]));")
;; Call-value patterns last so that the wildcard operand does not
;; disrupt insn-recog's switch tables.