This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH [mainline] use of save_world/rest_world calls in apple-ppc-darwin prologue/epilogue
- From: Fariborz Jahanian <fjahanian at apple dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Geoffrey Keating <geoffk at apple dot com>
- Date: Fri, 30 Jul 2004 17:02:08 -0700
- Subject: PATCH [mainline] use of save_world/rest_world calls in apple-ppc-darwin prologue/epilogue
This patch introduces calls to save_world/rest_world in a few
darwin-abi prologue/epilogue's
to avoid instruction-by-instruction store and load of all ppc/altivec
non-volatile GRR/FP/VRs
as well as special registers. save_world/rest_world will do all the
saves and restores thus
saving code size. This mechanism has been in use for a few years in
apple's version of gcc's.
We would like to include this in FSF mainline to avoid future merges.
darwin-world.asm file
where save_world/rest_world is defined has been in FSF mainline for
some time now.
This patch has been bootstrapped on apple-ppc-darwin. It has also been
tested against
dejagnu with both -mcpu=G5 and without this option.
OK for FSF mainline?
- Thanks, Fariborz (fjahanian@apple.com)
ChangeLog:
2004-07-30 Fariborz Jahanian <fjahanian@apple.com>
* config/rs6000/altivec.md: Add new patterns for calls to
save_world/rest_world functions.
* config/rs6000/rs6000.c: (struct rs6000_stack) new world_save_p
field added.
(rs6000_stack_info): Set world_save_p field.
(rs6000_emit_prologue): Check for world_save_p and generate
pattern to call save_world for saving all non-volatile and
special registers.
(rs6000_emit_epilogue): Check for world_save_p and generate
pattern to call rest_world to restore saved registers.
config/rs6000/rs6000.h: macros FIRST_SAVED_ALTIVEC_REGNO,
FIRST_SAVED_FP_REGNO, FIRST_SAVED_GP_REGNO defined.
Index: altivec.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/altivec.md,v
retrieving revision 1.20
diff -c -p -r1.20 altivec.md
*** altivec.md 11 May 2004 04:22:05 -0000 1.20
--- altivec.md 30 Jul 2004 17:32:37 -0000
***************
*** 295,300 ****
--- 295,354 ----
}"
[(set_attr "type" "*")])
+ (define_insn "*save_world"
+ [(match_parallel 0 "any_operand"
+ [(clobber (match_operand:SI 1 "register_operand"
"=l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (set (match_operand:V4SI 3 "any_operand" "=m")
+ (match_operand:V4SI 4 "register_operand"
"v"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ {
+ return "bl %z2";
+ }
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")])
+
+ (define_insn "*save_world_with_label_si"
+ [(match_parallel 0 "any_operand"
+ [(clobber (match_operand:SI 1 "register_operand"
"=l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (use (match_operand:SI 3 "" ""))
+ (set (match_operand:V4SI 4 "any_operand" "=m")
+ (match_operand:V4SI 5 "register_operand"
"v"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ {
+ #if TARGET_MACHO
+ const char *picbase = machopic_function_base_name ();
+ operands[3] = gen_rtx_SYMBOL_REF (Pmode, ggc_alloc_string (picbase,
-1));
+ return \"bl %z2\\n%3:\";
+ #endif
+ }
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")])
+
+ (define_insn "*return_and_restore_world"
+ [(match_parallel 0 "any_operand"
+ [(return)
+ (use (match_operand:SI 1 "register_operand" "l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (clobber (match_operand:SI 3 "gpc_reg_operand"
"=r"))
+ (set (match_operand:V4SI 4 "register_operand" "=v")
+ (match_operand:V4SI 5 "any_operand" ""))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ {
+ return "b %z2";
+ })
+
+ (define_insn "*restore_world"
+ [(match_parallel 0 "any_operand"
+ [(clobber (match_operand:SI 1 "register_operand"
"=l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (clobber (match_operand:SI 3 "gpc_reg_operand"
"=r"))
+ (set (match_operand:V4SI 4 "register_operand" "=v")
+ (match_operand:V4SI 5 "any_operand" ""))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ "bl %z2")
+
;; Simple binary operations.
(define_insn "addv16qi3"
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.673
diff -c -p -r1.673 rs6000.c
*** rs6000.c 28 Jul 2004 12:13:13 -0000 1.673
--- rs6000.c 30 Jul 2004 17:32:55 -0000
*************** typedef struct rs6000_stack {
*** 79,84 ****
--- 79,86 ----
int toc_save_p; /* true if the TOC needs to be saved */
int push_p; /* true if we need to allocate stack space */
int calls_p; /* true if the function makes any calls */
+ int world_save_p; /* true if we're saving *everything*:
+ r13-r31, cr, f14-f31, vrsave, v20-v31 */
enum rs6000_abi abi; /* which ABI to use */
int gp_save_offset; /* offset to save GP regs from initial SP */
int fp_save_offset; /* offset to save FP regs from initial SP */
*************** rs6000_stack_info (void)
*** 11478,11487 ****
else
info_ptr->vrsave_mask = 0;
! if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
! info_ptr->vrsave_size = 4;
else
! info_ptr->vrsave_size = 0;
/* Calculate the offsets. */
switch (DEFAULT_ABI)
--- 11483,11497 ----
else
info_ptr->vrsave_mask = 0;
! if ((TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
! /* Even if we're not touching VRsave, make sure there's room on the
! stack for it, if it looks like we're calling SAVE_WORLD, which
! will attempt to save it. */
! || ((DEFAULT_ABI == ABI_DARWIN)
! && (info_ptr->first_altivec_reg_save ==
FIRST_SAVED_ALTIVEC_REGNO)))
! info_ptr->vrsave_size = 4;
else
! info_ptr->vrsave_size = 0;
/* Calculate the offsets. */
switch (DEFAULT_ABI)
*************** rs6000_stack_info (void)
*** 11622,11627 ****
--- 11632,11674 ----
else
info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
+ /* For a very restricted set of circumstances, we can cut down the
+ size of prologs/epilogs by calling our own save/restore-the-world
+ routines. */
+ info_ptr->world_save_p =
+ (DEFAULT_ABI == ABI_DARWIN)
+ && ! (current_function_calls_setjmp && flag_exceptions)
+ && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
+ && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
+ && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
+ && info_ptr->cr_save_p;
+
+ /* This will not work in conjunction with sibcalls. Make sure there
+ are none. (This check is expensive, but seldom executed.) */
+ if ( info_ptr->world_save_p )
+ {
+ rtx insn;
+ for ( insn = get_last_insn_anywhere (); insn; insn = PREV_INSN
(insn))
+ if ( GET_CODE (insn) == CALL_INSN
+ && SIBLING_CALL_P (insn))
+ {
+ info_ptr->world_save_p = 0;
+ break;
+ }
+ }
+
+ /* "Save" the VRsave register too if we're saving the world. */
+ if (info_ptr->world_save_p && info_ptr->vrsave_mask == 0)
+ info_ptr->vrsave_mask = compute_vrsave_mask ();
+
+ /* Because the Darwin register save/restore routines only handle
+ F14 .. F31 and V20 .. V31 as per the ABI, perform a consistancy
+ check and abort if there's something worng. */
+ if ((DEFAULT_ABI == ABI_DARWIN)
+ && (info_ptr->first_fp_reg_save < FIRST_SAVED_FP_REGNO
+ || info_ptr->first_altivec_reg_save <
FIRST_SAVED_ALTIVEC_REGNO))
+ abort ();
+
/* Zero offsets if we're not saving those registers. */
if (info_ptr->fp_size == 0)
info_ptr->fp_save_offset = 0;
*************** rs6000_emit_prologue (void)
*** 12597,12602 ****
--- 12644,12771 ----
rs6000_emit_stack_tie ();
}
+ /* Handle world saves specially here. */
+ if (info->world_save_p)
+ {
+ int i, j, sz;
+ rtx treg;
+ rtvec p;
+
+ /* save_world expects lr in r0. */
+ if (info->lr_save_p)
+ {
+ insn = emit_move_insn (gen_rtx_REG (Pmode, 0),
+ gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
+ assumptions about the offsets of various bits of the stack
+ frame. Abort if things aren't what they should be. */
+ if (info->gp_save_offset != -220
+ || info->fp_save_offset != -144
+ || info->lr_save_offset != 8
+ || info->cr_save_offset != 4
+ || !info->push_p
+ || !info->lr_save_p
+ || (current_function_calls_eh_return && info->ehrd_offset
!= -432)
+ || (info->vrsave_save_offset != -224
+ || info->altivec_save_offset != (-224 -16 -192)))
+ abort ();
+
+ treg = gen_rtx_REG (SImode, 11);
+ emit_move_insn (treg, GEN_INT (-info->total_size));
+
+ /* SAVE_WORLD takes the caller's LR in R0 and the frame size
+ in R11. It also clobbers R12, so beware! */
+
+ /* Preserve CR2 for save_world prologues */
+ sz = 6;
+ sz += 32 - info->first_gp_reg_save;
+ sz += 64 - info->first_fp_reg_save;
+ sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
+ p = rtvec_alloc (sz);
+ j = 0;
+ RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (Pmode,
+
LINK_REGISTER_REGNUM));
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+ gen_rtx_SYMBOL_REF (Pmode,
+
"*save_world"));
+ /* We do floats first so that the instruction pattern matches
+ properly. */
+ for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+ {
+ rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->fp_save_offset
+ + sp_offset + 8 * i));
+ rtx mem = gen_rtx_MEM (DFmode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+ for (i = 0; info->first_altivec_reg_save + i <=
LAST_ALTIVEC_REGNO; i++)
+ {
+ rtx reg = gen_rtx_REG (V4SImode,
info->first_altivec_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->altivec_save_offset
+ + sp_offset + 16 * i));
+ rtx mem = gen_rtx_MEM (V4SImode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+ for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+ {
+ rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save +
i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->gp_save_offset
+ + sp_offset + reg_size *
i));
+ rtx mem = gen_rtx_MEM (reg_mode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+
+ {
+ /* CR register traditionally saved as CR2. */
+ rtx reg = gen_rtx_REG (reg_mode, CR2_REGNO);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->cr_save_offset
+ + sp_offset));
+ rtx mem = gen_rtx_MEM (reg_mode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+ /* Prevent any attempt to delete the setting of r0 and treg! */
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode,
0));
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, treg);
+ RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, sp_reg_rtx);
+
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+ rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+ NULL_RTX, NULL_RTX);
+
+ /* The goto below skips over this, so replicate here. */
+ if (current_function_calls_eh_return)
+ {
+ unsigned int i;
+ for (i = 0; ; ++i)
+ {
+ unsigned int regno = EH_RETURN_DATA_REGNO (i);
+ if (regno == INVALID_REGNUM)
+ break;
+ emit_frame_save (frame_reg_rtx, frame_ptr_rtx,
reg_mode, regno,
+ info->ehrd_offset + sp_offset
+ + reg_size * (int) i,
+ info->total_size);
+ }
+ }
+ goto world_saved;
+ }
+
/* Save AltiVec registers if needed. */
if (TARGET_ALTIVEC_ABI && info->altivec_size != 0)
{
*************** rs6000_emit_prologue (void)
*** 12912,12917 ****
--- 13081,13087 ----
&& !(DEFAULT_ABI == ABI_V4 || current_function_calls_eh_return))
rs6000_emit_allocate_stack (info->total_size, FALSE);
+ world_saved:
/* Set frame pointer, if needed. */
if (frame_pointer_needed)
{
*************** rs6000_emit_epilogue (int sibcall)
*** 13077,13082 ****
--- 13247,13323 ----
|| rs6000_cpu == PROCESSOR_PPC750
|| optimize_size);
+ if (info->world_save_p)
+ {
+ int i, j;
+ char rname[30];
+ const char *alloc_rname;
+ rtvec p;
+
+ /* eh_rest_world_r10 will return to the location saved in the LR
+ stack slot (which is not likely to be our caller.)
+ Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7,
R8.
+ rest_world is similar, except any R10 parameter is ignored.
+ The exception-handling stuff that was here in 2.95 is no
+ longer necessary. */
+
+ p = rtvec_alloc (9
+ + LAST_ALTIVEC_REGNO + 1 -
info->first_altivec_reg_save
+ + 63 + 1 - info->first_fp_reg_save);
+
+ strcpy (rname, (current_function_calls_eh_return) ?
+ "*eh_rest_world_r10" : "*rest_world");
+ alloc_rname = ggc_strdup (rname);
+
+ j = 0;
+ RTVEC_ELT (p, j++) = gen_rtx_RETURN (VOIDmode);
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (Pmode,
+
LINK_REGISTER_REGNUM));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode,
alloc_rname));
+ /* The instruction pattern requires a clobber here;
+ it is shared with the restVEC helper. */
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
+
+ for (i = 0; info->first_altivec_reg_save + i <=
LAST_ALTIVEC_REGNO; i++)
+ {
+ rtx reg = gen_rtx_REG (V4SImode,
info->first_altivec_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->altivec_save_offset
+ + sp_offset + 16 * i));
+ rtx mem = gen_rtx_MEM (V4SImode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+ }
+ for (i = 0; info->first_fp_reg_save + i <= 63; i++)
+ {
+ rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->fp_save_offset
+ + sp_offset + 8 * i));
+ rtx mem = gen_rtx_MEM (DFmode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+ }
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+ return;
+ }
+
/* If we have a frame pointer, a call to alloca, or a large stack
frame, restore the old stack pointer using the backchain.
Otherwise,
we know what size to update it with. */
Index: rs6000.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.h,v
retrieving revision 1.329
diff -c -p -r1.329 rs6000.h
*** rs6000.h 16 Jul 2004 23:25:47 -0000 1.329
--- rs6000.h 30 Jul 2004 17:32:57 -0000
*************** extern const char *rs6000_warn_altivec_l
*** 916,921 ****
--- 916,925 ----
#define SPE_ACC_REGNO 111
#define SPEFSCR_REGNO 112
+ #define FIRST_SAVED_ALTIVEC_REGNO 97
+ #define FIRST_SAVED_FP_REGNO (14+32)
+ #define FIRST_SAVED_GP_REGNO 13
+
/* List the order in which to allocate registers. Each register must
be
listed once, even those in FIXED_REGISTERS.