This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH [mainline] use of save_world/rest_world calls in apple-ppc-darwin prologue/epilogue


This patch introduces calls to save_world/rest_world in a few darwin-abi prologue/epilogue's
to avoid instruction-by-instruction store and load of all ppc/altivec non-volatile GRR/FP/VRs
as well as special registers. save_world/rest_world will do all the saves and restores thus
saving code size. This mechanism has been in use for a few years in apple's version of gcc's.
We would like to include this in FSF mainline to avoid future merges. darwin-world.asm file
where save_world/rest_world is defined has been in FSF mainline for some time now.


This patch has been bootstrapped on apple-ppc-darwin. It has also been tested against
dejagnu with both -mcpu=G5 and without this option.


OK for FSF mainline?

- Thanks, Fariborz (fjahanian@apple.com)


ChangeLog:


2004-07-30 Fariborz Jahanian <fjahanian@apple.com>

        * config/rs6000/altivec.md: Add new patterns for calls to
        save_world/rest_world functions.

        * config/rs6000/rs6000.c: (struct rs6000_stack) new world_save_p
        field added.
        (rs6000_stack_info): Set world_save_p field.
        (rs6000_emit_prologue): Check for world_save_p and generate
        pattern to call save_world for saving all non-volatile and
        special registers.
        (rs6000_emit_epilogue): Check for world_save_p and generate
        pattern to call rest_world to restore saved registers.

        config/rs6000/rs6000.h: macros FIRST_SAVED_ALTIVEC_REGNO,
        FIRST_SAVED_FP_REGNO, FIRST_SAVED_GP_REGNO defined.

Index: altivec.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/altivec.md,v
retrieving revision 1.20
diff -c -p -r1.20 altivec.md
*** altivec.md	11 May 2004 04:22:05 -0000	1.20
--- altivec.md	30 Jul 2004 17:32:37 -0000
***************
*** 295,300 ****
--- 295,354 ----
  }"
    [(set_attr "type" "*")])

+ (define_insn "*save_world"
+ [(match_parallel 0 "any_operand"
+ [(clobber (match_operand:SI 1 "register_operand" "=l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (set (match_operand:V4SI 3 "any_operand" "=m")
+ (match_operand:V4SI 4 "register_operand" "v"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ {
+ return "bl %z2";
+ }
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")])
+
+ (define_insn "*save_world_with_label_si"
+ [(match_parallel 0 "any_operand"
+ [(clobber (match_operand:SI 1 "register_operand" "=l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (use (match_operand:SI 3 "" ""))
+ (set (match_operand:V4SI 4 "any_operand" "=m")
+ (match_operand:V4SI 5 "register_operand" "v"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ {
+ #if TARGET_MACHO
+ const char *picbase = machopic_function_base_name ();
+ operands[3] = gen_rtx_SYMBOL_REF (Pmode, ggc_alloc_string (picbase, -1));
+ return \"bl %z2\\n%3:\";
+ #endif
+ }
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")])
+
+ (define_insn "*return_and_restore_world"
+ [(match_parallel 0 "any_operand"
+ [(return)
+ (use (match_operand:SI 1 "register_operand" "l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (clobber (match_operand:SI 3 "gpc_reg_operand" "=r"))
+ (set (match_operand:V4SI 4 "register_operand" "=v")
+ (match_operand:V4SI 5 "any_operand" ""))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ {
+ return "b %z2";
+ })
+
+ (define_insn "*restore_world"
+ [(match_parallel 0 "any_operand"
+ [(clobber (match_operand:SI 1 "register_operand" "=l"))
+ (use (match_operand:SI 2 "call_operand" "s"))
+ (clobber (match_operand:SI 3 "gpc_reg_operand" "=r"))
+ (set (match_operand:V4SI 4 "register_operand" "=v")
+ (match_operand:V4SI 5 "any_operand" ""))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ "bl %z2")
+
;; Simple binary operations.


  (define_insn "addv16qi3"
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.673
diff -c -p -r1.673 rs6000.c
*** rs6000.c	28 Jul 2004 12:13:13 -0000	1.673
--- rs6000.c	30 Jul 2004 17:32:55 -0000
*************** typedef struct rs6000_stack {
*** 79,84 ****
--- 79,86 ----
    int toc_save_p;		/* true if the TOC needs to be saved */
    int push_p;			/* true if we need to allocate stack space */
    int calls_p;			/* true if the function makes any calls */
+   int world_save_p;             /* true if we're saving *everything*:
+ 				   r13-r31, cr, f14-f31, vrsave, v20-v31  */
    enum rs6000_abi abi;		/* which ABI to use */
    int gp_save_offset;		/* offset to save GP regs from initial SP */
    int fp_save_offset;		/* offset to save FP regs from initial SP */
*************** rs6000_stack_info (void)
*** 11478,11487 ****
    else
      info_ptr->vrsave_mask = 0;

!   if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
!     info_ptr->vrsave_size  = 4;
    else
!     info_ptr->vrsave_size  = 0;

    /* Calculate the offsets.  */
    switch (DEFAULT_ABI)
--- 11483,11497 ----
    else
      info_ptr->vrsave_mask = 0;

! if ((TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
! /* Even if we're not touching VRsave, make sure there's room on the
! stack for it, if it looks like we're calling SAVE_WORLD, which
! will attempt to save it. */
! || ((DEFAULT_ABI == ABI_DARWIN)
! && (info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO)))
! info_ptr->vrsave_size = 4;
else
! info_ptr->vrsave_size = 0;


    /* Calculate the offsets.  */
    switch (DEFAULT_ABI)
*************** rs6000_stack_info (void)
*** 11622,11627 ****
--- 11632,11674 ----
    else
      info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);

+ /* For a very restricted set of circumstances, we can cut down the
+ size of prologs/epilogs by calling our own save/restore-the-world
+ routines. */
+ info_ptr->world_save_p =
+ (DEFAULT_ABI == ABI_DARWIN)
+ && ! (current_function_calls_setjmp && flag_exceptions)
+ && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
+ && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
+ && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
+ && info_ptr->cr_save_p;
+
+ /* This will not work in conjunction with sibcalls. Make sure there
+ are none. (This check is expensive, but seldom executed.) */
+ if ( info_ptr->world_save_p )
+ {
+ rtx insn;
+ for ( insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
+ if ( GET_CODE (insn) == CALL_INSN
+ && SIBLING_CALL_P (insn))
+ {
+ info_ptr->world_save_p = 0;
+ break;
+ }
+ }
+
+ /* "Save" the VRsave register too if we're saving the world. */
+ if (info_ptr->world_save_p && info_ptr->vrsave_mask == 0)
+ info_ptr->vrsave_mask = compute_vrsave_mask ();
+
+ /* Because the Darwin register save/restore routines only handle
+ F14 .. F31 and V20 .. V31 as per the ABI, perform a consistancy
+ check and abort if there's something worng. */
+ if ((DEFAULT_ABI == ABI_DARWIN)
+ && (info_ptr->first_fp_reg_save < FIRST_SAVED_FP_REGNO
+ || info_ptr->first_altivec_reg_save < FIRST_SAVED_ALTIVEC_REGNO))
+ abort ();
+
/* Zero offsets if we're not saving those registers. */
if (info_ptr->fp_size == 0)
info_ptr->fp_save_offset = 0;
*************** rs6000_emit_prologue (void)
*** 12597,12602 ****
--- 12644,12771 ----
rs6000_emit_stack_tie ();
}


+ /* Handle world saves specially here. */
+ if (info->world_save_p)
+ {
+ int i, j, sz;
+ rtx treg;
+ rtvec p;
+
+ /* save_world expects lr in r0. */
+ if (info->lr_save_p)
+ {
+ insn = emit_move_insn (gen_rtx_REG (Pmode, 0),
+ gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
+ assumptions about the offsets of various bits of the stack
+ frame. Abort if things aren't what they should be. */
+ if (info->gp_save_offset != -220
+ || info->fp_save_offset != -144
+ || info->lr_save_offset != 8
+ || info->cr_save_offset != 4
+ || !info->push_p
+ || !info->lr_save_p
+ || (current_function_calls_eh_return && info->ehrd_offset != -432)
+ || (info->vrsave_save_offset != -224
+ || info->altivec_save_offset != (-224 -16 -192)))
+ abort ();
+
+ treg = gen_rtx_REG (SImode, 11);
+ emit_move_insn (treg, GEN_INT (-info->total_size));
+
+ /* SAVE_WORLD takes the caller's LR in R0 and the frame size
+ in R11. It also clobbers R12, so beware! */
+
+ /* Preserve CR2 for save_world prologues */
+ sz = 6;
+ sz += 32 - info->first_gp_reg_save;
+ sz += 64 - info->first_fp_reg_save;
+ sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
+ p = rtvec_alloc (sz);
+ j = 0;
+ RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (Pmode,
+ LINK_REGISTER_REGNUM));
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+ gen_rtx_SYMBOL_REF (Pmode,
+ "*save_world"));
+ /* We do floats first so that the instruction pattern matches
+ properly. */
+ for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+ {
+ rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->fp_save_offset
+ + sp_offset + 8 * i));
+ rtx mem = gen_rtx_MEM (DFmode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+ for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
+ {
+ rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->altivec_save_offset
+ + sp_offset + 16 * i));
+ rtx mem = gen_rtx_MEM (V4SImode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+ for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+ {
+ rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->gp_save_offset
+ + sp_offset + reg_size * i));
+ rtx mem = gen_rtx_MEM (reg_mode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+
+ {
+ /* CR register traditionally saved as CR2. */
+ rtx reg = gen_rtx_REG (reg_mode, CR2_REGNO);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->cr_save_offset
+ + sp_offset));
+ rtx mem = gen_rtx_MEM (reg_mode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+ }
+ /* Prevent any attempt to delete the setting of r0 and treg! */
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, treg);
+ RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, sp_reg_rtx);
+
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+ rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+ NULL_RTX, NULL_RTX);
+
+ /* The goto below skips over this, so replicate here. */
+ if (current_function_calls_eh_return)
+ {
+ unsigned int i;
+ for (i = 0; ; ++i)
+ {
+ unsigned int regno = EH_RETURN_DATA_REGNO (i);
+ if (regno == INVALID_REGNUM)
+ break;
+ emit_frame_save (frame_reg_rtx, frame_ptr_rtx, reg_mode, regno,
+ info->ehrd_offset + sp_offset
+ + reg_size * (int) i,
+ info->total_size);
+ }
+ }
+ goto world_saved;
+ }
+
/* Save AltiVec registers if needed. */
if (TARGET_ALTIVEC_ABI && info->altivec_size != 0)
{
*************** rs6000_emit_prologue (void)
*** 12912,12917 ****
--- 13081,13087 ----
&& !(DEFAULT_ABI == ABI_V4 || current_function_calls_eh_return))
rs6000_emit_allocate_stack (info->total_size, FALSE);


+ world_saved:
    /* Set frame pointer, if needed.  */
    if (frame_pointer_needed)
      {
*************** rs6000_emit_epilogue (int sibcall)
*** 13077,13082 ****
--- 13247,13323 ----
  			 || rs6000_cpu == PROCESSOR_PPC750
  			 || optimize_size);

+ if (info->world_save_p)
+ {
+ int i, j;
+ char rname[30];
+ const char *alloc_rname;
+ rtvec p;
+
+ /* eh_rest_world_r10 will return to the location saved in the LR
+ stack slot (which is not likely to be our caller.)
+ Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
+ rest_world is similar, except any R10 parameter is ignored.
+ The exception-handling stuff that was here in 2.95 is no
+ longer necessary. */
+
+ p = rtvec_alloc (9
+ + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
+ + 63 + 1 - info->first_fp_reg_save);
+
+ strcpy (rname, (current_function_calls_eh_return) ?
+ "*eh_rest_world_r10" : "*rest_world");
+ alloc_rname = ggc_strdup (rname);
+
+ j = 0;
+ RTVEC_ELT (p, j++) = gen_rtx_RETURN (VOIDmode);
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (Pmode,
+ LINK_REGISTER_REGNUM));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
+ /* The instruction pattern requires a clobber here;
+ it is shared with the restVEC helper. */
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
+
+ for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
+ {
+ rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->altivec_save_offset
+ + sp_offset + 16 * i));
+ rtx mem = gen_rtx_MEM (V4SImode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+ }
+ for (i = 0; info->first_fp_reg_save + i <= 63; i++)
+ {
+ rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->fp_save_offset
+ + sp_offset + 8 * i));
+ rtx mem = gen_rtx_MEM (DFmode, addr);
+ set_mem_alias_set (mem, rs6000_sr_alias_set);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+ }
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
+ RTVEC_ELT (p, j++)
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+ return;
+ }
+
/* If we have a frame pointer, a call to alloca, or a large stack
frame, restore the old stack pointer using the backchain. Otherwise,
we know what size to update it with. */
Index: rs6000.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.h,v
retrieving revision 1.329
diff -c -p -r1.329 rs6000.h
*** rs6000.h 16 Jul 2004 23:25:47 -0000 1.329
--- rs6000.h 30 Jul 2004 17:32:57 -0000
*************** extern const char *rs6000_warn_altivec_l
*** 916,921 ****
--- 916,925 ----
#define SPE_ACC_REGNO 111
#define SPEFSCR_REGNO 112


+ #define FIRST_SAVED_ALTIVEC_REGNO 97
+ #define FIRST_SAVED_FP_REGNO (14+32)
+ #define FIRST_SAVED_GP_REGNO 13
+
/* List the order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS.



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]