This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Committed] S/390: Use FPRs as GPR save slots


Hi,

with the attached patch we use call-clobbered floating point registers
as save slots for general purpose registers in leaf functions.

Bootstrapped and regtested with various options and -march levels.

Committed to mainline.

Bye,

-Andreas-

2013-10-09  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>

	* config/s390/s390.c (struct s390_frame_layout): New field
	gpr_save_slots.
	(cfun_save_arg_fprs_p, cfun_gpr_save_slot): New macros.
	(s390_reg_clobbered_rtx, s390_regs_ever_clobbered): Change type of
	regs_ever_clobbered to char*.
	(s390_regs_ever_clobbered): Check crtl->saves_all_registers
	instead of cfun->has_nonlocal_label.  Ignore frame related restore
	INSNs.
	(s390_register_info): Enable FPR save slots.  Move/Copy some
	functionality into ...
	(s390_register_info_gprtofpr, s390_register_info_stdarg_fpr)
	(s390_register_info_stdarg_gpr, s390_optimize_register_info): New
	function.
	(s390_frame_info): Do gpr slot allocation here now.  stdarg does
	not imply a stack frame.
	(s390_init_frame_layout): Remove variable clobbered_regs.
	(s390_update_register_info): Remove function.
	(s390_hard_regno_rename_ok): Call-saved regs without a save slot
	cannot be used for register renaming.
	(s390_hard_regno_scratch_ok): New function.
	(TARGET_HARD_REGNO_SCRATCH_OK): Define target hook.
	(s390_initial_elimination_offset): Change offset calculation of
	the return address pointer.
	(save_gprs): Deal with only r6 being saved from the call-saved
	regs.
	(restore_gprs): Set frame related flag.
	(s390_save_gprs_to_fprs, s390_restore_gprs_from_fprs): New
	functions.
	(s390_emit_prologue): Call s390_register_info instead of
	s390_update_frame_layout.  Call s390_save_gprs_to_fprs.
	(s390_emit_epilogue): Call s390_restore_gprs_from_fprs.
	(s390_optimize_prologue): Call s390_optimize_register_info.
	Try to remove also FPR slot save/restore INSNs.  Remove frame
	related flags from restore INSNs.

---

---
 gcc/config/s390/s390.c |  663 ++++++++++--!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 1 file changed, 141 insertions(+), 24 deletions(-), 498 modifications(!)

Index: gcc/config/s390/s390.c
===================================================================
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*************** struct GTY (()) s390_frame_layout
*** 324,329 ****
--- 324,335 ----
    int first_save_gpr_slot;
    int last_save_gpr_slot;
  
+   /* Location (FP register number) where GPRs (r0-r15) should
+      be saved to.
+       0 - does not need to be saved at all
+      -1 - stack slot  */
+   signed char gpr_save_slots[16];
+ 
    /* Number of first and last gpr to be saved, restored.  */
    int first_save_gpr;
    int first_restore_gpr;
*************** struct GTY(()) machine_function
*** 377,388 ****
  
  #define cfun_frame_layout (cfun->machine->frame_layout)
  #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
! #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot -           \
    cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
  #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
    (1 << (REGNO - FPR0_REGNUM)))
  #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
    (1 << (REGNO - FPR0_REGNUM))))
  
  /* Number of GPRs and FPRs used for argument passing.  */
  #define GP_ARG_NUM_REG 5
--- 383,399 ----
  
  #define cfun_frame_layout (cfun->machine->frame_layout)
  #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
! #define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
! 				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
! 				 : cfun_frame_layout.fpr_bitmap & 0x03))
! #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
    cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
  #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
    (1 << (REGNO - FPR0_REGNUM)))
  #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
    (1 << (REGNO - FPR0_REGNUM))))
+ #define cfun_gpr_save_slot(REGNO) \
+   cfun->machine->frame_layout.gpr_save_slots[REGNO]
  
  /* Number of GPRs and FPRs used for argument passing.  */
  #define GP_ARG_NUM_REG 5
*************** find_unused_clobbered_reg (void)
*** 7364,7370 ****
  static void
  s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
  {
!   int *regs_ever_clobbered = (int *)data;
    unsigned int i, regno;
    enum machine_mode mode = GET_MODE (setreg);
  
--- 7375,7381 ----
  static void
  s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
  {
!   char *regs_ever_clobbered = (char *)data;
    unsigned int i, regno;
    enum machine_mode mode = GET_MODE (setreg);
  
*************** s390_reg_clobbered_rtx (rtx setreg, cons
*** 7392,7404 ****
     each of those regs.  */
  
  static void
! s390_regs_ever_clobbered (int *regs_ever_clobbered)
  {
    basic_block cur_bb;
    rtx cur_insn;
    unsigned int i;
  
!   memset (regs_ever_clobbered, 0, 32 * sizeof (int));
  
    /* For non-leaf functions we have to consider all call clobbered regs to be
       clobbered.  */
--- 7403,7415 ----
     each of those regs.  */
  
  static void
! s390_regs_ever_clobbered (char regs_ever_clobbered[])
  {
    basic_block cur_bb;
    rtx cur_insn;
    unsigned int i;
  
!   memset (regs_ever_clobbered, 0, 32);
  
    /* For non-leaf functions we have to consider all call clobbered regs to be
       clobbered.  */
*************** s390_regs_ever_clobbered (int *regs_ever
*** 7425,7431 ****
       This flag is also set for the unwinding code in libgcc.
       See expand_builtin_unwind_init.  For regs_ever_live this is done by
       reload.  */
!   if (cfun->has_nonlocal_label)
      for (i = 0; i < 32; i++)
        if (!call_really_used_regs[i])
  	regs_ever_clobbered[i] = 1;
--- 7436,7442 ----
       This flag is also set for the unwinding code in libgcc.
       See expand_builtin_unwind_init.  For regs_ever_live this is done by
       reload.  */
!   if (crtl->saves_all_registers)
      for (i = 0; i < 32; i++)
        if (!call_really_used_regs[i])
  	regs_ever_clobbered[i] = 1;
*************** s390_regs_ever_clobbered (int *regs_ever
*** 7434,7443 ****
      {
        FOR_BB_INSNS (cur_bb, cur_insn)
  	{
! 	  if (INSN_P (cur_insn))
! 	    note_stores (PATTERN (cur_insn),
! 			 s390_reg_clobbered_rtx,
! 			 regs_ever_clobbered);
  	}
      }
  }
--- 7445,7482 ----
      {
        FOR_BB_INSNS (cur_bb, cur_insn)
  	{
! 	  rtx pat;
! 
! 	  if (!INSN_P (cur_insn))
! 	    continue;
! 
! 	  pat = PATTERN (cur_insn);
! 
! 	  /* Ignore GPR restore insns.  */
! 	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
! 	    {
! 	      if (GET_CODE (pat) == SET
! 		  && GENERAL_REG_P (SET_DEST (pat)))
! 		{
! 		  /* lgdr  */
! 		  if (GET_MODE (SET_SRC (pat)) == DImode
! 		      && FP_REG_P (SET_SRC (pat)))
! 		    continue;
! 
! 		  /* l / lg  */
! 		  if (GET_CODE (SET_SRC (pat)) == MEM)
! 		    continue;
! 		}
! 
! 	      /* lm / lmg */
! 	      if (GET_CODE (pat) == PARALLEL
! 		  && load_multiple_operation (pat, VOIDmode))
! 		continue;
! 	    }
! 
! 	  note_stores (pat,
! 		       s390_reg_clobbered_rtx,
! 		       regs_ever_clobbered);
  	}
      }
  }
*************** s390_frame_area (int *area_bottom, int *
*** 7487,7559 ****
    *area_bottom = b;
    *area_top = t;
  }
! 
! /* Fill cfun->machine with info about register usage of current function.
!    Return in CLOBBERED_REGS which GPRs are currently considered set.  */
  
  static void
! s390_register_info (int clobbered_regs[])
  {
    int i, j;
  
!   /* Find first and last gpr to be saved.  We trust regs_ever_live
!      data, except that we don't save and restore global registers.
! 
!      Also, all registers with special meaning to the compiler need
!      to be handled extra.  */
! 
!   s390_regs_ever_clobbered (clobbered_regs);
  
!   /* fprs 8 - 15 are call saved for 64 Bit ABI.  */
!   if (!epilogue_completed)
      {
!       cfun_frame_layout.fpr_bitmap = 0;
!       cfun_frame_layout.high_fprs = 0;
! 
!       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
! 	{
! 	  if (call_really_used_regs[i])
! 	    continue;
! 	  /* During reload we have to use the df_regs_ever_live infos
! 	     since reload is marking FPRs used as spill slots there as
! 	     live before actually making the code changes.  Without
! 	     this we fail during elimination offset verification.  */
! 	  if ((clobbered_regs[i]
! 	       || (df_regs_ever_live_p (i)
! 		   && (lra_in_progress
! 		       || reload_in_progress
! 		       || crtl->saves_all_registers)))
! 	      && !global_regs[i])
! 	    {
! 	      cfun_set_fpr_save (i);
  
! 	      if (i >= FPR8_REGNUM)
! 		cfun_frame_layout.high_fprs++;
! 	    }
  	}
      }
  
!   for (i = 0; i < 16; i++)
!     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
  
!   if (frame_pointer_needed)
!     clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
  
    if (flag_pic)
      clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
!       |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
  
    clobbered_regs[BASE_REGNUM]
      |= (cfun->machine->base_reg
!         && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
  
    clobbered_regs[RETURN_REGNUM]
      |= (!crtl->is_leaf
  	|| TARGET_TPF_PROFILING
  	|| cfun->machine->split_branches_pending_p
  	|| cfun_frame_layout.save_return_addr_p
! 	|| crtl->calls_eh_return
! 	|| cfun->stdarg);
  
    clobbered_regs[STACK_POINTER_REGNUM]
      |= (!crtl->is_leaf
--- 7526,7693 ----
    *area_bottom = b;
    *area_top = t;
  }
! /* Update gpr_save_slots in the frame layout trying to make use of
!    FPRs as GPR save slots.
!    This is a helper routine of s390_register_info.  */
  
  static void
! s390_register_info_gprtofpr ()
  {
+   int save_reg_slot = FPR0_REGNUM;
    int i, j;
  
!   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
!     return;
  
!   for (i = 15; i >= 6; i--)
      {
!       if (cfun_gpr_save_slot (i) == 0)
! 	continue;
  
!       /* Advance to the next FP register which can be used as a
! 	 GPR save slot.  */
!       while ((!call_really_used_regs[save_reg_slot]
! 	      || df_regs_ever_live_p (save_reg_slot)
! 	      || cfun_fpr_save_p (save_reg_slot))
! 	     && FP_REGNO_P (save_reg_slot))
! 	save_reg_slot++;
!       if (!FP_REGNO_P (save_reg_slot))
! 	{
! 	  /* We only want to use ldgr/lgdr if we can get rid of
! 	     stm/lm entirely.  So undo the gpr slot allocation in
! 	     case we ran out of FPR save slots.  */
! 	  for (j = 6; j <= 15; j++)
! 	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
! 	      cfun_gpr_save_slot (j) = -1;
! 	  break;
  	}
+       cfun_gpr_save_slot (i) = save_reg_slot++;
      }
+ }
  
! /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
!    stdarg.
!    This is a helper routine for s390_register_info.  */
  
! static void
! s390_register_info_stdarg_fpr ()
! {
!   int i;
!   int min_fpr;
!   int max_fpr;
! 
!   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
!      f0-f4 for 64 bit.  */
!   if (!cfun->stdarg
!       || !TARGET_HARD_FLOAT
!       || !cfun->va_list_fpr_size
!       || crtl->args.info.fprs >= FP_ARG_NUM_REG)
!     return;
! 
!   min_fpr = crtl->args.info.fprs;
!   max_fpr = min_fpr + cfun->va_list_fpr_size;
!   if (max_fpr > FP_ARG_NUM_REG)
!     max_fpr = FP_ARG_NUM_REG;
! 
!   /* The va_arg algorithm accesses the FPRs in the reg save area using
!      a constant offset from r0.  With the packed stack layout omitting
!      FPRs from the beginning would change the offset for the
!      subsequent FPRs.  */
!   if (TARGET_PACKED_STACK)
!     min_fpr = 0;
! 
!   for (i = min_fpr; i < max_fpr; i++)
!     cfun_set_fpr_save (i + FPR0_REGNUM);
! }
! 
! /* Reserve the GPR save slots for GPRs which need to be saved due to
!    stdarg.
!    This is a helper routine for s390_register_info.  */
! 
! static void
! s390_register_info_stdarg_gpr ()
! {
!   int i;
!   int min_gpr;
!   int max_gpr;
! 
!   if (!cfun->stdarg
!       || !cfun->va_list_gpr_size
!       || crtl->args.info.gprs >= GP_ARG_NUM_REG)
!     return;
! 
!   min_gpr = crtl->args.info.gprs;
!   max_gpr = min_gpr + cfun->va_list_gpr_size;
!   if (max_gpr > GP_ARG_NUM_REG)
!     max_gpr = GP_ARG_NUM_REG;
! 
!   for (i = min_gpr; i < max_gpr; i++)
!     cfun_gpr_save_slot (2 + i) = -1;
! }
! 
! /* The GPR and FPR save slots in cfun->machine->frame_layout are set
!    for registers which need to be saved in function prologue.
!    This function can be used until the insns emitted for save/restore
!    of the regs are visible in the RTL stream.  */
! 
! static void
! s390_register_info ()
! {
!   int i, j;
!   char clobbered_regs[32];
! 
!   gcc_assert (!epilogue_completed);
! 
!   if (reload_completed)
!     /* After reload we rely on our own routine to determine which
!        registers need saving.  */
!     s390_regs_ever_clobbered (clobbered_regs);
!   else
!     /* During reload we use regs_ever_live as a base since reload
!        does changes in there which we otherwise would not be aware
!        of.  */
!     for (i = 0; i < 32; i++)
!       clobbered_regs[i] = df_regs_ever_live_p (i);
! 
!   for (i = 0; i < 32; i++)
!     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
! 
!   /* Mark the call-saved FPRs which need to be saved.
!      This needs to be done before checking the special GPRs since the
!      stack pointer usage depends on whether high FPRs have to be saved
!      or not.  */
!   cfun_frame_layout.fpr_bitmap = 0;
!   cfun_frame_layout.high_fprs = 0;
!   for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
!     if (clobbered_regs[i] && !call_really_used_regs[i])
!       {
! 	cfun_set_fpr_save (i);
! 	if (i >= FPR8_REGNUM)
! 	  cfun_frame_layout.high_fprs++;
!       }
  
    if (flag_pic)
      clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
!       |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
  
    clobbered_regs[BASE_REGNUM]
      |= (cfun->machine->base_reg
! 	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
! 
!   clobbered_regs[HARD_FRAME_POINTER_REGNUM]
!     |= !!frame_pointer_needed;
  
+   /* On pre z900 machines this might take until machine dependent
+      reorg to decide.
+      save_return_addr_p will only be set on non-zarch machines so
+      there is no risk that r14 goes into an FPR instead of a stack
+      slot.  */
    clobbered_regs[RETURN_REGNUM]
      |= (!crtl->is_leaf
  	|| TARGET_TPF_PROFILING
  	|| cfun->machine->split_branches_pending_p
  	|| cfun_frame_layout.save_return_addr_p
! 	|| crtl->calls_eh_return);
  
    clobbered_regs[STACK_POINTER_REGNUM]
      |= (!crtl->is_leaf
*************** s390_register_info (int clobbered_regs[]
*** 7561,7664 ****
  	|| cfun_save_high_fprs_p
  	|| get_frame_size () > 0
  	|| (reload_completed && cfun_frame_layout.frame_size > 0)
! 	|| cfun->calls_alloca
! 	|| cfun->stdarg);
  
    for (i = 6; i < 16; i++)
!     if (df_regs_ever_live_p (i) || clobbered_regs[i])
!       break;
!   for (j = 15; j > i; j--)
!     if (df_regs_ever_live_p (j) || clobbered_regs[j])
!       break;
  
!   if (i == 16)
!     {
!       /* Nothing to save/restore.  */
!       cfun_frame_layout.first_save_gpr_slot = -1;
!       cfun_frame_layout.last_save_gpr_slot = -1;
!       cfun_frame_layout.first_save_gpr = -1;
!       cfun_frame_layout.first_restore_gpr = -1;
!       cfun_frame_layout.last_save_gpr = -1;
!       cfun_frame_layout.last_restore_gpr = -1;
!     }
!   else
!     {
!       /* Save slots for gprs from i to j.  */
!       cfun_frame_layout.first_save_gpr_slot = i;
!       cfun_frame_layout.last_save_gpr_slot = j;
  
!       for (i = cfun_frame_layout.first_save_gpr_slot;
! 	   i < cfun_frame_layout.last_save_gpr_slot + 1;
! 	   i++)
! 	if (clobbered_regs[i])
! 	  break;
  
!       for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
! 	if (clobbered_regs[j])
! 	  break;
  
!       if (i == cfun_frame_layout.last_save_gpr_slot + 1)
! 	{
! 	  /* Nothing to save/restore.  */
! 	  cfun_frame_layout.first_save_gpr = -1;
! 	  cfun_frame_layout.first_restore_gpr = -1;
! 	  cfun_frame_layout.last_save_gpr = -1;
! 	  cfun_frame_layout.last_restore_gpr = -1;
! 	}
!       else
! 	{
! 	  /* Save / Restore from gpr i to j.  */
! 	  cfun_frame_layout.first_save_gpr = i;
! 	  cfun_frame_layout.first_restore_gpr = i;
! 	  cfun_frame_layout.last_save_gpr = j;
! 	  cfun_frame_layout.last_restore_gpr = j;
! 	}
!     }
  
!   if (cfun->stdarg)
!     {
!       /* Varargs functions need to save gprs 2 to 6.  */
!       if (cfun->va_list_gpr_size
! 	  && crtl->args.info.gprs < GP_ARG_NUM_REG)
! 	{
! 	  int min_gpr = crtl->args.info.gprs;
! 	  int max_gpr = min_gpr + cfun->va_list_gpr_size;
! 	  if (max_gpr > GP_ARG_NUM_REG)
! 	    max_gpr = GP_ARG_NUM_REG;
  
! 	  if (cfun_frame_layout.first_save_gpr == -1
! 	      || cfun_frame_layout.first_save_gpr > 2 + min_gpr)
! 	    {
! 	      cfun_frame_layout.first_save_gpr = 2 + min_gpr;
! 	      cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr;
! 	    }
  
! 	  if (cfun_frame_layout.last_save_gpr == -1
! 	      || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1)
! 	    {
! 	      cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1;
! 	      cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1;
! 	    }
! 	}
  
!       /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved.  */
!       if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size
! 	  && crtl->args.info.fprs < FP_ARG_NUM_REG)
! 	{
! 	  int min_fpr = crtl->args.info.fprs;
! 	  int max_fpr = min_fpr + cfun->va_list_fpr_size;
! 	  if (max_fpr > FP_ARG_NUM_REG)
! 	    max_fpr = FP_ARG_NUM_REG;
  
! 	  /* ??? This is currently required to ensure proper location
! 	     of the fpr save slots within the va_list save area.  */
! 	  if (TARGET_PACKED_STACK)
! 	    min_fpr = 0;
  
! 	  for (i = min_fpr; i < max_fpr; i++)
! 	    cfun_set_fpr_save (i + FPR0_REGNUM);
! 	}
!     }
  }
  
  /* Fill cfun->machine with info about frame of current function.  */
--- 7695,7779 ----
  	|| cfun_save_high_fprs_p
  	|| get_frame_size () > 0
  	|| (reload_completed && cfun_frame_layout.frame_size > 0)
! 	|| cfun->calls_alloca);
! 
!   memset (cfun_frame_layout.gpr_save_slots, 0, 16);
  
    for (i = 6; i < 16; i++)
!     if (clobbered_regs[i])
!       cfun_gpr_save_slot (i) = -1;
  
!   s390_register_info_stdarg_fpr ();
!   s390_register_info_gprtofpr ();
  
!   /* First find the range of GPRs to be restored.  Vararg regs don't
!      need to be restored so we do it before assigning slots to the
!      vararg GPRs.  */
!   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
!   for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
!   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
!   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
  
!   /* stdarg functions might need to save GPRs 2 to 6.  This might
!      override the GPR->FPR save decision made above for r6 since
!      vararg regs must go to the stack.  */
!   s390_register_info_stdarg_gpr ();
  
!   /* Now the range of GPRs which need saving.  */
!   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
!   for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
!   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
!   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
! }
  
! /* This function is called by s390_optimize_prologue in order to get
!    rid of unnecessary GPR save/restore instructions.  The register info
!    for the GPRs is re-computed and the ranges are re-calculated.  */
  
! static void
! s390_optimize_register_info ()
! {
!   char clobbered_regs[32];
!   int i, j;
  
!   gcc_assert (epilogue_completed);
!   gcc_assert (!cfun->machine->split_branches_pending_p);
  
!   s390_regs_ever_clobbered (clobbered_regs);
  
!   for (i = 0; i < 32; i++)
!     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
  
!   /* There is still special treatment needed for cases invisible to
!      s390_regs_ever_clobbered.  */
!   clobbered_regs[RETURN_REGNUM]
!     |= (TARGET_TPF_PROFILING
! 	/* When expanding builtin_return_addr in ESA mode we do not
! 	   know whether r14 will later be needed as scratch reg when
! 	   doing branch splitting.  So the builtin always accesses the
! 	   r14 save slot and we need to stick to the save/restore
! 	   decision for r14 even if it turns out that it didn't get
! 	   clobbered.  */
! 	|| cfun_frame_layout.save_return_addr_p
! 	|| crtl->calls_eh_return);
! 
!   memset (cfun_frame_layout.gpr_save_slots, 0, 6);
! 
!   for (i = 6; i < 16; i++)
!     if (!clobbered_regs[i])
!       cfun_gpr_save_slot (i) = 0;
! 
!   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
!   for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
!   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
!   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
! 
!   s390_register_info_stdarg_gpr ();
! 
!   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
!   for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
!   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
!   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
  }
  
  /* Fill cfun->machine with info about frame of current function.  */
*************** s390_frame_info (void)
*** 7668,7673 ****
--- 7783,7804 ----
  {
    int i;
  
+   cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
+   cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
+ 
+   /* The va_arg builtin uses a constant distance of 16 *
+      UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
+      pointer.  So even if we are going to save the stack pointer in an
+      FPR we need the stack space in order to keep the offsets
+      correct.  */
+   if (cfun->stdarg && cfun_save_arg_fprs_p)
+     {
+       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
+ 
+       if (cfun_frame_layout.first_save_gpr_slot == -1)
+ 	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
+     }
+ 
    cfun_frame_layout.frame_size = get_frame_size ();
    if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
      fatal_error ("total size of local variables exceeds architecture limit");
*************** s390_frame_info (void)
*** 7738,7745 ****
        && !TARGET_TPF_PROFILING
        && cfun_frame_layout.frame_size == 0
        && !cfun_save_high_fprs_p
!       && !cfun->calls_alloca
!       && !cfun->stdarg)
      return;
  
    if (!TARGET_PACKED_STACK)
--- 7869,7875 ----
        && !TARGET_TPF_PROFILING
        && cfun_frame_layout.frame_size == 0
        && !cfun_save_high_fprs_p
!       && !cfun->calls_alloca)
      return;
  
    if (!TARGET_PACKED_STACK)
*************** s390_init_frame_layout (void)
*** 7785,7791 ****
  {
    HOST_WIDE_INT frame_size;
    int base_used;
!   int clobbered_regs[32];
  
    /* On S/390 machines, we may need to perform branch splitting, which
       will require both base and return address register.  We have no
--- 7915,7922 ----
  {
    HOST_WIDE_INT frame_size;
    int base_used;
! 
!   gcc_assert (!reload_completed);
  
    /* On S/390 machines, we may need to perform branch splitting, which
       will require both base and return address register.  We have no
*************** s390_init_frame_layout (void)
*** 7814,7820 ****
        else
  	cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
  
!       s390_register_info (clobbered_regs);
        s390_frame_info ();
      }
    while (frame_size != cfun_frame_layout.frame_size);
--- 7945,7951 ----
        else
  	cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
  
!       s390_register_info ();
        s390_frame_info ();
      }
    while (frame_size != cfun_frame_layout.frame_size);
*************** s390_optimize_nonescaping_tx (void)
*** 7971,7999 ****
    return;
  }
  
- /* Update frame layout.  Recompute actual register save data based on
-    current info and update regs_ever_live for the special registers.
-    May be called multiple times, but may never cause *more* registers
-    to be saved than s390_init_frame_layout allocated room for.  */
- 
- static void
- s390_update_frame_layout (void)
- {
-   int clobbered_regs[32];
- 
-   s390_register_info (clobbered_regs);
- 
-   df_set_regs_ever_live (BASE_REGNUM,
- 			 clobbered_regs[BASE_REGNUM] ? true : false);
-   df_set_regs_ever_live (RETURN_REGNUM,
- 			 clobbered_regs[RETURN_REGNUM] ? true : false);
-   df_set_regs_ever_live (STACK_POINTER_REGNUM,
- 			 clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
- 
-   if (cfun->machine->base_reg)
-     df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true);
- }
- 
  /* Return true if it is legal to put a value with MODE into REGNO.  */
  
  bool
--- 8102,8107 ----
*************** s390_hard_regno_rename_ok (unsigned int 
*** 8054,8059 ****
--- 8162,8192 ----
  	|| REGNO (cfun->machine->base_reg) == new_reg)
        return false;
  
+   /* Prevent regrename from using call-saved regs which haven't
+      actually been saved.  This is necessary since regrename assumes
+      the backend save/restore decisions are based on
+      df_regs_ever_live.  Since we have our own routine we have to tell
+      regrename manually about it.  */
+   if (GENERAL_REGNO_P (new_reg)
+       && !call_really_used_regs[new_reg]
+       && cfun_gpr_save_slot (new_reg) == 0)
+     return false;
+ 
+   return true;
+ }
+ 
+ /* Return nonzero if register REGNO can be used as a scratch register
+    in peephole2.  */
+ 
+ static bool
+ s390_hard_regno_scratch_ok (unsigned int regno)
+ {
+   /* See s390_hard_regno_rename_ok.  */
+   if (GENERAL_REGNO_P (regno)
+       && !call_really_used_regs[regno]
+       && cfun_gpr_save_slot (regno) == 0)
+     return false;
+ 
    return true;
  }
  
*************** HOST_WIDE_INT
*** 8133,8139 ****
  s390_initial_elimination_offset (int from, int to)
  {
    HOST_WIDE_INT offset;
-   int index;
  
    /* ??? Why are we called for non-eliminable pairs?  */
    if (!s390_can_eliminate (from, to))
--- 8266,8271 ----
*************** s390_initial_elimination_offset (int fro
*** 8154,8163 ****
  
      case RETURN_ADDRESS_POINTER_REGNUM:
        s390_init_frame_layout ();
!       index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot;
!       gcc_assert (index >= 0);
!       offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset;
!       offset += index * UNITS_PER_LONG;
        break;
  
      case BASE_REGNUM:
--- 8286,8311 ----
  
      case RETURN_ADDRESS_POINTER_REGNUM:
        s390_init_frame_layout ();
! 
!       if (cfun_frame_layout.first_save_gpr_slot == -1)
! 	{
! 	  /* If it turns out that for stdarg nothing went into the reg
! 	     save area we also do not need the return address
! 	     pointer.  */
! 	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
! 	    return 0;
! 
! 	  gcc_unreachable ();
! 	}
! 
!       /* In order to make the following work it is not necessary for
! 	 r14 to have a save slot.  It is sufficient if one other GPR
! 	 got one.  Since the GPRs are always stored without gaps we
! 	 are able to calculate where the r14 save slot would
! 	 reside.  */
!       offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
! 		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
! 		UNITS_PER_LONG);
        break;
  
      case BASE_REGNUM:
*************** save_gprs (rtx base, int offset, int fir
*** 8295,8300 ****
--- 8443,8465 ----
  
        addr = plus_constant (Pmode, base,
  			    offset + (start - first) * UNITS_PER_LONG);
+ 
+       if (start == last)
+ 	{
+ 	  if (TARGET_64BIT)
+ 	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
+ 			      gen_rtx_REG (Pmode, start));
+ 	  else
+ 	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
+ 			      gen_rtx_REG (Pmode, start));
+ 	  note = PATTERN (note);
+ 
+ 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 
+ 	  return insn;
+ 	}
+ 
        note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
  				 gen_rtx_REG (Pmode, start),
  				 GEN_INT (last - start + 1));
*************** restore_gprs (rtx base, int offset, int 
*** 8335,8346 ****
--- 8500,8513 ----
        else
          insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
  
+       RTX_FRAME_RELATED_P (insn) = 1;
        return insn;
      }
  
    insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
  			    addr,
  			    GEN_INT (last - first + 1));
+   RTX_FRAME_RELATED_P (insn) = 1;
    return insn;
  }
  
*************** s390_emit_stack_tie (void)
*** 8405,8410 ****
--- 8572,8627 ----
    emit_insn (gen_stack_tie (mem));
  }
  
+ /* Copy GPRS into FPR save slots.  */
+ 
+ static void
+ s390_save_gprs_to_fprs (void)
+ {
+   int i;
+ 
+   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
+     return;
+ 
+   for (i = 6; i < 16; i++)
+     {
+       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
+ 	{
+ 	  rtx insn =
+ 	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
+ 			    gen_rtx_REG (DImode, i));
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	}
+     }
+ }
+ 
+ /* Restore GPRs from FPR save slots.  */
+ 
+ static void
+ s390_restore_gprs_from_fprs (void)
+ {
+   int i;
+ 
+   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
+     return;
+ 
+   for (i = 6; i < 16; i++)
+     {
+       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
+ 	{
+ 	  rtx insn =
+ 	    emit_move_insn (gen_rtx_REG (DImode, i),
+ 			    gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
+ 	  df_set_regs_ever_live (i, true);
+ 	  /* The frame related flag is only required on the save
+ 	     operations.  We nevertheless set it also for the restore
+ 	     in order to recognize these instructions in
+ 	     s390_optimize_prologue.  The flag will then be
+ 	     deleted.  */
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	}
+     }
+ }
+ 
  /* Expand the prologue into a bunch of separate insns.  */
  
  void
*************** s390_emit_prologue (void)
*** 8419,8426 ****
    /* Try to get rid of the FPR clobbers.  */
    s390_optimize_nonescaping_tx ();
  
!   /* Complete frame layout.  */
!   s390_update_frame_layout ();
  
    /* Annotate all constant pool references to let the scheduler know
       they implicitly use the base register.  */
--- 8636,8643 ----
    /* Try to get rid of the FPR clobbers.  */
    s390_optimize_nonescaping_tx ();
  
!   /* Re-compute register info.  */
!   s390_register_info ();
  
    /* Annotate all constant pool references to let the scheduler know
       they implicitly use the base register.  */
*************** s390_emit_prologue (void)
*** 8446,8451 ****
--- 8663,8670 ----
    else
      temp_reg = gen_rtx_REG (Pmode, 1);
  
+   s390_save_gprs_to_fprs ();
+ 
    /* Save call saved gprs.  */
    if (cfun_frame_layout.first_save_gpr != -1)
      {
*************** s390_emit_epilogue (bool sibcall)
*** 8900,8905 ****
--- 9119,9126 ----
        RTX_FRAME_RELATED_P (insn) = 1;
      }
  
+   s390_restore_gprs_from_fprs ();
+ 
    if (! sibcall)
      {
  
*************** s390_optimize_prologue (void)
*** 10561,10568 ****
    rtx insn, new_insn, next_insn;
  
    /* Do a final recompute of the frame-related data.  */
! 
!   s390_update_frame_layout ();
  
    /* If all special registers are in fact used, there's nothing we
       can do, so no point in walking the insn list.  */
--- 10782,10788 ----
    rtx insn, new_insn, next_insn;
  
    /* Do a final recompute of the frame-related data.  */
!   s390_optimize_register_info ();
  
    /* If all special registers are in fact used, there's nothing we
       can do, so no point in walking the insn list.  */
*************** s390_optimize_prologue (void)
*** 10580,10597 ****
      {
        int first, last, off;
        rtx set, base, offset;
  
        next_insn = NEXT_INSN (insn);
  
!       if (! NONJUMP_INSN_P (insn))
  	continue;
  
!       if (GET_CODE (PATTERN (insn)) == PARALLEL
! 	  && store_multiple_operation (PATTERN (insn), VOIDmode))
  	{
! 	  set = XVECEXP (PATTERN (insn), 0, 0);
  	  first = REGNO (SET_SRC (set));
! 	  last = first + XVECLEN (PATTERN (insn), 0) - 1;
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
  	  off = INTVAL (offset);
--- 10800,10862 ----
      {
        int first, last, off;
        rtx set, base, offset;
+       rtx pat;
  
        next_insn = NEXT_INSN (insn);
  
!       if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
  	continue;
  
!       pat = PATTERN (insn);
! 
!       /* Remove ldgr/lgdr instructions used for saving and restore
! 	 GPRs if possible.  */
!       if (TARGET_Z10
! 	  && GET_CODE (pat) == SET
! 	  && GET_MODE (SET_SRC (pat)) == DImode
! 	  && REG_P (SET_SRC (pat))
! 	  && REG_P (SET_DEST (pat)))
! 	{
! 	  int src_regno = REGNO (SET_SRC (pat));
! 	  int dest_regno = REGNO (SET_DEST (pat));
! 	  int gpr_regno;
! 	  int fpr_regno;
! 
! 	  if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
! 		|| (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
! 	    continue;
! 
! 	  gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
! 	  fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
! 
! 	  /* GPR must be call-saved, FPR must be call-clobbered.  */
! 	  if (!call_really_used_regs[fpr_regno]
! 	      || call_really_used_regs[gpr_regno])
! 	    continue;
! 
! 	  /* For restores we have to revert the frame related flag
! 	     since no debug info is supposed to be generated for
! 	     these.  */
! 	  if (dest_regno == gpr_regno)
! 	    RTX_FRAME_RELATED_P (insn) = 0;
! 
! 	  /* It must not happen that what we once saved in an FPR now
! 	     needs a stack slot.  */
! 	  gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
! 
! 	  if (cfun_gpr_save_slot (gpr_regno) == 0)
! 	    {
! 	      remove_insn (insn);
! 	      continue;
! 	    }
! 	}
! 
!       if (GET_CODE (pat) == PARALLEL
! 	  && store_multiple_operation (pat, VOIDmode))
  	{
! 	  set = XVECEXP (pat, 0, 0);
  	  first = REGNO (SET_SRC (set));
! 	  last = first + XVECLEN (pat, 0) - 1;
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
  	  off = INTVAL (offset);
*************** s390_optimize_prologue (void)
*** 10624,10637 ****
  	}
  
        if (cfun_frame_layout.first_save_gpr == -1
! 	  && GET_CODE (PATTERN (insn)) == SET
! 	  && GET_CODE (SET_SRC (PATTERN (insn))) == REG
! 	  && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM
! 	      || (!TARGET_CPU_ZARCH
! 		  && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM))
! 	  && GET_CODE (SET_DEST (PATTERN (insn))) == MEM)
  	{
! 	  set = PATTERN (insn);
  	  first = REGNO (SET_SRC (set));
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
--- 10889,10899 ----
  	}
  
        if (cfun_frame_layout.first_save_gpr == -1
! 	  && GET_CODE (pat) == SET
! 	  && GENERAL_REG_P (SET_SRC (pat))
! 	  && GET_CODE (SET_DEST (pat)) == MEM)
  	{
! 	  set = pat;
  	  first = REGNO (SET_SRC (set));
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
*************** s390_optimize_prologue (void)
*** 10647,10664 ****
  	  continue;
  	}
  
!       if (GET_CODE (PATTERN (insn)) == PARALLEL
! 	  && load_multiple_operation (PATTERN (insn), VOIDmode))
  	{
! 	  set = XVECEXP (PATTERN (insn), 0, 0);
  	  first = REGNO (SET_DEST (set));
! 	  last = first + XVECLEN (PATTERN (insn), 0) - 1;
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
  	  off = INTVAL (offset);
  
  	  if (GET_CODE (base) != REG || off < 0)
  	    continue;
  	  if (cfun_frame_layout.first_restore_gpr != -1
  	      && (cfun_frame_layout.first_restore_gpr < first
  		  || cfun_frame_layout.last_restore_gpr > last))
--- 10909,10929 ----
  	  continue;
  	}
  
!       if (GET_CODE (pat) == PARALLEL
! 	  && load_multiple_operation (pat, VOIDmode))
  	{
! 	  set = XVECEXP (pat, 0, 0);
  	  first = REGNO (SET_DEST (set));
! 	  last = first + XVECLEN (pat, 0) - 1;
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
  	  off = INTVAL (offset);
  
  	  if (GET_CODE (base) != REG || off < 0)
  	    continue;
+ 
+ 	  RTX_FRAME_RELATED_P (insn) = 0;
+ 
  	  if (cfun_frame_layout.first_restore_gpr != -1
  	      && (cfun_frame_layout.first_restore_gpr < first
  		  || cfun_frame_layout.last_restore_gpr > last))
*************** s390_optimize_prologue (void)
*** 10676,10681 ****
--- 10941,10947 ----
  					      - first) * UNITS_PER_LONG,
  				       cfun_frame_layout.first_restore_gpr,
  				       cfun_frame_layout.last_restore_gpr);
+ 	      RTX_FRAME_RELATED_P (new_insn) = 0;
  	      new_insn = emit_insn_before (new_insn, insn);
  	      INSN_ADDRESSES_NEW (new_insn, -1);
  	    }
*************** s390_optimize_prologue (void)
*** 10685,10698 ****
  	}
  
        if (cfun_frame_layout.first_restore_gpr == -1
! 	  && GET_CODE (PATTERN (insn)) == SET
! 	  && GET_CODE (SET_DEST (PATTERN (insn))) == REG
! 	  && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM
! 	      || (!TARGET_CPU_ZARCH
! 		  && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM))
! 	  && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
  	{
! 	  set = PATTERN (insn);
  	  first = REGNO (SET_DEST (set));
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
--- 10951,10961 ----
  	}
  
        if (cfun_frame_layout.first_restore_gpr == -1
! 	  && GET_CODE (pat) == SET
! 	  && GENERAL_REG_P (SET_DEST (pat))
! 	  && GET_CODE (SET_SRC (pat)) == MEM)
  	{
! 	  set = pat;
  	  first = REGNO (SET_DEST (set));
  	  offset = const0_rtx;
  	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
*************** s390_optimize_prologue (void)
*** 10700,10705 ****
--- 10963,10971 ----
  
  	  if (GET_CODE (base) != REG || off < 0)
  	    continue;
+ 
+ 	  RTX_FRAME_RELATED_P (insn) = 0;
+ 
  	  if (REGNO (base) != STACK_POINTER_REGNUM
  	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
  	    continue;
*************** s390_loop_unroll_adjust (unsigned nunrol
*** 11638,11643 ****
--- 11904,11912 ----
  #undef TARGET_CANONICALIZE_COMPARISON
  #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
  
+ #undef TARGET_HARD_REGNO_SCRATCH_OK
+ #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
+ 
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include "gt-s390.h"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]