ia64 patch, 2 minor optimizations, 1 minor bug fix

Jim Wilson wilson@cygnus.com
Mon Mar 20 20:07:00 GMT 2000


This patch contains two minor performance optimizations.  This lets the
register allocator use the input and output registers.  This also stops
saving/restoring the return address for leaf functions.

This patch also contains a minor bug fix.  We save the return address even if
there is no epilogue to use it, so that EH unwinding can find it.

This was tested with make all, make check-gcc, and a specint run.
	
Mon Mar 20 19:53:53 2000  Jim Wilson  <wilson@cygnus.com>

	* config/ia64/ia64.c (ia64_expand_prologue): Don't abort if leaf
	function uses output registers.  Don't save RP for leaf functions.
	Do save RP even if no epilogue.
	* config/ia64/ia64.h (FIXED_REGISTERS): Unmark in/out registers.
	(CALL_USED_REGISTERS): Unmark in registers.
	(REG_ALLOC_ORDER): Move out regs up, to near the top.  Move in regs up,
	to near the middle.

Index: ia64.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.c,v
retrieving revision 1.5
diff -p -r1.5 ia64.c
*** ia64.c	2000/03/17 20:06:48	1.5
--- ia64.c	2000/03/21 03:38:17
*************** save_restore_insns (save_p)
*** 775,786 ****
  /* ??? Get inefficient code when the frame size is larger than can fit in an
     adds instruction.  */
  
- /* ??? Add support for allocating temporaries from the output registers if
-    they do not need to live past call instructions.  */
- 
- /* ??? If the function does not return, then we don't need to save the rp
-    and ar.pfs registers.  */
- 
  /* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the
     low 32 regs.  */
  
--- 775,780 ----
*************** ia64_expand_prologue ()
*** 807,813 ****
    leaf_function = leaf_function_p ();
    pop_topmost_sequence ();
  
!   /* ??? If there is no epilogue, then we don't need some prologue insns.  We
       need to avoid emitting the dead prologue insns, because flow will complain
       about them.  */
    if (optimize)
--- 801,807 ----
    leaf_function = leaf_function_p ();
    pop_topmost_sequence ();
  
!   /* If there is no epilogue, then we don't need some prologue insns.  We
       need to avoid emitting the dead prologue insns, because flow will complain
       about them.  */
    if (optimize)
*************** ia64_expand_prologue ()
*** 863,872 ****
    else if (profile_block_flag == 2)
      outputs = MAX (outputs, 2);
  
-   /* Leaf functions should not use any output registers.  */
-   if (leaf_function && outputs != 0)
-     abort ();
- 
    /* No rotating register support as yet.  */
  
    rotates = 0;
--- 857,862 ----
*************** ia64_expand_prologue ()
*** 874,879 ****
--- 864,871 ----
    /* Allocate two extra locals for saving/restoring rp and ar.pfs.  Also
       allocate one local for use as the frame pointer if frame_pointer_needed
       is true.  */
+   /* ??? If this is a leaf function, then we aren't using one of these local
+      registers for the RP anymore.  */
    locals += 2 + frame_pointer_needed;
  
    /* Save these values in global registers for debugging info.  */
*************** ia64_expand_prologue ()
*** 925,930 ****
--- 917,923 ----
    /* We don't need an alloc instruction if this is a leaf function, and the
       locals and outputs are both zero sized.  Since we have already allocated
       two locals for rp and ar.pfs, we check for two locals.  */
+   /* Leaf functions can use output registers as call-clobbered temporaries.  */
    if (locals == 2 && outputs == 0 && leaf_function)
      {
        /* If there is no alloc, but there are input registers used, then we
*************** ia64_expand_prologue ()
*** 940,960 ****
    else
      {
        ia64_need_regstk = 0;
- 
        ia64_arpfs_regno = LOC_REG (locals - 1);
-       ia64_rp_regno = LOC_REG (locals - 2);
-       reg_names[RETURN_ADDRESS_REGNUM] = reg_names[ia64_rp_regno];
  
        emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno),
  			    GEN_INT (inputs), GEN_INT (locals),
  			    GEN_INT (outputs), GEN_INT (rotates)));
  
!       /* ??? FIXME ??? We don't need to save BR_REG (0) if this is a leaf
! 	 function.  We also don't need to allocate a local reg for it then.  */
!       /* ??? Likewise if there is no epilogue.  */
!       if (epilogue_p)
! 	emit_move_insn (gen_rtx_REG (DImode, ia64_rp_regno),
! 			gen_rtx_REG (DImode, BR_REG (0)));
      }
  
    /* Set up frame pointer and stack pointer.  */
--- 933,968 ----
    else
      {
        ia64_need_regstk = 0;
        ia64_arpfs_regno = LOC_REG (locals - 1);
  
        emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno),
  			    GEN_INT (inputs), GEN_INT (locals),
  			    GEN_INT (outputs), GEN_INT (rotates)));
  
!       /* Emit a save of BR_REG (0) if we call other functions.
! 	 Do this even if this function doesn't return, as EH
!          depends on this to be able to unwind the stack.  */
!       if (! leaf_function)
! 	{
! 	  rtx ia64_rp_reg;
! 
! 	  ia64_rp_regno = LOC_REG (locals - 2);
! 	  reg_names[RETURN_ADDRESS_REGNUM] = reg_names[ia64_rp_regno];
! 
! 	  ia64_rp_reg = gen_rtx_REG (DImode, ia64_rp_regno);
! 	  insn = emit_move_insn (ia64_rp_reg, gen_rtx_REG (DImode,
! 							   BR_REG (0)));
! 	  RTX_FRAME_RELATED_P (insn) = 1;
! 	  if (! epilogue_p)
! 	    {
! 	      /* If we don't have an epilogue, then the return value
! 		 doesn't appear to be needed and the above store will
! 		 appear dead and will elicit a warning from flow.  */
! 	      emit_insn (gen_rtx_USE (VOIDmode, ia64_rp_reg));
! 	    }
! 	}
!       else
! 	ia64_rp_regno = 0;
      }
  
    /* Set up frame pointer and stack pointer.  */
Index: ia64.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.h,v
retrieving revision 1.5
diff -p -r1.5 ia64.h
*** ia64.h	2000/03/17 20:06:48	1.5
--- ia64.h	2000/03/21 03:38:17
*************** while (0)
*** 577,592 ****
     p0: constant true
     fp: eliminable frame pointer */   
  
! /* The last 16 stacked regs are fixed, because they are reserved for the 8
!    input and 8 output registers.  */
  
  /* ??? Must mark the next 3 stacked regs as fixed, because ia64_expand_prologue
     assumes that three locals are available for fp, b0, and ar.pfs.  */
  
  /* ??? Should mark b0 as fixed?  */
  
- /* ??? input and output registers do not have to be marked as fixed.  */
- 
  #define FIXED_REGISTERS \
  { /* General registers.  */				\
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,	\
--- 577,590 ----
     p0: constant true
     fp: eliminable frame pointer */   
  
! /* The last 16 stacked regs are reserved for the 8 input and 8 output
!    registers.  */
  
  /* ??? Must mark the next 3 stacked regs as fixed, because ia64_expand_prologue
     assumes that three locals are available for fp, b0, and ar.pfs.  */
  
  /* ??? Should mark b0 as fixed?  */
  
  #define FIXED_REGISTERS \
  { /* General registers.  */				\
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,	\
*************** while (0)
*** 596,602 ****
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,	\
!   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
    /* Floating-point registers.  */			\
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
--- 594,600 ----
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,	\
!   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    /* Floating-point registers.  */			\
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
*************** while (0)
*** 622,629 ****
     therefore identifies the registers that are not available for general
     allocation of values that must live across function calls.  */
  
- /* ??? If inputs are not marked as fixed, then they are not call clobbered.  */
- 
  #define CALL_USED_REGISTERS \
  { /* General registers.  */				\
    1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
--- 620,625 ----
*************** while (0)
*** 633,639 ****
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,	\
!   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
    /* Floating-point registers.  */			\
    1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
--- 629,635 ----
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,	\
!   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
    /* Floating-point registers.  */			\
    1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
*************** while (0)
*** 689,699 ****
  /* ??? Should the GR return value registers come before or after the rest
     of the caller-save GRs?  */
  
- /* ??? Output registers are cheap, because they will be not be saved
-    by the register engine.  They probably should be early in the list.
-    We need to make them not fixed first though.  Similarly, input registers
-    are callee-saved (RSE) like the stacked locals.  */
- 
  #define REG_ALLOC_ORDER \
  {									   \
    /* Caller-saved general registers.  */				   \
--- 685,690 ----
*************** while (0)
*** 701,706 ****
--- 692,700 ----
    R_GR (18), R_GR (19), R_GR (20), R_GR (21), R_GR (22), R_GR (23), 	   \
    R_GR (24), R_GR (25), R_GR (26), R_GR (27), R_GR (28), R_GR (29), 	   \
    R_GR (30), R_GR (31),							   \
+   /* Output registers.  */						   \
+   R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125),  \
+   R_GR (126), R_GR (127), 						   \
    /* Caller-saved general registers, also used for return values.  */	   \
    R_GR (8), R_GR (9), R_GR (10), R_GR (11), 				   \
    /* addl caller-saved general registers.  */				   \
*************** while (0)
*** 759,764 ****
--- 753,761 ----
    R_GR (96), R_GR (97), R_GR (98), R_GR (99), R_GR (100), R_GR (101), 	   \
    R_GR (102), R_GR (103), R_GR (104), R_GR (105), R_GR (106), R_GR (107),  \
    R_GR (108),								   \
+   /* Input registers.  */						   \
+   R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117),  \
+   R_GR (118), R_GR (119),						   \
    /* Callee-saved general registers.  */				   \
    R_GR (4), R_GR (5), R_GR (6), R_GR (7),				   \
    /* Callee-saved FP registers.  */					   \
*************** while (0)
*** 773,784 ****
  									   \
    /* ??? Stacked registers reserved for fp, rp, and ar.pfs.  */		   \
    R_GR (109), R_GR (110), R_GR (111),					   \
-   /* Input registers.  */						   \
-   R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117),  \
-   R_GR (118), R_GR (119),						   \
-   /* Output registers.  */						   \
-   R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125),  \
-   R_GR (126), R_GR (127), 						   \
  									   \
    /* Special general registers.  */					   \
    R_GR (0), R_GR (1), R_GR (12), R_GR (13), 				   \
--- 770,775 ----


More information about the Gcc-patches mailing list