i386 prologues/epilogues tweeks

Jan Hubicka jh@suse.cz
Wed Jun 13 02:36:00 GMT 2001


> On Tue, Jun 12, 2001 at 08:02:56PM +0200, Jan Hubicka wrote:
> > + ix86_emit_save_regs_using_mov (pointer, offset)
> > + 	rtx pointer;
> > + 	int offset;
> > + {
> > +   int regno;
> > + 
> > +   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> > +     if (ix86_save_reg (regno))
> > +       {
> > + 	emit_move_insn (adj_offsettable_operand (gen_rtx_MEM (Pmode,
> > + 							      pointer),
> > + 						 offset),
> > + 			gen_rtx_REG (Pmode, regno));
> > + 	offset += UNITS_PER_WORD;
> 
> This can't be right.  You're not tagging these with FRAME_RELATED_P etc.
I've noticed that too together with few other problems on 64bit compilation.
Here is updated patch:
Tue Jun 12 20:01:12 CEST 2001  Jan Hubicka  <jh@suse.cz>

	* i386.c (x86_accumulate_outgoing_args, x86_prologue_using_move,
	x86_epilogue_using_move): New global variables.
	(override_options): Enable ACCUMULATE_OUTGOING_ARGS if preferred.
	(ix86_emit_save_regs_using_mov): New static function.
	(ix86_expand_prologue, ix86_expand_epilogue): Use moves if preferred.
	* i386.h (MASK_MMX, MASK_SSE, MASK_SSE2, MASK_128BIT_LONG_DOUBLE,
	MASK_MIX_SSE_I387): Renumber.
	(MASK_NO_ACCUMULATE_OUTGOING_ARGS): New.
	(x86_accumulate_outgoing_args, x86_prologue_using_move,
	x86_epilogue_using_move): Declare.
	(TARGET_PROLOGUE_USING_MOVE, TARGET_EPILOGUE_USING_MOVE): New.

Index: i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.265
diff -c -3 -p -r1.265 i386.c
*** i386.c	2001/06/06 12:57:30	1.265
--- i386.c	2001/06/13 09:30:41
*************** const int x86_add_esp_8 = m_ATHLON | m_P
*** 313,318 ****
--- 313,321 ----
  const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
  const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
  const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
+ const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
+ const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
+ const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
  
  #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
  
*************** static int ix86_split_to_parts PARAMS ((
*** 560,565 ****
--- 563,569 ----
  static int ix86_safe_length_prefix PARAMS ((rtx));
  static int ix86_nsaved_regs PARAMS((void));
  static void ix86_emit_save_regs PARAMS((void));
+ static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
  static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
  static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
  static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
*************** override_options ()
*** 836,841 ****
--- 840,850 ----
       on by -msse.  */
    if (TARGET_SSE)
      target_flags |= MASK_MMX;
+ 
+   if ((x86_accumulate_outgoing_args & CPUMASK)
+       && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
+       && !optimize_size)
+     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
  }
  
  void
*************** ix86_emit_save_regs ()
*** 2466,2471 ****
--- 2475,2502 ----
        }
  }
  
+ /* Emit code to save registers using MOV insns.  First register
+    is restored from POINTER + OFFSET.  */
+ static void
+ ix86_emit_save_regs_using_mov (pointer, offset)
+ 	rtx pointer;
+ 	HOST_WIDE_INT offset;
+ {
+   int regno;
+   rtx insn;
+ 
+   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+     if (ix86_save_reg (regno, true))
+       {
+ 	insn = emit_move_insn (adj_offsettable_operand (gen_rtx_MEM (Pmode,
+ 								     pointer),
+ 							offset),
+ 			       gen_rtx_REG (Pmode, regno));
+ 	RTX_FRAME_RELATED_P (insn) = 1;
+ 	offset += UNITS_PER_WORD;
+       }
+ }
+ 
  /* Expand the prologue into a bunch of separate insns.  */
  
  void
*************** ix86_expand_prologue ()
*** 2476,2481 ****
--- 2507,2514 ----
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
+   int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
+   HOST_WIDE_INT allocate;
  
    ix86_compute_frame_layout (&frame);
  
*************** ix86_expand_prologue ()
*** 2491,2513 ****
        RTX_FRAME_RELATED_P (insn) = 1;
      }
  
!   ix86_emit_save_regs ();
  
!   if (frame.to_allocate == 0)
      ;
    else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
      {
        if (frame_pointer_needed)
  	insn = emit_insn (gen_pro_epilogue_adjust_stack
  			  (stack_pointer_rtx, stack_pointer_rtx,
! 		           GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
        else
  	if (TARGET_64BIT)
  	  insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
! 					GEN_INT (-frame.to_allocate)));
          else
  	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
! 					GEN_INT (-frame.to_allocate)));
        RTX_FRAME_RELATED_P (insn) = 1;
      }
    else
--- 2524,2555 ----
        RTX_FRAME_RELATED_P (insn) = 1;
      }
  
!   allocate = frame.to_allocate;
!   /* In case we are dealing only with single register and empty frame,
!      push is equivalent of the mov+add sequence.  */
!   if (allocate == 0 && frame.nregs <= 1)
!     use_mov = 0;
  
!   if (!use_mov)
!     ix86_emit_save_regs ();
!   else
!     allocate += frame.nregs * UNITS_PER_WORD;
! 
!   if (allocate == 0)
      ;
    else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
      {
        if (frame_pointer_needed)
  	insn = emit_insn (gen_pro_epilogue_adjust_stack
  			  (stack_pointer_rtx, stack_pointer_rtx,
! 		           GEN_INT (-allocate), hard_frame_pointer_rtx));
        else
  	if (TARGET_64BIT)
  	  insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
! 					GEN_INT (-allocate)));
          else
  	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
! 					GEN_INT (-allocate)));
        RTX_FRAME_RELATED_P (insn) = 1;
      }
    else
*************** ix86_expand_prologue ()
*** 2520,2526 ****
  	abort();
  
        arg0 = gen_rtx_REG (SImode, 0);
!       emit_move_insn (arg0, GEN_INT (frame.to_allocate));
  
        sym = gen_rtx_MEM (FUNCTION_MODE,
  			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
--- 2562,2568 ----
  	abort();
  
        arg0 = gen_rtx_REG (SImode, 0);
!       emit_move_insn (arg0, GEN_INT (allocate));
  
        sym = gen_rtx_MEM (FUNCTION_MODE,
  			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
*************** ix86_expand_prologue ()
*** 2530,2535 ****
--- 2572,2585 ----
  	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
  			     CALL_INSN_FUNCTION_USAGE (insn));
      }
+   if (use_mov)
+     {
+       if (!frame_pointer_needed || !frame.to_allocate)
+         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
+       else
+         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
+ 				       -frame.nregs * UNITS_PER_WORD);
+     }
  
  #ifdef SUBTARGET_PROLOGUE
    SUBTARGET_PROLOGUE;
*************** ix86_expand_epilogue (style)
*** 2629,2634 ****
--- 2679,2686 ----
       and there is exactly one register to pop. This heruistic may need some
       tuning in future.  */
    if ((!sp_valid && frame.nregs <= 1)
+       || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ 	  && (frame.nregs > 1 || frame.to_allocate))
        || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
        || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
  	  && frame.nregs == 1)
Index: i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.187
diff -c -3 -p -r1.187 i386.h
*** i386.h	2001/06/08 19:53:59	1.187
--- i386.h	2001/06/13 09:30:44
*************** extern int target_flags;
*** 114,126 ****
  #define MASK_INLINE_ALL_STROPS	0x00002000	/* Inline stringops in all cases */
  #define MASK_NO_PUSH_ARGS	0x00004000	/* Use push instructions */
  #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00008000/* Accumulate outgoing args */
! #define MASK_MMX		0x00010000	/* Support MMX regs/builtins */
! #define MASK_SSE		0x00020000	/* Support SSE regs/builtins */
! #define MASK_SSE2		0x00040000	/* Support SSE2 regs/builtins */
! #define MASK_128BIT_LONG_DOUBLE 0x00080000	/* long double size is 128bit */
! #define MASK_MIX_SSE_I387	0x00100000	/* Mix SSE and i387 instructions */
! #define MASK_64BIT		0x00200000	/* Produce 64bit code */
! #define MASK_NO_RED_ZONE	0x00400000	/* Do not use red zone */
  
  /* Temporary codegen switches */
  #define MASK_INTEL_SYNTAX	0x00000200
--- 114,127 ----
  #define MASK_INLINE_ALL_STROPS	0x00002000	/* Inline stringops in all cases */
  #define MASK_NO_PUSH_ARGS	0x00004000	/* Use push instructions */
  #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00008000/* Accumulate outgoing args */
! #define MASK_NO_ACCUMULATE_OUTGOING_ARGS 0x00010000
! #define MASK_MMX		0x00020000	/* Support MMX regs/builtins */
! #define MASK_SSE		0x00040000	/* Support SSE regs/builtins */
! #define MASK_SSE2		0x00080000	/* Support SSE2 regs/builtins */
! #define MASK_128BIT_LONG_DOUBLE 0x00100000	/* long double size is 128bit */
! #define MASK_MIX_SSE_I387	0x00200000	/* Mix SSE and i387 instructions */
! #define MASK_64BIT		0x00400000	/* Produce 64bit code */
! #define MASK_NO_RED_ZONE	0x00800000	/* Do not use red zone */
  
  /* Temporary codegen switches */
  #define MASK_INTEL_SYNTAX	0x00000200
*************** extern const int x86_himode_math, x86_qi
*** 212,217 ****
--- 213,220 ----
  extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
  extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
  extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
+ extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
+ extern const int x86_epilogue_using_move;
  
  #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
  #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
*************** extern const int x86_partial_reg_depende
*** 247,252 ****
--- 250,257 ----
  #define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
  #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
  #define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
+ #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
+ #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
  
  #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
  



More information about the Gcc-patches mailing list