This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[csl-sol210] Patch to support Sun argument saving ABI extension


This patch, for csl-sol210-3_4-branch, adds support for an AMD64 ABI 
extension proposed by Sun by which incoming integer function arguments are 
saved on the stack in a particular layout (for the benefit of debugging 
tools which couldn't handle more general saving arrangements with DWARF2 
information saying what is saved where).  The specification is simple in 
principle if odd in detail - the option forces a frame pointer, INTEGER 
class arguments are saved on the stack immediately after %rbp with padding 
in the case of an odd number being saved, with %rdi not being saved if it 
is used for the address where a return value of class MEMORY is returned 
and the registers saved being those for the subset of the first six 
declared arguments which are of class INTEGER and are passed in registers.  
(Rather than strictly following that rule, this patch will sometimes save 
more registers than strictly required if INTEGER class arguments are being 
passed in registers beyond the first six declared arguments.)  However, 
there are also some undocumented requirements being implemented beyond 
this specification, such as that a particular code sequence be used 
consistently for argument saving to simplify the debugging tools which 
process this information.

I do not propose any form of this patch for anywhere other than 
csl-sol210-3_4-branch.

Bootstrapped with no regressions on i386-pc-solaris2.10.1.  Applied to 
csl-sol210-3_4-branch.

-- 
Joseph S. Myers               http://www.srcf.ucam.org/~jsm28/gcc/
    jsm@polyomino.org.uk (personal mail)
    joseph@codesourcery.com (CodeSourcery mail)
    jsm28@gcc.gnu.org (Bugzilla assignments and CCs)

2005-06-29  Joseph S. Myers  <joseph@codesourcery.com>

	* gcc/dwarf2.h (DW_AT_SUN_amd64_parmdump): New.
	* gcc/dwarf2out.c (gen_subprogram_die): Add this attribute.
	* gcc/doc/invoke.texi (-msave-args): New x86-64 option.
	* gcc/config/i386/i386.h (MASK_SAVE_ARGS, TARGET_SAVE_ARGS): New.
	(TARGET_SWITCHES): Add -msave-args.
	* gcc/config/i386/i386.c (struct ix86_frame): Add nmsave_args and
	padding0.
	(pro_epilogue_adjust_stack): Declare.
	(ix86_nsaved_args): New.
	(override_options, ix86_can_use_return_insn_p,
	ix86_frame_pointer_required, ix86_compute_frame_layout,
	ix86_emit_save_regs, ix86_emit_save_regs_using_mov,
	ix86_expand_prologue, ix86_expand_epilogue): Handle -msave-args.

diff -rupN GCC.orig/gcc/config/i386/i386.c GCC/gcc/config/i386/i386.c
--- GCC.orig/gcc/config/i386/i386.c	2005-05-03 21:51:44.000000000 +0000
+++ GCC/gcc/config/i386/i386.c	2005-06-09 11:08:20.000000000 +0000
@@ -691,8 +691,12 @@ struct stack_local_entry GTY(())
 
    saved frame pointer if frame_pointer_needed
 					      <- HARD_FRAME_POINTER
-   [saved regs]
+   [-msave-args]
 
+   [padding0]          \
+			)
+   [saved regs]        /
+		      (
    [padding1]          \
 		        )
    [va_arg registers]  (
@@ -703,6 +707,8 @@ struct stack_local_entry GTY(())
   */
 struct ix86_frame
 {
+  int nmsave_args;
+  int padding0;
   int nregs;
   int padding1;
   int va_arg_size;
@@ -882,6 +888,7 @@ static int extended_reg_mentioned_1 (rtx
 static bool ix86_rtx_costs (rtx, int, int, int *);
 static int min_insn_size (rtx);
 static void k8_avoid_jump_misspredicts (void);
+static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
 
 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
 static void ix86_svr3_asm_out_constructor (rtx, int);
@@ -1444,6 +1451,8 @@ override_options (void)
      }
   else
     {
+      if (TARGET_SAVE_ARGS)
+	error ("-msave-args makes no sense in the 32-bit mode");
       ix86_fpmath = FPMATH_387;
       /* i386 ABI does not specify red zone.  It still makes sense to use it
          when programmer takes care to stack from being destroyed.  */
@@ -4539,7 +4548,7 @@ ix86_can_use_return_insn_p (void)
     return 0;
 
   ix86_compute_frame_layout (&frame);
-  return frame.to_allocate == 0 && frame.nregs == 0;
+  return frame.to_allocate == 0 && frame.nregs == 0 && frame.nmsave_args == 0;
 }
 
 /* Return 1 if VALUE can be stored in the sign extended immediate field.  */
@@ -4749,6 +4758,10 @@ ix86_frame_pointer_required (void)
   if (SUBTARGET_FRAME_POINTER_REQUIRED)
     return 1;
 
+  /* -msave-args requires a frame pointer.  */
+  if (TARGET_SAVE_ARGS)
+    return 1;
+
   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
      the frame pointer by default.  Turn it back on now if we've not
      got a leaf function.  */
@@ -4971,6 +4984,18 @@ ix86_nsaved_regs (void)
   return nregs;
 }
 
+/* Return number of arguments to be saved on the stack with
+   -msave-args.  */
+
+static int
+ix86_nsaved_args (void)
+{
+  if (TARGET_SAVE_ARGS)
+    return current_function_args_info.regno - current_function_returns_struct;
+  else
+    return 0;
+}
+
 /* Return the offset between two registers, one to be eliminated, and the other
    its replacement, at the start of a routine.  */
 
@@ -5010,6 +5035,7 @@ ix86_compute_frame_layout (struct ix86_f
   HOST_WIDE_INT size = get_frame_size ();
 
   frame->nregs = ix86_nsaved_regs ();
+  frame->nmsave_args = ix86_nsaved_args ();
   total_size = size;
 
   /* During reload iteration the amount of registers saved can change.
@@ -5047,6 +5073,11 @@ ix86_compute_frame_layout (struct ix86_f
     frame->save_regs_using_mov = true;
   else
     frame->save_regs_using_mov = false;
+  if (TARGET_SAVE_ARGS)
+    {
+      cfun->machine->use_fast_prologue_epilogue = true;
+      frame->save_regs_using_mov = true;
+    }
 
 
   /* Skip return address and saved base pointer.  */
@@ -5070,6 +5101,16 @@ ix86_compute_frame_layout (struct ix86_f
   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
 
+  /* Argument save area */
+  if (TARGET_SAVE_ARGS)
+    {
+      offset += frame->nmsave_args * UNITS_PER_WORD;
+      frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
+      offset += frame->padding0;
+    }
+  else
+    frame->padding0 = 0;
+
   /* Register save area */
   offset += frame->nregs * UNITS_PER_WORD;
 
@@ -5125,8 +5166,10 @@ ix86_compute_frame_layout (struct ix86_f
     (size + frame->padding1 + frame->padding2
      + frame->outgoing_arguments_size + frame->va_arg_size);
 
-  if ((!frame->to_allocate && frame->nregs <= 1)
-      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
+  if (!TARGET_SAVE_ARGS
+      && ((!frame->to_allocate && frame->nregs <= 1)
+	  || (TARGET_64BIT
+	      && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
     frame->save_regs_using_mov = false;
 
   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
@@ -5134,7 +5177,11 @@ ix86_compute_frame_layout (struct ix86_f
     {
       frame->red_zone_size = frame->to_allocate;
       if (frame->save_regs_using_mov)
-	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+	{
+	  frame->red_zone_size
+	    += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
+	  frame->red_zone_size += frame->padding0;
+	}
       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
     }
@@ -5143,6 +5190,8 @@ ix86_compute_frame_layout (struct ix86_f
   frame->to_allocate -= frame->red_zone_size;
   frame->stack_pointer_offset -= frame->red_zone_size;
 #if 0
+  fprintf (stderr, "nmsave_args: %i\n", frame->nmsave_args);
+  fprintf (stderr, "padding0: %i\n", frame->padding0);
   fprintf (stderr, "nregs: %i\n", frame->nregs);
   fprintf (stderr, "size: %i\n", size);
   fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
@@ -5166,6 +5215,22 @@ ix86_emit_save_regs (void)
   int regno;
   rtx insn;
 
+  if (TARGET_SAVE_ARGS)
+    {
+      int i;
+      int nsaved = ix86_nsaved_args ();
+      int start = current_function_returns_struct;
+      for (i = start; i < start + nsaved; i++)
+	{
+	  regno = x86_64_int_parameter_registers[i];
+	  insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      if (nsaved % 2 != 0)
+	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (-UNITS_PER_WORD), -1);
+    }
+
   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
     if (ix86_save_reg (regno, true))
       {
@@ -5191,6 +5256,24 @@ ix86_emit_save_regs_using_mov (rtx point
 	RTX_FRAME_RELATED_P (insn) = 1;
 	offset += UNITS_PER_WORD;
       }
+
+  if (TARGET_SAVE_ARGS)
+    {
+      int i;
+      int nsaved = ix86_nsaved_args ();
+      int start = current_function_returns_struct;
+      if (nsaved % 2 != 0)
+	offset += UNITS_PER_WORD;
+      for (i = start + nsaved - 1; i >= start; i--)
+	{
+	  regno = x86_64_int_parameter_registers[i];
+	  insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
+						 Pmode, offset),
+				 gen_rtx_REG (Pmode, regno));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  offset += UNITS_PER_WORD;
+	}
+    }
 }
 
 /* Expand prologue or epilogue stack adjustment.
@@ -5257,14 +5340,16 @@ ix86_expand_prologue (void)
   if (!frame.save_regs_using_mov)
     ix86_emit_save_regs ();
   else
-    allocate += frame.nregs * UNITS_PER_WORD;
+    allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
+      + frame.padding0;
 
   /* When using red zone we may start register saving before allocating
      the stack frame saving one cycle of the prologue.  */
   if (TARGET_RED_ZONE && frame.save_regs_using_mov)
     ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
 				   : stack_pointer_rtx,
-				   -frame.nregs * UNITS_PER_WORD);
+				   -(frame.nregs + frame.nmsave_args)
+				   * UNITS_PER_WORD - frame.padding0);
 
   if (allocate == 0)
     ;
@@ -5301,11 +5386,12 @@ ix86_expand_prologue (void)
 
   if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
     {
-      if (!frame_pointer_needed || !frame.to_allocate)
+      if (!TARGET_SAVE_ARGS && (!frame_pointer_needed || !frame.to_allocate))
         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
       else
         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
-				       -frame.nregs * UNITS_PER_WORD);
+				       -(frame.nregs + frame.nmsave_args)
+				       * UNITS_PER_WORD - frame.padding0);
     }
 
   pic_reg_used = false;
@@ -5386,10 +5472,11 @@ ix86_expand_epilogue (int style)
      must be taken for the normal return case of a function using
      eh_return: the eax and edx registers are marked as saved, but not
      restored along this path.  */
-  offset = frame.nregs;
+  offset = frame.nregs + frame.nmsave_args;
   if (current_function_calls_eh_return && style != 2)
     offset -= 2;
   offset *= -UNITS_PER_WORD;
+  offset -= frame.padding0;
 
   /* If we're only restoring one register and sp is not valid then
      using a move instruction to restore the register since it's
@@ -5445,14 +5532,18 @@ ix86_expand_epilogue (int style)
 	    {
 	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
 	      tmp = plus_constant (tmp, (frame.to_allocate
-                                         + frame.nregs * UNITS_PER_WORD));
+                                         + (frame.nregs + frame.nmsave_args)
+					   * UNITS_PER_WORD
+					 + frame.padding0));
 	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
 	    }
 	}
       else if (!frame_pointer_needed)
 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (frame.to_allocate
-					    + frame.nregs * UNITS_PER_WORD),
+					    + (frame.nregs + frame.nmsave_args)
+					      * UNITS_PER_WORD
+					    + frame.padding0),
 				   style);
       /* If not an i386, mov & pop is faster than "leave".  */
       else if (TARGET_USE_LEAVE || optimize_size
@@ -5493,6 +5584,9 @@ ix86_expand_epilogue (int style)
 	    else
 	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
 	  }
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				 GEN_INT (frame.nmsave_args * UNITS_PER_WORD
+					  + frame.padding0), style);
       if (frame_pointer_needed)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
diff -rupN GCC.orig/gcc/config/i386/i386.h GCC/gcc/config/i386/i386.h
--- GCC.orig/gcc/config/i386/i386.h	2004-04-21 15:12:41.000000000 +0000
+++ GCC/gcc/config/i386/i386.h	2005-05-06 01:29:01.000000000 +0000
@@ -128,8 +128,9 @@ extern int target_flags;
 #define MASK_64BIT		0x00100000	/* Produce 64bit code */
 #define MASK_MS_BITFIELD_LAYOUT 0x00200000	/* Use native (MS) bitfield layout */
 #define MASK_TLS_DIRECT_SEG_REFS 0x00400000	/* Avoid adding %gs:0  */
+#define MASK_SAVE_ARGS		0x00800000	/* Save register args on stack */
 
-/* Unused:			0x03e0000	*/
+/* Unused:			0x03000000	*/
 
 /* ... overlap with subtarget options starts by 0x04000000.  */
 #define MASK_NO_RED_ZONE	0x04000000	/* Do not use red zone */
@@ -181,6 +182,9 @@ extern int target_flags;
 #define TARGET_OMIT_LEAF_FRAME_POINTER \
   (target_flags & MASK_OMIT_LEAF_FRAME_POINTER)
 
+/* Save register arguments on stack */
+#define TARGET_SAVE_ARGS (target_flags & MASK_SAVE_ARGS)
+
 /* Debug GO_IF_LEGITIMATE_ADDRESS */
 #define TARGET_DEBUG_ADDR (ix86_debug_addr_string != 0)
 
@@ -425,6 +429,10 @@ extern int x86_prefetch_sse;
     N_("Use direct references against %gs when accessing tls data") },	      \
   { "no-tls-direct-seg-refs",	-MASK_TLS_DIRECT_SEG_REFS,		      \
     N_("Do not use direct references against %gs when accessing tls data") }, \
+  { "save-args",		MASK_SAVE_ARGS,				      \
+    N_("Save register arguments on the stack") },			      \
+  { "no-save-args",		-MASK_SAVE_ARGS,			      \
+    N_("Do not save register arguments on the stack") },		      \
   SUBTARGET_SWITCHES							      \
   { "",									      \
     TARGET_DEFAULT | TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_DEFAULT	      \
diff -rupN GCC.orig/gcc/doc/invoke.texi GCC/gcc/doc/invoke.texi
--- GCC.orig/gcc/doc/invoke.texi	2005-05-20 22:18:51.000000000 +0000
+++ GCC/gcc/doc/invoke.texi	2005-06-29 22:30:20.000000000 +0000
@@ -8586,6 +8586,10 @@ building of shared libraries are not sup
 Generate code for the large model: This model makes no assumptions
 about addresses and sizes of sections.  Currently GCC does not implement
 this model.
+
+@item -msave-args
+@opindex msave-args
+Save integer arguments on the stack at function entry.
 @end table
 
 @node HPPA Options
diff -rupN GCC.orig/gcc/dwarf2.h GCC/gcc/dwarf2.h
--- GCC.orig/gcc/dwarf2.h	2004-01-08 07:50:36.000000000 +0000
+++ GCC/gcc/dwarf2.h	2005-06-01 00:56:50.000000000 +0000
@@ -243,6 +243,8 @@ enum dwarf_attribute
     DW_AT_body_begin = 0x2105,
     DW_AT_body_end   = 0x2106,
     DW_AT_GNU_vector = 0x2107,
+    /* Sun extension.  */
+    DW_AT_SUN_amd64_parmdump = 0x2224,
     /* VMS Extensions.  */
     DW_AT_VMS_rtnbeg_pd_address = 0x2201
   };
diff -rupN GCC.orig/gcc/dwarf2out.c GCC/gcc/dwarf2out.c
--- GCC.orig/gcc/dwarf2out.c	2005-05-04 23:15:26.000000000 +0000
+++ GCC/gcc/dwarf2out.c	2005-06-01 00:59:49.000000000 +0000
@@ -11079,6 +11079,10 @@ gen_subprogram_die (tree decl, dw_die_re
 	}
 #endif
     }
+#ifdef TARGET_SAVE_ARGS
+  if (TARGET_SAVE_ARGS)
+    add_AT_flag (subr_die, DW_AT_SUN_amd64_parmdump, 1);
+#endif
 }
 
 /* Generate a DIE to represent a declared data object.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]