This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch] Adjustments for Windows x64 SEH


Hi,

at AdaCore, we are now using native SEH exceptions for Windows x64 based on Richard's patch

 http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01966.html

I plan to submit a rebased version of this work soon, but we have found a few issues in the port:

* Registers cannot be pushed once the frame pointer is set.  Otherwise the unwinder is not able to read the old values.

* There was no protection against very large frame size.  We have a few tests declaring huge local array (> 2 GB) for the purpose of testing stack checking.  Without rework, this created either ICE or gas errors, because unwind info cannot handle such large values or because an extra register is needed to save or spill a register.  I fixed this issue by allocating a small area just to save the registers and then allocating the remaining.  There is no unwind info emitted for the second (and large) area, as if it were allocated by alloca.

* In order to implement setjmp/longjmp via RtlUnwindEx, we need to pass to setjmp the value of the establish frame address for the current function.  I added for this purpose a new builtin (__builtin_establisher_frame).  This was manually tested by building and using gdb which uses setjmp/longjmp internally for its exception mechanism.

Bootstrapped and regtested on x86_64 GNU/Linux.

Comments are welcome.

Tristan.

2012-06-18  Tristan Gingold  <gingold@adacore.com>

	* doc/extend.texi (X86 Built-in Functions): Document
	__builtin_establisher_frame.
	* config/i386/winnt.c (i386_pe_seh_end_prologue): Move code to ...
	(seh_cfa_adjust_cfa): ... that function.
	(seh_emit_stackalloc): Do not emit out of range values.
	* config/i386/i386.md: Delete unused UNSPEC_REG_SAVE,
	UNSPEC_DEF_CFA constants.
	Add UNSPEC_ESTABLISHER_FRAME.
	(establisher_frame): Add insn.
	* config/i386/i386.h (SEH_MAX_FRAME_SIZE): Define.
	* config/i386/i386.c (ix86_frame_pointer_required): Required
	for very large frames on SEH target.
	(ix86_compute_frame_layout): Save area is before frame pointer
	on SEH target.  Handle very large frames.
	(ix86_expand_prologue): Likewise.
	(enum ix86_builtins): Add IX86_BUILTIN_ESTABLISHER_FRAME.
	(ix86_init_builtins): Add __builtin_establisher_frame.
	(ix86_expand_builtin): Handle IX86_BUILTIN_ESTABLISHER_FRAME.
	(ix86_output_establisher_frame): New function.
	* config/i386/i386-protos.h (ix86_output_establisher_frame): Declare.

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f300a56..28aa928 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -79,6 +79,7 @@ extern const char *output_fix_trunc (rtx, rtx*, bool);
 extern const char *output_fp_compare (rtx, rtx*, bool, bool);
 extern const char *output_adjust_stack_and_probe (rtx);
 extern const char *output_probe_stack_range (rtx, rtx);
+extern const char *ix86_output_establisher_frame (rtx);
 
 extern void ix86_expand_clear (rtx);
 extern void ix86_expand_move (enum machine_mode, rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e2f5740..126c0cd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -8558,6 +8558,11 @@ ix86_frame_pointer_required (void)
   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
     return true;
 
+  /* Win64 SEH, very large frames need a frame-pointer as maximum stack
+     allocation is 4GB (add a safety guard for saved registers).  */
+  if (TARGET_64BIT_MS_ABI && get_frame_size () + 4096 > SEH_MAX_FRAME_SIZE)
+    return true;
+
   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
      turns off the frame pointer by default.  Turn it back on now if
      we've not got a leaf function.  */
@@ -9051,6 +9056,11 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
   offset += frame->nregs * UNITS_PER_WORD;
   frame->reg_save_offset = offset;
 
+  /* On SEH target, registers are pushed just before the frame pointer
+     location.  */
+  if (TARGET_SEH)
+    frame->hard_frame_pointer_offset = offset;
+
   /* Align and set SSE register save area.  */
   if (frame->nsseregs)
     {
@@ -9144,7 +9154,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
 
       /* If we can leave the frame pointer where it is, do so.  */
       diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
-      if (diff > 240 || (diff & 15) != 0)
+      if (diff <= SEH_MAX_FRAME_SIZE && (diff > 240 || (diff & 15) != 0))
 	{
 	  /* Ideally we'd determine what portion of the local stack frame
 	     (within the constraint of the lowest 240) is most heavily used.
@@ -10146,6 +10156,7 @@ ix86_expand_prologue (void)
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   bool int_registers_saved;
+  bool sse_registers_saved;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -10298,6 +10309,9 @@ ix86_expand_prologue (void)
       m->fs.realigned = true;
     }
 
+  int_registers_saved = (frame.nregs == 0);
+  sse_registers_saved = (frame.nsseregs == 0);
+
   if (frame_pointer_needed && !m->fs.fp_valid)
     {
       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
@@ -10305,6 +10319,17 @@ ix86_expand_prologue (void)
       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
       RTX_FRAME_RELATED_P (insn) = 1;
 
+      /* Push registers now, before setting the frame pointer
+	 on SEH target.  */
+      if (!int_registers_saved
+	  && TARGET_SEH
+	  && !frame.save_regs_using_mov)
+	{
+	  ix86_emit_save_regs ();
+	  int_registers_saved = true;
+	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+	}
+
       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
 	{
 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
@@ -10317,8 +10342,6 @@ ix86_expand_prologue (void)
 	}
     }
 
-  int_registers_saved = (frame.nregs == 0);
-
   if (!int_registers_saved)
     {
       /* If saving registers via PUSH, do so now.  */
@@ -10395,6 +10418,27 @@ ix86_expand_prologue (void)
       current_function_static_stack_size = stack_size;
     }
 
+  /* On SEH target with very large frame size, allocate an area to save
+     SSE registers (as the very large allocation won't be described).  */
+  if (TARGET_SEH
+      && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+      && !sse_registers_saved)
+    {
+      HOST_WIDE_INT sse_size =
+	frame.sse_reg_save_offset - frame.reg_save_offset;
+
+      gcc_assert (int_registers_saved);
+
+      /* No need to do stack checking as the area will be immediately
+	 written.  */
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			         GEN_INT (-sse_size), -1,
+				 m->fs.cfa_reg == stack_pointer_rtx);
+      allocate -= sse_size;
+      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+      sse_registers_saved = true;
+    }
+
   /* The stack has already been decremented by the instruction calling us
      so probe if the size is non-negative to preserve the protection area.  */
   if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
@@ -10519,7 +10563,7 @@ ix86_expand_prologue (void)
 
   if (!int_registers_saved)
     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
-  if (frame.nsseregs)
+  if (!sse_registers_saved)
     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
 
   pic_reg_used = false;
@@ -10975,8 +11019,13 @@ ix86_expand_epilogue (int style)
 	}
 
       /* First step is to deallocate the stack frame so that we can
-	 pop the registers.  */
-      if (!m->fs.sp_valid)
+	 pop the registers.  Also do it on SEH target for very large
+	 frame as the emitted instructions aren't allowed by the ABI in
+	 epilogues.  */
+      if (!m->fs.sp_valid
+ 	  || (TARGET_SEH
+	      && (m->fs.sp_offset - frame.reg_save_offset
+		  >= SEH_MAX_FRAME_SIZE)))
 	{
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
 				     GEN_INT (m->fs.fp_offset
@@ -25926,6 +25975,9 @@ enum ix86_builtins
   IX86_BUILTIN_CPU_IS,
   IX86_BUILTIN_CPU_SUPPORTS,
 
+  /* Establisher frame for Windows x64.  */
+  IX86_BUILTIN_ESTABLISHER_FRAME,
+
   IX86_BUILTIN_MAX
 };
 
@@ -28185,6 +28237,10 @@ ix86_init_builtins (void)
   if (TARGET_LP64)
     ix86_init_builtins_va_builtins_abi ();
 
+  if (TARGET_SEH)
+    def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_establisher_frame",
+ 		 PVOID_FTYPE_VOID, IX86_BUILTIN_ESTABLISHER_FRAME);
+ 
 #ifdef SUBTARGET_INIT_BUILTINS
   SUBTARGET_INIT_BUILTINS;
 #endif
@@ -29954,6 +30010,16 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 	return target;
       }
 
+    case IX86_BUILTIN_ESTABLISHER_FRAME:
+      {
+	if (target == NULL_RTX
+	    || GET_MODE (target) != Pmode)
+	  target = gen_reg_rtx (Pmode);
+
+	emit_insn (gen_establisher_frame (target));
+	return target;
+      }
+
     case IX86_BUILTIN_LLWPCB:
       arg0 = CALL_EXPR_ARG (exp, 0);
       op0 = expand_normal (arg0);
@@ -35286,6 +35352,43 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
 			  gen_rtx_MULT (mode, e2, e3)));
 }
 
+/* Output assembly code to get the establisher frame (Windows x64 only).
+   This corresponds to what will be computed by Windows from Frame Register
+   and Frame Register Offset fields of the UNWIND_INFO structure.  Since
+   these values are computed very late (by ix86_expand_prologue), we cannot
+   express this using only RTL.  */
+
+const char *
+ix86_output_establisher_frame (rtx target)
+{
+  if (!frame_pointer_needed)
+    {
+      /* Note that we have advertized an lea operation.  */
+      output_asm_insn ("lea{q}\t{0(%%rsp), %0|%0, 0[rsp]}", &target);
+    }
+  else
+    {
+      rtx xops[3];
+      struct ix86_frame frame;
+
+      /* Recompute the frame layout here.  */
+      ix86_compute_frame_layout (&frame);
+
+      /* Closely follow how the frame pointer is set in
+	 ix86_expand_prologue.  */
+      xops[0] = target;
+      xops[1] = hard_frame_pointer_rtx;
+      if (frame.hard_frame_pointer_offset == frame.reg_save_offset)
+	xops[2] = GEN_INT (0);
+      else
+	xops[2] = GEN_INT (-(frame.stack_pointer_offset
+			     - frame.hard_frame_pointer_offset));
+      output_asm_insn ("lea{q}\t{%a2(%1), %0|%0, %a2[%1]}", xops);
+    }
+
+  return "";
+}
+
 #ifdef TARGET_SOLARIS
 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index ddb3645..e0fb534 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -729,6 +729,16 @@ enum target_cpu_default
 /* Boundary (in *bits*) on which the incoming stack is aligned.  */
 #define INCOMING_STACK_BOUNDARY ix86_incoming_stack_boundary
 
+/* According to Windows x64 software convention, the maximum stack allocatable
+   in the prologue is 4G - 8 bytes.  Furthermore, there is a limited set of
+   instructions allowed to adjust the stack pointer in the epilog, forcing the
+   use of frame pointer for frames larger than 2 GB.
+   We define only one threshold for both the prolog and the epilog.  When the
+   frame size is larger than this threshold, we allocate the are to save SSE
+   regs, then save them, and then allocate the remaining.  There is no SEH
+   unwind info for this later allocation.  */
+#define SEH_MAX_FRAME_SIZE (2U << 30)
+
 /* Target OS keeps a vector-aligned (128-bit, 16-byte) stack.  This is
    mandatory for the 64-bit ABI, and may or may not be true for other
    operating systems.  */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 43c9f1d..fd5192c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -84,8 +84,6 @@
   ;; Prologue support
   UNSPEC_STACK_ALLOC
   UNSPEC_SET_GOT
-  UNSPEC_REG_SAVE
-  UNSPEC_DEF_CFA
   UNSPEC_SET_RIP
   UNSPEC_SET_GOT_OFFSET
   UNSPEC_MEMORY_BLOCKAGE
@@ -115,6 +113,7 @@
   UNSPEC_PAUSE
   UNSPEC_LEA_ADDR
   UNSPEC_XBEGIN_ABORT
+  UNSPEC_ESTABLISHER_FRAME
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -12080,6 +12079,15 @@
   "TARGET_64BIT"
   "leave"
   [(set_attr "type" "leave")])
+
+(define_insn "establisher_frame"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec [(const_int 0)] UNSPEC_ESTABLISHER_FRAME))]
+  "TARGET_64BIT"
+  "* return ix86_output_establisher_frame (operands[0]);"
+  [(set_attr "type" "lea")
+   (set_attr "length_immediate" "4")
+   (set_attr "mode" "DI")])
 

 ;; Handle -fsplit-stack.
 
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index c1ed1c0..10cdee8 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -829,22 +829,6 @@ i386_pe_seh_end_prologue (FILE *f)
     return;
   seh = cfun->machine->seh;
 
-  /* Emit an assembler directive to set up the frame pointer.  Always do
-     this last.  The documentation talks about doing this "before" any
-     other code that uses offsets, but (experimentally) that's after we
-     emit the codes in reverse order (handled by the assembler).  */
-  if (seh->cfa_reg != stack_pointer_rtx)
-    {
-      HOST_WIDE_INT offset = seh->sp_offset - seh->cfa_offset;
-
-      gcc_assert ((offset & 15) == 0);
-      gcc_assert (IN_RANGE (offset, 0, 240));
-
-      fputs ("\t.seh_setframe\t", f);
-      print_reg (seh->cfa_reg, 0, f);
-      fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
-    }
-
   XDELETE (seh);
   cfun->machine->seh = NULL;
 
@@ -915,7 +899,10 @@ seh_emit_stackalloc (FILE *f, struct seh_frame_state *seh,
     seh->cfa_offset += offset;
   seh->sp_offset += offset;
 
-  fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
+  /* Do not output the stackalloc in that case (it won't work as there is no
+     encoding for very large frame size).  */
+  if (offset < SEH_MAX_FRAME_SIZE)
+    fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
 }
 
 /* Process REG_CFA_ADJUST_CFA for SEH.  */
@@ -948,8 +935,19 @@ seh_cfa_adjust_cfa (FILE *f, struct seh_frame_state *seh, rtx pat)
     seh_emit_stackalloc (f, seh, reg_offset);
   else if (dest_regno == HARD_FRAME_POINTER_REGNUM)
     {
+      HOST_WIDE_INT offset;
+
       seh->cfa_reg = dest;
       seh->cfa_offset -= reg_offset;
+
+      offset = seh->sp_offset - seh->cfa_offset;
+
+      gcc_assert ((offset & 15) == 0);
+      gcc_assert (IN_RANGE (offset, 0, 240));
+
+      fputs ("\t.seh_setframe\t", f);
+      print_reg (seh->cfa_reg, 0, f);
+      fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
     }
   else
     gcc_unreachable ();
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index a60d6da..e689d51 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10768,6 +10768,14 @@ v2sf __builtin_ia32_pswapdsf (v2sf)
 v2si __builtin_ia32_pswapdsi (v2si)
 @end smallexample
 
+The following built-in function is available on Microsoft Windows x64 target.
+
+@table @code
+@item void *__builtin_establisher_frame (void)
+Return the establisher frame for the current function.  This is used to
+implement @code{setjmp}.
+@end table
+
 @node MIPS DSP Built-in Functions
 @subsection MIPS DSP Built-in Functions
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]