This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch-v2] Adjustments for Windows x64 SEH
- From: Tristan Gingold <gingold at adacore dot com>
- To: Richard Henderson <rth at redhat dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Kai Tietz <ktietz70 at googlemail dot com>
- Date: Thu, 21 Jun 2012 09:48:30 +0200
- Subject: [Patch-v2] Adjustments for Windows x64 SEH
- References: <79603AC4-6AF9-4057-AADC-1EECD617AAA8@adacore.com> <4FE0AD20.4070201@redhat.com>
Here is the new version. It is now possible to use __builtin_frame_address (0) to get the current establisher frame thanks to a tiny adjustment.
No regressions on x86_64-linux, and an x86_64-windows native gdb can be built and run.
Tristan.
2012-06-18 Tristan Gingold <gingold@adacore.com>
* config/i386/winnt.c (i386_pe_seh_end_prologue): Move code to ...
(seh_cfa_adjust_cfa): ... that function.
(seh_emit_stackalloc): Do not emit out of range values.
* config/i386/i386.md: Delete unused UNSPEC_REG_SAVE,
UNSPEC_DEF_CFA constants.
* config/i386/i386.h (SEH_MAX_FRAME_SIZE): Define.
* config/i386/i386.c (ix86_frame_pointer_required): Required
for very large frames on SEH target.
(ix86_compute_frame_layout): Save area is before frame pointer
on SEH target. Handle very large frames.
(ix86_expand_prologue): Likewise.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e2f5740..5ef7d01 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -8558,6 +8558,11 @@ ix86_frame_pointer_required (void)
if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
return true;
+ /* Win64 SEH, very large frames need a frame-pointer as maximum stack
+ allocation is 4GB. */
+ if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
+ return true;
+
/* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
turns off the frame pointer by default. Turn it back on now if
we've not got a leaf function. */
@@ -9051,6 +9056,11 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
offset += frame->nregs * UNITS_PER_WORD;
frame->reg_save_offset = offset;
+ /* On SEH target, registers are pushed just before the frame pointer
+ location. */
+ if (TARGET_SEH)
+ frame->hard_frame_pointer_offset = offset;
+
/* Align and set SSE register save area. */
if (frame->nsseregs)
{
@@ -9142,9 +9152,12 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
{
HOST_WIDE_INT diff;
- /* If we can leave the frame pointer where it is, do so. */
+ /* If we can leave the frame pointer where it is, do so. Also, returns
+ the establisher frame for __builtin_frame_address (0). */
diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
- if (diff > 240 || (diff & 15) != 0)
+ if (diff <= SEH_MAX_FRAME_SIZE
+ && (diff > 240 || (diff & 15) != 0)
+ && !crtl->accesses_prior_frames)
{
/* Ideally we'd determine what portion of the local stack frame
(within the constraint of the lowest 240) is most heavily used.
@@ -10146,6 +10159,7 @@ ix86_expand_prologue (void)
struct ix86_frame frame;
HOST_WIDE_INT allocate;
bool int_registers_saved;
+ bool sse_registers_saved;
ix86_finalize_stack_realign_flags ();
@@ -10298,6 +10312,9 @@ ix86_expand_prologue (void)
m->fs.realigned = true;
}
+ int_registers_saved = (frame.nregs == 0);
+ sse_registers_saved = (frame.nsseregs == 0);
+
if (frame_pointer_needed && !m->fs.fp_valid)
{
/* Note: AT&T enter does NOT have reversed args. Enter is probably
@@ -10305,6 +10322,17 @@ ix86_expand_prologue (void)
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
+ /* Push registers now, before setting the frame pointer
+ on SEH target. */
+ if (!int_registers_saved
+ && TARGET_SEH
+ && !frame.save_regs_using_mov)
+ {
+ ix86_emit_save_regs ();
+ int_registers_saved = true;
+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+ }
+
if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
{
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
@@ -10317,8 +10345,6 @@ ix86_expand_prologue (void)
}
}
- int_registers_saved = (frame.nregs == 0);
-
if (!int_registers_saved)
{
/* If saving registers via PUSH, do so now. */
@@ -10395,6 +10421,27 @@ ix86_expand_prologue (void)
current_function_static_stack_size = stack_size;
}
+ /* On SEH target with very large frame size, allocate an area to save
+ SSE registers (as the very large allocation won't be described). */
+ if (TARGET_SEH
+ && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+ && !sse_registers_saved)
+ {
+ HOST_WIDE_INT sse_size =
+ frame.sse_reg_save_offset - frame.reg_save_offset;
+
+ gcc_assert (int_registers_saved);
+
+ /* No need to do stack checking as the area will be immediately
+ written. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-sse_size), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ allocate -= sse_size;
+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+ sse_registers_saved = true;
+ }
+
/* The stack has already been decremented by the instruction calling us
so probe if the size is non-negative to preserve the protection area. */
if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
@@ -10519,7 +10566,7 @@ ix86_expand_prologue (void)
if (!int_registers_saved)
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- if (frame.nsseregs)
+ if (!sse_registers_saved)
ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
pic_reg_used = false;
@@ -10975,8 +11022,13 @@ ix86_expand_epilogue (int style)
}
/* First step is to deallocate the stack frame so that we can
- pop the registers. */
- if (!m->fs.sp_valid)
+ pop the registers. Also do it on SEH target for very large
+ frame as the emitted instructions aren't allowed by the ABI in
+ epilogues. */
+ if (!m->fs.sp_valid
+ || (TARGET_SEH
+ && (m->fs.sp_offset - frame.reg_save_offset
+ >= SEH_MAX_FRAME_SIZE)))
{
pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
GEN_INT (m->fs.fp_offset
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index ddb3645..44533e0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -729,6 +729,18 @@ enum target_cpu_default
/* Boundary (in *bits*) on which the incoming stack is aligned. */
#define INCOMING_STACK_BOUNDARY ix86_incoming_stack_boundary
+/* According to Windows x64 software convention, the maximum stack allocatable
+ in the prologue is 4G - 8 bytes. Furthermore, there is a limited set of
+ instructions allowed to adjust the stack pointer in the epilog, forcing the
+ use of frame pointer for frames larger than 2 GB. This theorical limit
+ is reduced by 256, an over-estimated upper bound for the stack use by the
+ prologue.
+ We define only one threshold for both the prolog and the epilog. When the
+ frame size is larger than this threshold, we allocate the are to save SSE
+ regs, then save them, and then allocate the remaining. There is no SEH
+ unwind info for this later allocation. */
+#define SEH_MAX_FRAME_SIZE ((2U << 30) - 256)
+
/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is
mandatory for the 64-bit ABI, and may or may not be true for other
operating systems. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 43c9f1d..e62a3f7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -84,8 +84,6 @@
;; Prologue support
UNSPEC_STACK_ALLOC
UNSPEC_SET_GOT
- UNSPEC_REG_SAVE
- UNSPEC_DEF_CFA
UNSPEC_SET_RIP
UNSPEC_SET_GOT_OFFSET
UNSPEC_MEMORY_BLOCKAGE
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index c1ed1c0..10cdee8 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -829,22 +829,6 @@ i386_pe_seh_end_prologue (FILE *f)
return;
seh = cfun->machine->seh;
- /* Emit an assembler directive to set up the frame pointer. Always do
- this last. The documentation talks about doing this "before" any
- other code that uses offsets, but (experimentally) that's after we
- emit the codes in reverse order (handled by the assembler). */
- if (seh->cfa_reg != stack_pointer_rtx)
- {
- HOST_WIDE_INT offset = seh->sp_offset - seh->cfa_offset;
-
- gcc_assert ((offset & 15) == 0);
- gcc_assert (IN_RANGE (offset, 0, 240));
-
- fputs ("\t.seh_setframe\t", f);
- print_reg (seh->cfa_reg, 0, f);
- fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
- }
-
XDELETE (seh);
cfun->machine->seh = NULL;
@@ -915,7 +899,10 @@ seh_emit_stackalloc (FILE *f, struct seh_frame_state *seh,
seh->cfa_offset += offset;
seh->sp_offset += offset;
- fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
+ /* Do not output the stackalloc in that case (it won't work as there is no
+ encoding for very large frame size). */
+ if (offset < SEH_MAX_FRAME_SIZE)
+ fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
}
/* Process REG_CFA_ADJUST_CFA for SEH. */
@@ -948,8 +935,19 @@ seh_cfa_adjust_cfa (FILE *f, struct seh_frame_state *seh, rtx pat)
seh_emit_stackalloc (f, seh, reg_offset);
else if (dest_regno == HARD_FRAME_POINTER_REGNUM)
{
+ HOST_WIDE_INT offset;
+
seh->cfa_reg = dest;
seh->cfa_offset -= reg_offset;
+
+ offset = seh->sp_offset - seh->cfa_offset;
+
+ gcc_assert ((offset & 15) == 0);
+ gcc_assert (IN_RANGE (offset, 0, 240));
+
+ fputs ("\t.seh_setframe\t", f);
+ print_reg (seh->cfa_reg, 0, f);
+ fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
}
else
gcc_unreachable ();