This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[split] Add x86 support
- From: Ian Lance Taylor <iant at google dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 25 Sep 2009 17:56:51 -0700
- Subject: [split] Add x86 support
This adds x86-specific support for -fsplit-stack. This uses the stack
protector field in the TCB. I've filed a glibc bug asking for a new
field, but no response as yet.
Bootstrapped on x86_64-unknown-linux-gnu. Committed to split branch.
Ian
2009-09-25 Ian Lance Taylor <iant@google.com>
* config/i386/i386.c (ix86_supports_split_stack): New static
function.
(split_stack_fn): New static variable.
(ix86_expand_split_stack_prologue): New function.
(ix86_expand_call): Return the new call insn.
(TARGET_SUPPORTS_SPLIT_STACK): Define.
* config/i386/i386.md (UNSPEC_STACK_CHECK): Define constant.
(split_stack_prologue): New expander.
(split_stack_check_small): New expander.
(split_stack_compare_small_32): New insn.
(split_stack_compare_small_64): New insn.
(split_stack_check_large): New expander.
(split_stack_compare_large_32): New insn.
(split_stack_compare_large_64): New insn.
* config/i386/linux.h (TARGET_THREAD_SPLIT_STACK_OFFSET): Define.
* config/i386/linux64.h (TARGET_THREAD_SPLIT_STACK_OFFSET):
Define.
* config/i386/i386-protos.h (ix86_expand_split_stack_prologue):
Declare.
(ix86_ewxpand_call): Change return type in declaration.
Index: gcc/config/i386/linux.h
===================================================================
--- gcc/config/i386/linux.h (revision 152193)
+++ gcc/config/i386/linux.h (working copy)
@@ -213,4 +213,7 @@ along with GCC; see the file COPYING3.
#ifdef TARGET_LIBC_PROVIDES_SSP
/* i386 glibc provides __stack_chk_guard in %gs:0x14. */
#define TARGET_THREAD_SSP_OFFSET 0x14
+
+/* For now -fsplit-stack uses the same field. */
+#define TARGET_THREAD_SPLIT_STACK_OFFSET TARGET_THREAD_SSP_OFFSET
#endif
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md (revision 152193)
+++ gcc/config/i386/i386.md (working copy)
@@ -85,6 +85,7 @@
(UNSPEC_SET_RIP 16)
(UNSPEC_SET_GOT_OFFSET 17)
(UNSPEC_MEMORY_BLOCKAGE 18)
+ (UNSPEC_STACK_CHECK 19)
; TLS support
(UNSPEC_TP 20)
@@ -15888,7 +15889,141 @@
DONE;
}
})
+
+;; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+ [(const_int 0)]
+ ""
+{
+ ix86_expand_split_stack_prologue ();
+ DONE;
+})
+
+;; For -fsplit-stack, check whether we have enough stack space at the
+;; start of a function which allocates 256 bytes or less on the stack.
+;; Branch to the label if we have enough space.
+
+(define_expand "split_stack_check_small"
+ [(set (pc)
+ (if_then_else
+ (le (reg SP_REG) (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+ (label_ref
+ (match_operand 0 "" ""))
+ (pc)))]
+ ""
+{
+ rtx ssp_offset;
+
+#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
+ ssp_offset = GEN_INT (TARGET_THREAD_SPLIT_STACK_OFFSET);
+#else
+ gcc_unreachable ();
+#endif
+
+ if (TARGET_64BIT)
+ emit_insn (gen_split_stack_compare_small_64 (ssp_offset));
+ else
+ emit_insn (gen_split_stack_compare_small_32 (ssp_offset));
+ ix86_compare_op0 = gen_rtx_REG (CCmode, FLAGS_REG);
+ ix86_compare_op1 = GEN_INT (0);
+ ix86_expand_branch (GT, operands[0]);
+ JUMP_LABEL (get_last_insn ()) = operands[0];
+ DONE;
+})
+
+;; Compare the stack pointer with the -fsplit-stack limit.
+(define_insn "split_stack_compare_small_32"
+ [(set (reg:CC FLAGS_REG)
+ (compare (reg:SI SP_REG)
+ (unspec:SI [(match_operand 0 "const_int_operand" "i")]
+ UNSPEC_STACK_CHECK)))]
+ "!TARGET_64BIT"
+ "cmp{l}\t{%%gs:%P0, %%esp|%%esp, %%gs:%P0}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "4")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")])
+
+;; 64-bit version of split_stack_compare_small_32.
+
+(define_insn "split_stack_compare_small_64"
+ [(set (reg:CC FLAGS_REG)
+ (compare (reg:DI SP_REG)
+ (unspec:DI [(match_operand 0 "const_int_operand" "i")]
+ UNSPEC_STACK_CHECK)))]
+ "TARGET_64BIT"
+ "cmp{q}\t{%%fs:%P0, %%rsp|%%rsp, %%fs:%P0}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "4")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")])
+
+;; For -fsplit-stack, check whether we have enough stack space at the
+;; start of a function which allocates more than 256 bytes on the
+;; stack. Branch to the label if we have enough space.
+
+(define_expand "split_stack_check_large"
+ [(set (pc)
+ (if_then_else
+ (le (match_operand 0 "" "")
+ (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+{
+ rtx ssp_offset;
+
+#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
+ ssp_offset = GEN_INT (TARGET_THREAD_SPLIT_STACK_OFFSET);
+#else
+ gcc_unreachable ();
+#endif
+
+ if (TARGET_64BIT)
+ emit_insn (gen_split_stack_compare_large_64 (operands[0], ssp_offset));
+ else
+ emit_insn (gen_split_stack_compare_large_32 (operands[0], ssp_offset));
+ ix86_compare_op0 = gen_rtx_REG (CCmode, FLAGS_REG);
+ ix86_compare_op1 = GEN_INT (0);
+ ix86_expand_branch (GT, operands[1]);
+ JUMP_LABEL (get_last_insn ()) = operands[1];
+ DONE;
+})
+
+;; Compare operand 0 with the -fsplit-stack limit.
+
+(define_insn "split_stack_compare_large_32"
+ [(set (reg:CC FLAGS_REG)
+ (compare (match_operand:SI 0 "register_operand" "r")
+ (unspec:SI [(match_operand 1 "const_int_operand" "i")]
+ UNSPEC_STACK_CHECK)))]
+ "!TARGET_64BIT"
+ "cmp{l}\t{%%gs:%P1, %0|%0, %%gs:%P1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "4")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")])
+
+;; 64-bit version of split_stack_compare_large_32.
+
+(define_insn "split_stack_compare_large_64"
+ [(set (reg:CC FLAGS_REG)
+ (compare (match_operand:DI 0 "register_operand" "r")
+ (unspec:DI [(match_operand 1 "const_int_operand" "i")]
+ UNSPEC_STACK_CHECK)))]
+ "TARGET_64BIT"
+ "cmp{q}\t{%%fs:%P1, %0|%0, %%fs:%P1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "4")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")])
+
(define_expand "ffs_cmove"
[(set (match_dup 2) (const_int -1))
(parallel [(set (reg:CCZ FLAGS_REG)
Index: gcc/config/i386/linux64.h
===================================================================
--- gcc/config/i386/linux64.h (revision 152193)
+++ gcc/config/i386/linux64.h (working copy)
@@ -117,4 +117,7 @@ see the files COPYING3 and COPYING.RUNTI
/* i386 glibc provides __stack_chk_guard in %gs:0x14,
x86_64 glibc provides it in %fs:0x28. */
#define TARGET_THREAD_SSP_OFFSET (TARGET_64BIT ? 0x28 : 0x14)
+
+/* For now -fsplit-stack uses the same field. */
+#define TARGET_THREAD_SPLIT_STACK_OFFSET TARGET_THREAD_SSP_OFFSET
#endif
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc/config/i386/i386-protos.h (revision 152193)
+++ gcc/config/i386/i386-protos.h (working copy)
@@ -31,6 +31,7 @@ extern void ix86_file_end (void);
extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
extern void ix86_expand_prologue (void);
extern void ix86_expand_epilogue (int);
+extern void ix86_expand_split_stack_prologue (void);
extern void ix86_output_addr_vec_elt (FILE *, int);
extern void ix86_output_addr_diff_elt (FILE *, int, int);
@@ -114,7 +115,7 @@ extern bool ix86_expand_int_vcond (rtx[]
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern int ix86_expand_int_addcc (rtx[]);
-extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
extern rtx ix86_zero_extend_to_Pmode (rtx);
extern void ix86_split_long_move (rtx[]);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 152193)
+++ gcc/config/i386/i386.c (working copy)
@@ -7788,6 +7788,34 @@ ix86_builtin_setjmp_frame_value (void)
return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
}
+/* On the x86 -fsplit-stack and -fstack-protector both use the same
+ field in the TCB, so they can not be used together. */
+
+static bool
+ix86_supports_split_stack (void)
+{
+ bool ret = true;
+
+#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
+ error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
+ ret = false;
+#else
+ if (flag_stack_protect)
+ {
+ error ("%<-fstack-protector%> is not compatible with %<-fsplit-stack%>");
+ ret = false;
+ }
+#endif
+
+ return ret;
+}
+
+/* When using -fsplit-stack, the allocation routines set a field in
+ the TCB to the bottom of the stack plus this much space, measured
+ in bytes. */
+
+#define SPLIT_STACK_AVAILABLE 256
+
/* Fill structure ix86_frame about frame of currently computed function. */
static void
@@ -9094,6 +9122,138 @@ ix86_output_function_epilogue (FILE *fil
#endif
}
+
+/* A SYMBOL_REF for the function which allocates new stackspace for
+ -fsplit-stack. */
+
+static GTY(()) rtx split_stack_fn;
+
+/* Handle -fsplit-stack. These are the first instructions in the
+ function, even before the regular prologue. */
+
+void
+ix86_expand_split_stack_prologue (void)
+{
+ struct ix86_frame frame;
+ HOST_WIDE_INT allocate;
+ tree decl;
+ bool is_fastcall;
+ int regparm, args_size;
+ rtx label, jump_insn, allocate_rtx, call_insn;
+
+ gcc_assert (flag_split_stack && reload_completed);
+
+ ix86_finalize_stack_realign_flags ();
+ ix86_compute_frame_layout (&frame);
+ allocate = (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0);
+ decl = cfun->decl;
+ is_fastcall = lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (decl))) != NULL;
+ regparm = ix86_function_regparm (TREE_TYPE (decl), decl);
+
+ /* This is the label we will branch to if we have enough stack
+ space. We expect the basic block reordering pass to reverse this
+ branch if optimizing, so that we branch in the unlikely case. */
+ label = gen_label_rtx ();
+
+ /* We need to compare the stack pointer minus the frame size with
+ the stack boundary in the TCB. The stack boundary always gives
+ us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
+ can compare directly. Otherwise we need to do an addition. */
+ if (allocate <= SPLIT_STACK_AVAILABLE)
+ emit_jump_insn (gen_split_stack_check_small (label));
+ else
+ {
+ rtx offset, scratch_reg;
+
+ /* We need a scratch register to hold the stack pointer minus
+ the required frame size. Since this is the very start of the
+ function, the scratch register can be any caller-saved
+ register which is not used for parameters. */
+ offset = GEN_INT (- allocate);
+ if (TARGET_64BIT)
+ {
+ scratch_reg = gen_rtx_REG (Pmode, R10_REG);
+ if (x86_64_immediate_operand (offset, Pmode))
+ emit_insn (gen_adddi3 (scratch_reg, stack_pointer_rtx, offset));
+ else
+ {
+ emit_move_insn (scratch_reg, offset);
+ emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
+ stack_pointer_rtx));
+ }
+ }
+ else
+ {
+ unsigned int scratch_regno;
+
+ if (is_fastcall)
+ scratch_regno = AX_REG;
+ else if (regparm < 3)
+ scratch_regno = CX_REG;
+ else
+ {
+ /* FIXME: We could make this work by pushing a register
+ around the addition and comparison. */
+ sorry ("-fsplit-stack does not support 3 register parameters");
+ scratch_regno = CX_REG;
+ }
+
+ scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+ emit_insn (gen_addsi3 (scratch_reg, stack_pointer_rtx, offset));
+ }
+
+ emit_jump_insn (gen_split_stack_check_large (scratch_reg, label));
+ }
+
+ /* Mark the jump as very likely to be taken. */
+ jump_insn = get_last_insn ();
+ gcc_assert (JUMP_P (jump_insn));
+ add_reg_note (jump_insn, REG_BR_PROB,
+ GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
+
+ /* Get more stack space. We pass in the desired stack space and the
+ size of the arguments to copy to the new stack. In 32-bit mode
+ we push the parameters; __morestack will return on a new stack
+ anyhow. In 64-bit mode we pass the parameters in r10 and
+ r11. */
+ allocate_rtx = GEN_INT (allocate);
+ args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
+ if (!TARGET_64BIT)
+ {
+ /* In order to give __morestack a scratch register, we save %ecx
+ if necessary. */
+ if (is_fastcall || regparm > 2)
+ {
+ emit_insn (gen_push (gen_rtx_REG (Pmode, CX_REG)));
+ args_size += UNITS_PER_WORD;
+ }
+ emit_insn (gen_push (GEN_INT (args_size)));
+ emit_insn (gen_push (allocate_rtx));
+ }
+ else
+ {
+ emit_move_insn (gen_rtx_REG (Pmode, R10_REG), allocate_rtx);
+ emit_move_insn (gen_rtx_REG (Pmode, R11_REG), GEN_INT (args_size));
+ }
+ if (split_stack_fn == NULL_RTX)
+ split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+ call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
+ GEN_INT (UNITS_PER_WORD), constm1_rtx,
+ NULL_RTX, 0);
+
+ if (!TARGET_64BIT && (is_fastcall || regparm > 2))
+ {
+ /* Restore the scratch register we pushed earlier. */
+ emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, CX_REG)));
+ }
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+}
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
@@ -18866,7 +19026,7 @@ construct_plt_address (rtx symbol)
return tmp;
}
-void
+rtx
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
rtx callarg2,
rtx pop, int sibcall)
@@ -18957,6 +19117,8 @@ ix86_expand_call (rtx retval, rtx fnaddr
call = emit_call_insn (call);
if (use)
CALL_INSN_FUNCTION_USAGE (call) = use;
+
+ return call;
}
@@ -29396,6 +29558,9 @@ ix86_enum_va_list (int idx, const char *
#undef TARGET_STACK_PROTECT_FAIL
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
+#undef TARGET_SUPPORTS_SPLIT_STACK
+#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
+
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value