This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] s390: Add -fsplit-stack support


On 29/01/16 14:33, Andreas Krebbel wrote:
Hi Marcin,

sorry for the late feedback.

A few comments regarding the split stack implementation:

The GNU coding style requires to replace every 8 leading blanks on a
line with a tab.  There are many lines in your patch violating this.
In case you are an emacs user `whitespace-cleanup' will fix this for
you.

OK, will do.

Could you please add a testcase checking the different
variants. I.e. with early exit, no-alloc in __morestack, and with an
actual allocation?

The testsuite with -fsplit-stack already hits all of them, and checking them manually is rather tricky (I don't know if it could be done in target-independent way at all), but I think it'd be reasonable to make assembly testcases calling __morestack for the last two cases, to check if the registers are being preserved, etc.


There are a few more comments inline.

Bye,

-Andreas-

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c881d52..71f6f38 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,38 @@
  2016-01-16  Marcin KoÅcielnicki  <koriakin@0x04.net>

+	* common/config/s390/s390-common.c (s390_supports_split_stack):
+	New function.
+	(TARGET_SUPPORTS_SPLIT_STACK): New macro.
+	* config/s390/s390-protos.h: Add s390_expand_split_stack_prologue.
+	* config/s390/s390.c (struct machine_function): New field
+	split_stack_varargs_pointer.
+	(s390_register_info): Mark r12 as clobbered if it'll be used as temp
+	in s390_emit_prologue.
+	(s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack
+	vararg pointer.
+	(morestack_ref): New global.
+	(SPLIT_STACK_AVAILABLE): New macro.
+	(s390_expand_split_stack_prologue): New function.
+	(s390_expand_split_stack_call): New function.
+	(s390_live_on_entry): New function.
+	(s390_va_start): Use split-stack vararg pointer if appropriate.
+	(s390_reorg): Lower the split-stack pseudo-insns.
+	(s390_asm_file_end): Emit the split-stack note sections.
+	(TARGET_EXTRA_LIVE_ON_ENTRY): New macro.
+	* config/s390/s390.md: (UNSPEC_STACK_CHECK): New unspec.
+	(UNSPECV_SPLIT_STACK_CALL): New unspec.
+	(UNSPECV_SPLIT_STACK_SIBCALL): New unspec.
+	(UNSPECV_SPLIT_STACK_MARKER): New unspec.
+	(split_stack_prologue): New expand.
+	(split_stack_call_*): New insn.
+	(split_stack_cond_call_*): New insn.
+	(split_stack_space_check): New expand.
+	(split_stack_sibcall_*): New insn.
+	(split_stack_cond_sibcall_*): New insn.
+	(split_stack_marker): New insn.
+
+2016-01-02  Marcin KoÅcielnicki  <koriakin@0x04.net>
+
  	* cfgrtl.c (rtl_tidy_fallthru_edge): Bail for unconditional jumps
  	with side effects.

diff --git a/gcc/common/config/s390/s390-common.c b/gcc/common/config/s390/s390-common.c
index 4519c21..1e497e6 100644
--- a/gcc/common/config/s390/s390-common.c
+++ b/gcc/common/config/s390/s390-common.c
@@ -105,6 +105,17 @@ s390_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
      }
  }

+/* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
+   We don't verify it, since earlier versions just have padding at
+   its place, which works just as well.  */
+
+static bool
+s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
+			   struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
  #undef TARGET_DEFAULT_TARGET_FLAGS
  #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)

@@ -117,4 +128,7 @@ s390_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
  #undef TARGET_OPTION_INIT_STRUCT
  #define TARGET_OPTION_INIT_STRUCT s390_option_init_struct

+#undef TARGET_SUPPORTS_SPLIT_STACK
+#define TARGET_SUPPORTS_SPLIT_STACK s390_supports_split_stack
+
  struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 633bc1e..09032c9 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -42,6 +42,7 @@ extern bool s390_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
  extern HOST_WIDE_INT s390_initial_elimination_offset (int, int);
  extern void s390_emit_prologue (void);
  extern void s390_emit_epilogue (bool);
+extern void s390_expand_split_stack_prologue (void);
  extern bool s390_can_use_simple_return_insn (void);
  extern bool s390_can_use_return_insn (void);
  extern void s390_function_profiler (FILE *, int);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 3be64de..6afce7c 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -426,6 +426,13 @@ struct GTY(()) machine_function
    /* True if the current function may contain a tbegin clobbering
       FPRs.  */
    bool tbegin_p;
+
+  /* For -fsplit-stack support: A stack local which holds a pointer to
+     the stack arguments for a function with a variable number of
+     arguments.  This is set at the start of the function and is used
+     to initialize the overflow_arg_area field of the va_list
+     structure.  */
+  rtx split_stack_varargs_pointer;
  };

  /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
@@ -9316,9 +9323,13 @@ s390_register_info ()
  	  cfun_frame_layout.high_fprs++;
        }

-  if (flag_pic)
-    clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
-      |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+  /* Register 12 is used for GOT address, but also as temp in prologue
+     for split-stack stdarg functions (unless r14 is available).  */
+  clobbered_regs[12]
+    |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+	|| (flag_split_stack && cfun->stdarg
+	    && (crtl->is_leaf || TARGET_TPF_PROFILING
+		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));

    clobbered_regs[BASE_REGNUM]
      |= (cfun->machine->base_reg
@@ -10446,6 +10457,8 @@ s390_emit_prologue (void)
        && !crtl->is_leaf
        && !TARGET_TPF_PROFILING)
      temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  else if (flag_split_stack && cfun->stdarg)
+    temp_reg = gen_rtx_REG (Pmode, 12);
TPF uses r1 hard coded in tracing prologue/epilogue.  So I think we
need && !TARGET_TPF_PROFILING here as well.

Well, in that case, we'll need to emit a move instruction to some temp register, since __morestack will leave the pointer in %r1. I'll look into that.

    else
      temp_reg = gen_rtx_REG (Pmode, 1);

@@ -10939,6 +10952,284 @@ s300_set_up_by_prologue (hard_reg_set_container *regs)
      SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
  }

+/* -fsplit-stack support.  */
+
+/* A SYMBOL_REF for __morestack.  */
+static GTY(()) rtx morestack_ref;
+
+/* When using -fsplit-stack, the allocation routines set a field in
+   the TCB to the bottom of the stack plus this much space, measured
+   in bytes.  */
+
+#define SPLIT_STACK_AVAILABLE 1024
+
+/* Emit -fsplit-stack prologue, which goes before the regular function
+   prologue.  */
+
+void
+s390_expand_split_stack_prologue (void)
+{
+  rtx r1, guard, cc;
+  rtx_insn *insn;
+  /* Offset from thread pointer to __private_ss.  */
+  int psso = TARGET_64BIT ? 0x38 : 0x20;
+  /* Pointer size in bytes.  */
+  /* Frame size and argument size - the two parameters to __morestack.  */
+  HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
+  /* Align argument size to 8 bytes - simplifies __morestack code.  */
+  HOST_WIDE_INT args_size = crtl->args.size >= 0
+			    ? ((crtl->args.size + 7) & ~7)
+			    : 0;
+  /* Label to jump to when no __morestack call is necessary.  */
+  rtx_code_label *enough = NULL;
+  /* Label to be called by __morestack.  */
+  rtx_code_label *call_done = NULL;
+  /* 1 if __morestack called conditionally, 0 if always.  */
+  int conditional = 0;
+
+  gcc_assert (flag_split_stack && reload_completed);
+  if (!TARGET_CPU_ZARCH)
+    {
+      sorry ("CPUs older than z900 are not supported for -fsplit-stack");
+      return;
+    }
+
+  r1 = gen_rtx_REG (Pmode, 1);
+
+  /* If no stack frame will be allocated, don't do anything.  */
+  if (!frame_size)
+    {
+      /* But emit a marker that will let linker and indirect function
+	 calls recognise this function as split-stack aware.  */
+      emit_insn(gen_split_stack_marker());
2x missing blank before (

+      if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+        {
+          /* If va_start is used, just use r15.  */
+          emit_move_insn (r1,
+		          gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			                GEN_INT (STACK_POINTER_OFFSET)));
virtual_incoming_args_rtx ?


Alright.

+        }
+      return;
+    }
+
+  if (morestack_ref == NULL_RTX)
+    {
+      morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+      SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
+					   | SYMBOL_FLAG_FUNCTION);
+    }
+
+  if (frame_size <= 0x7fff || (TARGET_EXTIMM && frame_size <= 0xffffffffu))
The agfi immediate value is a signed 32 bit integer.  So you can only
add up to 2G-1.  I think it would be more readable to write this as:

We're emitting ALGFI here, which accepts unsigned 32-bit integer.

if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Os (frame_size))

as in s390_emit_prologue. The Os check will check for TARGET_EXTIMM as well.

Alright.

+    {
+      /* If frame_size will fit in an add instruction, do a stack space
+	 check, and only call __morestack if there's not enough space.  */
+      conditional = 1;
+
+      /* Get thread pointer.  r1 is the only register we can always destroy - r0
+         could contain a static chain (and cannot be used to address memory
+         anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
+      emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
+      /* Aim at __private_ss.  */
+      guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
+
+      /* If less that 1kiB used, skip addition and compare directly with
+         __private_ss.  */
+      if (frame_size > SPLIT_STACK_AVAILABLE)
+        {
+          emit_move_insn (r1, guard);
+	  if (TARGET_64BIT)
+	    emit_insn (gen_adddi3 (r1, r1, GEN_INT(frame_size)));
+	  else
+	    emit_insn (gen_addsi3 (r1, r1, GEN_INT(frame_size)));
+	  guard = r1;
+        }
+
+      if (TARGET_CPU_ZARCH)
+        {
Looks like the !TARGET_CPU_ZARCH stuff hasn't been completely removed?!

Oops, will remove that.

+	  rtx tmp;
+
+          /* Compare the (maybe adjusted) guard with the stack pointer.  */
+          cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
+
+          call_done = gen_label_rtx ();
+
+	  if (TARGET_64BIT)
+	    tmp = gen_split_stack_cond_call_di (call_done,
+						morestack_ref,
+						GEN_INT (frame_size),
+						GEN_INT (args_size),
+						cc);
+	  else
+	    tmp = gen_split_stack_cond_call_si (call_done,
+						morestack_ref,
+						GEN_INT (frame_size),
+						GEN_INT (args_size),
+						cc);
Perhaps it would be more readable to do the TARGET_64BIT check in a separate
expander.  Please see "movstr" in s390.md. The same applies to all the
other gen_split_stack* invocations.

Alright.

+
+
+          insn = emit_jump_insn (tmp);
+	  JUMP_LABEL (insn) = call_done;
+
+          /* Mark the jump as very unlikely to be taken.  */
+          add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
+	}
+      else
+        {
+          /* Compare the (maybe adjusted) guard with the stack pointer.  */
+          cc = s390_emit_compare (GE, stack_pointer_rtx, guard);
+
+          enough = gen_label_rtx ();
+          insn = s390_emit_jump (enough, cc);
+          JUMP_LABEL (insn) = enough;
+
+          /* Mark the jump as very likely to be taken.  */
+          add_int_reg_note (insn, REG_BR_PROB,
+			    REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
+	}
+    }
+
+  if (call_done == NULL)
With the !TARGET_CPU_ZARCH path removed above this could be the else
path to the frame_size check and call_done can be removed.

Right.

+    {
+      rtx tmp;
+      call_done = gen_label_rtx ();
+
+      /* Now, we need to call __morestack.  It has very special calling
+         conventions: it preserves param/return/static chain registers for
+         calling main function body, and looks for its own parameters
+         at %r1 (after aligning it up to a 4 byte bounduary for 31-bit mode). */
+      if (TARGET_64BIT)
+        tmp = gen_split_stack_call_di (call_done,
+					     morestack_ref,
+					     GEN_INT (frame_size),
+					     GEN_INT (args_size));
Indentation.

+      else
+        tmp = gen_split_stack_call_si (call_done,
+					     morestack_ref,
+					     GEN_INT (frame_size),
+					     GEN_INT (args_size));
Indentation.

+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = call_done;
+      emit_barrier ();
+    }
+
+  /* __morestack will call us here.  */
+
+  if (enough != NULL)
+    {
+      emit_label (enough);
+      LABEL_NUSES (enough) = 1;
+    }
This also was only for !TARGET_CPU_ZARCH.

Yes, it'll be removed.

+
+  if (conditional && cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      /* If va_start is used, and __morestack was not called, just use r15.  */
+      emit_move_insn (r1,
+		      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+			            GEN_INT (STACK_POINTER_OFFSET)));
virtual_incoming_args_rtx?

+    }
+
+  emit_label (call_done);
+  LABEL_NUSES (call_done) = 1;
+}
+
+/* Generates split-stack call sequence, along with its parameter block.  */
+
+static void
+s390_expand_split_stack_call (rtx_insn *orig_insn,
+			      rtx call_done,
+			      rtx function,
+			      rtx frame_size,
+			      rtx args_size,
+			      rtx cond)
+{
+  int psize = GET_MODE_SIZE (Pmode);
+  rtx_insn *insn = orig_insn;
+  rtx parmbase = gen_label_rtx();
+  rtx r1 = gen_rtx_REG (Pmode, 1);
+  rtx tmp, tmp2;
+
+  /* %r1 = litbase.  */
+  insn = emit_insn_after (gen_main_base_64 (r1, parmbase), insn);
+  add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+  LABEL_NUSES (parmbase)++;
+
+  /* jg<cond> __morestack.  */
+  if (cond == NULL)
+    {
+      if (TARGET_64BIT)
+        tmp = gen_split_stack_sibcall_di (function, call_done);
+      else
+        tmp = gen_split_stack_sibcall_si (function, call_done);
+      insn = emit_jump_insn_after (tmp, insn);
+    }
+  else
+    {
+      if (!s390_comparison (cond, VOIDmode))
+	internal_error ("bad split_stack_call cond");
Perhaps just gcc_assert (s390_comparison (cond, VOIDmode)); ?

OK.

+      if (TARGET_64BIT)
+        tmp = gen_split_stack_cond_sibcall_di (function, cond, call_done);
+      else
+        tmp = gen_split_stack_cond_sibcall_si (function, cond, call_done);
+      insn = emit_jump_insn_after (tmp, insn);
+    }
+  JUMP_LABEL (insn) = call_done;
+  LABEL_NUSES (call_done)++;
+
+  /* Go to .rodata.  */
+  insn = emit_insn_after (gen_pool_section_start (), insn);
+
+  /* Now, we'll emit parameters to __morestack.  First, align to pointer size
+     (this mirrors the alignment done in __morestack - don't touch it).  */
+  insn = emit_insn_after (gen_pool_align (GEN_INT (psize)), insn);
psize -> UNITS_PER_LONG?


OK.
+
+  insn = emit_label_after (parmbase, insn);
+
+  tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+				 gen_rtvec (1, frame_size),
+				 UNSPECV_POOL_ENTRY);
+  insn = emit_insn_after (tmp, insn);
+
+  /* Second parameter is size of the arguments passed on stack that
+     __morestack has to copy to the new stack (does not include varargs).  */
+  tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+				 gen_rtvec (1, args_size),
+				 UNSPECV_POOL_ENTRY);
+  insn = emit_insn_after (tmp, insn);
+
+  /* Third parameter is offset between start of the parameter block
+     and function body to be called by __morestack.  */
+  tmp = gen_rtx_LABEL_REF (Pmode, parmbase);
+  tmp2 = gen_rtx_LABEL_REF (Pmode, call_done);
+  tmp = gen_rtx_CONST (Pmode,
+                       gen_rtx_MINUS (Pmode, tmp2, tmp));
+  tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+				 gen_rtvec (1, tmp),
+				 UNSPECV_POOL_ENTRY);
+  insn = emit_insn_after (tmp, insn);
+  add_reg_note (insn, REG_LABEL_OPERAND, call_done);
+  LABEL_NUSES (call_done)++;
+  add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+  LABEL_NUSES (parmbase)++;
+
+  /* Return from .rodata.  */
+  insn = emit_insn_after (gen_pool_section_end (), insn);
+
+  delete_insn (orig_insn);
+}
+
+/* We may have to tell the dataflow pass that the split stack prologue
+   is initializing a register.  */
+
+static void
+s390_live_on_entry (bitmap regs)
+{
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      gcc_assert (flag_split_stack);
+      bitmap_set_bit (regs, 1);
+    }
+}
+
  /* Return true if the function can use simple_return to return outside
     of a shrink-wrapped region.  At present shrink-wrapping is supported
     in all cases.  */
@@ -11541,6 +11832,27 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
        expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
      }

+  if (flag_split_stack
+     && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
+         == NULL)
+     && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+    {
+      rtx reg;
+      rtx_insn *seq;
+
+      reg = gen_reg_rtx (Pmode);
+      cfun->machine->split_stack_varargs_pointer = reg;
+
+      start_sequence ();
+      emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
+      seq = get_insns ();
+      end_sequence ();
+
+      push_topmost_sequence ();
+      emit_insn_after (seq, entry_of_function ());
+      pop_topmost_sequence ();
+    }
+
    /* Find the overflow area.
       FIXME: This currently is too pessimistic when the vector ABI is
       enabled.  In that case we *always* set up the overflow area
@@ -11549,7 +11861,10 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
        || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
        || TARGET_VX_ABI)
      {
-      t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+        t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
What is the reason for changing virtual_incoming_args_rtx to
crtl->args.internal_arg_pointer in the non-split-stack case?

Looks like an accident, will change it back.

+      else
+        t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);

        off = INTVAL (crtl->args.arg_offset_rtx);
        off = off < 0 ? 0 : off;
@@ -13158,6 +13473,48 @@ s390_reorg (void)
  	}
      }

+  if (flag_split_stack)
+    {
+      rtx_insn *insn;
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  /* Look for the split-stack fake jump instructions.  */
+	  if (!JUMP_P(insn))
+	    continue;
+	  if (GET_CODE (PATTERN (insn)) != PARALLEL
+	      || XVECLEN (PATTERN (insn), 0) != 2)
+	    continue;
+	  rtx set = XVECEXP (PATTERN (insn), 0, 1);
+	  if (GET_CODE (set) != SET)
+	    continue;
+	  rtx unspec = XEXP(set, 1);
+	  if (GET_CODE (unspec) != UNSPEC_VOLATILE)
+	    continue;
+	  if (XINT (unspec, 1) != UNSPECV_SPLIT_STACK_CALL)
+	    continue;
+	  rtx set_pc = XVECEXP (PATTERN (insn), 0, 0);
+	  rtx function = XVECEXP (unspec, 0, 0);
+	  rtx frame_size = XVECEXP (unspec, 0, 1);
+	  rtx args_size = XVECEXP (unspec, 0, 2);
+	  rtx pc_src = XEXP (set_pc, 1);
+	  rtx call_done, cond = NULL_RTX;
+	  if (GET_CODE (pc_src) == IF_THEN_ELSE)
+	    {
+	      cond = XEXP (pc_src, 0);
+	      call_done = XEXP (XEXP (pc_src, 1), 0);
+	    }
+	  else
+	    call_done = XEXP (pc_src, 0);
+	  s390_expand_split_stack_call (insn,
+					call_done,
+					function,
+					frame_size,
+					args_size,
+					cond);
+	}
+    }
+
I'm wondering if it is really necessary to expand the call in that
two-step approach?! We do the general literal pool handling in
s390_reorg because we need all the insn lengths to be finalized before
performing the branch/pool splitting loop.  But this shouldn't be necessary
in this case.  Would it be possible to expand the call already in
emit_prologue phase and get rid of the s390_reorg part?

There's an internal literal pool involved, which needs to be emitted as one chunk. Optimizations are also very likely to destroy the sequence: consider the target address that __morestack will call - the control flow change happens in __morestack jump instruction, but the address itself is encoded in one of the pool literals. Just not worth the risk.


    /* Try to optimize prologue and epilogue further.  */
    s390_optimize_prologue ();

@@ -14469,6 +14826,9 @@ s390_asm_file_end (void)
  	     s390_vector_abi);
  #endif
    file_end_indicate_exec_stack ();
+
+  if (flag_split_stack)
+    file_end_indicate_split_stack ();
  }

  /* Return true if TYPE is a vector bool type.  */
@@ -14724,6 +15084,9 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree ty
  #undef TARGET_SET_UP_BY_PROLOGUE
  #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue

+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
+
  #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
  #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
    s390_use_by_pieces_infrastructure_p
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 9b869d5..21cd989 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -114,6 +114,9 @@
     UNSPEC_SP_SET
     UNSPEC_SP_TEST

+   ; Split stack support
+   UNSPEC_STACK_CHECK
+
     ; Test Data Class (TDC)
     UNSPEC_TDC_INSN

@@ -276,6 +279,11 @@
     ; Set and get floating point control register
     UNSPECV_SFPC
     UNSPECV_EFPC
+
+   ; Split stack support
+   UNSPECV_SPLIT_STACK_CALL
+   UNSPECV_SPLIT_STACK_SIBCALL
+   UNSPECV_SPLIT_STACK_MARKER
    ])

  ;;
@@ -10907,3 +10915,104 @@
    "TARGET_Z13"
    "lcbb\t%0,%1,%b2"
    [(set_attr "op_type" "VRX")])
+
+; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+  [(const_int 0)]
+  ""
+{
+  s390_expand_split_stack_prologue ();
+  DONE;
+})
+
+(define_insn "split_stack_call_<mode>"
+  [(set (pc) (label_ref (match_operand 0 "" "")))
+   (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+                                    (match_operand 2 "consttable_operand" "X")
+                                    (match_operand 3 "consttable_operand" "X")]
+                                   UNSPECV_SPLIT_STACK_CALL))]
+  "TARGET_CPU_ZARCH"
+{
+  gcc_unreachable ();
+}
+  [(set_attr "length" "12")])
+
+(define_insn "split_stack_cond_call_<mode>"
+  [(set (pc)
+        (if_then_else
+          (match_operand 4 "" "")
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+                                    (match_operand 2 "consttable_operand" "X")
+                                    (match_operand 3 "consttable_operand" "X")]
+                                   UNSPECV_SPLIT_STACK_CALL))]
+  "TARGET_CPU_ZARCH"
+{
+  gcc_unreachable ();
+}
+  [(set_attr "length" "12")])
+
+;; If there are operand 0 bytes available on the stack, jump to
+;; operand 1.
+
+(define_expand "split_stack_space_check"
+  [(set (pc) (if_then_else
+	      (ltu (minus (reg 15)
+			  (match_operand 0 "register_operand"))
+		   (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+	      (label_ref (match_operand 1))
+	      (pc)))]
+  ""
+{
+  /* Offset from thread pointer to __private_ss.  */
+  int psso = TARGET_64BIT ? 0x38 : 0x20;
+  rtx tp = s390_get_thread_pointer ();
+  rtx guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, tp, psso));
+  rtx reg = gen_reg_rtx (Pmode);
+  rtx cc;
+  if (TARGET_64BIT)
+    emit_insn (gen_subdi3 (reg, stack_pointer_rtx, operands[0]));
+  else
+    emit_insn (gen_subsi3 (reg, stack_pointer_rtx, operands[0]));
+  cc = s390_emit_compare (GT, reg, guard);
+  s390_emit_jump (operands[1], cc);
+
+  DONE;
+})
This expander does not seem to get called from anywhere.

It's called from target-independent code for alloca and VLAs.

+
+;; A jg with minimal fuss for use in split stack prologue.
+
+(define_insn "split_stack_sibcall_<mode>"
+  [(set (pc) (label_ref (match_operand 1 "" "")))
+   (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")]
+                                   UNSPECV_SPLIT_STACK_SIBCALL))]
+  "TARGET_CPU_ZARCH"
+  "jg\t%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"  "branch")])
+
+;; Also a conditional one.
+
+(define_insn "split_stack_cond_sibcall_<mode>"
+  [(set (pc)
+        (if_then_else
+          (match_operand 1 "" "")
+          (label_ref (match_operand 2 "" ""))
+          (pc)))
+   (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")]
+                                   UNSPECV_SPLIT_STACK_SIBCALL))]
+  "TARGET_CPU_ZARCH"
+  "jg%C1\t%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"  "branch")])
+
+;; An unusual nop instruction used to mark functions with no stack frames
+;; as split-stack aware.
+
+(define_insn "split_stack_marker"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SPLIT_STACK_MARKER)]
+  ""
+  "nopr\t%%r15"
+  [(set_attr "op_type" "RR")])
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 4cd8f01..604b120 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-01-16  Marcin KoÅcielnicki  <koriakin@0x04.net>
+
+	* config.host: Use t-stack and t-stack-s390 for s390*-*-linux.
+	* config/s390/morestack.S: New file.
+	* config/s390/t-stack-s390: New file.
+	* generic-morestack.c (__splitstack_find): Add s390-specific code.
+
  2016-01-15  Nick Clifton  <nickc@redhat.com>

  	* config/msp430/t-msp430 (lib2_mul_none.o): Only use the first
diff --git a/libgcc/config.host b/libgcc/config.host
index f58ee45..9793155 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1105,11 +1105,11 @@ rx-*-elf)
  	tm_file="$tm_file rx/rx-abi.h rx/rx-lib.h"
  	;;
  s390-*-linux*)
-	tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux s390/32/t-floattodi"
+	tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux s390/32/t-floattodi t-stack s390/t-stack-s390"
  	md_unwind_header=s390/linux-unwind.h
  	;;
  s390x-*-linux*)
-	tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux"
+	tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux t-stack s390/t-stack-s390"
  	if test "${host_address}" = 32; then
  	   tmake_file="${tmake_file} s390/32/t-floattodi"
  	fi
diff --git a/libgcc/config/s390/morestack.S b/libgcc/config/s390/morestack.S
new file mode 100644
index 0000000..c99f6e4
--- /dev/null
+++ b/libgcc/config/s390/morestack.S
@@ -0,0 +1,609 @@
+# s390 support for -fsplit-stack.
+# Copyright (C) 2015 Free Software Foundation, Inc.
+# Contributed by Marcin KoÅcielnicki <koriakin@0x04.net>.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# Excess space needed to call ld.so resolver for lazy plt
+# resolution.  Go uses sigaltstack so this doesn't need to
+# also cover signal frame size.
+#define BACKOFF 0x1000
+
+# The __morestack function.
+
+	.global	__morestack
+	.hidden	__morestack
+
+	.type	__morestack,@function
+
+__morestack:
+.LFB1:
+	.cfi_startproc
+
+
+#ifndef __s390x__
+
+
+# The 31-bit __morestack function.
+
+	# We use a cleanup to restore the stack guard if an exception
+	# is thrown through this code.
+#ifndef __PIC__
+	.cfi_personality 0,__gcc_personality_v0
+	.cfi_lsda 0,.LLSDA1
+#else
+	.cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+	.cfi_lsda 0x1b,.LLSDA1
+#endif
+
+	stm	%r2, %r15, 0x8(%r15)	# Save %r2-%r15.
+	.cfi_offset %r6, -0x48
+	.cfi_offset %r7, -0x44
+	.cfi_offset %r8, -0x40
+	.cfi_offset %r9, -0x3c
+	.cfi_offset %r10, -0x38
+	.cfi_offset %r11, -0x34
+	.cfi_offset %r12, -0x30
+	.cfi_offset %r13, -0x2c
+	.cfi_offset %r14, -0x28
+	.cfi_offset %r15, -0x24
+	lr	%r11, %r15		# Make frame pointer for vararg.
+	.cfi_def_cfa_register %r11
+	ahi	%r15, -0x60		# 0x60 for standard frame.
+	st	%r11, 0(%r15)		# Save back chain.
+	lr	%r8, %r0		# Save %r0 (static chain).
+	lr	%r10, %r1		# Save %r1 (address of parameter block).
+
+	l	%r7, 0(%r10)		# Required frame size to %r7
+	ear	%r1, %a0		# Extract thread pointer.
+	l	%r1, 0x20(%r1)		# Get stack bounduary
+	ar	%r1, %r7		# Stack bounduary + frame size
+	a	%r1, 4(%r10)		# + stack param size
+	clr	%r1, %r15		# Compare with current stack pointer
+	jle	.Lnoalloc		# guard > sp - frame-size: need alloc
+
+	brasl	%r14, __morestack_block_signals
+
+	# We abuse one of caller's fpr save slots (which we don't use for fprs)
+	# as a local variable.  Not needed here, but done to be consistent with
+	# the below use.
+	ahi	%r7, BACKOFF		# Bump requested size a bit.
+	st	%r7, 0x40(%r11)		# Stuff frame size on stack.
+	la	%r2, 0x40(%r11)		# Pass its address as parameter.
+	la	%r3, 0x60(%r11)		# Caller's stack parameters.
+	l	%r4, 4(%r10)		# Size of stack paremeters.
parameters

+	brasl	%r14, __generic_morestack
+
+	lr	%r15, %r2		# Switch to the new stack.
+	ahi	%r15, -0x60		# Make a stack frame on it.
+	st	%r11, 0(%r15)		# Save back chain.
+
+	s	%r2, 0x40(%r11)		# The end of stack space.
+	ahi	%r2, BACKOFF		# Back off a bit.
+	ear	%r1, %a0		# Extract thread pointer.
+.LEHB0:
+	st	%r2, 0x20(%r1)	# Save the new stack boundary.
+
+	brasl	%r14, __morestack_unblock_signals
+
+	lr	%r0, %r8		# Static chain.
+	lm	%r2, %r6, 0x8(%r11)	# Paremeter registers.
+
+	# Third parameter is address of function meat - address of parameter
+	# block.
+	a	%r10, 0x8(%r10)
+
+	# Leave vararg pointer in %r1, in case function uses it
+	la	%r1, 0x60(%r11)
+
+	# State of registers:
+	# %r0: Static chain from entry.
+	# %r1: Vararg pointer.
+	# %r2-%r6: Parameters from entry.
+	# %r7-%r10: Indeterminate.
+	# %r11: Frame pointer (%r15 from entry).
+	# %r12-%r13: Indeterminate.
+	# %r14: Return address.
+	# %r15: Stack pointer.
+	basr	%r14, %r10		# Call our caller.
+
+	stm	%r2, %r3, 0x8(%r11)	# Save return registers.
+
+	brasl	%r14, __morestack_block_signals
+
+	# We need a stack slot now, but have no good way to get it - the frame
+	# on new stack had to be exactly 0x60 bytes, or stack parameters would
+	# be passed wrong.  Abuse fpr save area in caller's frame (we don't
+	# save actual fprs).
+	la	%r2, 0x40(%r11)
+	brasl	%r14, __generic_releasestack
+
+	s	%r2, 0x40(%r11)		# Subtract available space.
+	ahi	%r2, BACKOFF		# Back off a bit.
+	ear	%r1, %a0		# Extract thread pointer.
+.LEHE0:
+	st	%r2, 0x20(%r1)	# Save the new stack boundary.
+
+	# We need to restore the old stack pointer before unblocking signals.
+	# We also need 0x60 bytes for a stack frame.  Since we had a stack
+	# frame at this place before the stack switch, there's no need to
+	# write the back chain again.
+	lr	%r15, %r11
+	ahi	%r15, -0x60
+
+	brasl	%r14, __morestack_unblock_signals
+
+	lm	%r2, %r15, 0x8(%r11)	# Restore all registers.
+	.cfi_remember_state
+	.cfi_restore %r15
+	.cfi_restore %r14
+	.cfi_restore %r13
+	.cfi_restore %r12
+	.cfi_restore %r11
+	.cfi_restore %r10
+	.cfi_restore %r9
+	.cfi_restore %r8
+	.cfi_restore %r7
+	.cfi_restore %r6
+	.cfi_def_cfa_register %r15
+	br	%r14			# Return to caller's caller.
+
+# Executed if no new stack allocation is needed.
+
+.Lnoalloc:
+	.cfi_restore_state
+	# We may need to copy stack parameters.
+	l	%r9, 0x4(%r10)		# Load stack parameter size.
+	ltr	%r9, %r9		# And check if it's 0.
+	je	.Lnostackparm		# Skip the copy if not needed.
+	sr	%r15, %r9		# Make space on the stack.
+	la	%r8, 0x60(%r15)		# Destination.
+	la	%r12, 0x60(%r11)	# Source.
+	lr	%r13, %r9		# Source size.
+.Lcopy:
+	mvcle	%r8, %r12, 0		# Copy.
+	jo	.Lcopy
+
+.Lnostackparm:
+	# Third parameter is address of function meat - address of parameter
+	# block.
+	a	%r10, 0x8(%r10)
+
+	# Leave vararg pointer in %r1, in case function uses it
+	la	%r1, 0x60(%r11)
+
+	# OK, no stack allocation needed.  We still follow the protocol and
+	# call our caller - it doesn't cost much and makes sure vararg works.
+	# No need to set any registers here - %r0 and %r2-%r6 weren't modified.
+	basr	%r14, %r10		# Call our caller.
The comment confuses me.  It somewhat sounds to me like the call
wouldn't be really needed but in fact it cannot even remotely work
without jumping back to the function body right?!

Certainly. __morestack's task is to call the given function entry point once the necessary stack space is established. In fact, in the no allocation case, a sibling-call would actually be possible, if it weren't for one annoying detail: there are no free GPRs we could use to keep the address of the entry point - %r0 may be used to keep static chain, %r1 may have to be the argument pointer, %r2-%r5 may be used to keep parameters, and %r6-%r15 are callee-saved.

+
+	lm	%r6, %r15, 0x18(%r11)	# Restore all callee-saved registers.
+	.cfi_remember_state
+	.cfi_restore %r15
+	.cfi_restore %r14
+	.cfi_restore %r13
+	.cfi_restore %r12
+	.cfi_restore %r11
+	.cfi_restore %r10
+	.cfi_restore %r9
+	.cfi_restore %r8
+	.cfi_restore %r7
+	.cfi_restore %r6
+	.cfi_def_cfa_register %r15
+	br	%r14			# Return to caller's caller.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+
+.L1:
+	.cfi_restore_state
+	lr	%r2, %r11		# Stack pointer after resume.
+	brasl	%r14, __generic_findstack
+	lr	%r3, %r11		# Get the stack pointer.
+	sr	%r3, %r2		# Subtract available space.
+	ahi	%r3, BACKOFF		# Back off a bit.
+	ear	%r1, %a0		# Extract thread pointer.
+	st	%r3, 0x20(%r1)	# Save the new stack boundary.
+
+	lr	%r2, %r6		# Exception header.
+#ifdef __PIC__
+	brasl	%r14, _Unwind_Resume@PLT
+#else
+	brasl	%r14, _Unwind_Resume
+#endif
+
+#else /* defined(__s390x__) */
+
+
+# The 64-bit __morestack function.
+
+	# We use a cleanup to restore the stack guard if an exception
+	# is thrown through this code.
+#ifndef __PIC__
+	.cfi_personality 0x3,__gcc_personality_v0
+	.cfi_lsda 0x3,.LLSDA1
+#else
+	.cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+	.cfi_lsda 0x1b,.LLSDA1
+#endif
+
+	stmg	%r2, %r15, 0x10(%r15)	# Save %r2-%r15.
+	.cfi_offset %r6, -0x70
+	.cfi_offset %r7, -0x68
+	.cfi_offset %r8, -0x60
+	.cfi_offset %r9, -0x58
+	.cfi_offset %r10, -0x50
+	.cfi_offset %r11, -0x48
+	.cfi_offset %r12, -0x40
+	.cfi_offset %r13, -0x38
+	.cfi_offset %r14, -0x30
+	.cfi_offset %r15, -0x28
+	lgr	%r11, %r15		# Make frame pointer for vararg.
+	.cfi_def_cfa_register %r11
+	aghi	%r15, -0xa0		# 0xa0 for standard frame.
+	stg	%r11, 0(%r15)		# Save back chain.
+	lgr	%r8, %r0		# Save %r0 (static chain).
+	lgr	%r10, %r1		# Save %r1 (address of parameter block).
+
+	lg	%r7, 0(%r10)		# Required frame size to %r7
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1		# Extract thread pointer.
+	lg	%r1, 0x38(%r1)		# Get stack bounduary
+	agr	%r1, %r7		# Stack bounduary + frame size
+	ag	%r1, 8(%r10)		# + stack param size
+	clgr	%r1, %r15		# Compare with current stack pointer
+	jle	.Lnoalloc		# guard > sp - frame-size: need alloc
+
+	brasl	%r14, __morestack_block_signals
+
+	# We abuse one of caller's fpr save slots (which we don't use for fprs)
+	# as a local variable.  Not needed here, but done to be consistent with
+	# the below use.
+	aghi	%r7, BACKOFF		# Bump requested size a bit.
+	stg	%r7, 0x80(%r11)		# Stuff frame size on stack.
+	la	%r2, 0x80(%r11)		# Pass its address as parameter.
+	la	%r3, 0xa0(%r11)		# Caller's stack parameters.
+	lg	%r4, 8(%r10)		# Size of stack paremeters.
+	brasl	%r14, __generic_morestack
+
+	lgr	%r15, %r2		# Switch to the new stack.
+	aghi	%r15, -0xa0		# Make a stack frame on it.
+	stg	%r11, 0(%r15)		# Save back chain.
+
+	sg	%r2, 0x80(%r11)		# The end of stack space.
+	aghi	%r2, BACKOFF		# Back off a bit.
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1		# Extract thread pointer.
+.LEHB0:
+	stg	%r2, 0x38(%r1)	# Save the new stack boundary.
+
+	brasl	%r14, __morestack_unblock_signals
+
+	lgr	%r0, %r8		# Static chain.
+	lmg	%r2, %r6, 0x10(%r11)	# Paremeter registers.
+
+	# Third parameter is address of function meat - address of parameter
+	# block.
+	ag	%r10, 0x10(%r10)
+
+	# Leave vararg pointer in %r1, in case function uses it
+	la	%r1, 0xa0(%r11)
+
+	# State of registers:
+	# %r0: Static chain from entry.
+	# %r1: Vararg pointer.
+	# %r2-%r6: Parameters from entry.
+	# %r7-%r10: Indeterminate.
+	# %r11: Frame pointer (%r15 from entry).
+	# %r12-%r13: Indeterminate.
+	# %r14: Return address.
+	# %r15: Stack pointer.
+	basr	%r14, %r10		# Call our caller.
+
+	stg	%r2, 0x10(%r11)		# Save return register.
+
+	brasl	%r14, __morestack_block_signals
+
+	# We need a stack slot now, but have no good way to get it - the frame
+	# on new stack had to be exactly 0xa0 bytes, or stack parameters would
+	# be passed wrong.  Abuse fpr save area in caller's frame (we don't
+	# save actual fprs).
+	la	%r2, 0x80(%r11)
+	brasl	%r14, __generic_releasestack
+
+	sg	%r2, 0x80(%r11)		# Subtract available space.
+	aghi	%r2, BACKOFF		# Back off a bit.
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1		# Extract thread pointer.
+.LEHE0:
+	stg	%r2, 0x38(%r1)	# Save the new stack boundary.
+
+	# We need to restore the old stack pointer before unblocking signals.
+	# We also need 0xa0 bytes for a stack frame.  Since we had a stack
+	# frame at this place before the stack switch, there's no need to
+	# write the back chain again.
+	lgr	%r15, %r11
+	aghi	%r15, -0xa0
+
+	brasl	%r14, __morestack_unblock_signals
+
+	lmg	%r2, %r15, 0x10(%r11)	# Restore all registers.
+	.cfi_remember_state
+	.cfi_restore %r15
+	.cfi_restore %r14
+	.cfi_restore %r13
+	.cfi_restore %r12
+	.cfi_restore %r11
+	.cfi_restore %r10
+	.cfi_restore %r9
+	.cfi_restore %r8
+	.cfi_restore %r7
+	.cfi_restore %r6
+	.cfi_def_cfa_register %r15
+	br	%r14			# Return to caller's caller.
+
+# Executed if no new stack allocation is needed.
+
+.Lnoalloc:
+	.cfi_restore_state
+	# We may need to copy stack parameters.
+	lg	%r9, 0x8(%r10)		# Load stack parameter size.
+	ltgr	%r9, %r9		# Check if it's 0.
+	je	.Lnostackparm		# Skip the copy if not needed.
+	sgr	%r15, %r9		# Make space on the stack.
+	la	%r8, 0xa0(%r15)		# Destination.
+	la	%r12, 0xa0(%r11)	# Source.
+	lgr	%r13, %r9		# Source size.
+.Lcopy:
+	mvcle	%r8, %r12, 0		# Copy.
+	jo	.Lcopy
+
+.Lnostackparm:
+	# Third parameter is address of function meat - address of parameter
+	# block.
+	ag	%r10, 0x10(%r10)
+
+	# Leave vararg pointer in %r1, in case function uses it
+	la	%r1, 0xa0(%r11)
+
+	# OK, no stack allocation needed.  We still follow the protocol and
+	# call our caller - it doesn't cost much and makes sure vararg works.
+	# No need to set any registers here - %r0 and %r2-%r6 weren't modified.
+	basr	%r14, %r10		# Call our caller.
+
+	lmg	%r6, %r15, 0x30(%r11)	# Restore all callee-saved registers.
+	.cfi_remember_state
+	.cfi_restore %r15
+	.cfi_restore %r14
+	.cfi_restore %r13
+	.cfi_restore %r12
+	.cfi_restore %r11
+	.cfi_restore %r10
+	.cfi_restore %r9
+	.cfi_restore %r8
+	.cfi_restore %r7
+	.cfi_restore %r6
+	.cfi_def_cfa_register %r15
+	br	%r14			# Return to caller's caller.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+
+.L1:
+	.cfi_restore_state
+	lgr	%r2, %r11		# Stack pointer after resume.
+	brasl	%r14, __generic_findstack
+	lgr	%r3, %r11		# Get the stack pointer.
+	sgr	%r3, %r2		# Subtract available space.
+	aghi	%r3, BACKOFF		# Back off a bit.
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1		# Extract thread pointer.
+	stg	%r3, 0x38(%r1)	# Save the new stack boundary.
+
+	lgr	%r2, %r6		# Exception header.
+#ifdef __PIC__
+	brasl	%r14, _Unwind_Resume@PLT
+#else
+	brasl	%r14, _Unwind_Resume
+#endif
+
+#endif /* defined(__s390x__) */
+
+	.cfi_endproc
+	.size	__morestack, . - __morestack
+
+
+# The exception table.  This tells the personality routine to execute
+# the exception handler.
+
+	.section	.gcc_except_table,"a",@progbits
+	.align	4
+.LLSDA1:
+	.byte	0xff	# @LPStart format (omit)
+	.byte	0xff	# @TType format (omit)
+	.byte	0x1	# call-site format (uleb128)
+	.uleb128 .LLSDACSE1-.LLSDACSB1	# Call-site table length
+.LLSDACSB1:
+	.uleb128 .LEHB0-.LFB1	# region 0 start
+	.uleb128 .LEHE0-.LEHB0	# length
+	.uleb128 .L1-.LFB1	# landing pad
+	.uleb128 0		# action
+.LLSDACSE1:
+
+
+	.global __gcc_personality_v0
+#ifdef __PIC__
+	# Build a position independent reference to the basic
+        # personality function.
+	.hidden DW.ref.__gcc_personality_v0
+	.weak   DW.ref.__gcc_personality_v0
+	.section .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat
+	.type	DW.ref.__gcc_personality_v0, @object
+DW.ref.__gcc_personality_v0:
+#ifndef __LP64__
+	.align 4
+	.size	DW.ref.__gcc_personality_v0, 4
+	.long	__gcc_personality_v0
+#else
+	.align 8
+	.size	DW.ref.__gcc_personality_v0, 8
+	.quad	__gcc_personality_v0
+#endif
+#endif
+
+
+
+# Initialize the stack test value when the program starts or when a
+# new thread starts.  We don't know how large the main stack is, so we
+# guess conservatively.  We might be able to use getrlimit here.
+
+	.text
+	.global	__stack_split_initialize
+	.hidden	__stack_split_initialize
+
+	.type	__stack_split_initialize, @function
+
+__stack_split_initialize:
+
+#ifndef __s390x__
+
+	ear	%r1, %a0
+	lr	%r0, %r15
+	ahi	%r0, -0x4000	# We should have at least 16K.
+	st	%r0, 0x20(%r1)
+
+	lr	%r2, %r15
+	lhi	%r3, 0x4000
+#ifdef __PIC__
+	jg	__generic_morestack_set_initial_sp@PLT	# Tail call
+#else
+	jg	__generic_morestack_set_initial_sp	# Tail call
+#endif
+
+#else /* defined(__s390x__) */
+
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1
+	lgr	%r0, %r15
+	aghi	%r0, -0x4000	# We should have at least 16K.
+	stg	%r0, 0x38(%r1)
+
+	lgr	%r2, %r15
+	lghi	%r3, 0x4000
+#ifdef __PIC__
+	jg	__generic_morestack_set_initial_sp@PLT	# Tail call
+#else
+	jg	__generic_morestack_set_initial_sp	# Tail call
+#endif
+
+#endif /* defined(__s390x__) */
+
+	.size	__stack_split_initialize, . - __stack_split_initialize
+
+# Routines to get and set the guard, for __splitstack_getcontext,
+# __splitstack_setcontext, and __splitstack_makecontext.
+
+# void *__morestack_get_guard (void) returns the current stack guard.
+	.text
+	.global	__morestack_get_guard
+	.hidden	__morestack_get_guard
+
+	.type	__morestack_get_guard,@function
+
+__morestack_get_guard:
+
+#ifndef __s390x__
+	ear	%r1, %a0
+	l	%r2, 0x20(%r1)
+#else
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1
+	lg	%r2, 0x38(%r1)
+#endif
+	br %r14
+
+	.size	__morestack_get_guard, . - __morestack_get_guard
+
+# void __morestack_set_guard (void *) sets the stack guard.
+	.global	__morestack_set_guard
+	.hidden	__morestack_set_guard
+
+	.type	__morestack_set_guard,@function
+
+__morestack_set_guard:
+
+#ifndef __s390x__
+	ear	%r1, %a0
+	st	%r2, 0x20(%r1)
+#else
+	ear	%r1, %a0
+	sllg	%r1, %r1, 32
+	ear	%r1, %a1
+	stg	%r2, 0x38(%r1)
+#endif
+	br	%r14
+
+	.size	__morestack_set_guard, . - __morestack_set_guard
+
+# void *__morestack_make_guard (void *, size_t) returns the stack
+# guard value for a stack.
+	.global	__morestack_make_guard
+	.hidden	__morestack_make_guard
+
+	.type	__morestack_make_guard,@function
+
+__morestack_make_guard:
+
+#ifndef __s390x__
+	sr	%r2, %r3
+	ahi	%r2, BACKOFF
+#else
+	sgr	%r2, %r3
+	aghi	%r2, BACKOFF
+#endif
+	br	%r14
+
+	.size	__morestack_make_guard, . - __morestack_make_guard
+
+# Make __stack_split_initialize a high priority constructor.
+
+	.section .ctors.65535,"aw",@progbits
+
+#ifndef __LP64__
+	.align	4
+	.long	__stack_split_initialize
+	.long	__morestack_load_mmap
+#else
+	.align	8
+	.quad	__stack_split_initialize
+	.quad	__morestack_load_mmap
+#endif
+
+	.section	.note.GNU-stack,"",@progbits
+	.section	.note.GNU-split-stack,"",@progbits
+	.section	.note.GNU-no-split-stack,"",@progbits
diff --git a/libgcc/config/s390/t-stack-s390 b/libgcc/config/s390/t-stack-s390
new file mode 100644
index 0000000..4c959b0
--- /dev/null
+++ b/libgcc/config/s390/t-stack-s390
@@ -0,0 +1,2 @@
+# Makefile fragment to support -fsplit-stack for s390.
+LIB2ADD_ST += $(srcdir)/config/s390/morestack.S
diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c
index 89765d4..b8eec4e 100644
--- a/libgcc/generic-morestack.c
+++ b/libgcc/generic-morestack.c
@@ -939,6 +939,10 @@ __splitstack_find (void *segment_arg, void *sp, size_t *len,
  #elif defined (__i386__)
        nsp -= 6 * sizeof (void *);
  #elif defined __powerpc64__
+#elif defined __s390x__
+      nsp -= 2 * 160;
+#elif defined __s390__
+      nsp -= 2 * 96;
  #else
  #error "unrecognized target"
  #endif




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]