This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][RFA/RFC] Stack clash mitigation patch 08/08 V2


I don't think this patch has changed in any significant way since the V1
patch.

I have tested a slightly different version which punts stack clash
protection for very large static stack frames -- otherwise tests which
have *huge* frames will timeout, run out of memory during compilation, etc.

--
s390's most interesting property is that the caller allocates space for
the callee to save registers into.

So we start with a very conservative assumption about the offset between
SP and the most recent stack probe.  As we encounter those register
saves we may be able to decrease that offset.  And like aarch64 as we
allocate space, the offset increases.  If the offset crosses
PROBE_INTERVAL, we must emit probes.

For large frames, I did not implement an allocate/probe in a loop.
Someone with a better understanding of the architecture is better suited
for that work.  I'll note that you're going to need another scratch
register   This is the cause of the xfail of one test which expects to
see a prologue allocate/probe loop.

s390 has a -mbackchain option.  I'm not sure where it's used, but we do
try to handle it in the initial offset computation.   However, we don't
handle it in the actual allocations that occur when -fstack-clash-protection

Other than the xfail noted above, the s390 uses the same tests as the
x86, ppc and aarch64 ports.

I suspect we're going to need further iteration here.
	* config/s390/s390.c (PROBE_INTERVAL): Define.
	(allocate_stack_space): New function, partially extracted from
	s390_emit_prologue.
	(s390_emit_prologue): Track offset to most recent stack probe.
	Code to allocate space moved into allocate_stack_space.
	Dump actions when no stack is allocated.

testsuite/

	* gcc.dg/stack-check-6.c: xfail for s390*-*-*.
	
commit 0d2fdca4d86238f2fc095c7d91013e927c6ecf0c
Author: Jeff Law <law@devel1.s390.bos.redhat.com>
Date:   Fri Jul 7 17:25:35 2017 +0000

    S390 implementatoin

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 958ee3b..7d4020c 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10999,6 +10999,107 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
+   probe relative to the stack pointer.
+
+   Note that SIZE is negative. 
+
+   TMP_REG_IS_LIVE indicates that TEMP_REG actually holds a live
+   value and must be restored if we clobber it.  */
+static void
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+		      rtx temp_reg, bool temp_reg_is_live)
+{
+  rtx insn;
+
+  /* If we are emitting stack probes and a SIZE allocation would cross
+     the PROBE_INTERVAL boundary, then we need significantly different
+     sequences to allocate and probe the stack.  */
+  if (flag_stack_clash_protection
+      && last_probe_offset + -INTVAL (size) < PROBE_INTERVAL)
+    dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+  else if (flag_stack_clash_protection
+      && last_probe_offset + -INTVAL (size) >= PROBE_INTERVAL)
+    {
+      rtx memref;
+
+      HOST_WIDE_INT rounded_size = -INTVAL (size) & -PROBE_INTERVAL;
+
+      emit_move_insn (temp_reg, GEN_INT (PROBE_INTERVAL - 8));
+
+      /* We really should have a runtime loop version as well.  */
+      for (unsigned int i = 0; i < rounded_size; i += PROBE_INTERVAL)
+	{
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+					   GEN_INT (-PROBE_INTERVAL)));
+	  RTX_FRAME_RELATED_P (insn);
+
+	  /* We just allocated PROBE_INTERVAL bytes of stack space.  Thus,
+	     a probe is mandatory here, but LAST_PROBE_OFFSET does not
+	     change.  */
+	  memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+							 stack_pointer_rtx));
+	  MEM_VOLATILE_P (memref);
+	  emit_move_insn (memref, temp_reg);
+	}
+
+      /* Handle any residual allocation request.  */
+      HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+				       GEN_INT (-residual)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      last_probe_offset += residual;
+      if (last_probe_offset >= PROBE_INTERVAL)
+	{
+	  emit_move_insn (temp_reg, GEN_INT (residual
+					     - GET_MODE_SIZE (word_mode)));
+	  memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+						     stack_pointer_rtx));
+	  MEM_VOLATILE_P (memref);
+	  emit_move_insn (memref, temp_reg);
+	}
+
+      /* We clobbered TEMP_REG, but it really isn't a temporary at this point,
+	 restore its value.  */
+      if (temp_reg_is_live)
+	{
+	  emit_move_insn (temp_reg, GEN_INT (-INTVAL (size)));
+	  emit_insn (gen_add2_insn (temp_reg, stack_pointer_rtx));
+	}
+
+      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
+      emit_insn (gen_blockage ());
+      return;
+    }
+
+  /* Subtract frame size from stack pointer.  */
+
+  if (DISP_IN_RANGE (INTVAL (size)))
+    {
+      insn = gen_rtx_SET (stack_pointer_rtx,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, size));
+      insn = emit_insn (insn);
+    }
+  else
+    {
+      if (!CONST_OK_FOR_K (INTVAL (size)))
+	size = force_const_mem (Pmode, size);
+
+      insn = emit_insn (gen_add2_insn (stack_pointer_rtx, size));
+      annotate_constant_pool_refs (&PATTERN (insn));
+    }
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  rtx real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		gen_rtx_SET (stack_pointer_rtx,
+			     gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					   real_frame_off)));
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -11023,6 +11124,16 @@ s390_emit_prologue (void)
   else
     temp_reg = gen_rtx_REG (Pmode, 1);
 
+  /* When probing for stack-clash mitigation, we have to track the distance
+     between the stack pointer and closest known reference.
+
+     Most of the time we have to make a worst cast assumption.  The
+     only exception is when TARGET_BACKCHAIN is active, in which case
+     we know *sp (offset 0) was written.  */
+  HOST_WIDE_INT last_probe_offset
+    = (TARGET_BACKCHAIN
+       ? 0 : PROBE_INTERVAL - (STACK_BOUNDARY / UNITS_PER_WORD));
+
   s390_save_gprs_to_fprs ();
 
   /* Save call saved gprs.  */
@@ -11034,6 +11145,14 @@ s390_emit_prologue (void)
 					  - cfun_frame_layout.first_save_gpr_slot),
 			cfun_frame_layout.first_save_gpr,
 			cfun_frame_layout.last_save_gpr);
+
+      /* This is not 100% correct.  If we have more than one register saved,
+	 then LAST_PROBE_OFFSET can move even closer to sp.  */
+      last_probe_offset
+	= (cfun_frame_layout.gprs_offset +
+	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
+			     - cfun_frame_layout.first_save_gpr_slot));
+
       emit_insn (insn);
     }
 
@@ -11050,6 +11169,8 @@ s390_emit_prologue (void)
       if (cfun_fpr_save_p (i))
 	{
 	  save_fpr (stack_pointer_rtx, offset, i);
+	  if (offset < last_probe_offset)
+	    last_probe_offset = offset;
 	  offset += 8;
 	}
       else if (!TARGET_PACKED_STACK || cfun->stdarg)
@@ -11063,6 +11184,8 @@ s390_emit_prologue (void)
       if (cfun_fpr_save_p (i))
 	{
 	  insn = save_fpr (stack_pointer_rtx, offset, i);
+	  if (offset < last_probe_offset)
+	    last_probe_offset = offset;
 	  offset += 8;
 
 	  /* If f4 and f6 are call clobbered they are saved due to
@@ -11085,6 +11208,8 @@ s390_emit_prologue (void)
 	if (cfun_fpr_save_p (i))
 	  {
 	    insn = save_fpr (stack_pointer_rtx, offset, i);
+	    if (offset < last_probe_offset)
+	      last_probe_offset = offset;
 
 	    RTX_FRAME_RELATED_P (insn) = 1;
 	    offset -= 8;
@@ -11104,7 +11229,6 @@ s390_emit_prologue (void)
   if (cfun_frame_layout.frame_size > 0)
     {
       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
-      rtx real_frame_off;
 
       if (s390_stack_size)
   	{
@@ -11177,31 +11301,8 @@ s390_emit_prologue (void)
       if (TARGET_BACKCHAIN || next_fpr)
 	insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
 
-      /* Subtract frame size from stack pointer.  */
-
-      if (DISP_IN_RANGE (INTVAL (frame_off)))
-	{
-	  insn = gen_rtx_SET (stack_pointer_rtx,
-			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-					    frame_off));
-	  insn = emit_insn (insn);
-	}
-      else
-	{
-	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
-	    frame_off = force_const_mem (Pmode, frame_off);
-
-          insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
-	  annotate_constant_pool_refs (&PATTERN (insn));
-	}
-
-      RTX_FRAME_RELATED_P (insn) = 1;
-      real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-		    gen_rtx_SET (stack_pointer_rtx,
-				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-					       real_frame_off)));
-
+      allocate_stack_space (frame_off, last_probe_offset, temp_reg,
+			    TARGET_BACKCHAIN || next_fpr);
       /* Set backchain.  */
 
       if (TARGET_BACKCHAIN)
@@ -11225,6 +11326,8 @@ s390_emit_prologue (void)
 	  emit_clobber (addr);
 	}
     }
+  else if (flag_stack_clash_protection)
+    dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
 
   /* Save fprs 8 - 15 (64 bit ABI).  */
 
diff --git a/gcc/testsuite/gcc.dg/stack-check-6.c b/gcc/testsuite/gcc.dg/stack-check-6.c
index 1682385..487b8e2 100644
--- a/gcc/testsuite/gcc.dg/stack-check-6.c
+++ b/gcc/testsuite/gcc.dg/stack-check-6.c
@@ -45,8 +45,8 @@ f7 (void)
   foo (buf);
 }
 
-/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" } } */
-/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" } } */
+/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" { xfail s390*-*-*} } } */
+/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" { xfail s390*-*-*} } } */
 /* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */
 /* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]