This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][RFA/RFC] Stack clash mitigation patch 02/08 - V3


This patch introduces generic mechanisms to protect the dynamically
allocated stack space against stack-clash attacks.

Changes since V2:

Dynamic allocations can be emitted as unrolled inlined probes or with a
rotated loop.  Blockage insns are also properly emitted for the dynamic
area probes and the dynamic area probing now supports targets that may
make optimistic assumptions in their prologues.  Finally it uses the new
param to control the probing interval.

Tests were updated to explicitly specify the guard and probing interval.
 New test to check inline/unrolled probes as well as rotated loop.

	* explow.c: Include "params.h".
	(anti_adjust_stack_and_probe_stack_clash): New function.
	(get_stack_check_protect): Likewise.
	(compute_stack_clash_protection_loop_data): Likewise.
	(emit_stack_clash_protection_loop_start): Likewise.
	(emit_stack_clash_protection_loop_end): Likewise.
	(allocate_dynamic_stack_space): Use get_stack_check_protect.
	Use anti_adjust_stack_and_probe_stack_clash.
	* explow.h (compute_stack_clash_protection_loop_data): Prototype.
	(emit_stack_clash_protection_loop_start): Likewise.
	(emit_stack_clash_protection_loop_end): Likewise.
	* rtl.h (get_stack_check_protect): Prototype.
	* defaults.h (STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE):
	Define new default.
	* doc/tm.texi.in (STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE):
	Define.
	* doc/tm.texi: Rebuilt.

	* config/aarch64/aarch64.c (aarch64_expand_prologue): Use
	get_stack_check_protect.
	* config/alpha/alpha.c (alpha_expand_prologue): Likewise.
	* config/arm/arm.c (arm_expand_prologue): Likewise.
	* config/i386/i386.c (ix86_expand_prologue): Likewise.
	* config/ia64/ia64.c (ia64_expand_prologue): Likewise.
	* config/mips/mips.c (mips_expand_prologue): Likewise.
	* config/powerpcspe/powerpcspe.c (rs6000_emit_prologue): Likewise.
	* config/rs6000/rs6000.c (rs6000_emit_prologue): Likewise.
	* config/sparc/sparc.c (sparc_expand_prologue): Likewise.


testsuite

	* gcc.dg/stack-check-3.c: New test.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ef1b5a8..0a8b40a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3676,12 +3676,14 @@ aarch64_expand_prologue (void)
     {
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
-	    aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					    frame_size - STACK_CHECK_PROTECT);
+	  if (frame_size > PROBE_INTERVAL
+	      && frame_size > get_stack_check_protect ())
+	    aarch64_emit_probe_stack_range (get_stack_check_protect (),
+					    (frame_size
+					     - get_stack_check_protect ()));
 	}
       else if (frame_size > 0)
-	aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
+	aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
     }
 
   aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 00a69c1..91f3d7c 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -7741,7 +7741,7 @@ alpha_expand_prologue (void)
 
   probed_size = frame_size;
   if (flag_stack_check)
-    probed_size += STACK_CHECK_PROTECT;
+    probed_size += get_stack_check_protect ();
 
   if (probed_size <= 32768)
     {
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c6101ef..9822ca7 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -21680,13 +21680,13 @@ arm_expand_prologue (void)
 
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					size - STACK_CHECK_PROTECT,
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    arm_emit_probe_stack_range (get_stack_check_protect (),
+					size - get_stack_check_protect (),
 					regno, live_regs_mask);
 	}
       else if (size > 0)
-	arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
+	arm_emit_probe_stack_range (get_stack_check_protect (), size,
 				    regno, live_regs_mask);
     }
 
@@ -27854,7 +27854,7 @@ arm_frame_pointer_required (void)
 	{
 	  /* We don't have the final size of the frame so adjust.  */
 	  size += 32 * UNITS_PER_WORD;
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
 	    return true;
 	}
       else
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1a8a3a3..0947b3c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14638,7 +14638,7 @@ ix86_expand_prologue (void)
 	  HOST_WIDE_INT size = allocate;
 
 	  if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
-	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+	    size = 0x80000000 - get_stack_check_protect () - 1;
 
 	  if (TARGET_STACK_PROBE)
 	    {
@@ -14648,18 +14648,20 @@ ix86_expand_prologue (void)
 		    ix86_emit_probe_stack_range (0, size);
 		}
 	      else
-		ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+		ix86_emit_probe_stack_range (0,
+					     size + get_stack_check_protect ());
 	    }
 	  else
 	    {
 	      if (crtl->is_leaf && !cfun->calls_alloca)
 		{
-		  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-		    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
-						 size - STACK_CHECK_PROTECT);
+		  if (size > PROBE_INTERVAL
+		      && size > get_stack_check_protect ())
+		    ix86_emit_probe_stack_range (get_stack_check_protect (),
+						 size - get_stack_check_protect ());
 		}
 	      else
-		ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+		ix86_emit_probe_stack_range (get_stack_check_protect (), size);
 	    }
 	}
     }
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 617d188..70aef34 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -3481,15 +3481,16 @@ ia64_expand_prologue (void)
 
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					 size - STACK_CHECK_PROTECT,
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    ia64_emit_probe_stack_range (get_stack_check_protect (),
+					 size - get_stack_check_protect (),
 					 bs_size);
-	  else if (size + bs_size > STACK_CHECK_PROTECT)
-	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
+	  else if (size + bs_size > get_stack_check_protect ())
+	    ia64_emit_probe_stack_range (get_stack_check_protect (),
+					 0, bs_size);
 	}
       else if (size + bs_size > 0)
-	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
+	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
     }
 
   if (dump_file) 
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 6bfd86a..7d85ce7 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -12081,12 +12081,12 @@ mips_expand_prologue (void)
     {
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    mips_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					 size - STACK_CHECK_PROTECT);
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    mips_emit_probe_stack_range (get_stack_check_protect (),
+					 size - get_stack_check_protect ());
 	}
       else if (size > 0)
-	mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	mips_emit_probe_stack_range (get_stack_check_protect (), size);
     }
 
   /* Save the registers.  Allocate up to MIPS_MAX_FIRST_STACK_STEP
diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c
index 06d66d7..df5d3cd 100644
--- a/gcc/config/powerpcspe/powerpcspe.c
+++ b/gcc/config/powerpcspe/powerpcspe.c
@@ -29597,12 +29597,12 @@ rs6000_emit_prologue (void)
 
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					   size - STACK_CHECK_PROTECT);
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    rs6000_emit_probe_stack_range (get_stack_check_protect (),
+					   size - get_stack_check_protect ());
 	}
       else if (size > 0)
-	rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
     }
 
   if (TARGET_FIX_AND_CONTINUE)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 63a6c80..aa70e30 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -26895,12 +26895,12 @@ rs6000_emit_prologue (void)
 
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					   size - STACK_CHECK_PROTECT);
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    rs6000_emit_probe_stack_range (get_stack_check_protect (),
+					   size - get_stack_check_protect ());
 	}
       else if (size > 0)
-	rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
     }
 
   if (TARGET_FIX_AND_CONTINUE)
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 790a036..1da032a 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -5552,12 +5552,12 @@ sparc_expand_prologue (void)
     {
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					  size - STACK_CHECK_PROTECT);
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    sparc_emit_probe_stack_range (get_stack_check_protect (),
+					  size - get_stack_check_protect ());
 	}
       else if (size > 0)
-	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	sparc_emit_probe_stack_range (get_stack_check_protect (), size);
     }
 
   if (size == 0)
@@ -5663,12 +5663,12 @@ sparc_flat_expand_prologue (void)
     {
       if (crtl->is_leaf && !cfun->calls_alloca)
 	{
-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
-	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
-					  size - STACK_CHECK_PROTECT);
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    sparc_emit_probe_stack_range (get_stack_check_protect (),
+					  size - get_stack_check_protect ());
 	}
       else if (size > 0)
-	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	sparc_emit_probe_stack_range (get_stack_check_protect (), size);
     }
 
   if (sparc_save_local_in_regs_p)
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 7ad92d9..e5ba317 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1459,6 +1459,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define STACK_CHECK_MAX_VAR_SIZE (STACK_CHECK_MAX_FRAME_SIZE / 100)
 #endif
 
+/* Whether or not the stack clash protection code must probe
+   the last word in the dynamic stack space area.  Most targets
+   do not need this and thus we define it to zero.  */
+#ifndef STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE
+#define STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE(RESIDUAL) 0
+#endif
+
 /* By default, the C++ compiler will use function addresses in the
    vtable entries.  Setting this nonzero tells the compiler to use
    function descriptors instead.  The value of this macro says how
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 795e492..fa107de 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3419,6 +3419,15 @@ GCC computed the default from the values of the above macros and you will
 normally not need to override that default.
 @end defmac
 
+@defmac STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE (@var{residual})
+Some targets make optimistic assumptions about the state of stack
+probing when they emit their prologues.  On such targets a probe into
+the end of any dynamically allocated space is likely required for
+safety against stack clash style attacks.  Define this variable
+to return nonzero if such a probe is required or zero otherwise.  You
+need not define this macro if it would always have the value zero.
+@end defmac
+
 @need 2000
 @node Frame Registers
 @subsection Registers That Address the Stack Frame
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 98f2e6b..826bf04 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2999,6 +2999,15 @@ GCC computed the default from the values of the above macros and you will
 normally not need to override that default.
 @end defmac
 
+@defmac STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE (@var{residual})
+Some targets make optimistic assumptions about the state of stack
+probing when they emit their prologues.  On such targets a probe into
+the end of any dynamically allocated space is likely required for
+safety against stack clash style attacks.  Define this variable
+to return nonzero if such a probe is required or zero otherwise.  You
+need not define this macro if it would always have the value zero.
+@end defmac
+
 @need 2000
 @node Frame Registers
 @subsection Registers That Address the Stack Frame
diff --git a/gcc/explow.c b/gcc/explow.c
index 50074e2..73018ec 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -40,8 +40,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "expr.h"
 #include "common/common-target.h"
 #include "output.h"
+#include "params.h"
 
 static rtx break_out_memory_refs (rtx);
+static void anti_adjust_stack_and_probe_stack_clash (rtx);
 
 
 /* Truncate and perhaps sign-extend C as appropriate for MODE.  */
@@ -1272,6 +1274,29 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align,
   *psize = size;
 }
 
+/* Return the number of bytes to "protect" on the stack for -fstack-check.
+
+   "protect" in the context of -fstack-check means how many bytes we
+   should always ensure are available on the stack.  More importantly
+   this is how many bytes are skipped when probing the stack.
+
+   On some targets we want to reuse the -fstack-check prologue support
+   to give a degree of protection against stack clashing style attacks.
+
+   In that scenario we do not want to skip bytes before probing as that
+   would render the stack clash protections useless.
+
+   So we never use STACK_CHECK_PROTECT directly.  Instead we indirect though
+   this helper which allows us to provide different values for
+   -fstack-check and -fstack-clash-protection.  */
+HOST_WIDE_INT
+get_stack_check_protect (void)
+{
+  if (flag_stack_clash_protection)
+    return 0;
+ return STACK_CHECK_PROTECT;
+}
+
 /* Return an rtx representing the address of an area of memory dynamically
    pushed on the stack.
 
@@ -1430,7 +1455,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
 		       size);
   else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
-    probe_stack_range (STACK_CHECK_PROTECT, size);
+    probe_stack_range (get_stack_check_protect (), size);
 
   /* Don't let anti_adjust_stack emit notes.  */
   suppress_reg_args_size = true;
@@ -1483,6 +1508,8 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
 
       if (flag_stack_check && STACK_CHECK_MOVING_SP)
 	anti_adjust_stack_and_probe (size, false);
+      else if (flag_stack_clash_protection)
+	anti_adjust_stack_and_probe_stack_clash (size);
       else
 	anti_adjust_stack (size);
 
@@ -1758,6 +1785,219 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
   emit_insn (gen_blockage ());
 }
 
+/* Compute parameters for stack clash probing a dynamic stack
+   allocation of SIZE bytes.
+
+   We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL.
+
+   Additionally we conditionally dump the type of probing that will
+   be needed given the values computed.  */
+
+void
+compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr,
+					  rtx *residual,
+					  HOST_WIDE_INT *probe_interval,
+					  rtx size)
+{
+  /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL */
+  *probe_interval
+    = PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+  *rounded_size = simplify_gen_binary (AND, Pmode, size,
+				        GEN_INT (-*probe_interval));
+
+  /* Compute the value of the stack pointer for the last iteration.
+     It's just SP + ROUNDED_SIZE.  */
+  rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX);
+  *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode,
+					      stack_pointer_rtx,
+					      rounded_size_op),
+			      NULL_RTX);
+
+  /* Compute any residuals not allocated by the loop above.  Residuals
+     are just the ROUNDED_SIZE - SIZE.  */
+  *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size);
+
+  /* Dump key information to make writing tests easy.  */
+  if (dump_file)
+    {
+      if (*rounded_size == CONST0_RTX (Pmode))
+	fprintf (dump_file,
+		 "Stack clash skipped dynamic allocation and probing loop.\n");
+      else if (GET_CODE (*rounded_size) == CONST_INT
+	       && INTVAL (*rounded_size) <= 4 * *probe_interval)
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing inline.\n");
+      else if (GET_CODE (*rounded_size) == CONST_INT)
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing in "
+		 "rotated loop.\n");
+      else
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing in loop.\n");
+
+      if (*residual != CONST0_RTX (Pmode))
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing residuals.\n");
+      else
+	fprintf (dump_file,
+		 "Stack clash skipped dynamic allocation and "
+		 "probing residuals.\n");
+    }
+}
+
+/* Emit the start of an allocate/probe loop for stack
+   clash protection.
+
+   LOOP_LAB and END_LAB are returned for use when we emit the
+   end of the loop.
+
+   LAST addr is the value for SP which stops the loop.  */
+void
+emit_stack_clash_protection_probe_loop_start (rtx *loop_lab,
+					      rtx *end_lab,
+					      rtx last_addr,
+					      bool rotated)
+{
+  /* Essentially we want to emit any setup code, the top of loop
+     label and the comparison at the top of the loop.  */
+  *loop_lab = gen_label_rtx ();
+  *end_lab = gen_label_rtx ();
+
+  emit_label (*loop_lab);
+  if (!rotated)
+    emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX,
+			     Pmode, 1, *end_lab);
+}
+
+/* Emit the end of a stack clash probing loop.
+
+   This consists of just the jump back to LOOP_LAB and
+   emitting END_LOOP after the loop.  */
+
+void
+emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop,
+					    rtx last_addr, bool rotated)
+{
+  if (rotated)
+    emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX,
+			     Pmode, 1, loop_lab);
+  else
+    emit_jump (loop_lab);
+
+  emit_label (end_loop);
+
+}
+
+/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
+   while probing it.  This pushes when SIZE is positive.  SIZE need not
+   be constant.
+
+   This is subtly different than anti_adjust_stack_and_probe to try and
+   prevent stack-clash attacks
+
+     1. It must assume no knowledge of the probing state, any allocation
+	must probe.
+
+	Consider the case of a 1 byte alloca in a loop.  If the sum of the
+	allocations is large, then this could be used to jump the guard if
+	probes were not emitted.
+
+     2. It never skips probes, whereas anti_adjust_stack_and_probe will
+	skip probes on the first couple PROBE_INTERVALs on the assumption
+	they're done elsewhere.
+
+     3. It only allocates and probes SIZE bytes, it does not need to
+	allocate/probe beyond that because this probing style does not
+	guarantee signal handling capability if the guard is hit.  */
+
+static void
+anti_adjust_stack_and_probe_stack_clash (rtx size)
+{
+  /* First ensure SIZE is Pmode.  */
+  if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
+    size = convert_to_mode (Pmode, size, 1);
+
+  /* We can get here with a constant size on some targets.  */
+  rtx rounded_size, last_addr, residual;
+  HOST_WIDE_INT probe_interval;
+  compute_stack_clash_protection_loop_data (&rounded_size, &last_addr,
+					    &residual, &probe_interval, size);
+
+  if (rounded_size != CONST0_RTX (Pmode))
+    {
+      if (INTVAL (rounded_size) <= 4 * probe_interval)
+	{
+	  for (HOST_WIDE_INT i = 0;
+	       i < INTVAL (rounded_size);
+	       i += probe_interval)
+	    {
+	      anti_adjust_stack (GEN_INT (probe_interval));
+
+	      /* The prologue does not probe residuals.  Thus the offset
+		 here to probe just beyond what the prologue had already
+		 allocated.  */
+	      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					       (probe_interval
+						- GET_MODE_SIZE (word_mode))));
+	      emit_insn (gen_blockage ());
+	    }
+	}
+      else
+	{
+	  rtx loop_lab, end_loop;
+	  bool rotate_loop = GET_CODE (rounded_size) == CONST_INT;
+	  emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop,
+							last_addr, rotate_loop);
+
+	  anti_adjust_stack (GEN_INT (probe_interval));
+
+	  /* The prologue does not probe residuals.  Thus the offset here
+	     to probe just beyond what the prologue had already allocated.  */
+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					   (probe_interval
+					    - GET_MODE_SIZE (word_mode))));
+
+	  emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop,
+						      last_addr, rotate_loop);
+	  emit_insn (gen_blockage ());
+	}
+    }
+
+  if (residual != CONST0_RTX (Pmode))
+    {
+      rtx x = force_reg (Pmode, plus_constant (Pmode, residual,
+					       -GET_MODE_SIZE (word_mode)));
+      anti_adjust_stack (residual);
+      emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x));
+      emit_insn (gen_blockage ());
+    }
+
+  /* Some targets make optimistic assumptions in their prologues about
+     how the caller may have probed the stack.  Make sure we honor
+     those assumptions when needed.  */
+  if (size != CONST0_RTX (Pmode)
+      && STACK_CLASH_PROTECTION_NEEDS_FINAL_DYNAMIC_PROBE (residual))
+    {
+      /* Ideally we would just probe at *sp.  However, if SIZE is not
+	 a compile-time constant, but is zero at runtime, then *sp
+	 might hold live data.  So probe at *sp if we know that
+	 an allocation was made, otherwise probe into the red zone
+	 which is obviously undesirable.  */
+      if (GET_CODE (size) == CONST_INT)
+	{
+	  emit_stack_probe (stack_pointer_rtx);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	{
+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					   -GET_MODE_SIZE (word_mode)));
+	  emit_insn (gen_blockage ());
+	}
+    }
+}
+
+
 /* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
    while probing it.  This pushes when SIZE is positive.  SIZE need not
    be constant.  If ADJUST_BACK is true, adjust back the stack pointer
diff --git a/gcc/explow.h b/gcc/explow.h
index 217a322..b85c051 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx);
 /* Add some bytes to the stack while probing it.  An rtx says how many. */
 extern void anti_adjust_stack_and_probe (rtx, bool);
 
+/* Support for building allocation/probing loops for stack-clash
+   protection of dyamically allocated stack space.  */
+extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *,
+						      HOST_WIDE_INT *, rtx);
+extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *,
+							  rtx, bool);
+extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx,
+							rtx, bool);
+
 /* This enum is used for the following two functions.  */
 enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL};
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 59da995..24240fc 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2703,6 +2703,7 @@ get_full_set_src_cost (rtx x, machine_mode mode, struct full_rtx_costs *c)
 /* In explow.c */
 extern HOST_WIDE_INT trunc_int_for_mode	(HOST_WIDE_INT, machine_mode);
 extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
+extern HOST_WIDE_INT get_stack_check_protect (void);
 
 /* In rtl.c */
 extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL);
diff --git a/gcc/testsuite/gcc.dg/stack-check-3.c b/gcc/testsuite/gcc.dg/stack-check-3.c
new file mode 100644
index 0000000..58fb656
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/stack-check-3.c
@@ -0,0 +1,86 @@
+/* The goal here is to ensure that dynamic allocations via vlas or
+   alloca calls receive probing.
+
+   Scanning the RTL or assembly code seems like insanity here as does
+   checking for particular allocation sizes and probe offsets.  For
+   now we just verify that there's an allocation + probe loop and
+   residual allocation + probe for f?.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=4096 --param stack-clash-protection-guard-size=4096" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+__attribute__((noinline, noclone)) void
+foo (char *p)
+{
+  asm volatile ("" : : "r" (p) : "memory");
+}
+
+/* Simple VLA, no other locals. */
+__attribute__((noinline, noclone)) void
+f0 (int x)
+{
+  char vla[x];
+  foo (vla);
+}
+
+/* Simple VLA, small local frame.  */
+__attribute__((noinline, noclone)) void
+f1 (int x)
+{
+  char locals[128];
+  char vla[x];
+  foo (vla);
+}
+
+/* Small constant alloca, no other locals. */
+__attribute__((noinline, noclone)) void
+f2 (int x)
+{
+  char *vla = __builtin_alloca (128);
+  foo (vla);
+}
+
+/* Big constant alloca, small local frame.  */
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  char locals[128];
+  char *vla = __builtin_alloca (16384);
+  foo (vla);
+}
+
+/* Big constant alloca, small local frame.  */
+__attribute__((noinline, noclone)) void
+f3a (int x)
+{
+  char locals[128];
+  char *vla = __builtin_alloca (32768);
+  foo (vla);
+}
+
+/* Nonconstant alloca, no other locals. */
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+  char *vla = __builtin_alloca (x);
+  foo (vla);
+}
+
+/* Nonconstant alloca, small local frame.  */
+__attribute__((noinline, noclone)) void
+f5 (int x)
+{
+  char locals[128];
+  char *vla = __builtin_alloca (x);
+  foo (vla);
+}
+
+/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 7 "expand" } } */
+
+
+/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 7 "expand" { target callee_realigns_stack } } } */
+/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 4 "expand" { target { ! callee_realigns_stack } } } } */
+/* { dg-final { scan-rtl-dump-times "allocation and probing in rotated loop" 1 "expand" { target { ! callee_realigns_stack } } } } */
+/* { dg-final { scan-rtl-dump-times "allocation and probing inline" 1 "expand" { target { ! callee_realigns_stack } } } } */
+/* { dg-final { scan-rtl-dump-times "skipped dynamic allocation and probing loop" 1 "expand" { target { ! callee_realigns_stack } } } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]