This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed][PATCH] Stack clash protection 08/08 - V4


Andreas K. indicated privately he had no concerns WRT the s390 stack
clash patches which are a combination of his work and mine.

I believe the only changes since V4 was the addition of scheduling
barriers, REG_STACK_CHECK notes and changes related to the params
changing from byte counts to log2 of the byte count.


I'm committing this to the trunk.  Leaving just aarch64 and ppc patches
to wrap up.

Jeff
commit 00e021d7eb48f8aee76aece3444ca19e02c79a01
Author: Jeff Law <law@devel1.s390.bos.redhat.com>
Date:   Fri Jul 7 17:25:35 2017 +0000

            * config/s390/s390.c (MIN_UNROLL_PROBES): Define.
            (allocate_stack_space): New function, partially extracted from
            s390_emit_prologue.
            (s390_emit_prologue): Track offset to most recent stack probe.
            Code to allocate space moved into allocate_stack_space.
            Dump actions when no stack is allocated.
            (s390_prologue_plus_offset): New function.
            (s390_emit_stack_probe): Likewise.
    
            * gcc.dg/stack-check-5.c:  Add argument for s390.
            * lib/target-supports.exp:
            (check_effective_target_supports_stack_clash_protection): Enable for
            s390/s390x targets.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0c88145d5a7..bd8bb61f71b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2017-09-20  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+	    Jeff Law  <law@redhat.com>
+
+	* config/s390/s390.c (MIN_UNROLL_PROBES): Define.
+	(allocate_stack_space): New function, partially extracted from
+	s390_emit_prologue.
+	(s390_emit_prologue): Track offset to most recent stack probe.
+	Code to allocate space moved into allocate_stack_space.
+	Dump actions when no stack is allocated.
+	(s390_prologue_plus_offset): New function.
+	(s390_emit_stack_probe): Likewise.
+
 2017-09-20  Alexandre Oliva <aoliva@redhat.com>
 
 	* common.opt (Wa, Wl, Wp, g, gz=): Add
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index f8cf213901d..037e22e737e 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -11155,6 +11155,183 @@ pass_s390_early_mach::execute (function *fun)
 
 } // anon namespace
 
+/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
+   - push too big immediates to the literal pool and annotate the refs
+   - emit frame related notes for stack pointer changes.  */
+
+static rtx
+s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
+{
+  rtx insn;
+  rtx orig_offset = offset;
+
+  gcc_assert (REG_P (target));
+  gcc_assert (REG_P (reg));
+  gcc_assert (CONST_INT_P (offset));
+
+  if (offset == const0_rtx)                               /* lr/lgr */
+    {
+      insn = emit_move_insn (target, reg);
+    }
+  else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
+    {
+      insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
+						   offset));
+    }
+  else
+    {
+      if (!satisfies_constraint_K (offset)                /* ahi/aghi */
+	  && (!TARGET_EXTIMM
+	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
+		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
+	offset = force_const_mem (Pmode, offset);
+
+      if (target != reg)
+	{
+	  insn = emit_move_insn (target, reg);
+	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
+	}
+
+      insn = emit_insn (gen_add2_insn (target, offset));
+
+      if (!CONST_INT_P (offset))
+	{
+	  annotate_constant_pool_refs (&PATTERN (insn));
+
+	  if (frame_related_p)
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (target,
+				       gen_rtx_PLUS (Pmode, target,
+						     orig_offset)));
+	}
+    }
+
+  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
+
+  /* If this is a stack adjustment and we are generating a stack clash
+     prologue, then add a REG_STACK_CHECK note to signal that this insn
+     should be left alone.  */
+  if (flag_stack_clash_protection && target == stack_pointer_rtx)
+    add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
+
+  return insn;
+}
+
+/* Emit a compare instruction with a volatile memory access as stack
+   probe.  It does not waste store tags and does not clobber any
+   registers apart from the condition code.  */
+static void
+s390_emit_stack_probe (rtx addr)
+{
+  rtx tmp = gen_rtx_MEM (Pmode, addr);
+  MEM_VOLATILE_P (tmp) = 1;
+  s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
+  emit_insn (gen_blockage ());
+}
+
+/* Use a runtime loop if we have to emit more probes than this.  */
+#define MIN_UNROLL_PROBES 3
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
+   probe relative to the stack pointer.
+
+   Note that SIZE is negative.
+
+   The return value is true if TEMP_REG has been clobbered.  */
+static bool
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+		      rtx temp_reg)
+{
+  bool temp_reg_clobbered_p = false;
+  HOST_WIDE_INT probe_interval
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+  HOST_WIDE_INT guard_size
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
+
+  if (flag_stack_clash_protection)
+    {
+      if (last_probe_offset + -INTVAL (size) < guard_size)
+	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+      else
+	{
+	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
+	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
+	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
+	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+
+	  if (num_probes < MIN_UNROLL_PROBES)
+	    {
+	      /* Emit unrolled probe statements.  */
+
+	      for (unsigned int i = 0; i < num_probes; i++)
+		{
+		  s390_prologue_plus_offset (stack_pointer_rtx,
+					     stack_pointer_rtx,
+					     GEN_INT (-probe_interval), true);
+		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
+						       stack_pointer_rtx,
+						       offset));
+		}
+	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
+	    }
+	  else
+	    {
+	      /* Emit a loop probing the pages.  */
+
+	      rtx_code_label *loop_start_label = gen_label_rtx ();
+
+	      /* From now on temp_reg will be the CFA register.  */
+	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
+					 GEN_INT (-rounded_size), true);
+	      emit_label (loop_start_label);
+
+	      s390_prologue_plus_offset (stack_pointer_rtx,
+					 stack_pointer_rtx,
+					 GEN_INT (-probe_interval), false);
+	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   offset));
+	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
+				       GT, NULL_RTX,
+				       Pmode, 1, loop_start_label);
+
+	      /* Without this make_edges ICEes.  */
+	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
+	      LABEL_NUSES (loop_start_label) = 1;
+
+	      /* That's going to be a NOP since stack pointer and
+		 temp_reg are supposed to be the same here.  We just
+		 emit it to set the CFA reg back to r15.  */
+	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
+					 const0_rtx, true);
+	      temp_reg_clobbered_p = true;
+	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
+	    }
+
+	  /* Handle any residual allocation request.  */
+	  s390_prologue_plus_offset (stack_pointer_rtx,
+				     stack_pointer_rtx,
+				     GEN_INT (-residual), true);
+	  last_probe_offset += residual;
+	  if (last_probe_offset >= probe_interval)
+	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (residual
+							  - UNITS_PER_LONG)));
+
+	  return temp_reg_clobbered_p;
+	}
+    }
+
+  /* Subtract frame size from stack pointer.  */
+  s390_prologue_plus_offset (stack_pointer_rtx,
+			     stack_pointer_rtx,
+			     size, true);
+
+  return temp_reg_clobbered_p;
+}
+
 /* Expand the prologue into a bunch of separate insns.  */
 
 void
@@ -11179,6 +11356,19 @@ s390_emit_prologue (void)
   else
     temp_reg = gen_rtx_REG (Pmode, 1);
 
+  /* When probing for stack-clash mitigation, we have to track the distance
+     between the stack pointer and closest known reference.
+
+     Most of the time we have to make a worst cast assumption.  The
+     only exception is when TARGET_BACKCHAIN is active, in which case
+     we know *sp (offset 0) was written.  */
+  HOST_WIDE_INT probe_interval
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+  HOST_WIDE_INT last_probe_offset
+    = (TARGET_BACKCHAIN
+       ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
+       : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
+
   s390_save_gprs_to_fprs ();
 
   /* Save call saved gprs.  */
@@ -11190,6 +11380,14 @@ s390_emit_prologue (void)
 					  - cfun_frame_layout.first_save_gpr_slot),
 			cfun_frame_layout.first_save_gpr,
 			cfun_frame_layout.last_save_gpr);
+
+      /* This is not 100% correct.  If we have more than one register saved,
+	 then LAST_PROBE_OFFSET can move even closer to sp.  */
+      last_probe_offset
+	= (cfun_frame_layout.gprs_offset +
+	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
+			     - cfun_frame_layout.first_save_gpr_slot));
+
       emit_insn (insn);
     }
 
@@ -11206,6 +11404,8 @@ s390_emit_prologue (void)
       if (cfun_fpr_save_p (i))
 	{
 	  save_fpr (stack_pointer_rtx, offset, i);
+	  if (offset < last_probe_offset)
+	    last_probe_offset = offset;
 	  offset += 8;
 	}
       else if (!TARGET_PACKED_STACK || cfun->stdarg)
@@ -11219,6 +11419,8 @@ s390_emit_prologue (void)
       if (cfun_fpr_save_p (i))
 	{
 	  insn = save_fpr (stack_pointer_rtx, offset, i);
+	  if (offset < last_probe_offset)
+	    last_probe_offset = offset;
 	  offset += 8;
 
 	  /* If f4 and f6 are call clobbered they are saved due to
@@ -11241,6 +11443,8 @@ s390_emit_prologue (void)
 	if (cfun_fpr_save_p (i))
 	  {
 	    insn = save_fpr (stack_pointer_rtx, offset, i);
+	    if (offset < last_probe_offset)
+	      last_probe_offset = offset;
 
 	    RTX_FRAME_RELATED_P (insn) = 1;
 	    offset -= 8;
@@ -11260,10 +11464,11 @@ s390_emit_prologue (void)
   if (cfun_frame_layout.frame_size > 0)
     {
       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
-      rtx real_frame_off;
+      rtx_insn *stack_pointer_backup_loc;
+      bool temp_reg_clobbered_p;
 
       if (s390_stack_size)
-  	{
+	{
 	  HOST_WIDE_INT stack_guard;
 
 	  if (s390_stack_guard)
@@ -11329,35 +11534,36 @@ s390_emit_prologue (void)
       if (s390_warn_dynamicstack_p && cfun->calls_alloca)
 	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
 
-      /* Save incoming stack pointer into temp reg.  */
-      if (TARGET_BACKCHAIN || next_fpr)
-	insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
+      /* Save the location where we could backup the incoming stack
+	 pointer.  */
+      stack_pointer_backup_loc = get_last_insn ();
 
-      /* Subtract frame size from stack pointer.  */
+      temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
+						   temp_reg);
 
-      if (DISP_IN_RANGE (INTVAL (frame_off)))
-	{
-	  insn = gen_rtx_SET (stack_pointer_rtx,
-			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-					    frame_off));
-	  insn = emit_insn (insn);
-	}
-      else
+      if (TARGET_BACKCHAIN || next_fpr)
 	{
-	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
-	    frame_off = force_const_mem (Pmode, frame_off);
-
-          insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
-	  annotate_constant_pool_refs (&PATTERN (insn));
+	  if (temp_reg_clobbered_p)
+	    {
+	      /* allocate_stack_space had to make use of temp_reg and
+		 we need it to hold a backup of the incoming stack
+		 pointer.  Calculate back that value from the current
+		 stack pointer.  */
+	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
+					 GEN_INT (cfun_frame_layout.frame_size),
+					 false);
+	    }
+	  else
+	    {
+	      /* allocate_stack_space didn't actually required
+		 temp_reg.  Insert the stack pointer backup insn
+		 before the stack pointer decrement code - knowing now
+		 that the value will survive.  */
+	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
+			       stack_pointer_backup_loc);
+	    }
 	}
 
-      RTX_FRAME_RELATED_P (insn) = 1;
-      real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-		    gen_rtx_SET (stack_pointer_rtx,
-				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-					       real_frame_off)));
-
       /* Set backchain.  */
 
       if (TARGET_BACKCHAIN)
@@ -11381,6 +11587,8 @@ s390_emit_prologue (void)
 	  emit_clobber (addr);
 	}
     }
+  else if (flag_stack_clash_protection)
+    dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
 
   /* Save fprs 8 - 15 (64 bit ABI).  */
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 94696c91b93..75c14d5b045 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2017-09-20  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+	    Jeff Law  <law@redhat.com>
+
+	* gcc.dg/stack-check-5.c:  Add argument for s390.
+	* lib/target-supports.exp: 
+	(check_effective_target_supports_stack_clash_protection): Enable for
+	s390/s390x targets.
+
 2017-09-20  Martin Sebor  <msebor@redhat.com>
 
 	PR c/81854
diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c
index 2171d9b6c23..3178f5d8ce5 100644
--- a/gcc/testsuite/gcc.dg/stack-check-5.c
+++ b/gcc/testsuite/gcc.dg/stack-check-5.c
@@ -3,6 +3,10 @@
 /* { dg-require-effective-target supports_stack_clash_protection } */
 
 
+/* Otherwise the S/390 back-end might save the stack pointer in f2 ()
+   into an FPR.  */
+/* { dg-additional-options "-msoft-float" { target { s390x-*-* } } } */
+
 extern void foo (char *);
 extern void bar (void);
 
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5b40058e2d7..2e0e4d43bfa 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8640,12 +8640,12 @@ proc check_effective_target_supports_stack_clash_protection { } {
 
    # Temporary until the target bits are fully ACK'd.
 #  if { [istarget aarch*-*-*]
-#       || [istarget s390*-*-*]
 #       || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } {
 #	return 1
 #  }
 
-    if { [istarget x86_64-*-*] || [istarget i?86-*-*] } {
+    if { [istarget x86_64-*-*] || [istarget i?86-*-*] 
+	  || [istarget s390*-*-*] } {
 	return 1
     }
   return 0

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]