This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v5] Allocate constant size dynamic stack space in the prologue


On Wed, Jul 13, 2016 at 04:12:36PM -0600, Jeff Law wrote:
> On 07/11/2016 05:44 AM, Dominik Vogt wrote:
> >On Thu, Jul 07, 2016 at 12:57:16PM +0100, Dominik Vogt wrote:
> >>On Wed, Jul 06, 2016 at 02:01:06PM +0200, Bernd Schmidt wrote:
> >>>There's one thing I don't quite understand and which seems to have
> >>>changed since v1:
> >>>
> >>>On 07/04/2016 02:19 PM, Dominik Vogt wrote:
> >>>>@@ -1099,8 +1101,10 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
> >>>>
> >>>>      /* If there were any, allocate space.  */
> >>>>      if (large_size > 0)
> >>>>-	large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
> >>>>-						   large_align, true);
> >>>>+	{
> >>>>+	  large_allocsize = GEN_INT (large_size);
> >>>>+	  get_dynamic_stack_size (&large_allocsize, 0, large_align, NULL);
> >>>>+	}
> >>>>    }
> >>>>
> >>>>  for (si = 0; si < n; ++si)
> >>>>@@ -1186,6 +1190,19 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
> >>>>	  /* Large alignment is only processed in the last pass.  */
> >>>>	  if (pred)
> >>>>	    continue;
> >>>>+
> >>>>+	  if (large_allocsize && ! large_allocation_done)
> >>>>+	    {
> >>>>+	      /* Allocate space the virtual stack vars area in the
> >>>>+	         prologue.  */
> >>>>+	      HOST_WIDE_INT loffset;
> >>>>+
> >>>>+	      loffset = alloc_stack_frame_space
> >>>>+		(INTVAL (large_allocsize),
> >>>>+		 PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
> >>>>+	      large_base = get_dynamic_stack_base (loffset, large_align);
> >>>>+	      large_allocation_done = true;
> >>>>+	    }
> >>>>	  gcc_assert (large_base != NULL);
> >>>>
> >>>
> >>>Why is this code split between the two places here?
> >>
> >>This is a bugfix from v1 to v2.
> >>I think I had to move this code to the second loop so that the
> >>space for dynamically aligned variables is allocated at the "end"
> >>of the space for stack variables.  I cannot remember what the bug
> >>was, but maybe it was that the variables with fixed and static
> >>alignment ended up at the same address.
> >>
> >>Anyway, now that the new allocation strategy is used
> >>unconditionally, it should be possible to move the
> >>get_dynamic_stack_size call down to the second loop and simplify
> >>the code somewhat.  I'll look into that.
> >
> >Version 5 with some code moved from the first loop to the second.
> -ENOPATCH

Oops.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Attachment: 0001-v5-ChangeLog
Description: Text document

>From 254581388640af34bcff55105ec13555043b62e5 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Wed, 25 Nov 2015 09:31:19 +0100
Subject: [PATCH] Allocate constant size dynamic stack space in the
 prologue ...

... and place it in the virtual stack vars area, if the platform supports it.
On S/390 this saves adjusting the stack pointer twice and forcing the frame
pointer into existence.  It also removes the warning with -mwarn-dynamicstack
that is triggered by cfun->calls_alloca == 1.

This fixes a problem with the Linux kernel which aligns the page structure to
16 bytes at run time using inefficient code and issuing a bogus warning.
---
 gcc/cfgexpand.c                                    |  22 +-
 gcc/explow.c                                       | 226 ++++++++++++++-------
 gcc/explow.h                                       |   8 +
 gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c      |  14 ++
 gcc/testsuite/gcc.dg/stack-usage-2.c               |   4 +-
 .../gcc.target/s390/warn-dynamicstack-1.c          |  17 ++
 6 files changed, 207 insertions(+), 84 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index e4ddb3a..5046d61 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1053,6 +1053,7 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
   HOST_WIDE_INT large_size = 0, large_alloc = 0;
   rtx large_base = NULL;
   unsigned large_align = 0;
+  bool large_allocation_done = false;
   tree decl;
 
   /* Determine if there are any variables requiring "large" alignment.
@@ -1096,11 +1097,6 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 	  large_size &= -(HOST_WIDE_INT)alignb;
 	  large_size += stack_vars[i].size;
 	}
-
-      /* If there were any, allocate space.  */
-      if (large_size > 0)
-	large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
-						   large_align, true);
     }
 
   for (si = 0; si < n; ++si)
@@ -1186,6 +1182,22 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 	  /* Large alignment is only processed in the last pass.  */
 	  if (pred)
 	    continue;
+
+	  /* If there were any variables requiring "large" alignment, allocate
+	     space.  */
+	  if (large_size > 0 && ! large_allocation_done)
+	    {
+	      HOST_WIDE_INT loffset;
+	      rtx large_allocsize;
+
+	      large_allocsize = GEN_INT (large_size);
+	      get_dynamic_stack_size (&large_allocsize, 0, large_align, NULL);
+	      loffset = alloc_stack_frame_space
+		(INTVAL (large_allocsize),
+		 PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
+	      large_base = get_dynamic_stack_base (loffset, large_align);
+	      large_allocation_done = true;
+	    }
 	  gcc_assert (large_base != NULL);
 
 	  large_alloc += alignb - 1;
diff --git a/gcc/explow.c b/gcc/explow.c
index 09a0330..d505e98 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1146,82 +1146,55 @@ record_new_stack_level (void)
     update_sjlj_context ();
 }
 
-/* Return an rtx representing the address of an area of memory dynamically
-   pushed on the stack.
+/* Return an rtx doing runtime alignment to REQUIRED_ALIGN on TARGET.  */
+static rtx
+align_dynamic_address (rtx target, unsigned required_align)
+{
+  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
+     but we know it can't.  So add ourselves and then do
+     TRUNC_DIV_EXPR.  */
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
+				       Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
+			  gen_int_mode (required_align / BITS_PER_UNIT,
+					Pmode),
+			  NULL_RTX, 1);
+  target = expand_mult (Pmode, target,
+			gen_int_mode (required_align / BITS_PER_UNIT,
+				      Pmode),
+			NULL_RTX, 1);
 
-   Any required stack pointer alignment is preserved.
+  return target;
+}
 
-   SIZE is an rtx representing the size of the area.
+/* Return an rtx through *PSIZE, representing the size of an area of memory to
+   be dynamically pushed on the stack.
+
+   *PSIZE is an rtx representing the size of the area.
 
    SIZE_ALIGN is the alignment (in bits) that we know SIZE has.  This
-   parameter may be zero.  If so, a proper value will be extracted 
+   parameter may be zero.  If so, a proper value will be extracted
    from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
 
    REQUIRED_ALIGN is the alignment (in bits) required for the region
    of memory.
 
-   If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
-   stack space allocated by the generated code cannot be added with itself
-   in the course of the execution of the function.  It is always safe to
-   pass FALSE here and the following criterion is sufficient in order to
-   pass TRUE: every path in the CFG that starts at the allocation point and
-   loops to it executes the associated deallocation code.  */
-
-rtx
-allocate_dynamic_stack_space (rtx size, unsigned size_align,
-			      unsigned required_align, bool cannot_accumulate)
+   If PSTACK_USAGE_SIZE is not NULL it points to a value that is increased for
+   the additional size returned.  */
+void
+get_dynamic_stack_size (rtx *psize, unsigned size_align,
+			unsigned required_align,
+			HOST_WIDE_INT *pstack_usage_size)
 {
-  HOST_WIDE_INT stack_usage_size = -1;
-  rtx_code_label *final_label;
-  rtx final_target, target;
-  unsigned extra;
-
-  /* If we're asking for zero bytes, it doesn't matter what we point
-     to since we can't dereference it.  But return a reasonable
-     address anyway.  */
-  if (size == const0_rtx)
-    return virtual_stack_dynamic_rtx;
-
-  /* Otherwise, show we're calling alloca or equivalent.  */
-  cfun->calls_alloca = 1;
-
-  /* If stack usage info is requested, look into the size we are passed.
-     We need to do so this early to avoid the obfuscation that may be
-     introduced later by the various alignment operations.  */
-  if (flag_stack_usage_info)
-    {
-      if (CONST_INT_P (size))
-	stack_usage_size = INTVAL (size);
-      else if (REG_P (size))
-        {
-	  /* Look into the last emitted insn and see if we can deduce
-	     something for the register.  */
-	  rtx_insn *insn;
-	  rtx set, note;
-	  insn = get_last_insn ();
-	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
-	    {
-	      if (CONST_INT_P (SET_SRC (set)))
-		stack_usage_size = INTVAL (SET_SRC (set));
-	      else if ((note = find_reg_equal_equiv_note (insn))
-		       && CONST_INT_P (XEXP (note, 0)))
-		stack_usage_size = INTVAL (XEXP (note, 0));
-	    }
-	}
-
-      /* If the size is not constant, we can't say anything.  */
-      if (stack_usage_size == -1)
-	{
-	  current_function_has_unbounded_dynamic_stack_size = 1;
-	  stack_usage_size = 0;
-	}
-    }
+  unsigned extra = 0;
+  rtx size = *psize;
 
   /* Ensure the size is in the proper mode.  */
   if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
     size = convert_to_mode (Pmode, size, 1);
 
-  /* Adjust SIZE_ALIGN, if needed.  */
   if (CONST_INT_P (size))
     {
       unsigned HOST_WIDE_INT lsb;
@@ -1255,8 +1228,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   size = plus_constant (Pmode, size, extra);
   size = force_operand (size, NULL_RTX);
 
-  if (flag_stack_usage_info)
-    stack_usage_size += extra;
+  /*!!!*/
+  if (flag_stack_usage_info && pstack_usage_size)
+    *pstack_usage_size += extra;
 
   if (extra && size_align > BITS_PER_UNIT)
     size_align = BITS_PER_UNIT;
@@ -1278,13 +1252,89 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     {
       size = round_push (size);
 
-      if (flag_stack_usage_info)
+      if (flag_stack_usage_info && pstack_usage_size)
 	{
 	  int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
-	  stack_usage_size = (stack_usage_size + align - 1) / align * align;
+	  *pstack_usage_size =
+	    (*pstack_usage_size + align - 1) / align * align;
 	}
     }
 
+  *psize = size;
+}
+
+/* Return an rtx representing the address of an area of memory dynamically
+   pushed on the stack.
+
+   Any required stack pointer alignment is preserved.
+
+   SIZE is an rtx representing the size of the area.
+
+   SIZE_ALIGN is the alignment (in bits) that we know SIZE has.  This
+   parameter may be zero.  If so, a proper value will be extracted
+   from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.
+
+   If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
+   stack space allocated by the generated code cannot be added with itself
+   in the course of the execution of the function.  It is always safe to
+   pass FALSE here and the following criterion is sufficient in order to
+   pass TRUE: every path in the CFG that starts at the allocation point and
+   loops to it executes the associated deallocation code.  */
+
+rtx
+allocate_dynamic_stack_space (rtx size, unsigned size_align,
+			      unsigned required_align, bool cannot_accumulate)
+{
+  HOST_WIDE_INT stack_usage_size = -1;
+  rtx_code_label *final_label;
+  rtx final_target, target;
+
+  /* If we're asking for zero bytes, it doesn't matter what we point
+     to since we can't dereference it.  But return a reasonable
+     address anyway.  */
+  if (size == const0_rtx)
+    return virtual_stack_dynamic_rtx;
+
+  /* Otherwise, show we're calling alloca or equivalent.  */
+  cfun->calls_alloca = 1;
+
+  /* If stack usage info is requested, look into the size we are passed.
+     We need to do so this early to avoid the obfuscation that may be
+     introduced later by the various alignment operations.  */
+  if (flag_stack_usage_info)
+    {
+      if (CONST_INT_P (size))
+	stack_usage_size = INTVAL (size);
+      else if (REG_P (size))
+        {
+	  /* Look into the last emitted insn and see if we can deduce
+	     something for the register.  */
+	  rtx_insn *insn;
+	  rtx set, note;
+	  insn = get_last_insn ();
+	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
+	    {
+	      if (CONST_INT_P (SET_SRC (set)))
+		stack_usage_size = INTVAL (SET_SRC (set));
+	      else if ((note = find_reg_equal_equiv_note (insn))
+		       && CONST_INT_P (XEXP (note, 0)))
+		stack_usage_size = INTVAL (XEXP (note, 0));
+	    }
+	}
+
+      /* If the size is not constant, we can't say anything.  */
+      if (stack_usage_size == -1)
+	{
+	  current_function_has_unbounded_dynamic_stack_size = 1;
+	  stack_usage_size = 0;
+	}
+    }
+
+  get_dynamic_stack_size (&size, size_align, required_align, &stack_usage_size);
+
   target = gen_reg_rtx (Pmode);
 
   /* The size is supposed to be fully adjusted at this point so record it
@@ -1447,19 +1497,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       target = final_target;
     }
 
-  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
-     but we know it can't.  So add ourselves and then do
-     TRUNC_DIV_EXPR.  */
-  target = expand_binop (Pmode, add_optab, target,
-			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
-				       Pmode),
-			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
-  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
-			  gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
-			  NULL_RTX, 1);
-  target = expand_mult (Pmode, target,
-			gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
-			NULL_RTX, 1);
+  target = align_dynamic_address (target, required_align);
 
   /* Now that we've committed to a return value, mark its alignment.  */
   mark_reg_pointer (target, required_align);
@@ -1469,6 +1507,38 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
 
   return target;
 }
+
+/* Return an rtx representing the address of an area of memory already
+   statically pushed onto the stack in the virtual stack vars area.  (It is
+   assumed that the area is allocated in the function prologue.)
+
+   Any required stack pointer alignment is preserved.
+
+   OFFSET is the offset of the area into the virtual stack vars area.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.  */
+
+rtx
+get_dynamic_stack_base (HOST_WIDE_INT offset, unsigned required_align)
+{
+  rtx target;
+
+  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+    crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
+  target = gen_reg_rtx (Pmode);
+  emit_move_insn (target, virtual_stack_vars_rtx);
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (offset, Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = align_dynamic_address (target, required_align);
+
+  /* Now that we've committed to a return value, mark its alignment.  */
+  mark_reg_pointer (target, required_align);
+
+  return target;
+}
 
 /* A front end may want to override GCC's stack checking by providing a
    run-time routine to call to check the stack, so provide a mechanism for
diff --git a/gcc/explow.h b/gcc/explow.h
index 52113db..e12f90c 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -87,6 +87,14 @@ extern void record_new_stack_level (void);
 /* Allocate some space on the stack dynamically and return its address.  */
 extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool);
 
+/* Calculate the necessary size of a constant dynamic stack allocation from the
+   size of the variable area.  */
+extern void get_dynamic_stack_size (rtx *, unsigned, unsigned, HOST_WIDE_INT *);
+
+/* Returns the address of the dynamic stack space without allocating it.  */
+extern rtx get_dynamic_stack_base (HOST_WIDE_INT offset,
+				   unsigned required_align);
+
 /* Emit one stack probe at ADDRESS, an address within the stack.  */
 extern void emit_stack_probe (rtx);
 
diff --git a/gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c b/gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c
new file mode 100644
index 0000000..6dc17af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c
@@ -0,0 +1,14 @@
+/* Verify that run time aligned local variables are aloocated in the prologue
+   in one pass together with normal local variables.  */
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+extern void bar (void *, void *, void *);
+void foo (void)
+{
+  int i;
+  __attribute__ ((aligned(65536))) char runtime_aligned_1[512];
+  __attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
+  bar (&i, &runtime_aligned_1, &runtime_aligned_2);
+}
+/* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */
diff --git a/gcc/testsuite/gcc.dg/stack-usage-2.c b/gcc/testsuite/gcc.dg/stack-usage-2.c
index 86e2a65..1a9e7f3 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-2.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-2.c
@@ -16,7 +16,9 @@ int foo2 (void)  /* { dg-warning "stack usage is \[0-9\]* bytes" } */
   return 0;
 }
 
-int foo3 (void) /* { dg-warning "stack usage might be \[0-9\]* bytes" } */
+/* The actual warning depends on whether stack space is allocated dynamically
+   or statically.  */
+int foo3 (void) /* { dg-warning "stack usage (might be)|(is) \[0-9\]* bytes" } */
 {
   char arr[1024] __attribute__((aligned (512)));
   arr[0] = 1;
diff --git a/gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c b/gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c
new file mode 100644
index 0000000..66913f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c
@@ -0,0 +1,17 @@
+/* Check that the stack pointer is decreased only once in a funtion with
+   runtime aligned stack variables and -mwarn-dynamicstack does not generate a
+   warning.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O2 -mwarn-dynamicstack" } */
+
+extern int bar (char *pl);
+
+int foo (long size)
+{
+  char __attribute__ ((aligned(16))) l = size;
+
+  return bar (&l);
+}
+
+/* { dg-final { scan-assembler-times "%r15,-" 1 } } */
-- 
2.3.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]