This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v2] Allocate constant size dynamic stack space in the prologue


Version 4 with the following change:

 * Rebased on top of the "Minor cleanup to
   allocate_dynamic_stack_space" patch.  The "Drop excess size
   used for run time allocated stack variables." path needs an
   update because it touches the dsame code as the patch in this
   message.

Ran the testsuite on s390x biarch, s390 and x86_64.

On Fri, Jun 24, 2016 at 01:30:44PM +0100, Dominik Vogt wrote:
> > The only open question I'm aware of is the
> > stack-usage-2.c test.  I guess foo3() will not generate
> > 
> >   stack usage might be ... bytes
> > 
> > On any target anymore, and using alloca() with a constant size
> > results in "unbounded".  It's unclear to me whether that message
> > is ever generated, and if so, how to trigger it.

This point is still open.  If nobody has more comments Andreas
will commit the (afaik already approved) patch soon and we can
clean up the test case in a follow up patch.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Attachment: 0001-v4-ChangeLog
Description: Text document

>From 83fafd37883e1af3deb2ff13b9fcaefc9d3b7c7e Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Wed, 25 Nov 2015 09:31:19 +0100
Subject: [PATCH] Allocate constant size dynamic stack space in the
 prologue ...

... and place it in the virtual stack vars area, if the platform supports it.
On S/390 this saves adjusting the stack pointer twice and forcing the frame
pointer into existence.  It also removes the warning with -mwarn-dynamicstack
that is triggered by cfun->calls_alloca == 1.

This fixes a problem with the Linux kernel which aligns the page structure to
16 bytes at run time using inefficient code and issuing a bogus warning.
---
 gcc/cfgexpand.c                                    |  21 +-
 gcc/explow.c                                       | 226 ++++++++++++++-------
 gcc/explow.h                                       |   8 +
 gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c      |  14 ++
 gcc/testsuite/gcc.dg/stack-usage-2.c               |   4 +-
 .../gcc.target/s390/warn-dynamicstack-1.c          |  17 ++
 6 files changed, 209 insertions(+), 81 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 56ef71d..f0ef80f 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1052,7 +1052,9 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
   size_t si, i, j, n = stack_vars_num;
   HOST_WIDE_INT large_size = 0, large_alloc = 0;
   rtx large_base = NULL;
+  rtx large_allocsize = NULL;
   unsigned large_align = 0;
+  bool large_allocation_done = false;
   tree decl;
 
   /* Determine if there are any variables requiring "large" alignment.
@@ -1099,8 +1101,10 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 
       /* If there were any, allocate space.  */
       if (large_size > 0)
-	large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
-						   large_align, true);
+	{
+	  large_allocsize = GEN_INT (large_size);
+	  get_dynamic_stack_size (&large_allocsize, 0, large_align, NULL);
+	}
     }
 
   for (si = 0; si < n; ++si)
@@ -1186,6 +1190,19 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 	  /* Large alignment is only processed in the last pass.  */
 	  if (pred)
 	    continue;
+
+	  if (large_allocsize && ! large_allocation_done)
+	    {
+	      /* Allocate space the virtual stack vars area in the
+	         prologue.  */
+	      HOST_WIDE_INT loffset;
+
+	      loffset = alloc_stack_frame_space
+		(INTVAL (large_allocsize),
+		 PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
+	      large_base = get_dynamic_stack_base (loffset, large_align);
+	      large_allocation_done = true;
+	    }
 	  gcc_assert (large_base != NULL);
 
 	  large_alloc += alignb - 1;
diff --git a/gcc/explow.c b/gcc/explow.c
index 09a0330..d505e98 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1146,82 +1146,55 @@ record_new_stack_level (void)
     update_sjlj_context ();
 }
 
-/* Return an rtx representing the address of an area of memory dynamically
-   pushed on the stack.
+/* Return an rtx doing runtime alignment to REQUIRED_ALIGN on TARGET.  */
+static rtx
+align_dynamic_address (rtx target, unsigned required_align)
+{
+  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
+     but we know it can't.  So add ourselves and then do
+     TRUNC_DIV_EXPR.  */
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
+				       Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
+			  gen_int_mode (required_align / BITS_PER_UNIT,
+					Pmode),
+			  NULL_RTX, 1);
+  target = expand_mult (Pmode, target,
+			gen_int_mode (required_align / BITS_PER_UNIT,
+				      Pmode),
+			NULL_RTX, 1);
 
-   Any required stack pointer alignment is preserved.
+  return target;
+}
 
-   SIZE is an rtx representing the size of the area.
+/* Return an rtx through *PSIZE, representing the size of an area of memory to
+   be dynamically pushed on the stack.
+
+   *PSIZE is an rtx representing the size of the area.
 
    SIZE_ALIGN is the alignment (in bits) that we know SIZE has.  This
-   parameter may be zero.  If so, a proper value will be extracted 
+   parameter may be zero.  If so, a proper value will be extracted
    from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
 
    REQUIRED_ALIGN is the alignment (in bits) required for the region
    of memory.
 
-   If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
-   stack space allocated by the generated code cannot be added with itself
-   in the course of the execution of the function.  It is always safe to
-   pass FALSE here and the following criterion is sufficient in order to
-   pass TRUE: every path in the CFG that starts at the allocation point and
-   loops to it executes the associated deallocation code.  */
-
-rtx
-allocate_dynamic_stack_space (rtx size, unsigned size_align,
-			      unsigned required_align, bool cannot_accumulate)
+   If PSTACK_USAGE_SIZE is not NULL it points to a value that is increased for
+   the additional size returned.  */
+void
+get_dynamic_stack_size (rtx *psize, unsigned size_align,
+			unsigned required_align,
+			HOST_WIDE_INT *pstack_usage_size)
 {
-  HOST_WIDE_INT stack_usage_size = -1;
-  rtx_code_label *final_label;
-  rtx final_target, target;
-  unsigned extra;
-
-  /* If we're asking for zero bytes, it doesn't matter what we point
-     to since we can't dereference it.  But return a reasonable
-     address anyway.  */
-  if (size == const0_rtx)
-    return virtual_stack_dynamic_rtx;
-
-  /* Otherwise, show we're calling alloca or equivalent.  */
-  cfun->calls_alloca = 1;
-
-  /* If stack usage info is requested, look into the size we are passed.
-     We need to do so this early to avoid the obfuscation that may be
-     introduced later by the various alignment operations.  */
-  if (flag_stack_usage_info)
-    {
-      if (CONST_INT_P (size))
-	stack_usage_size = INTVAL (size);
-      else if (REG_P (size))
-        {
-	  /* Look into the last emitted insn and see if we can deduce
-	     something for the register.  */
-	  rtx_insn *insn;
-	  rtx set, note;
-	  insn = get_last_insn ();
-	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
-	    {
-	      if (CONST_INT_P (SET_SRC (set)))
-		stack_usage_size = INTVAL (SET_SRC (set));
-	      else if ((note = find_reg_equal_equiv_note (insn))
-		       && CONST_INT_P (XEXP (note, 0)))
-		stack_usage_size = INTVAL (XEXP (note, 0));
-	    }
-	}
-
-      /* If the size is not constant, we can't say anything.  */
-      if (stack_usage_size == -1)
-	{
-	  current_function_has_unbounded_dynamic_stack_size = 1;
-	  stack_usage_size = 0;
-	}
-    }
+  unsigned extra = 0;
+  rtx size = *psize;
 
   /* Ensure the size is in the proper mode.  */
   if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
     size = convert_to_mode (Pmode, size, 1);
 
-  /* Adjust SIZE_ALIGN, if needed.  */
   if (CONST_INT_P (size))
     {
       unsigned HOST_WIDE_INT lsb;
@@ -1255,8 +1228,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   size = plus_constant (Pmode, size, extra);
   size = force_operand (size, NULL_RTX);
 
-  if (flag_stack_usage_info)
-    stack_usage_size += extra;
+  /*!!!*/
+  if (flag_stack_usage_info && pstack_usage_size)
+    *pstack_usage_size += extra;
 
   if (extra && size_align > BITS_PER_UNIT)
     size_align = BITS_PER_UNIT;
@@ -1278,13 +1252,89 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     {
       size = round_push (size);
 
-      if (flag_stack_usage_info)
+      if (flag_stack_usage_info && pstack_usage_size)
 	{
 	  int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
-	  stack_usage_size = (stack_usage_size + align - 1) / align * align;
+	  *pstack_usage_size =
+	    (*pstack_usage_size + align - 1) / align * align;
 	}
     }
 
+  *psize = size;
+}
+
+/* Return an rtx representing the address of an area of memory dynamically
+   pushed on the stack.
+
+   Any required stack pointer alignment is preserved.
+
+   SIZE is an rtx representing the size of the area.
+
+   SIZE_ALIGN is the alignment (in bits) that we know SIZE has.  This
+   parameter may be zero.  If so, a proper value will be extracted
+   from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.
+
+   If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
+   stack space allocated by the generated code cannot be added with itself
+   in the course of the execution of the function.  It is always safe to
+   pass FALSE here and the following criterion is sufficient in order to
+   pass TRUE: every path in the CFG that starts at the allocation point and
+   loops to it executes the associated deallocation code.  */
+
+rtx
+allocate_dynamic_stack_space (rtx size, unsigned size_align,
+			      unsigned required_align, bool cannot_accumulate)
+{
+  HOST_WIDE_INT stack_usage_size = -1;
+  rtx_code_label *final_label;
+  rtx final_target, target;
+
+  /* If we're asking for zero bytes, it doesn't matter what we point
+     to since we can't dereference it.  But return a reasonable
+     address anyway.  */
+  if (size == const0_rtx)
+    return virtual_stack_dynamic_rtx;
+
+  /* Otherwise, show we're calling alloca or equivalent.  */
+  cfun->calls_alloca = 1;
+
+  /* If stack usage info is requested, look into the size we are passed.
+     We need to do so this early to avoid the obfuscation that may be
+     introduced later by the various alignment operations.  */
+  if (flag_stack_usage_info)
+    {
+      if (CONST_INT_P (size))
+	stack_usage_size = INTVAL (size);
+      else if (REG_P (size))
+        {
+	  /* Look into the last emitted insn and see if we can deduce
+	     something for the register.  */
+	  rtx_insn *insn;
+	  rtx set, note;
+	  insn = get_last_insn ();
+	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
+	    {
+	      if (CONST_INT_P (SET_SRC (set)))
+		stack_usage_size = INTVAL (SET_SRC (set));
+	      else if ((note = find_reg_equal_equiv_note (insn))
+		       && CONST_INT_P (XEXP (note, 0)))
+		stack_usage_size = INTVAL (XEXP (note, 0));
+	    }
+	}
+
+      /* If the size is not constant, we can't say anything.  */
+      if (stack_usage_size == -1)
+	{
+	  current_function_has_unbounded_dynamic_stack_size = 1;
+	  stack_usage_size = 0;
+	}
+    }
+
+  get_dynamic_stack_size (&size, size_align, required_align, &stack_usage_size);
+
   target = gen_reg_rtx (Pmode);
 
   /* The size is supposed to be fully adjusted at this point so record it
@@ -1447,19 +1497,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       target = final_target;
     }
 
-  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
-     but we know it can't.  So add ourselves and then do
-     TRUNC_DIV_EXPR.  */
-  target = expand_binop (Pmode, add_optab, target,
-			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
-				       Pmode),
-			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
-  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
-			  gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
-			  NULL_RTX, 1);
-  target = expand_mult (Pmode, target,
-			gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
-			NULL_RTX, 1);
+  target = align_dynamic_address (target, required_align);
 
   /* Now that we've committed to a return value, mark its alignment.  */
   mark_reg_pointer (target, required_align);
@@ -1469,6 +1507,38 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
 
   return target;
 }
+
+/* Return an rtx representing the address of an area of memory already
+   statically pushed onto the stack in the virtual stack vars area.  (It is
+   assumed that the area is allocated in the function prologue.)
+
+   Any required stack pointer alignment is preserved.
+
+   OFFSET is the offset of the area into the virtual stack vars area.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.  */
+
+rtx
+get_dynamic_stack_base (HOST_WIDE_INT offset, unsigned required_align)
+{
+  rtx target;
+
+  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+    crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
+  target = gen_reg_rtx (Pmode);
+  emit_move_insn (target, virtual_stack_vars_rtx);
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (offset, Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = align_dynamic_address (target, required_align);
+
+  /* Now that we've committed to a return value, mark its alignment.  */
+  mark_reg_pointer (target, required_align);
+
+  return target;
+}
 
 /* A front end may want to override GCC's stack checking by providing a
    run-time routine to call to check the stack, so provide a mechanism for
diff --git a/gcc/explow.h b/gcc/explow.h
index 52113db..e12f90c 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -87,6 +87,14 @@ extern void record_new_stack_level (void);
 /* Allocate some space on the stack dynamically and return its address.  */
 extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool);
 
+/* Calculate the necessary size of a constant dynamic stack allocation from the
+   size of the variable area.  */
+extern void get_dynamic_stack_size (rtx *, unsigned, unsigned, HOST_WIDE_INT *);
+
+/* Returns the address of the dynamic stack space without allocating it.  */
+extern rtx get_dynamic_stack_base (HOST_WIDE_INT offset,
+				   unsigned required_align);
+
 /* Emit one stack probe at ADDRESS, an address within the stack.  */
 extern void emit_stack_probe (rtx);
 
diff --git a/gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c b/gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c
new file mode 100644
index 0000000..6dc17af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/stack-layout-dynamic-1.c
@@ -0,0 +1,14 @@
+/* Verify that run time aligned local variables are aloocated in the prologue
+   in one pass together with normal local variables.  */
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+extern void bar (void *, void *, void *);
+void foo (void)
+{
+  int i;
+  __attribute__ ((aligned(65536))) char runtime_aligned_1[512];
+  __attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
+  bar (&i, &runtime_aligned_1, &runtime_aligned_2);
+}
+/* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */
diff --git a/gcc/testsuite/gcc.dg/stack-usage-2.c b/gcc/testsuite/gcc.dg/stack-usage-2.c
index 86e2a65..1a9e7f3 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-2.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-2.c
@@ -16,7 +16,9 @@ int foo2 (void)  /* { dg-warning "stack usage is \[0-9\]* bytes" } */
   return 0;
 }
 
-int foo3 (void) /* { dg-warning "stack usage might be \[0-9\]* bytes" } */
+/* The actual warning depends on whether stack space is allocated dynamically
+   or statically.  */
+int foo3 (void) /* { dg-warning "stack usage (might be)|(is) \[0-9\]* bytes" } */
 {
   char arr[1024] __attribute__((aligned (512)));
   arr[0] = 1;
diff --git a/gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c b/gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c
new file mode 100644
index 0000000..66913f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/warn-dynamicstack-1.c
@@ -0,0 +1,17 @@
+/* Check that the stack pointer is decreased only once in a funtion with
+   runtime aligned stack variables and -mwarn-dynamicstack does not generate a
+   warning.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O2 -mwarn-dynamicstack" } */
+
+extern int bar (char *pl);
+
+int foo (long size)
+{
+  char __attribute__ ((aligned(16))) l = size;
+
+  return bar (&l);
+}
+
+/* { dg-final { scan-assembler-times "%r15,-" 1 } } */
-- 
2.3.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]