This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Drop excess size used for run time allocated stack variables.


Version two of the patch including a test case.

On Mon, May 02, 2016 at 09:10:25AM -0600, Jeff Law wrote:
> On 04/29/2016 04:12 PM, Dominik Vogt wrote:
> >The attached patch removes excess stack space allocation with
> >alloca in some situations.  Plese check the commit message in the
> >patch for details.

> However, I would strongly recommend some tests, even if they are
> target specific.  You can always copy pr36728-1 into the s390x
> directory and look at size of the generated stack.  Simliarly for
> pr50938 for x86.

However, x86 uses the "else" branch in round_push, i.e. it uses
"virtual_preferred_stack_boundary_rtx" to calculate the number of
bytes to add for stack alignment.  That value is unknown at the
time round_push is called, so the test case fails on such targets,
and I've no idea how to fix this properly.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Attachment: 0001-v2-ChangeLog
Description: Text document

>From f4d252eb2c860450b3738591fca36f568c958232 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 29 Apr 2016 08:36:59 +0100
Subject: [PATCH] Drop excess size used for run time allocated stack
 variables.

The present calculation sometimes led to more stack memory being used than
necessary with alloca.  First, (STACK_BOUNDARY -1) would be added to the
allocated size:

  size = plus_constant (Pmode, size, extra);
  size = force_operand (size, NULL_RTX);

Then round_push was called and added another (STACK_BOUNDARY - 1) before
rounding down to a multiple of STACK_BOUNDARY.  On s390x this resulted in
adding 14 before rounding down for "x" in the test case pr36728-1.c.

round_push() now takes an argument to inform it about what has already been
added to size.
---
 gcc/explow.c                   | 33 ++++++++++++++++-----------
 gcc/testsuite/gcc.dg/pr50938.c | 52 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr50938.c

diff --git a/gcc/explow.c b/gcc/explow.c
index e0ce201..1858597 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -949,24 +949,30 @@ anti_adjust_stack (rtx adjust)
 }
 
 /* Round the size of a block to be pushed up to the boundary required
-   by this machine.  SIZE is the desired size, which need not be constant.  */
+   by this machine.  SIZE is the desired size, which need not be constant.
+   ALREADY_ADDED is the number of units that have already been added to SIZE
+   for other alignment reasons.
+*/
 
 static rtx
-round_push (rtx size)
+round_push (rtx size, int already_added)
 {
-  rtx align_rtx, alignm1_rtx;
+  rtx align_rtx, add_rtx;
 
   if (!SUPPORTS_STACK_ALIGNMENT
       || crtl->preferred_stack_boundary == MAX_SUPPORTED_STACK_ALIGNMENT)
     {
       int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+      int add;
 
       if (align == 1)
 	return size;
 
+      add = (align > already_added) ? align - already_added - 1 : 0;
+
       if (CONST_INT_P (size))
 	{
-	  HOST_WIDE_INT new_size = (INTVAL (size) + align - 1) / align * align;
+	  HOST_WIDE_INT new_size = (INTVAL (size) + add) / align * align;
 
 	  if (INTVAL (size) != new_size)
 	    size = GEN_INT (new_size);
@@ -974,7 +980,7 @@ round_push (rtx size)
 	}
 
       align_rtx = GEN_INT (align);
-      alignm1_rtx = GEN_INT (align - 1);
+      add_rtx = (add > 0) ? GEN_INT (add) : const0_rtx;
     }
   else
     {
@@ -983,15 +989,15 @@ round_push (rtx size)
 	 substituted by the right value in vregs pass and optimized
 	 during combine.  */
       align_rtx = virtual_preferred_stack_boundary_rtx;
-      alignm1_rtx = force_operand (plus_constant (Pmode, align_rtx, -1),
-				   NULL_RTX);
+      add_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), NULL_RTX);
     }
 
   /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
      but we know it can't.  So add ourselves and then do
      TRUNC_DIV_EXPR.  */
-  size = expand_binop (Pmode, add_optab, size, alignm1_rtx,
-		       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  if (add_rtx != const0_rtx)
+    size = expand_binop (Pmode, add_optab, size, add_rtx,
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
   size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx,
 			NULL_RTX, 1);
   size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1);
@@ -1175,6 +1181,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   rtx_code_label *final_label;
   rtx final_target, target;
   unsigned extra_align = 0;
+  unsigned extra = 0;
   bool must_align;
 
   /* If we're asking for zero bytes, it doesn't matter what we point
@@ -1275,9 +1282,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   extra_align = BITS_PER_UNIT;
 #endif
 
-  if (must_align)
+  if (must_align && required_align > extra_align)
     {
-      unsigned extra = (required_align - extra_align) / BITS_PER_UNIT;
+      extra = (required_align - extra_align) / BITS_PER_UNIT;
 
       size = plus_constant (Pmode, size, extra);
       size = force_operand (size, NULL_RTX);
@@ -1285,7 +1292,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       if (flag_stack_usage_info)
 	stack_usage_size += extra;
 
-      if (extra && size_align > extra_align)
+      if (size_align > extra_align)
 	size_align = extra_align;
     }
 
@@ -1304,7 +1311,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
      momentarily mis-aligning the stack.  */
   if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
     {
-      size = round_push (size);
+      size = round_push (size, extra);
 
       if (flag_stack_usage_info)
 	{
diff --git a/gcc/testsuite/gcc.dg/pr50938.c b/gcc/testsuite/gcc.dg/pr50938.c
new file mode 100644
index 0000000..14b7540
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr50938.c
@@ -0,0 +1,52 @@
+/* PR/50938: Check that alloca () reserves the correct amount of stack space.
+ */
+
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+/* { dg-require-effective-target alloca } */
+
+__attribute__ ((noinline))
+static void
+dummy (void)
+{
+}
+
+__attribute__ ((noinline))
+static char *
+get_frame_addr (void *p)
+{
+  void *faddr = __builtin_frame_address (0);
+  /* Call a function to make sure that a stack frame exists.  */
+  dummy ();
+  return faddr;
+}
+
+__attribute__ ((noinline))
+static void *
+stack_var (void)
+{
+  /* 1024 bytes on the stack.  */
+  char s[1024];
+  /* One stack slot.  */
+  void *p = (void *)s;
+  /* Keep stack memory alive by passing it to the function.  */
+  return get_frame_addr (p);
+}
+
+__attribute__ ((noinline))
+static void *
+alloca_var (void)
+{
+  /* 1024 bytes on the stack plus one stack slot for a.  */
+  void *a = __builtin_alloca (1024);
+  return get_frame_addr (a);
+}
+
+int main (void)
+{
+  if (stack_var () != alloca_var ())
+    /* The functions used a different amount of stack space.  */
+    __builtin_abort ();
+
+  return 0;
+}
-- 
2.3.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]