This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/2][v3] Drop excess size used for run time allocated stack variables.


On Tue, May 03, 2016 at 03:17:53PM +0100, Dominik Vogt wrote:
> Version two of the patch including a test case.
> 
> On Mon, May 02, 2016 at 09:10:25AM -0600, Jeff Law wrote:
> > On 04/29/2016 04:12 PM, Dominik Vogt wrote:
> > >The attached patch removes excess stack space allocation with
> > >alloca in some situations.  Plese check the commit message in the
> > >patch for details.
> 
> > However, I would strongly recommend some tests, even if they are
> > target specific.  You can always copy pr36728-1 into the s390x
> > directory and look at size of the generated stack.  Simliarly for
> > pr50938 for x86.
> 
> However, x86 uses the "else" branch in round_push, i.e. it uses
> "virtual_preferred_stack_boundary_rtx" to calculate the number of
> bytes to add for stack alignment.  That value is unknown at the
> time round_push is called, so the test case fails on such targets,
> and I've no idea how to fix this properly.

Third version of the patch with the suggested cleanup in the first
patch and the functional stuff in the second one.  The first patch
is based on Jeff's draft with the change suggested by Eric and
more cleanup added by me.

Tested and bootstrapped on s390x biarch (but did not look for
performance regressions as the change should be a no-op).

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Attachment: 0001-ChangeLog
Description: Text document

>From 01264dac2f62c16a2c7e684a674aa9e9bc27b434 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Wed, 25 May 2016 10:23:57 +0100
Subject: [PATCH 1/2] Minor cleanup to allocate_dynamic_stack_space.

---
 gcc/explow.c | 96 +++++++++++++++++++-----------------------------------------
 1 file changed, 30 insertions(+), 66 deletions(-)

diff --git a/gcc/explow.c b/gcc/explow.c
index e0ce201..09a0330 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1174,8 +1174,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   HOST_WIDE_INT stack_usage_size = -1;
   rtx_code_label *final_label;
   rtx final_target, target;
-  unsigned extra_align = 0;
-  bool must_align;
+  unsigned extra;
 
   /* If we're asking for zero bytes, it doesn't matter what we point
      to since we can't dereference it.  But return a reasonable
@@ -1246,48 +1245,21 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
 
   /* We will need to ensure that the address we return is aligned to
-     REQUIRED_ALIGN.  If STACK_DYNAMIC_OFFSET is defined, we don't
-     always know its final value at this point in the compilation (it
-     might depend on the size of the outgoing parameter lists, for
-     example), so we must align the value to be returned in that case.
-     (Note that STACK_DYNAMIC_OFFSET will have a default nonzero value if
-     STACK_POINTER_OFFSET or ACCUMULATE_OUTGOING_ARGS are defined).
-     We must also do an alignment operation on the returned value if
-     the stack pointer alignment is less strict than REQUIRED_ALIGN.
-
-     If we have to align, we must leave space in SIZE for the hole
-     that might result from the alignment operation.  */
-
-  must_align = (crtl->preferred_stack_boundary < required_align);
-  if (must_align)
-    {
-      if (required_align > PREFERRED_STACK_BOUNDARY)
-	extra_align = PREFERRED_STACK_BOUNDARY;
-      else if (required_align > STACK_BOUNDARY)
-	extra_align = STACK_BOUNDARY;
-      else
-	extra_align = BITS_PER_UNIT;
-    }
+     REQUIRED_ALIGN.  At this point in the compilation, we don't always
+     know the final value of the STACK_DYNAMIC_OFFSET used in function.c
+     (it might depend on the size of the outgoing parameter lists, for
+     example), so we must preventively align the value.  We leave space
+     in SIZE for the hole that might result from the alignment operation.  */
 
-  /* ??? STACK_POINTER_OFFSET is always defined now.  */
-#if defined (STACK_DYNAMIC_OFFSET) || defined (STACK_POINTER_OFFSET)
-  must_align = true;
-  extra_align = BITS_PER_UNIT;
-#endif
-
-  if (must_align)
-    {
-      unsigned extra = (required_align - extra_align) / BITS_PER_UNIT;
+  extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
+  size = plus_constant (Pmode, size, extra);
+  size = force_operand (size, NULL_RTX);
 
-      size = plus_constant (Pmode, size, extra);
-      size = force_operand (size, NULL_RTX);
-
-      if (flag_stack_usage_info)
-	stack_usage_size += extra;
+  if (flag_stack_usage_info)
+    stack_usage_size += extra;
 
-      if (extra && size_align > extra_align)
-	size_align = extra_align;
-    }
+  if (extra && size_align > BITS_PER_UNIT)
+    size_align = BITS_PER_UNIT;
 
   /* Round the size to a multiple of the required stack alignment.
      Since the stack if presumed to be rounded before this allocation,
@@ -1361,13 +1333,10 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       if (MALLOC_ABI_ALIGNMENT >= required_align)
 	ask = size;
       else
-	{
-	  ask = expand_binop (Pmode, add_optab, size,
-			      gen_int_mode (required_align / BITS_PER_UNIT - 1,
-					    Pmode),
-			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
-	  must_align = true;
-	}
+	ask = expand_binop (Pmode, add_optab, size,
+			    gen_int_mode (required_align / BITS_PER_UNIT - 1,
+					  Pmode),
+			    NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
       func = init_one_libfunc ("__morestack_allocate_stack_space");
 
@@ -1478,24 +1447,19 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       target = final_target;
     }
 
-  if (must_align)
-    {
-      /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
-	 but we know it can't.  So add ourselves and then do
-	 TRUNC_DIV_EXPR.  */
-      target = expand_binop (Pmode, add_optab, target,
-			     gen_int_mode (required_align / BITS_PER_UNIT - 1,
-					   Pmode),
-			     NULL_RTX, 1, OPTAB_LIB_WIDEN);
-      target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
-			      gen_int_mode (required_align / BITS_PER_UNIT,
-					    Pmode),
-			      NULL_RTX, 1);
-      target = expand_mult (Pmode, target,
-			    gen_int_mode (required_align / BITS_PER_UNIT,
-					  Pmode),
-			    NULL_RTX, 1);
-    }
+  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
+     but we know it can't.  So add ourselves and then do
+     TRUNC_DIV_EXPR.  */
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
+				       Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
+			  gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
+			  NULL_RTX, 1);
+  target = expand_mult (Pmode, target,
+			gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
+			NULL_RTX, 1);
 
   /* Now that we've committed to a return value, mark its alignment.  */
   mark_reg_pointer (target, required_align);
-- 
2.3.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]