This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Drop excess size used for run time allocated stack variables.


The attached patch removes excess stack space allocation with
alloca in some situations.  Plese check the commit message in the
patch for details.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Attachment: 0001-ChangeLog
Description: Text document

>From 9ea451aef0f1f2fb0a36a7b718f910cfe285541d Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 29 Apr 2016 08:36:59 +0100
Subject: [PATCH] Drop excess size used for run time allocated stack
 variables.

The present calculation sometimes led to more stack memory being used than
necessary with alloca.  First, (STACK_BOUNDARY -1) would be added to the
allocated size:

  size = plus_constant (Pmode, size, extra);
  size = force_operand (size, NULL_RTX);

Then round_push was called and added another (STACK_BOUNDARY - 1) before
rounding down to a multiple of STACK_BOUNDARY.  On s390x this resulted in
adding 14 before rounding down for "x" in the test case pr36728-1.c.

round_push() now takes an argument to inform it about what has already been
added to size.
---
 gcc/explow.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/gcc/explow.c b/gcc/explow.c
index e0ce201..a039295 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -949,24 +949,30 @@ anti_adjust_stack (rtx adjust)
 }
 
 /* Round the size of a block to be pushed up to the boundary required
-   by this machine.  SIZE is the desired size, which need not be constant.  */
+   by this machine.  SIZE is the desired size, which need not be constant.
+   ALREADY_ADDED is the number of units that have already been added to SIZE for
+   other alignment reasons.
+*/
 
 static rtx
-round_push (rtx size)
+round_push (rtx size, int already_added)
 {
-  rtx align_rtx, alignm1_rtx;
+  rtx align_rtx, add_rtx;
 
   if (!SUPPORTS_STACK_ALIGNMENT
       || crtl->preferred_stack_boundary == MAX_SUPPORTED_STACK_ALIGNMENT)
     {
       int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+      int add;
 
       if (align == 1)
 	return size;
 
+      add = (align > already_added) ? align - already_added - 1 : 0;
+
       if (CONST_INT_P (size))
 	{
-	  HOST_WIDE_INT new_size = (INTVAL (size) + align - 1) / align * align;
+	  HOST_WIDE_INT new_size = (INTVAL (size) + add) / align * align;
 
 	  if (INTVAL (size) != new_size)
 	    size = GEN_INT (new_size);
@@ -974,7 +980,7 @@ round_push (rtx size)
 	}
 
       align_rtx = GEN_INT (align);
-      alignm1_rtx = GEN_INT (align - 1);
+      add_rtx = (add > 0) ? GEN_INT (add) : const0_rtx;
     }
   else
     {
@@ -983,15 +989,15 @@ round_push (rtx size)
 	 substituted by the right value in vregs pass and optimized
 	 during combine.  */
       align_rtx = virtual_preferred_stack_boundary_rtx;
-      alignm1_rtx = force_operand (plus_constant (Pmode, align_rtx, -1),
-				   NULL_RTX);
+      add_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), NULL_RTX);
     }
 
   /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
      but we know it can't.  So add ourselves and then do
      TRUNC_DIV_EXPR.  */
-  size = expand_binop (Pmode, add_optab, size, alignm1_rtx,
-		       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  if (add_rtx != const0_rtx)
+    size = expand_binop (Pmode, add_optab, size, add_rtx,
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
   size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx,
 			NULL_RTX, 1);
   size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1);
@@ -1175,6 +1181,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   rtx_code_label *final_label;
   rtx final_target, target;
   unsigned extra_align = 0;
+  unsigned extra = 0;
   bool must_align;
 
   /* If we're asking for zero bytes, it doesn't matter what we point
@@ -1275,9 +1282,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
   extra_align = BITS_PER_UNIT;
 #endif
 
-  if (must_align)
+  if (must_align && required_align > extra_align)
     {
-      unsigned extra = (required_align - extra_align) / BITS_PER_UNIT;
+      extra = (required_align - extra_align) / BITS_PER_UNIT;
 
       size = plus_constant (Pmode, size, extra);
       size = force_operand (size, NULL_RTX);
@@ -1285,7 +1292,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       if (flag_stack_usage_info)
 	stack_usage_size += extra;
 
-      if (extra && size_align > extra_align)
+      if (size_align > extra_align)
 	size_align = extra_align;
     }
 
@@ -1304,7 +1311,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
      momentarily mis-aligning the stack.  */
   if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
     {
-      size = round_push (size);
+      size = round_push (size, extra);
 
       if (flag_stack_usage_info)
 	{
-- 
2.3.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]