This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Allocate constant size dynamic stack space in the prologue


New patch with the following changes:

* Fixed comment about dynamic var area placement.
* The area is now placed further away from the stack pointer than
  the non-dynamic stack variables (tested only with
  STACK_GROWS_DOWNWARD).  This is a possible performance
  improvement on S/390 (hoping that more variables will be
  addressable using a displacement).
* Moved the code that calculates the size to actually allocate
  from the size required by dynamic stack variables to a separate
  function.  Use that function from allocate_dynamic_stack_space()
  and expand_stack_vars() so the size calculations are the same
  for both.
* Use a target hook to activate the feature (for now).
  (This is just meant to make it more feasible to be included in
  Gcc6.  If it's to late for this the code may be as well be used
  for all targets.)

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Attachment: 0001-ChangeLog
Description: Text document

>From 55b9ba6882dbd2d8deed6c337b0e7de65617d7b3 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Wed, 25 Nov 2015 09:31:19 +0100
Subject: [PATCH] v2: Allocate constant size dynamic stack space in the
 prologue ...

... and place it in the virtual stack vars area, if the platform supports it.
On S/390 this saves adjusting the stack pointer twice and forcing the frame
pointer into existence.  It also removes the warning with -mwarn-dynamicstack
that is triggered by cfun->calls_alloca == 1.

This fixes a problem with the Linux kernel which aligns the page structure to
16 bytes at run time using inefficient code and issuing a bogus warning.
---
 gcc/cfgexpand.c                      |  26 +++-
 gcc/config/s390/s390.c               |   3 +
 gcc/config/s390/s390.h               |   4 +
 gcc/defaults.h                       |   4 +
 gcc/doc/tm.texi                      |   5 +
 gcc/doc/tm.texi.in                   |   2 +
 gcc/explow.c                         | 232 +++++++++++++++++++++++------------
 gcc/explow.h                         |   9 ++
 gcc/target.def                       |   9 ++
 gcc/testsuite/gcc.dg/stack-usage-2.c |   4 +-
 10 files changed, 214 insertions(+), 84 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 1990e10..81a7aac 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1032,7 +1032,9 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
   size_t si, i, j, n = stack_vars_num;
   HOST_WIDE_INT large_size = 0, large_alloc = 0;
   rtx large_base = NULL;
+  rtx large_allocsize = NULL;
   unsigned large_align = 0;
+  bool large_allocation_done = false;
   tree decl;
 
   /* Determine if there are any variables requiring "large" alignment.
@@ -1079,8 +1081,17 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 
       /* If there were any, allocate space.  */
       if (large_size > 0)
-	large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
-						   large_align, true);
+	{
+	  if (targetm.calls.allocate_dynamic_stack_space_in_prologue_p ())
+	    {
+	      large_allocsize = GEN_INT (large_size);
+	      get_dynamic_stack_size (&large_allocsize, 0, large_align, NULL);
+	    }
+	  else
+	    /* Allocate space now.  */
+	    large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
+						       large_align, true);
+	}
     }
 
   for (si = 0; si < n; ++si)
@@ -1166,6 +1177,17 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 	  /* Large alignment is only processed in the last pass.  */
 	  if (pred)
 	    continue;
+
+	  if (large_allocsize && ! large_allocation_done)
+	    {
+	      /* Allocate space the virtual stack vars area in the prologue.
+	       */
+	      HOST_WIDE_INT loffset;
+
+	      loffset = alloc_stack_frame_space (INTVAL (large_allocsize), 1);
+	      large_base = get_dynamic_stack_base (loffset, large_align);
+	      large_allocation_done = true;
+	    }
 	  gcc_assert (large_base != NULL);
 
 	  large_alloc += alignb - 1;
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 40ee2f7..61793ca 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -14096,6 +14096,9 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree ty
 #undef TARGET_ASM_FILE_END
 #define TARGET_ASM_FILE_END s390_asm_file_end
 
+#undef TARGET_ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE_P
+#define TARGET_ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE_P hook_bool_void_true
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index a0faf13..073ce5c 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -594,6 +594,10 @@ extern const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER];
 #define STACK_DYNAMIC_OFFSET(FUNDECL) \
   (STACK_POINTER_OFFSET + crtl->outgoing_args_size)
 
+/* Constant size dynamic stack space can be allocated through the function
+   prologue to save the extra instructions to adjust the stack pointer.  */
+#define ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE 1
+
 /* Offset of first parameter from the argument pointer register value.
    We have a fake argument pointer register that points directly to
    the argument area.  */
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 0f1c713..2c4ab7d 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1055,6 +1055,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define STACK_POINTER_OFFSET    0
 #endif
 
+#ifndef ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE_P
+#define ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE_P 0
+#endif
+
 #ifndef LOCAL_REGNO
 #define LOCAL_REGNO(REGNO)  0
 #endif
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index bde808b..4ede2d7 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11568,6 +11568,11 @@ to the stack.  Therefore, this hook should return true in general, but
 false for naked functions.  The default implementation always returns true.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE_P (void)
+This hook indicates whether the target supports allocating runtime aligned
+stack variable with constant size through the function prologue.
+@end deftypefn
+
 @deftypevr {Target Hook} {unsigned HOST_WIDE_INT} TARGET_CONST_ANCHOR
 On some architectures it can take multiple instructions to synthesize
 a constant.  If there is another constant already in a register that
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 0677fc1..8c5c68d 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8201,6 +8201,8 @@ and the associated definitions of those functions.
 
 @hook TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 
+@hook TARGET_ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE_P
+
 @hook TARGET_CONST_ANCHOR
 
 @hook TARGET_ASAN_SHADOW_OFFSET
diff --git a/gcc/explow.c b/gcc/explow.c
index e6a69e0..88d58d4 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1140,83 +1140,57 @@ record_new_stack_level (void)
     update_sjlj_context ();
 }
 
-/* Return an rtx representing the address of an area of memory dynamically
-   pushed on the stack.
+/* Return an rtx doing runtime alignment to REQUIRED_ALIGN on TARGET.  */
+static rtx
+align_dynamic_address (rtx target, unsigned required_align)
+{
+  /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
+     but we know it can't.  So add ourselves and then do
+     TRUNC_DIV_EXPR.  */
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (required_align / BITS_PER_UNIT - 1,
+				       Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
+			  gen_int_mode (required_align / BITS_PER_UNIT,
+					Pmode),
+			  NULL_RTX, 1);
+  target = expand_mult (Pmode, target,
+			gen_int_mode (required_align / BITS_PER_UNIT,
+				      Pmode),
+			NULL_RTX, 1);
 
-   Any required stack pointer alignment is preserved.
+  return target;
+}
 
-   SIZE is an rtx representing the size of the area.
+/* Return an rtx through *PSIZE, representing the size of an area of memory to
+   be dynamically pushed on the stack.  The bool return value of this function
+   indicates whether any alignment has been done.
+
+   *PSIZE is an rtx representing the size of the area.
 
    SIZE_ALIGN is the alignment (in bits) that we know SIZE has.  This
-   parameter may be zero.  If so, a proper value will be extracted 
+   parameter may be zero.  If so, a proper value will be extracted
    from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
 
    REQUIRED_ALIGN is the alignment (in bits) required for the region
    of memory.
 
-   If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
-   stack space allocated by the generated code cannot be added with itself
-   in the course of the execution of the function.  It is always safe to
-   pass FALSE here and the following criterion is sufficient in order to
-   pass TRUE: every path in the CFG that starts at the allocation point and
-   loops to it executes the associated deallocation code.  */
-
-rtx
-allocate_dynamic_stack_space (rtx size, unsigned size_align,
-			      unsigned required_align, bool cannot_accumulate)
+   If PSTACK_USAGE_SIZE is not NULL it points to a value that is increased for
+   the additional size returned.  */
+bool
+get_dynamic_stack_size (rtx *psize, unsigned size_align,
+			unsigned required_align,
+			HOST_WIDE_INT *pstack_usage_size)
 {
-  HOST_WIDE_INT stack_usage_size = -1;
-  rtx_code_label *final_label;
-  rtx final_target, target;
   unsigned extra_align = 0;
   bool must_align;
-
-  /* If we're asking for zero bytes, it doesn't matter what we point
-     to since we can't dereference it.  But return a reasonable
-     address anyway.  */
-  if (size == const0_rtx)
-    return virtual_stack_dynamic_rtx;
-
-  /* Otherwise, show we're calling alloca or equivalent.  */
-  cfun->calls_alloca = 1;
-
-  /* If stack usage info is requested, look into the size we are passed.
-     We need to do so this early to avoid the obfuscation that may be
-     introduced later by the various alignment operations.  */
-  if (flag_stack_usage_info)
-    {
-      if (CONST_INT_P (size))
-	stack_usage_size = INTVAL (size);
-      else if (REG_P (size))
-        {
-	  /* Look into the last emitted insn and see if we can deduce
-	     something for the register.  */
-	  rtx_insn *insn;
-	  rtx set, note;
-	  insn = get_last_insn ();
-	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
-	    {
-	      if (CONST_INT_P (SET_SRC (set)))
-		stack_usage_size = INTVAL (SET_SRC (set));
-	      else if ((note = find_reg_equal_equiv_note (insn))
-		       && CONST_INT_P (XEXP (note, 0)))
-		stack_usage_size = INTVAL (XEXP (note, 0));
-	    }
-	}
-
-      /* If the size is not constant, we can't say anything.  */
-      if (stack_usage_size == -1)
-	{
-	  current_function_has_unbounded_dynamic_stack_size = 1;
-	  stack_usage_size = 0;
-	}
-    }
+  rtx size = *psize;
 
   /* Ensure the size is in the proper mode.  */
   if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
     size = convert_to_mode (Pmode, size, 1);
 
-  /* Adjust SIZE_ALIGN, if needed.  */
   if (CONST_INT_P (size))
     {
       unsigned HOST_WIDE_INT lsb;
@@ -1276,8 +1250,8 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
       size = plus_constant (Pmode, size, extra);
       size = force_operand (size, NULL_RTX);
 
-      if (flag_stack_usage_info)
-	stack_usage_size += extra;
+      if (flag_stack_usage_info && pstack_usage_size)
+	*pstack_usage_size += extra;
 
       if (extra && size_align > extra_align)
 	size_align = extra_align;
@@ -1300,13 +1274,93 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     {
       size = round_push (size);
 
-      if (flag_stack_usage_info)
+      if (flag_stack_usage_info && pstack_usage_size)
 	{
 	  int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
-	  stack_usage_size = (stack_usage_size + align - 1) / align * align;
+	  *pstack_usage_size =
+	    (*pstack_usage_size + align - 1) / align * align;
+	}
+    }
+
+  *psize = size;
+
+  return must_align;
+}
+
+/* Return an rtx representing the address of an area of memory dynamically
+   pushed on the stack.
+
+   Any required stack pointer alignment is preserved.
+
+   SIZE is an rtx representing the size of the area.
+
+   SIZE_ALIGN is the alignment (in bits) that we know SIZE has.  This
+   parameter may be zero.  If so, a proper value will be extracted
+   from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.
+
+   If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
+   stack space allocated by the generated code cannot be added with itself
+   in the course of the execution of the function.  It is always safe to
+   pass FALSE here and the following criterion is sufficient in order to
+   pass TRUE: every path in the CFG that starts at the allocation point and
+   loops to it executes the associated deallocation code.  */
+
+rtx
+allocate_dynamic_stack_space (rtx size, unsigned size_align,
+			      unsigned required_align, bool cannot_accumulate)
+{
+  HOST_WIDE_INT stack_usage_size = -1;
+  rtx_code_label *final_label;
+  rtx final_target, target;
+  bool must_align;
+
+  /* If we're asking for zero bytes, it doesn't matter what we point
+     to since we can't dereference it.  But return a reasonable
+     address anyway.  */
+  if (size == const0_rtx)
+    return virtual_stack_dynamic_rtx;
+
+  /* Otherwise, show we're calling alloca or equivalent.  */
+  cfun->calls_alloca = 1;
+
+  /* If stack usage info is requested, look into the size we are passed.
+     We need to do so this early to avoid the obfuscation that may be
+     introduced later by the various alignment operations.  */
+  if (flag_stack_usage_info)
+    {
+      if (CONST_INT_P (size))
+	stack_usage_size = INTVAL (size);
+      else if (REG_P (size))
+        {
+	  /* Look into the last emitted insn and see if we can deduce
+	     something for the register.  */
+	  rtx_insn *insn;
+	  rtx set, note;
+	  insn = get_last_insn ();
+	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
+	    {
+	      if (CONST_INT_P (SET_SRC (set)))
+		stack_usage_size = INTVAL (SET_SRC (set));
+	      else if ((note = find_reg_equal_equiv_note (insn))
+		       && CONST_INT_P (XEXP (note, 0)))
+		stack_usage_size = INTVAL (XEXP (note, 0));
+	    }
+	}
+
+      /* If the size is not constant, we can't say anything.  */
+      if (stack_usage_size == -1)
+	{
+	  current_function_has_unbounded_dynamic_stack_size = 1;
+	  stack_usage_size = 0;
 	}
     }
 
+  must_align = get_dynamic_stack_size (&size, size_align, required_align,
+				       &stack_usage_size);
+
   target = gen_reg_rtx (Pmode);
 
   /* The size is supposed to be fully adjusted at this point so record it
@@ -1473,23 +1527,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
     }
 
   if (must_align)
-    {
-      /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
-	 but we know it can't.  So add ourselves and then do
-	 TRUNC_DIV_EXPR.  */
-      target = expand_binop (Pmode, add_optab, target,
-			     gen_int_mode (required_align / BITS_PER_UNIT - 1,
-					   Pmode),
-			     NULL_RTX, 1, OPTAB_LIB_WIDEN);
-      target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
-			      gen_int_mode (required_align / BITS_PER_UNIT,
-					    Pmode),
-			      NULL_RTX, 1);
-      target = expand_mult (Pmode, target,
-			    gen_int_mode (required_align / BITS_PER_UNIT,
-					  Pmode),
-			    NULL_RTX, 1);
-    }
+    target = align_dynamic_address (target, required_align);
 
   /* Now that we've committed to a return value, mark its alignment.  */
   mark_reg_pointer (target, required_align);
@@ -1499,6 +1537,38 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
 
   return target;
 }
+
+/* Return an rtx representing the address of an area of memory already
+   statically pushed onto the stack in the virtual stack vars area.  (It is
+   assumed that the area is allocated in the function prologue.)
+
+   Any required stack pointer alignment is preserved.
+
+   OFFSET is the offset of the area into the virtual stack vars area.
+
+   REQUIRED_ALIGN is the alignment (in bits) required for the region
+   of memory.  */
+
+rtx
+get_dynamic_stack_base (HOST_WIDE_INT offset, unsigned required_align)
+{
+  rtx target;
+
+  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+    crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
+  target = gen_reg_rtx (Pmode);
+  emit_move_insn (target, virtual_stack_vars_rtx);
+  target = expand_binop (Pmode, add_optab, target,
+			 gen_int_mode (offset, Pmode),
+			 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  target = align_dynamic_address (target, required_align);
+
+  /* Now that we've committed to a return value, mark its alignment.  */
+  mark_reg_pointer (target, required_align);
+
+  return target;
+}
 
 /* A front end may want to override GCC's stack checking by providing a
    run-time routine to call to check the stack, so provide a mechanism for
diff --git a/gcc/explow.h b/gcc/explow.h
index 52113db..6a89387 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -87,6 +87,15 @@ extern void record_new_stack_level (void);
 /* Allocate some space on the stack dynamically and return its address.  */
 extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool);
 
+/* Calculate the necessary size of a constant dynamic stack allocation from the
+   size of the variable area.  */
+extern bool get_dynamic_stack_size (rtx *, unsigned, unsigned,
+				    HOST_WIDE_INT *);
+
+/* Returns the address of the dynamic stack space without allocating it.  */
+extern rtx get_dynamic_stack_base (HOST_WIDE_INT offset,
+				   unsigned required_align);
+
 /* Emit one stack probe at ADDRESS, an address within the stack.  */
 extern void emit_stack_probe (rtx);
 
diff --git a/gcc/target.def b/gcc/target.def
index b0ad09e..2b30082 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4651,6 +4651,15 @@ false for naked functions.  The default implementation always returns true.",
  bool, (void),
  hook_bool_void_true)
 
+/* Return true if the target supports allocation of runtime aligned stack
+   variables in the prologue.  */
+DEFHOOK
+(allocate_dynamic_stack_space_in_prologue_p,
+ "This hook indicates whether the target supports allocating runtime aligned\n\
+stack variable with constant size through the function prologue.",
+ bool, (void),
+ hook_bool_void_false)
+
 /* Return an rtx for the static chain for FNDECL_OR_TYPE.  If INCOMING_P
    is true, then it should be for the callee; otherwise for the caller.  */
 DEFHOOK
diff --git a/gcc/testsuite/gcc.dg/stack-usage-2.c b/gcc/testsuite/gcc.dg/stack-usage-2.c
index c2527d2..7d246ec 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-2.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-2.c
@@ -17,7 +17,9 @@ int foo2 (void)  /* { dg-warning "stack usage is \[0-9\]* bytes" } */
   return 0;
 }
 
-int foo3 (void) /* { dg-warning "stack usage might be \[0-9\]* bytes" } */
+/* The actual warning depends on whether stack space is allocated dynamically
+   or staically.  */
+int foo3 (void) /* { dg-warning "stack usage (might be)|(is) \[0-9\]* bytes" } */
 {
   char arr[1024] __attribute__((aligned (512)));
   arr[0] = 1;
-- 
2.3.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]