This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Remember/restore ALLOCA_FOR_VAR_P over tuples


Hello,

as my patch for stack-arrays in fortran reveals we a problem that VLA 
objects basically prevent all inlining to happen.  They're transformed 
into alloca calls and those are considered to disable inlining.  The 
(correct) fear being that inlining a bare alloca call into a loop leads to 
unbounded stack growth.

But the situation is different for alloca calls emitted for dealing with 
VLA objects.  They always are wrapped with stack_save/stack_restore calls.  
Inlining such regions always is okay, even into loops.  The stack space 
usage will be exactly the same at runtime.

We have a flag for this already on the CALL_EXPR.  But we don't retain it 
over tuples, and hence we also don't look at it in 
inline_forbidden_p_stmt.  This patch fixes both.  (The strange testing of 
builtin-ness is because in CALL_EXPR the ALLOCA_FOR_VAR_P and 
CALL_FROM_THUNK_P flags are overloaded)

(This fixes the regression of fatigue with the stack-arrays patch)

regstrapping on x86_64-linux in progress, okay for trunk?


Ciao,
Michael.

	* gimple.h (enum gf_mask): Add GF_CALL_ALLOCA_FOR_VAR.
	(gimple_call_set_alloca_for_var): New inline function.
	(gimple_call_alloca_for_var_p): Ditto.
	* gimple.c (gimple_build_call_from_tree): Remember ALLOCA_FOR_VAR_P
	state.
	* cfgexpand.c (expand_call_stmt): Restore ALLOCA_FOR_VAR_P state.

	* tree-inline.c (inline_forbidden_p_stmt): Don't reject alloca
	calls if they were for VLA objects.

Index: cfgexpand.c
===================================================================
*** cfgexpand.c	(revision 172431)
--- cfgexpand.c	(working copy)
*************** expand_call_stmt (gimple stmt)
*** 1873,1879 ****
  
    CALL_EXPR_TAILCALL (exp) = gimple_call_tail_p (stmt);
    CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt);
!   CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt);
    CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt);
    CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt);
    SET_EXPR_LOCATION (exp, gimple_location (stmt));
--- 1873,1884 ----
  
    CALL_EXPR_TAILCALL (exp) = gimple_call_tail_p (stmt);
    CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt);
!   if (decl
!       && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
!       && DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA)
!     ALLOCA_FOR_VAR_P (exp) = gimple_call_alloca_for_var_p (stmt);
!   else
!     CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt);
    CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt);
    CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt);
    SET_EXPR_LOCATION (exp, gimple_location (stmt));
Index: tree-inline.c
===================================================================
*** tree-inline.c	(revision 172431)
--- tree-inline.c	(working copy)
*************** inline_forbidden_p_stmt (gimple_stmt_ite
*** 2997,3004 ****
  	 this may change program's memory overhead drastically when the
  	 function using alloca is called in loop.  In GCC present in
  	 SPEC2000 inlining into schedule_block cause it to require 2GB of
! 	 RAM instead of 256MB.  */
        if (gimple_alloca_call_p (stmt)
  	  && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))
  	{
  	  inline_forbidden_reason
--- 2997,3007 ----
  	 this may change program's memory overhead drastically when the
  	 function using alloca is called in loop.  In GCC present in
  	 SPEC2000 inlining into schedule_block cause it to require 2GB of
! 	 RAM instead of 256MB.  Don't do so for alloca calls emitted for
! 	 VLA objects as those can't cause unbounded growth (they're always
! 	 wrapped inside stack_save/stack_restore regions.  */
        if (gimple_alloca_call_p (stmt)
+ 	  && !gimple_call_alloca_for_var_p (stmt)
  	  && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))
  	{
  	  inline_forbidden_reason
Index: gimple.c
===================================================================
*** gimple.c	(revision 172431)
--- gimple.c	(working copy)
*************** gimple_build_call_from_tree (tree t)
*** 303,309 ****
    gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t));
    gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t));
    gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t));
!   gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t));
    gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t));
    gimple_call_set_nothrow (call, TREE_NOTHROW (t));
    gimple_set_no_warning (call, TREE_NO_WARNING (t));
--- 303,314 ----
    gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t));
    gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t));
    gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t));
!   if (fndecl
!       && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
!       && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA)
!     gimple_call_set_alloca_for_var (call, ALLOCA_FOR_VAR_P (t));
!   else
!     gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t));
    gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t));
    gimple_call_set_nothrow (call, TREE_NOTHROW (t));
    gimple_set_no_warning (call, TREE_NO_WARNING (t));
Index: gimple.h
===================================================================
*** gimple.h	(revision 172431)
--- gimple.h	(working copy)
*************** enum gf_mask {
*** 102,107 ****
--- 102,108 ----
      GF_CALL_TAILCALL		= 1 << 3,
      GF_CALL_VA_ARG_PACK		= 1 << 4,
      GF_CALL_NOTHROW		= 1 << 5,
+     GF_CALL_ALLOCA_FOR_VAR	= 1 << 6,
      GF_OMP_PARALLEL_COMBINED	= 1 << 0,
  
      /* True on an GIMPLE_OMP_RETURN statement if the return does not require
*************** gimple_call_nothrow_p (gimple s)
*** 2329,2334 ****
--- 2330,2358 ----
    return (gimple_call_flags (s) & ECF_NOTHROW) != 0;
  }
  
+ /* If FOR_VAR is true, GIMPLE_CALL S is a call to builtin_alloca that
+    is known to be emitted for VLA objects.  Those are wrapped by
+    stack_save/stack_restore calls and hence can't lead to unbounded
+    stack growth even when they occur in loops.  */
+ 
+ static inline void
+ gimple_call_set_alloca_for_var (gimple s, bool for_var)
+ {
+   GIMPLE_CHECK (s, GIMPLE_CALL);
+   if (for_var)
+     s->gsbase.subcode |= GF_CALL_ALLOCA_FOR_VAR;
+   else
+     s->gsbase.subcode &= ~GF_CALL_ALLOCA_FOR_VAR;
+ }
+ 
+ /* Return true of S is a call to builtin_alloca emitted for VLA objects.  */
+ 
+ static inline bool
+ gimple_call_alloca_for_var_p (gimple s)
+ {
+   GIMPLE_CHECK (s, GIMPLE_CALL);
+   return (s->gsbase.subcode & GF_CALL_ALLOCA_FOR_VAR) != 0;
+ }
  
  /* Copy all the GF_CALL_* flags from ORIG_CALL to DEST_CALL.  */
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]