This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, PR50527] Don't assume alignment of vla-related allocas.
- From: Tom de Vries <Tom_deVries at mentor dot com>
- To: Richard Guenther <richard dot guenther at gmail dot com>
- Cc: "gcc-patches >> \"gcc-patches at gcc dot gnu dot org\"" <gcc-patches at gcc dot gnu dot org>
- Date: Sat, 01 Oct 2011 17:46:37 +0200
- Subject: Re: [PATCH, PR50527] Don't assume alignment of vla-related allocas.
- References: <4E82EA3E.4050000@mentor.com> <CAFiYyc1pjDPwZS3-yp+xN7+PPZoQdD9FWAGs2LZ8kxzCvFAtDQ@mail.gmail.com> <4E846F6F.7070002@mentor.com> <CAFiYyc2oW-=1sUqXYtYtPBAcNYjYbf-Edix2ES18BdNW2TwbOg@mail.gmail.com>
On 09/30/2011 03:29 PM, Richard Guenther wrote:
> On Thu, Sep 29, 2011 at 3:15 PM, Tom de Vries <Tom_deVries@mentor.com> wrote:
>> On 09/28/2011 11:53 AM, Richard Guenther wrote:
>>> On Wed, Sep 28, 2011 at 11:34 AM, Tom de Vries <Tom_deVries@mentor.com> wrote:
>>>> Richard,
>>>>
>>>> I got a patch for PR50527.
>>>>
>>>> The patch prevents the alignment of vla-related allocas to be set to
>>>> BIGGEST_ALIGNMENT in ccp. The alignment may turn out smaller after folding
>>>> the alloca.
>>>>
>>>> Bootstrapped and regtested on x86_64.
>>>>
>>>> OK for trunk?
>>>
>>> Hmm. As gfortran with -fstack-arrays uses VLAs it's probably bad that
>>> the vectorizer then will no longer see that the arrays are properly aligned.
>>>
>>> I'm not sure what the best thing to do is here, other than trying to record
>>> the alignment requirement of the VLA somewhere.
>>>
>>> Forcing the alignment of the alloca replacement decl to BIGGEST_ALIGNMENT
>>> has the issue that it will force stack-realignment which isn't free (and the
>>> point was to make the decl cheaper than the alloca). But that might
>>> possibly be the better choice.
>>>
>>> Any other thoughts?
>>
>> How about the approach in this (untested) patch? Using the DECL_ALIGN of the vla
>> for the new array prevents stack realignment for folded vla-allocas, also for
>> large vlas.
>>
>> This will not help in vectorizing large folded vla-allocas, but I think it's not
>> reasonable to expect BIGGEST_ALIGNMENT when writing a vla (although that has
>> been the case up until we started to fold). If you want to trigger vectorization
>> for a vla, you can still use the aligned attribute on the declaration.
>>
>> Still, the unfolded vla-allocas will have BIGGEST_ALIGNMENT, also without using
>> an attribute on the decl. This patch exploits this by setting it at the end of
>> the 3rd pass_ccp, renamed to pass_ccp_last. This is not very effective in
>> propagation though, because although the ptr_info of the lhs is propagated via
>> copy_prop afterwards, it's not propagated anymore via ccp.
>>
>> Another way to do this would be to set BIGGEST_ALIGNMENT at the end of ccp2 and
>> not fold during ccp3.
>
> Ugh, somehow I like this the least ;)
>
> How about lowering VLAs to
>
> p = __builtin_alloca (...);
> p = __builtin_assume_aligned (p, DECL_ALIGN (vla));
>
> and not assume anything for alloca itself if it feeds a
> __builtin_assume_aligned?
>
> Or rather introduce a __builtin_alloca_with_align () and for VLAs do
>
> p = __builtin_alloca_with_align (..., DECL_ALIGN (vla));
>
> that's less awkward to use?
>
> Sorry for not having a clear plan here ;)
>
Using assume_aligned is a more orthogonal way to represent this in gimple, but
indeed harder to use.
Another possibility is to add a 'tree vla_decl' field to struct
gimple_statement_call, which is probably the easiest to implement.
But I think __builtin_alloca_with_align might have a use beyond vlas, so I
decided to try this one. Attached patch implements my first stab at this (now
testing on x86_64).
Is this an acceptable approach?
Thanks,
- Tom
> Richard.
>
Thanks,- Tom
Index: gcc/tree.c
===================================================================
--- gcc/tree.c (revision 179210)
+++ gcc/tree.c (working copy)
@@ -9483,9 +9483,18 @@ build_common_builtin_nodes (void)
"alloca", ECF_MALLOC | ECF_NOTHROW | ECF_LEAF);
}
+ ftype = build_function_type_list (ptr_type_node, size_type_node,
+ size_type_node, NULL_TREE);
+ local_define_builtin ("__builtin_alloca_with_align", ftype,
+ BUILT_IN_ALLOCA_WITH_ALIGN, "alloca_with_align",
+ ECF_MALLOC | ECF_NOTHROW | ECF_LEAF);
+
/* If we're checking the stack, `alloca' can throw. */
if (flag_stack_check)
- TREE_NOTHROW (built_in_decls[BUILT_IN_ALLOCA]) = 0;
+ {
+ TREE_NOTHROW (built_in_decls[BUILT_IN_ALLOCA]) = 0;
+ TREE_NOTHROW (built_in_decls[BUILT_IN_ALLOCA_WITH_ALIGN]) = 0;
+ }
ftype = build_function_type_list (void_type_node,
ptr_type_node, ptr_type_node,
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h (revision 179210)
+++ gcc/tree-pass.h (working copy)
@@ -389,6 +389,7 @@ extern struct gimple_opt_pass pass_iv_op
extern struct gimple_opt_pass pass_tree_loop_done;
extern struct gimple_opt_pass pass_ch;
extern struct gimple_opt_pass pass_ccp;
+extern struct gimple_opt_pass pass_ccp_last;
extern struct gimple_opt_pass pass_phi_only_cprop;
extern struct gimple_opt_pass pass_build_ssa;
extern struct gimple_opt_pass pass_build_alias;
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c (revision 179210)
+++ gcc/builtins.c (working copy)
@@ -5304,6 +5304,7 @@ expand_builtin (tree exp, rtx target, rt
&& !called_as_built_in (fndecl)
&& DECL_ASSEMBLER_NAME_SET_P (fndecl)
&& fcode != BUILT_IN_ALLOCA
+ && fcode != BUILT_IN_ALLOCA_WITH_ALIGN
&& fcode != BUILT_IN_FREE)
return expand_call (exp, target, ignore);
@@ -5559,6 +5560,7 @@ expand_builtin (tree exp, rtx target, rt
return XEXP (DECL_RTL (DECL_RESULT (current_function_decl)), 0);
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
/* If the allocation stems from the declaration of a variable-sized
object, it cannot accumulate. */
target = expand_builtin_alloca (exp, CALL_ALLOCA_FOR_VAR_P (exp));
@@ -13561,6 +13563,7 @@ is_inexpensive_builtin (tree decl)
{
case BUILT_IN_ABS:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
case BUILT_IN_CLZ:
Index: gcc/tree-ssa-ccp.c
===================================================================
--- gcc/tree-ssa-ccp.c (revision 179210)
+++ gcc/tree-ssa-ccp.c (working copy)
@@ -167,8 +167,13 @@ typedef struct prop_value_d prop_value_t
doing the store). */
static prop_value_t *const_val;
+/* Indicates whether we're in pass_ccp_last. */
+static bool ccp_last;
+
static void canonicalize_float_value (prop_value_t *);
static bool ccp_fold_stmt (gimple_stmt_iterator *);
+static bool fold_builtin_alloca_with_align_p (gimple);
+static bool builtin_alloca_with_align_p (gimple);
/* Dump constant propagation value VAL to file OUTF prefixed by PREFIX. */
@@ -813,6 +818,18 @@ ccp_finalize (void)
|| !POINTER_TYPE_P (TREE_TYPE (name)))
continue;
+ if (ccp_last && !SSA_NAME_IS_DEFAULT_DEF (name))
+ {
+ gimple def = SSA_NAME_DEF_STMT (name);
+ if (builtin_alloca_with_align_p (def)
+ && !fold_builtin_alloca_with_align_p (def))
+ {
+ pi = get_ptr_info (name);
+ pi->align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
+ continue;
+ }
+ }
+
val = get_value (name);
if (val->lattice_val != CONSTANT
|| TREE_CODE (val->value) != INTEGER_CST)
@@ -1492,6 +1509,7 @@ evaluate_stmt (gimple stmt)
tree simplified = NULL_TREE;
ccp_lattice_t likelyvalue = likely_value (stmt);
bool is_constant = false;
+ unsigned int align;
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1632,10 +1650,14 @@ evaluate_stmt (gimple stmt)
break;
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
+ align = (builtin_alloca_with_align_p (stmt)
+ ? TREE_INT_CST_LOW (gimple_call_arg (stmt, 1))
+ : BIGGEST_ALIGNMENT);
val.lattice_val = CONSTANT;
val.value = build_int_cst (TREE_TYPE (gimple_get_lhs (stmt)), 0);
val.mask = shwi_to_double_int
- (~(((HOST_WIDE_INT) BIGGEST_ALIGNMENT)
+ (~(((HOST_WIDE_INT) align)
/ BITS_PER_UNIT - 1));
break;
@@ -1685,27 +1707,45 @@ evaluate_stmt (gimple stmt)
return val;
}
-/* Detects a vla-related alloca with a constant argument. Declares fixed-size
- array and return the address, if found, otherwise returns NULL_TREE. */
+/* Return true if stmt is a call to __builtin_alloca_with_align. */
-static tree
-fold_builtin_alloca_for_var (gimple stmt)
+static bool
+builtin_alloca_with_align_p (gimple stmt)
{
- unsigned HOST_WIDE_INT size, threshold, n_elem;
- tree lhs, arg, block, var, elem_type, array_type;
- unsigned int align;
+ tree fndecl;
+ if (!is_gimple_call (stmt))
+ return false;
+
+ fndecl = gimple_call_fndecl (stmt);
+
+ return (fndecl != NULL_TREE
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA_WITH_ALIGN);
+}
+
+/* Returns true if STMT is a call to __builtin_alloca_with_align that will be\
+ folded. */
+
+static bool
+fold_builtin_alloca_with_align_p (gimple stmt)
+{
+ unsigned HOST_WIDE_INT size, threshold;
+ tree lhs, arg, block;
+
+ if (!builtin_alloca_with_align_p (stmt)
+ || !gimple_call_alloca_for_var_p (stmt))
+ return false;
/* Get lhs. */
lhs = gimple_call_lhs (stmt);
if (lhs == NULL_TREE)
- return NULL_TREE;
+ return false;
/* Detect constant argument. */
arg = get_constant_value (gimple_call_arg (stmt, 0));
if (arg == NULL_TREE
|| TREE_CODE (arg) != INTEGER_CST
|| !host_integerp (arg, 1))
- return NULL_TREE;
+ return false;
size = TREE_INT_CST_LOW (arg);
@@ -1718,28 +1758,49 @@ fold_builtin_alloca_for_var (gimple stmt
&& TREE_CODE (BLOCK_SUPERCONTEXT (block)) == FUNCTION_DECL))
threshold /= 10;
if (size > threshold)
- return NULL_TREE;
+ return false;
+
+ return true;
+}
+
+/* Detects a __builtin_alloca_with_align with constant size argument. Declares
+ fixed-size array and returns the address, if found, otherwise returns
+ NULL_TREE. */
+
+static tree
+fold_builtin_alloca_with_align (gimple stmt)
+{
+ unsigned HOST_WIDE_INT size, n_elem;
+ tree lhs, arg, var, elem_type, array_type;
+ struct ptr_info_def *pi;
+
+ if (!fold_builtin_alloca_with_align_p (stmt))
+ return NULL;
+
+ lhs = gimple_call_lhs (stmt);
+ pi = SSA_NAME_PTR_INFO (lhs);
+ arg = get_constant_value (gimple_call_arg (stmt, 0));
+ size = TREE_INT_CST_LOW (arg);
/* Declare array. */
elem_type = build_nonstandard_integer_type (BITS_PER_UNIT, 1);
n_elem = size * 8 / BITS_PER_UNIT;
- align = MIN (size * 8, BIGGEST_ALIGNMENT);
- if (align < BITS_PER_UNIT)
- align = BITS_PER_UNIT;
array_type = build_array_type_nelts (elem_type, n_elem);
var = create_tmp_var (array_type, NULL);
- DECL_ALIGN (var) = align;
- {
- struct ptr_info_def *pi = SSA_NAME_PTR_INFO (lhs);
- if (pi != NULL && !pi->pt.anything)
- {
- bool singleton_p;
- unsigned uid;
- singleton_p = pt_solution_singleton_p (&pi->pt, &uid);
- gcc_assert (singleton_p);
- SET_DECL_PT_UID (var, uid);
- }
- }
+ DECL_ALIGN (var) = TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
+
+ /* Make sure DECL_ALIGN garantuees the alignment propagated from the ptr_info
+ of lhs. */
+ gcc_checking_assert (DECL_ALIGN (var) >= pi->align);
+
+ if (!pi->pt.anything)
+ {
+ bool singleton_p;
+ unsigned uid;
+ singleton_p = pt_solution_singleton_p (&pi->pt, &uid);
+ gcc_assert (singleton_p);
+ SET_DECL_PT_UID (var, uid);
+ }
/* Fold alloca to the address of the array. */
return fold_convert (TREE_TYPE (lhs), build_fold_addr_expr (var));
@@ -1816,9 +1877,9 @@ ccp_fold_stmt (gimple_stmt_iterator *gsi
/* The heuristic of fold_builtin_alloca_for_var differs before and after
inlining, so we don't require the arg to be changed into a constant
for folding, but just to be constant. */
- if (gimple_call_alloca_for_var_p (stmt))
+ if (builtin_alloca_with_align_p (stmt))
{
- tree new_rhs = fold_builtin_alloca_for_var (stmt);
+ tree new_rhs = fold_builtin_alloca_with_align (stmt);
if (new_rhs)
{
bool res = update_call_from_tree (gsi, new_rhs);
@@ -2030,6 +2091,20 @@ do_ssa_ccp (void)
return 0;
}
+/* Last invocation of SSA Conditional Constant Propagation.
+ At the end of the last invocation, we know which builtin_alloca_with_align
+ will not be folded, and we can assign BIGGEST_ALIGNMENT for those in
+ ccp_finalize. */
+
+static unsigned int
+do_ssa_ccp_last (void)
+{
+ unsigned int res;
+ ccp_last = true;
+ res = do_ssa_ccp ();
+ ccp_last = false;
+ return res;
+}
static bool
gate_ccp (void)
@@ -2058,6 +2133,25 @@ struct gimple_opt_pass pass_ccp =
}
};
+struct gimple_opt_pass pass_ccp_last =
+{
+ {
+ GIMPLE_PASS,
+ "ccp3", /* name */
+ gate_ccp, /* gate */
+ do_ssa_ccp_last, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_CCP, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_verify_ssa
+ | TODO_verify_stmts | TODO_ggc_collect/* todo_flags_finish */
+ }
+};
/* Try to optimize out __builtin_stack_restore. Optimize it out
@@ -2093,7 +2187,8 @@ optimize_stack_restore (gimple_stmt_iter
if (!callee
|| DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
/* All regular builtins are ok, just obviously not alloca. */
- || DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA)
+ || DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA
+ || DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA_WITH_ALIGN)
return NULL_TREE;
if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE)
Index: gcc/builtins.def
===================================================================
--- gcc/builtins.def (revision 179210)
+++ gcc/builtins.def (working copy)
@@ -616,6 +616,7 @@ DEF_LIB_BUILTIN (BUILT_IN_ABORT,
DEF_LIB_BUILTIN (BUILT_IN_ABS, "abs", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_AGGREGATE_INCOMING_ADDRESS, "aggregate_incoming_address", BT_FN_PTR_VAR, ATTR_LEAF_LIST)
DEF_EXT_LIB_BUILTIN (BUILT_IN_ALLOCA, "alloca", BT_FN_PTR_SIZE, ATTR_MALLOC_NOTHROW_LEAF_LIST)
+DEF_BUILTIN_STUB (BUILT_IN_ALLOCA_WITH_ALIGN, "alloca_with_align")
DEF_GCC_BUILTIN (BUILT_IN_APPLY, "apply", BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE, ATTR_NULL)
DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
Index: gcc/ipa-pure-const.c
===================================================================
--- gcc/ipa-pure-const.c (revision 179210)
+++ gcc/ipa-pure-const.c (working copy)
@@ -437,6 +437,7 @@ special_builtin_state (enum pure_const_s
case BUILT_IN_RETURN:
case BUILT_IN_UNREACHABLE:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
case BUILT_IN_STACK_SAVE:
case BUILT_IN_STACK_RESTORE:
case BUILT_IN_EH_POINTER:
Index: gcc/tree-ssa-alias.c
===================================================================
--- gcc/tree-ssa-alias.c (revision 179210)
+++ gcc/tree-ssa-alias.c (working copy)
@@ -1231,6 +1231,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
case BUILT_IN_MALLOC:
case BUILT_IN_CALLOC:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
case BUILT_IN_STACK_SAVE:
case BUILT_IN_STACK_RESTORE:
case BUILT_IN_MEMSET:
@@ -1513,6 +1514,7 @@ call_may_clobber_ref_p_1 (gimple call, a
return false;
case BUILT_IN_STACK_SAVE:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
case BUILT_IN_ASSUME_ALIGNED:
return false;
/* Freeing memory kills the pointed-to memory. More importantly
Index: gcc/function.c
===================================================================
--- gcc/function.c (revision 179210)
+++ gcc/function.c (working copy)
@@ -3638,8 +3638,10 @@ gimplify_parameters (void)
DECL_IGNORED_P (addr) = 0;
local = build_fold_indirect_ref (addr);
- t = built_in_decls[BUILT_IN_ALLOCA];
- t = build_call_expr (t, 1, DECL_SIZE_UNIT (parm));
+ t = built_in_decls[BUILT_IN_ALLOCA_WITH_ALIGN];
+ t = build_call_expr (t, 1, DECL_SIZE_UNIT (parm),
+ build_int_cstu (size_type_node,
+ DECL_ALIGN (parm)));
/* The call has been built for a variable-sized object. */
CALL_ALLOCA_FOR_VAR_P (t) = 1;
t = fold_convert (ptr_type, t);
Index: gcc/gimplify.c
===================================================================
--- gcc/gimplify.c (revision 179210)
+++ gcc/gimplify.c (working copy)
@@ -1329,8 +1329,9 @@ gimplify_vla_decl (tree decl, gimple_seq
SET_DECL_VALUE_EXPR (decl, t);
DECL_HAS_VALUE_EXPR_P (decl) = 1;
- t = built_in_decls[BUILT_IN_ALLOCA];
- t = build_call_expr (t, 1, DECL_SIZE_UNIT (decl));
+ t = built_in_decls[BUILT_IN_ALLOCA_WITH_ALIGN];
+ t = build_call_expr (t, 2, DECL_SIZE_UNIT (decl),
+ build_int_cstu (size_type_node, DECL_ALIGN (decl)));
/* The call has been built for a variable-sized object. */
CALL_ALLOCA_FOR_VAR_P (t) = 1;
t = fold_convert (ptr_type, t);
Index: gcc/cfgexpand.c
===================================================================
--- gcc/cfgexpand.c (revision 179210)
+++ gcc/cfgexpand.c (working copy)
@@ -1858,7 +1858,8 @@ expand_call_stmt (gimple stmt)
CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt);
if (decl
&& DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
- && DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA)
+ && (DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA
+ || DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA))
CALL_ALLOCA_FOR_VAR_P (exp) = gimple_call_alloca_for_var_p (stmt);
else
CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt);
Index: gcc/tree-mudflap.c
===================================================================
--- gcc/tree-mudflap.c (revision 179210)
+++ gcc/tree-mudflap.c (working copy)
@@ -969,7 +969,9 @@ mf_xform_statements (void)
case GIMPLE_CALL:
{
tree fndecl = gimple_call_fndecl (s);
- if (fndecl && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA))
+ if (fndecl && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA
+ || (DECL_FUNCTION_CODE (fndecl)
+ == BUILT_IN_ALLOCA_WITH_ALIGN)))
gimple_call_set_cannot_inline (s, true);
}
break;
Index: gcc/tree-ssa-dce.c
===================================================================
--- gcc/tree-ssa-dce.c (revision 179210)
+++ gcc/tree-ssa-dce.c (working copy)
@@ -308,6 +308,7 @@ mark_stmt_if_obviously_necessary (gimple
case BUILT_IN_MALLOC:
case BUILT_IN_CALLOC:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
return;
default:;
@@ -639,6 +640,7 @@ mark_all_reaching_defs_necessary_1 (ao_r
case BUILT_IN_MALLOC:
case BUILT_IN_CALLOC:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
case BUILT_IN_FREE:
return false;
@@ -890,6 +892,8 @@ propagate_necessity (struct edge_list *e
|| DECL_FUNCTION_CODE (callee) == BUILT_IN_FREE
|| DECL_FUNCTION_CODE (callee) == BUILT_IN_VA_END
|| DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA
+ || (DECL_FUNCTION_CODE (callee)
+ == BUILT_IN_ALLOCA_WITH_ALIGN)
|| DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_SAVE
|| DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE
|| DECL_FUNCTION_CODE (callee) == BUILT_IN_ASSUME_ALIGNED))
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c (revision 179210)
+++ gcc/varasm.c (working copy)
@@ -2104,7 +2104,8 @@ incorporeal_function_p (tree decl)
const char *name;
if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
- && DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA)
+ && (DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA
+ || DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA_WITH_ALIGN))
return true;
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
Index: gcc/tree-object-size.c
===================================================================
--- gcc/tree-object-size.c (revision 179210)
+++ gcc/tree-object-size.c (working copy)
@@ -411,6 +411,7 @@ alloc_object_size (const_gimple call, in
/* fall through */
case BUILT_IN_MALLOC:
case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
arg1 = 0;
default:
break;
Index: gcc/gimple.c
===================================================================
--- gcc/gimple.c (revision 179210)
+++ gcc/gimple.c (working copy)
@@ -374,7 +374,8 @@ gimple_build_call_from_tree (tree t)
gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t));
if (fndecl
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
- && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA)
+ && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA
+ || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA_WITH_ALIGN))
gimple_call_set_alloca_for_var (call, CALL_ALLOCA_FOR_VAR_P (t));
else
gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t));
Index: gcc/passes.c
===================================================================
--- gcc/passes.c (revision 179210)
+++ gcc/passes.c (working copy)
@@ -1321,7 +1321,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_forwprop);
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_object_sizes);
- NEXT_PASS (pass_ccp);
+ NEXT_PASS (pass_ccp_last);
NEXT_PASS (pass_copy_prop);
NEXT_PASS (pass_cse_sincos);
NEXT_PASS (pass_optimize_bswap);
Index: gcc/testsuite/gcc.dg/pr50527.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/pr50527.c (revision 0)
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-Os --param large-stack-frame=30" } */
+
+extern void abort (void);
+
+void __attribute__((noinline))
+bar (char *a)
+{
+}
+
+void __attribute__((noinline))
+foo (char *a, int b)
+{
+}
+
+void __attribute__((noinline))
+test_align (char *p, int aligned, unsigned int mask)
+{
+ int p_aligned = ((unsigned long int)p & mask) == 0;
+ if (aligned != p_aligned)
+ abort ();
+}
+
+int
+main ()
+{
+ const int kIterations = 4;
+ char results[kIterations];
+ int i;
+ unsigned int mask;
+
+ mask = 0xf;
+ test_align (results, ((unsigned long int)results & mask) == 0, mask);
+ mask = 0x7;
+ test_align (results, ((unsigned long int)results & mask) == 0, mask);
+ mask = 0x3;
+ test_align (results, ((unsigned long int)results & mask) == 0, mask);
+ mask = 0x1;
+ test_align (results, ((unsigned long int)results & mask) == 0, mask);
+
+ bar (results);
+ for (i = 0; i < kIterations; i++)
+ foo ("%d ", results[i]);
+
+ return 0;
+}