From 0a1c58a25ab5df1a3e4596024774641ebae8be2a Mon Sep 17 00:00:00 2001 From: Jeffrey A Law Date: Fri, 17 Mar 2000 14:40:45 -0800 Subject: [PATCH] Sibling call optimizations. Co-Authored-By: Richard Henderson From-SVN: r32612 --- gcc/ChangeLog | 54 ++ gcc/Makefile.in | 5 +- gcc/calls.c | 1499 ++++++++++++++++++++++++++++------------------- gcc/final.c | 6 +- gcc/flow.c | 25 +- gcc/function.c | 444 +++++++++----- gcc/genflags.c | 8 +- gcc/integrate.c | 294 +++++++--- gcc/jump.c | 40 +- gcc/rtl.c | 12 +- gcc/rtl.def | 7 +- gcc/rtl.h | 22 +- gcc/sibcall.c | 578 ++++++++++++++++++ gcc/toplev.c | 11 +- gcc/tree.c | 79 +++ gcc/tree.h | 8 + 16 files changed, 2232 insertions(+), 860 deletions(-) create mode 100644 gcc/sibcall.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b35f1aae9436..3ac1d635426b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,57 @@ +2000-03-17 Jeff Law + Richard Henderson + + * Makefile.in (OBJS): Add sibcall.o. + (sibcall.o): New. + * sibcall.c: New file. + * calls.c (FUNCTION_OK_FOR_SIBCALL): Provide default. + (ECF_IS_CONST, ECF_NOTHROW, ECF_SIBCALL): New. + (emit_call_1): Replace `is_const' and `nothrow' with `ecf_flags'. + Emit sibcall patterns when requested. Update all callers. + (expand_call): Generate CALL_PLACEHOLDER insns when tail call + elimination seems feasable. + * final.c (leaf_function_p): Sibling calls don't discount being + a leaf function. + * flow.c (HAVE_sibcall_epilogue): Provide default. + (find_basic_blocks_1): Sibling calls don't throw. + (make_edges): Make edge from sibling call to EXIT. + (propagate_block): Don't remove sibcall_epilogue insns. + * function.c (prologue, epilogue): Turn into varrays. Update all uses. + (sibcall_epilogue): New. + (fixup_var_refs): Scan CALL_PLACEHOLDER sub-sequences. + (identify_blocks_1): Likewise. Break out from ... + (identify_blocks): ... here. + (reorder_blocks_1): Scan CALL_PLACEHOLDER. Break out from ... + (reorder_blocks): ... here. + (init_function_for_compilation): Zap prologue/epilogue as varrays. + (record_insns): Extend a varray instead of mallocing new memory. + (contains): Read a varray not array of ints. + (sibcall_epilogue_contains): New. + (thread_prologue_and_epilogue_insns): Emit and record + sibcall_epilogue patterns. + (init_function_once): Allocate prologue/epilogue varrays. + * genflags.c (gen_insn): Treat sibcall patterns as calls. + * integrate.c (save_parm_insns): Recurse on CALL_PLACEHOLDER patterns. + Broken out from ... + (save_for_inline_nocopy): ... here. + (copy_insn_list): Recurse on CALL_PLACEHOLDER patterns. + Broken out from ... + (expand_inline_function): ... here. + (copy_rtx_and_substitute): Handle NOTE_INSN_DELETED_LABEL. + (subst_constants): Handle 'n' formats. + * jump.c (jump_optimize_minimal): New. + (jump_optimize_1): New arg `minimal'; update callers. Elide most + optimizations if it's set. + * rtl.c (copy_rtx): Do copy jump & call for insns. + * rtl.h (struct rtx_def): Document use of jump and call for insns. + (SIBLING_CALL_P): New. + (sibcall_use_t): New. + * toplev.c (rest_of_compilation): Do init_EXPR_INSN_LIST_cache earlier. + Invoke optimize_sibling_and_tail_recursive_calls. + * tree.c (lang_safe_for_unsave): New. + (safe_for_unsave): New. + * tree.h (lang_safe_for_unsave, safe_for_unsave): Declare. + 2000-03-17 Mark Mitchell * objc/objc-act.c (encode_method_prototype): Pass types, not diff --git a/gcc/Makefile.in b/gcc/Makefile.in index b41eebb44321..51ff35f72626 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -675,7 +675,8 @@ OBJS = diagnostic.o \ insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o lcm.o \ profile.o insn-attrtab.o $(out_object_file) $(EXTRA_OBJS) convert.o \ mbchar.o dyn-string.o splay-tree.o graph.o sbitmap.o resource.o hash.o \ - predict.o lists.o ggc-common.o $(GGC) simplify-rtx.o ssa.o bb-reorder.o + predict.o lists.o ggc-common.o $(GGC) simplify-rtx.o ssa.o bb-reorder.o \ + sibcall.o # GEN files are listed separately, so they can be built before doing parallel # makes for cc1 or cc1plus. Otherwise sequent parallel make attempts to load @@ -1562,6 +1563,8 @@ cse.o : cse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h flags.h \ gcse.o : gcse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h \ flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) \ function.h output.h toplev.h +sibcall.o : sibcall.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) function.h \ + hard-reg-set.h flags.h insn-config.h $(RECOG_H) $(BASIC_BLOCK_H) resource.o : resource.c $(CONFIG_H) $(RTL_H) hard-reg-set.h system.h \ $(BASIC_BLOCK_H) $(REGS_H) flags.h output.h resource.h function.h toplev.h \ insn-attr.h diff --git a/gcc/calls.c b/gcc/calls.c index 6a4c1484ffc7..21feeed39ac1 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -32,6 +32,10 @@ Boston, MA 02111-1307, USA. */ #include "output.h" #include "tm_p.h" +#if !defined FUNCTION_OK_FOR_SIBCALL +#define FUNCTION_OK_FOR_SIBCALL(DECL) 1 +#endif + #if !defined PREFERRED_STACK_BOUNDARY && defined STACK_BOUNDARY #define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY #endif @@ -132,9 +136,13 @@ int stack_arg_under_construction; static int calls_function PARAMS ((tree, int)); static int calls_function_1 PARAMS ((tree, int)); + +#define ECF_IS_CONST 1 +#define ECF_NOTHROW 2 +#define ECF_SIBCALL 4 static void emit_call_1 PARAMS ((rtx, tree, tree, HOST_WIDE_INT, HOST_WIDE_INT, HOST_WIDE_INT, rtx, - rtx, int, rtx, int, int)); + rtx, int, rtx, int)); static void precompute_register_parameters PARAMS ((int, struct arg_data *, int *)); @@ -384,7 +392,7 @@ prepare_call_address (funexp, fndecl, call_fusage, reg_parm_seen) static void emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size, struct_value_size, next_arg_reg, valreg, old_inhibit_defer_pop, - call_fusage, is_const, nothrow) + call_fusage, ecf_flags) rtx funexp; tree fndecl ATTRIBUTE_UNUSED; tree funtype ATTRIBUTE_UNUSED; @@ -395,7 +403,7 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size, rtx valreg; int old_inhibit_defer_pop; rtx call_fusage; - int is_const, nothrow; + int ecf_flags; { rtx rounded_stack_size_rtx = GEN_INT (rounded_stack_size); #if defined (HAVE_call) && defined (HAVE_call_value) @@ -414,6 +422,33 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size, funexp = memory_address (FUNCTION_MODE, funexp); #ifndef ACCUMULATE_OUTGOING_ARGS +#if defined (HAVE_sibcall_pop) && defined (HAVE_sibcall_value_pop) + if ((ecf_flags & ECF_SIBCALL) + && HAVE_sibcall_pop && HAVE_sibcall_value_pop + && (RETURN_POPS_ARGS (fndecl, funtype, stack_size) > 0 + || stack_size == 0)) + { + rtx n_pop = GEN_INT (RETURN_POPS_ARGS (fndecl, funtype, stack_size)); + rtx pat; + + /* If this subroutine pops its own args, record that in the call insn + if possible, for the sake of frame pointer elimination. */ + + if (valreg) + pat = gen_sibcall_value_pop (valreg, + gen_rtx_MEM (FUNCTION_MODE, funexp), + rounded_stack_size_rtx, next_arg_reg, + n_pop); + else + pat = gen_sibcall_pop (gen_rtx_MEM (FUNCTION_MODE, funexp), + rounded_stack_size_rtx, next_arg_reg, n_pop); + + emit_call_insn (pat); + already_popped = 1; + } + else +#endif + #if defined (HAVE_call_pop) && defined (HAVE_call_value_pop) /* If the target has "call" or "call_value" insns, then prefer them if no arguments are actually popped. If the target does not have @@ -447,6 +482,23 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size, #endif #endif +#if defined (HAVE_sibcall) && defined (HAVE_sibcall_value) + if ((ecf_flags & ECF_SIBCALL) + && HAVE_sibcall && HAVE_sibcall_value) + { + if (valreg) + emit_call_insn (gen_sibcall_value (valreg, + gen_rtx_MEM (FUNCTION_MODE, funexp), + rounded_stack_size_rtx, + next_arg_reg, NULL_RTX)); + else + emit_call_insn (gen_sibcall (gen_rtx_MEM (FUNCTION_MODE, funexp), + rounded_stack_size_rtx, next_arg_reg, + struct_value_size_rtx)); + } + else +#endif + #if defined (HAVE_call) && defined (HAVE_call_value) if (HAVE_call && HAVE_call_value) { @@ -489,15 +541,17 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size, CALL_INSN_FUNCTION_USAGE (call_insn) = call_fusage; /* If this is a const call, then set the insn's unchanging bit. */ - if (is_const) + if (ecf_flags & ECF_IS_CONST) CONST_CALL_P (call_insn) = 1; /* If this call can't throw, attach a REG_EH_REGION reg note to that effect. */ - if (nothrow) + if (ecf_flags & ECF_NOTHROW) REG_NOTES (call_insn) = gen_rtx_EXPR_LIST (REG_EH_REGION, const0_rtx, REG_NOTES (call_insn)); + SIBLING_CALL_P (call_insn) = ((ecf_flags & ECF_SIBCALL) != 0); + /* Restore this now, so that we do defer pops for this call's args if the context of the call as a whole permits. */ inhibit_defer_pop = old_inhibit_defer_pop; @@ -527,7 +581,8 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size, if (rounded_stack_size != 0) { - if (flag_defer_pop && inhibit_defer_pop == 0 && !is_const) + if (flag_defer_pop && inhibit_defer_pop == 0 + && !(ecf_flags & ECF_IS_CONST)) pending_stack_adjust += rounded_stack_size; else adjust_stack (rounded_stack_size_rtx); @@ -1227,6 +1282,8 @@ compute_argument_block_size (reg_parm_stack_space, args_size, { #ifdef PREFERRED_STACK_BOUNDARY preferred_stack_boundary /= BITS_PER_UNIT; + if (preferred_stack_boundary < 1) + preferred_stack_boundary = 1; args_size->constant = (((args_size->constant + arg_space_so_far + pending_stack_adjust @@ -1602,17 +1659,31 @@ expand_call (exp, target, ignore) rtx target; int ignore; { + /* Nonzero if we are currently expanding a call. */ + static int currently_expanding_call = 0; + /* List of actual parameters. */ tree actparms = TREE_OPERAND (exp, 1); /* RTX for the function to be called. */ rtx funexp; + /* Sequence of insns to perform a tail recursive "call". */ + rtx tail_recursion_insns = NULL_RTX; + /* Sequence of insns to perform a normal "call". */ + rtx normal_call_insns = NULL_RTX; + /* Sequence of insns to perform a tail recursive "call". */ + rtx tail_call_insns = NULL_RTX; /* Data type of the function. */ tree funtype; /* Declaration of the function being called, or 0 if the function is computed (not known by name). */ tree fndecl = 0; char *name = 0; +#ifdef ACCUMULATE_OUTGOING_ARGS rtx before_call; +#endif + rtx insn; + int safe_for_reeval; + int pass; /* Register in which non-BLKmode value will be returned, or 0 if no value or if value is BLKmode. */ @@ -1708,17 +1779,10 @@ expand_call (exp, target, ignore) rtx old_stack_level = 0; int old_pending_adj = 0; int old_inhibit_defer_pop = inhibit_defer_pop; - rtx call_fusage = 0; + rtx call_fusage; register tree p; register int i; -#ifdef PREFERRED_STACK_BOUNDARY - int preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; -#else - /* In this case preferred_stack_boundary variable is meaningless. - It is used only in order to keep ifdef noise down when calling - compute_argument_block_size. */ - int preferred_stack_boundary = 0; -#endif + int preferred_stack_boundary; /* The value of the function call can be put in a hard register. But if -fcheck-memory-usage, code which invokes functions (and thus @@ -1947,11 +2011,91 @@ expand_call (exp, target, ignore) mark_addressable (fndecl); } + currently_expanding_call++; + + /* If we're considering tail recursion optimizations, verify that the + arguments are safe for re-evaluation. If we can unsave them, wrap + each argument in an UNSAVE_EXPR. */ + + safe_for_reeval = 0; + if (optimize >= 2 + && currently_expanding_call == 1 + && stmt_loop_nest_empty ()) + { + /* Verify that each argument is safe for re-evaluation. */ + for (p = actparms; p; p = TREE_CHAIN (p)) + if (! safe_for_unsave (TREE_VALUE (p))) + break; + + if (p == NULL_TREE) + { + tree new_actparms = NULL_TREE, q; + + for (p = actparms; p ; p = TREE_CHAIN (p)) + { + tree np = build_tree_list (TREE_PURPOSE (p), + unsave_expr (TREE_VALUE (p))); + if (new_actparms) + TREE_CHAIN (q) = np; + else + new_actparms = np; + q = np; + } + + actparms = new_actparms; + safe_for_reeval = 1; + } + } + + /* Generate a tail recursion sequence when calling ourselves. */ + + if (safe_for_reeval + && TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR + && TREE_OPERAND (TREE_OPERAND (exp, 0), 0) == current_function_decl) + { + /* We want to emit any pending stack adjustments before the tail + recursion "call". That way we know any adjustment after the tail + recursion call can be ignored if we indeed use the tail recursion + call expansion. */ + int save_pending_stack_adjust = pending_stack_adjust; + rtx last; + + /* Use a new sequence to hold any RTL we generate. We do not even + know if we will use this RTL yet. The final decision can not be + made until after RTL generation for the entire function is + complete. */ + push_to_sequence (0); + + /* Emit the pending stack adjustments before we expand any arguments. */ + do_pending_stack_adjust (); + + optimize_tail_recursion (exp, get_last_insn ()); + + last = get_last_insn (); + tail_recursion_insns = get_insns (); + end_sequence (); + + /* If the last insn on the tail recursion sequence is not a + BARRIER, then tail recursion optimization failed. */ + if (last == NULL_RTX || GET_CODE (last) != BARRIER) + tail_recursion_insns = NULL_RTX; + + /* Restore the original pending stack adjustment for the sibling and + normal call cases below. */ + pending_stack_adjust = save_pending_stack_adjust; + } + function_call_count++; if (fndecl && DECL_NAME (fndecl)) name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); +#ifdef PREFERRED_STACK_BOUNDARY + preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; +#else + preferred_stack_boundary = STACK_BOUNDARY; +#endif + /* Ensure current function's preferred stack boundary is at least what we need. We don't have to increase alignment for recursive functions. */ @@ -1973,708 +2117,881 @@ expand_call (exp, target, ignore) abort (); funtype = TREE_TYPE (funtype); - /* When calling a const function, we must pop the stack args right away, - so that the pop is deleted or moved with the call. */ - if (is_const) - NO_DEFER_POP; + /* We want to make two insn chains; one for a sibling call, the other + for a normal call. We will select one of the two chains after + initial RTL generation is complete. */ + for (pass = 0; pass < 2; pass++) + { + int sibcall_failure = 0; + /* We want to emit ay pending stack adjustments before the tail + recursion "call". That way we know any adjustment after the tail + recursion call can be ignored if we indeed use the tail recursion + call expansion. */ + int save_pending_stack_adjust; + rtx insns; + rtx before_call; - /* Don't let pending stack adjusts add up to too much. - Also, do all pending adjustments now - if there is any chance this might be a call to alloca. */ + if (pass == 0) + { + /* Various reasons we can not use a sibling call. */ + if (! safe_for_reeval +#ifdef HAVE_sibcall_epilogue + || ! HAVE_sibcall_epilogue +#else + || 1 +#endif + /* The structure value address is used and modified in the + loop below. It does not seem worth the effort to save and + restore it as a state variable since few optimizable + sibling calls will return a structure. */ + || structure_value_addr != NULL_RTX + /* If the register holding the address is a callee saved + register, then we lose. We have no way to prevent that, + so we only allow calls to named functions. */ + || fndecl == NULL_TREE + || ! FUNCTION_OK_FOR_SIBCALL (fndecl)) + continue; - if (pending_stack_adjust >= 32 - || (pending_stack_adjust > 0 && may_be_alloca)) - do_pending_stack_adjust (); + /* State variables we need to save and restore between + iterations. */ + save_pending_stack_adjust = pending_stack_adjust; + } - if (profile_arc_flag && fork_or_exec) - { - /* A fork duplicates the profile information, and an exec discards - it. We can't rely on fork/exec to be paired. So write out the - profile information we have gathered so far, and clear it. */ - emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__bb_fork_func"), 0, - VOIDmode, 0); - - /* ??? When __clone is called with CLONE_VM set, profiling is - subject to race conditions, just as with multithreaded programs. */ - } + /* Other state variables that we must reinitialize each time + through the loop (that are not initialized by the loop itself. */ + argblock = 0; + call_fusage = 0; - /* Push the temporary stack slot level so that we can free any temporaries - we make. */ - push_temp_slots (); + /* Start a new sequence for the normal call case. - /* Start updating where the next arg would go. + From this point on, if the sibling call fails, we want to set + sibcall_failure instead of continuing the loop. */ + start_sequence (); - On some machines (such as the PA) indirect calls have a different - calling convention than normal calls. The last argument in - INIT_CUMULATIVE_ARGS tells the backend if this is an indirect call - or not. */ - INIT_CUMULATIVE_ARGS (args_so_far, funtype, NULL_RTX, (fndecl == 0)); + /* When calling a const function, we must pop the stack args right away, + so that the pop is deleted or moved with the call. */ + if (is_const) + NO_DEFER_POP; - /* If struct_value_rtx is 0, it means pass the address - as if it were an extra parameter. */ - if (structure_value_addr && struct_value_rtx == 0) - { - /* If structure_value_addr is a REG other than - virtual_outgoing_args_rtx, we can use always use it. If it - is not a REG, we must always copy it into a register. - If it is virtual_outgoing_args_rtx, we must copy it to another - register in some cases. */ - rtx temp = (GET_CODE (structure_value_addr) != REG -#ifdef ACCUMULATE_OUTGOING_ARGS - || (stack_arg_under_construction - && structure_value_addr == virtual_outgoing_args_rtx) -#endif - ? copy_addr_to_reg (structure_value_addr) - : structure_value_addr); - - actparms - = tree_cons (error_mark_node, - make_tree (build_pointer_type (TREE_TYPE (funtype)), - temp), - actparms); - structure_value_addr_parm = 1; - } + /* Don't let pending stack adjusts add up to too much. + Also, do all pending adjustments now if there is any chance + this might be a call to alloca or if we are expanding a sibling + call sequence. */ + if (pending_stack_adjust >= 32 + || (pending_stack_adjust > 0 && may_be_alloca) + || pass == 0) + do_pending_stack_adjust (); - /* Count the arguments and set NUM_ACTUALS. */ - for (p = actparms, i = 0; p; p = TREE_CHAIN (p)) i++; - num_actuals = i; - - /* Compute number of named args. - Normally, don't include the last named arg if anonymous args follow. - We do include the last named arg if STRICT_ARGUMENT_NAMING is nonzero. - (If no anonymous args follow, the result of list_length is actually - one too large. This is harmless.) - - If PRETEND_OUTGOING_VARARGS_NAMED is set and STRICT_ARGUMENT_NAMING is - zero, this machine will be able to place unnamed args that were passed in - registers into the stack. So treat all args as named. This allows the - insns emitting for a specific argument list to be independent of the - function declaration. - - If PRETEND_OUTGOING_VARARGS_NAMED is not set, we do not have any reliable - way to pass unnamed args in registers, so we must force them into - memory. */ - - if ((STRICT_ARGUMENT_NAMING - || ! PRETEND_OUTGOING_VARARGS_NAMED) - && TYPE_ARG_TYPES (funtype) != 0) - n_named_args - = (list_length (TYPE_ARG_TYPES (funtype)) - /* Don't include the last named arg. */ - - (STRICT_ARGUMENT_NAMING ? 0 : 1) - /* Count the struct value address, if it is passed as a parm. */ - + structure_value_addr_parm); - else - /* If we know nothing, treat all args as named. */ - n_named_args = num_actuals; + if (profile_arc_flag && fork_or_exec) + { + /* A fork duplicates the profile information, and an exec discards + it. We can't rely on fork/exec to be paired. So write out the + profile information we have gathered so far, and clear it. */ + /* ??? When Linux's __clone is called with CLONE_VM set, profiling + is subject to race conditions, just as with multithreaded + programs. */ + + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__bb_fork_func"), 0, + VOIDmode, 0); + } - /* Make a vector to hold all the information about each arg. */ - args = (struct arg_data *) alloca (num_actuals * sizeof (struct arg_data)); - bzero ((char *) args, num_actuals * sizeof (struct arg_data)); + /* Push the temporary stack slot level so that we can free any + temporaries we make. */ + push_temp_slots (); + + /* Start updating where the next arg would go. + + On some machines (such as the PA) indirect calls have a different + calling convention than normal calls. The last argument in + INIT_CUMULATIVE_ARGS tells the backend if this is an indirect call + or not. */ + INIT_CUMULATIVE_ARGS (args_so_far, funtype, NULL_RTX, (fndecl == 0)); + + /* If struct_value_rtx is 0, it means pass the address + as if it were an extra parameter. */ + if (structure_value_addr && struct_value_rtx == 0) + { + /* If structure_value_addr is a REG other than + virtual_outgoing_args_rtx, we can use always use it. If it + is not a REG, we must always copy it into a register. + If it is virtual_outgoing_args_rtx, we must copy it to another + register in some cases. */ + rtx temp = (GET_CODE (structure_value_addr) != REG +#ifdef ACCUMULATE_OUTGOING_ARGS + || (stack_arg_under_construction + && structure_value_addr == virtual_outgoing_args_rtx) +#endif + ? copy_addr_to_reg (structure_value_addr) + : structure_value_addr); + + actparms + = tree_cons (error_mark_node, + make_tree (build_pointer_type (TREE_TYPE (funtype)), + temp), + actparms); + structure_value_addr_parm = 1; + } - /* Build up entries inthe ARGS array, compute the size of the arguments - into ARGS_SIZE, etc. */ - initialize_argument_information (num_actuals, args, &args_size, n_named_args, - actparms, fndecl, &args_so_far, - reg_parm_stack_space, &old_stack_level, - &old_pending_adj, &must_preallocate, - &is_const); + /* Count the arguments and set NUM_ACTUALS. */ + for (p = actparms, i = 0; p; p = TREE_CHAIN (p)) i++; + num_actuals = i; + + /* Compute number of named args. + Normally, don't include the last named arg if anonymous args follow. + We do include the last named arg if STRICT_ARGUMENT_NAMING is nonzero. + (If no anonymous args follow, the result of list_length is actually + one too large. This is harmless.) + + If PRETEND_OUTGOING_VARARGS_NAMED is set and STRICT_ARGUMENT_NAMING is + zero, this machine will be able to place unnamed args that were + passed in registers into the stack. So treat all args as named. + This allows the insns emitting for a specific argument list to be + independent of the function declaration. + + If PRETEND_OUTGOING_VARARGS_NAMED is not set, we do not have any + reliable way to pass unnamed args in registers, so we must force + them into memory. */ + + if ((STRICT_ARGUMENT_NAMING + || ! PRETEND_OUTGOING_VARARGS_NAMED) + && TYPE_ARG_TYPES (funtype) != 0) + n_named_args + = (list_length (TYPE_ARG_TYPES (funtype)) + /* Don't include the last named arg. */ + - (STRICT_ARGUMENT_NAMING ? 0 : 1) + /* Count the struct value address, if it is passed as a parm. */ + + structure_value_addr_parm); + else + /* If we know nothing, treat all args as named. */ + n_named_args = num_actuals; + + /* Make a vector to hold all the information about each arg. */ + args = (struct arg_data *) alloca (num_actuals + * sizeof (struct arg_data)); + bzero ((char *) args, num_actuals * sizeof (struct arg_data)); + + /* Build up entries inthe ARGS array, compute the size of the arguments + into ARGS_SIZE, etc. */ + initialize_argument_information (num_actuals, args, &args_size, + n_named_args, actparms, fndecl, + &args_so_far, reg_parm_stack_space, + &old_stack_level, &old_pending_adj, + &must_preallocate, &is_const); #ifdef FINAL_REG_PARM_STACK_SPACE - reg_parm_stack_space = FINAL_REG_PARM_STACK_SPACE (args_size.constant, - args_size.var); + reg_parm_stack_space = FINAL_REG_PARM_STACK_SPACE (args_size.constant, + args_size.var); #endif - if (args_size.var) - { - /* If this function requires a variable-sized argument list, don't try to - make a cse'able block for this call. We may be able to do this - eventually, but it is too complicated to keep track of what insns go - in the cse'able block and which don't. */ + if (args_size.var) + { + /* If this function requires a variable-sized argument list, don't + try to make a cse'able block for this call. We may be able to + do this eventually, but it is too complicated to keep track of + what insns go in the cse'able block and which don't. - is_const = 0; - must_preallocate = 1; - } + Also do not make a sibling call. */ - /* Compute the actual size of the argument block required. The variable - and constant sizes must be combined, the size may have to be rounded, - and there may be a minimum required size. */ - unadjusted_args_size - = compute_argument_block_size (reg_parm_stack_space, &args_size, - preferred_stack_boundary); - - /* Now make final decision about preallocating stack space. */ - must_preallocate = finalize_must_preallocate (must_preallocate, - num_actuals, args, &args_size); - - /* If the structure value address will reference the stack pointer, we must - stabilize it. We don't need to do this if we know that we are not going - to adjust the stack pointer in processing this call. */ - - if (structure_value_addr - && (reg_mentioned_p (virtual_stack_dynamic_rtx, structure_value_addr) - || reg_mentioned_p (virtual_outgoing_args_rtx, structure_value_addr)) - && (args_size.var + is_const = 0; + must_preallocate = 1; + sibcall_failure = 1; + } + + /* Compute the actual size of the argument block required. The variable + and constant sizes must be combined, the size may have to be rounded, + and there may be a minimum required size. When generating a sibcall + pattern, do not round up, since we'll be re-using whatever space our + caller provided. */ + unadjusted_args_size + = compute_argument_block_size (reg_parm_stack_space, &args_size, + (pass == 0 ? 0 + : preferred_stack_boundary)); + + /* If the callee pops its own arguments, then it must pop exactly + the same number of arguments as the current function. */ + if (RETURN_POPS_ARGS (fndecl, funtype, unadjusted_args_size) + != RETURN_POPS_ARGS (current_function_decl, + TREE_TYPE (current_function_decl), + current_function_args_size)) + sibcall_failure = 1; + + /* Now make final decision about preallocating stack space. */ + must_preallocate = finalize_must_preallocate (must_preallocate, + num_actuals, args, + &args_size); + + /* If the structure value address will reference the stack pointer, we + must stabilize it. We don't need to do this if we know that we are + not going to adjust the stack pointer in processing this call. */ + + if (structure_value_addr + && (reg_mentioned_p (virtual_stack_dynamic_rtx, structure_value_addr) + || reg_mentioned_p (virtual_outgoing_args_rtx, + structure_value_addr)) + && (args_size.var #ifndef ACCUMULATE_OUTGOING_ARGS - || args_size.constant + || args_size.constant #endif - )) - structure_value_addr = copy_to_reg (structure_value_addr); + )) + structure_value_addr = copy_to_reg (structure_value_addr); - /* Precompute any arguments as needed. */ - precompute_arguments (is_const, must_preallocate, num_actuals, - args, &args_size); + /* Precompute any arguments as needed. */ + precompute_arguments (is_const, must_preallocate, num_actuals, + args, &args_size); - /* Now we are about to start emitting insns that can be deleted - if a libcall is deleted. */ - if (is_const || is_malloc) - start_sequence (); + /* Now we are about to start emitting insns that can be deleted + if a libcall is deleted. */ + if (is_const || is_malloc) + start_sequence (); - /* If we have no actual push instructions, or shouldn't use them, - make space for all args right now. */ + /* If we have no actual push instructions, or shouldn't use them, + make space for all args right now. */ - if (args_size.var != 0) - { - if (old_stack_level == 0) + if (args_size.var != 0) { - emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX); - old_pending_adj = pending_stack_adjust; - pending_stack_adjust = 0; + if (old_stack_level == 0) + { + emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX); + old_pending_adj = pending_stack_adjust; + pending_stack_adjust = 0; #ifdef ACCUMULATE_OUTGOING_ARGS - /* stack_arg_under_construction says whether a stack arg is - being constructed at the old stack level. Pushing the stack - gets a clean outgoing argument block. */ - old_stack_arg_under_construction = stack_arg_under_construction; - stack_arg_under_construction = 0; + /* stack_arg_under_construction says whether a stack arg is + being constructed at the old stack level. Pushing the stack + gets a clean outgoing argument block. */ + old_stack_arg_under_construction = stack_arg_under_construction; + stack_arg_under_construction = 0; #endif + } + argblock = push_block (ARGS_SIZE_RTX (args_size), 0, 0); } - argblock = push_block (ARGS_SIZE_RTX (args_size), 0, 0); - } - else - { - /* Note that we must go through the motions of allocating an argument - block even if the size is zero because we may be storing args - in the area reserved for register arguments, which may be part of - the stack frame. */ + else + { + /* Note that we must go through the motions of allocating an argument + block even if the size is zero because we may be storing args + in the area reserved for register arguments, which may be part of + the stack frame. */ - int needed = args_size.constant; + int needed = args_size.constant; - /* Store the maximum argument space used. It will be pushed by - the prologue (if ACCUMULATE_OUTGOING_ARGS, or stack overflow - checking). */ + /* Store the maximum argument space used. It will be pushed by + the prologue (if ACCUMULATE_OUTGOING_ARGS, or stack overflow + checking). */ - if (needed > current_function_outgoing_args_size) - current_function_outgoing_args_size = needed; + if (needed > current_function_outgoing_args_size) + current_function_outgoing_args_size = needed; - if (must_preallocate) - { + if (must_preallocate) + { #ifdef ACCUMULATE_OUTGOING_ARGS - /* Since the stack pointer will never be pushed, it is possible for - the evaluation of a parm to clobber something we have already - written to the stack. Since most function calls on RISC machines - do not use the stack, this is uncommon, but must work correctly. + /* Since the stack pointer will never be pushed, it is possible + for the evaluation of a parm to clobber something we have + already written to the stack. Since most function calls on + RISC machines do not use the stack, this is uncommon, but + must work correctly. - Therefore, we save any area of the stack that was already written - and that we are using. Here we set up to do this by making a new - stack usage map from the old one. The actual save will be done - by store_one_arg. + Therefore, we save any area of the stack that was already + written and that we are using. Here we set up to do this by + making a new stack usage map from the old one. The actual + save will be done by store_one_arg. - Another approach might be to try to reorder the argument - evaluations to avoid this conflicting stack usage. */ + Another approach might be to try to reorder the argument + evaluations to avoid this conflicting stack usage. */ #ifndef OUTGOING_REG_PARM_STACK_SPACE - /* Since we will be writing into the entire argument area, the - map must be allocated for its entire size, not just the part that - is the responsibility of the caller. */ - needed += reg_parm_stack_space; + /* Since we will be writing into the entire argument area, the + map must be allocated for its entire size, not just the part + that is the responsibility of the caller. */ + needed += reg_parm_stack_space; #endif #ifdef ARGS_GROW_DOWNWARD - highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use, - needed + 1); + highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use, + needed + 1); #else - highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use, - needed); + highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use, + needed); #endif - stack_usage_map = (char *) alloca (highest_outgoing_arg_in_use); + stack_usage_map = (char *) alloca (highest_outgoing_arg_in_use); - if (initial_highest_arg_in_use) - bcopy (initial_stack_usage_map, stack_usage_map, - initial_highest_arg_in_use); + if (initial_highest_arg_in_use) + bcopy (initial_stack_usage_map, stack_usage_map, + initial_highest_arg_in_use); - if (initial_highest_arg_in_use != highest_outgoing_arg_in_use) - bzero (&stack_usage_map[initial_highest_arg_in_use], - highest_outgoing_arg_in_use - initial_highest_arg_in_use); - needed = 0; + if (initial_highest_arg_in_use != highest_outgoing_arg_in_use) + bzero (&stack_usage_map[initial_highest_arg_in_use], + (highest_outgoing_arg_in_use + - initial_highest_arg_in_use)); + needed = 0; - /* The address of the outgoing argument list must not be copied to a - register here, because argblock would be left pointing to the - wrong place after the call to allocate_dynamic_stack_space below. - */ + /* The address of the outgoing argument list must not be copied + to a register here, because argblock would be left pointing + to the wrong place after the call to + allocate_dynamic_stack_space below. */ - argblock = virtual_outgoing_args_rtx; + argblock = virtual_outgoing_args_rtx; #else /* not ACCUMULATE_OUTGOING_ARGS */ - if (inhibit_defer_pop == 0) - { - /* Try to reuse some or all of the pending_stack_adjust - to get this space. Maybe we can avoid any pushing. */ - if (needed > pending_stack_adjust) + if (inhibit_defer_pop == 0) { - needed -= pending_stack_adjust; - pending_stack_adjust = 0; + /* Try to reuse some or all of the pending_stack_adjust + to get this space. Maybe we can avoid any pushing. */ + if (needed > pending_stack_adjust) + { + needed -= pending_stack_adjust; + pending_stack_adjust = 0; + } + else + { + pending_stack_adjust -= needed; + needed = 0; + } } + /* Special case this because overhead of `push_block' in this + case is non-trivial. */ + if (needed == 0) + argblock = virtual_outgoing_args_rtx; else - { - pending_stack_adjust -= needed; - needed = 0; - } - } - /* Special case this because overhead of `push_block' in this - case is non-trivial. */ - if (needed == 0) - argblock = virtual_outgoing_args_rtx; - else - argblock = push_block (GEN_INT (needed), 0, 0); - - /* We only really need to call `copy_to_reg' in the case where push - insns are going to be used to pass ARGBLOCK to a function - call in ARGS. In that case, the stack pointer changes value - from the allocation point to the call point, and hence - the value of VIRTUAL_OUTGOING_ARGS_RTX changes as well. - But might as well always do it. */ - argblock = copy_to_reg (argblock); + argblock = push_block (GEN_INT (needed), 0, 0); + + /* We only really need to call `copy_to_reg' in the case where + push insns are going to be used to pass ARGBLOCK to a function + call in ARGS. In that case, the stack pointer changes value + from the allocation point to the call point, and hence + the value of VIRTUAL_OUTGOING_ARGS_RTX changes as well. + But might as well always do it. */ + argblock = copy_to_reg (argblock); #endif /* not ACCUMULATE_OUTGOING_ARGS */ + } + } + + /* The argument block when performing a sibling call is the + incoming argument block. */ + if (pass == 0) + { + rtx temp = plus_constant (arg_pointer_rtx, + FIRST_PARM_OFFSET (current_function_decl)); + argblock = force_reg (Pmode, force_operand (temp, NULL_RTX)); } - } #ifdef ACCUMULATE_OUTGOING_ARGS - /* The save/restore code in store_one_arg handles all cases except one: - a constructor call (including a C function returning a BLKmode struct) - to initialize an argument. */ - if (stack_arg_under_construction) - { + /* The save/restore code in store_one_arg handles all cases except one: + a constructor call (including a C function returning a BLKmode struct) + to initialize an argument. */ + if (stack_arg_under_construction) + { #ifndef OUTGOING_REG_PARM_STACK_SPACE - rtx push_size = GEN_INT (reg_parm_stack_space + args_size.constant); + rtx push_size = GEN_INT (reg_parm_stack_space + args_size.constant); #else - rtx push_size = GEN_INT (args_size.constant); + rtx push_size = GEN_INT (args_size.constant); #endif - if (old_stack_level == 0) - { - emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX); - old_pending_adj = pending_stack_adjust; - pending_stack_adjust = 0; - /* stack_arg_under_construction says whether a stack arg is - being constructed at the old stack level. Pushing the stack - gets a clean outgoing argument block. */ - old_stack_arg_under_construction = stack_arg_under_construction; - stack_arg_under_construction = 0; - /* Make a new map for the new argument list. */ - stack_usage_map = (char *)alloca (highest_outgoing_arg_in_use); - bzero (stack_usage_map, highest_outgoing_arg_in_use); - highest_outgoing_arg_in_use = 0; + if (old_stack_level == 0) + { + emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX); + old_pending_adj = pending_stack_adjust; + pending_stack_adjust = 0; + /* stack_arg_under_construction says whether a stack arg is + being constructed at the old stack level. Pushing the stack + gets a clean outgoing argument block. */ + old_stack_arg_under_construction = stack_arg_under_construction; + stack_arg_under_construction = 0; + /* Make a new map for the new argument list. */ + stack_usage_map = (char *)alloca (highest_outgoing_arg_in_use); + bzero (stack_usage_map, highest_outgoing_arg_in_use); + highest_outgoing_arg_in_use = 0; + } + allocate_dynamic_stack_space (push_size, NULL_RTX, BITS_PER_UNIT); } - allocate_dynamic_stack_space (push_size, NULL_RTX, BITS_PER_UNIT); - } - /* If argument evaluation might modify the stack pointer, copy the - address of the argument list to a register. */ - for (i = 0; i < num_actuals; i++) - if (args[i].pass_on_stack) - { - argblock = copy_addr_to_reg (argblock); - break; - } + /* If argument evaluation might modify the stack pointer, copy the + address of the argument list to a register. */ + for (i = 0; i < num_actuals; i++) + if (args[i].pass_on_stack) + { + argblock = copy_addr_to_reg (argblock); + break; + } #endif - compute_argument_addresses (args, argblock, num_actuals); + compute_argument_addresses (args, argblock, num_actuals); #ifdef PUSH_ARGS_REVERSED #ifdef PREFERRED_STACK_BOUNDARY - /* If we push args individually in reverse order, perform stack alignment - before the first push (the last arg). */ - if (args_size.constant != unadjusted_args_size) - { - /* When the stack adjustment is pending, - we get better code by combining the adjustments. */ - if (pending_stack_adjust && !is_const - && !inhibit_defer_pop) + /* If we push args individually in reverse order, perform stack alignment + before the first push (the last arg). */ + if (args_size.constant != unadjusted_args_size) { - args_size.constant = (unadjusted_args_size - + ((pending_stack_adjust + args_size.constant - + arg_space_so_far - - unadjusted_args_size) - % (preferred_stack_boundary / BITS_PER_UNIT))); - pending_stack_adjust -= args_size.constant - unadjusted_args_size; - do_pending_stack_adjust (); - } - else if (argblock == 0) - anti_adjust_stack (GEN_INT (args_size.constant - unadjusted_args_size)); - arg_space_so_far += args_size.constant - unadjusted_args_size; + /* When the stack adjustment is pending, we get better code + by combining the adjustments. */ + if (pending_stack_adjust && ! is_const + && ! inhibit_defer_pop) + { + args_size.constant = (unadjusted_args_size + + ((pending_stack_adjust + + args_size.constant + + arg_space_so_far + - unadjusted_args_size) + % (preferred_stack_boundary + / BITS_PER_UNIT))); + pending_stack_adjust -= (args_size.constant + - unadjusted_args_size); + do_pending_stack_adjust (); + } + else if (argblock == 0) + anti_adjust_stack (GEN_INT (args_size.constant + - unadjusted_args_size)); + arg_space_so_far += args_size.constant - unadjusted_args_size; - /* Now that the stack is properly aligned, pops can't safely - be deferred during the evaluation of the arguments. */ - NO_DEFER_POP; - } + /* Now that the stack is properly aligned, pops can't safely + be deferred during the evaluation of the arguments. */ + NO_DEFER_POP; + } #endif #endif - /* Don't try to defer pops if preallocating, not even from the first arg, - since ARGBLOCK probably refers to the SP. */ - if (argblock) - NO_DEFER_POP; + /* Don't try to defer pops if preallocating, not even from the first arg, + since ARGBLOCK probably refers to the SP. */ + if (argblock) + NO_DEFER_POP; - funexp = rtx_for_function_call (fndecl, exp); + funexp = rtx_for_function_call (fndecl, exp); - /* Figure out the register where the value, if any, will come back. */ - valreg = 0; - if (TYPE_MODE (TREE_TYPE (exp)) != VOIDmode - && ! structure_value_addr) - { - if (pcc_struct_value) - valreg = hard_function_value (build_pointer_type (TREE_TYPE (exp)), - fndecl, 0); - else - valreg = hard_function_value (TREE_TYPE (exp), fndecl, 0); - } + /* Figure out the register where the value, if any, will come back. */ + valreg = 0; + if (TYPE_MODE (TREE_TYPE (exp)) != VOIDmode + && ! structure_value_addr) + { + if (pcc_struct_value) + valreg = hard_function_value (build_pointer_type (TREE_TYPE (exp)), + fndecl, 0); + else + valreg = hard_function_value (TREE_TYPE (exp), fndecl, 0); + } - /* Precompute all register parameters. It isn't safe to compute anything - once we have started filling any specific hard regs. */ - precompute_register_parameters (num_actuals, args, ®_parm_seen); + /* Precompute all register parameters. It isn't safe to compute anything + once we have started filling any specific hard regs. */ + precompute_register_parameters (num_actuals, args, ®_parm_seen); #if defined(ACCUMULATE_OUTGOING_ARGS) && defined(REG_PARM_STACK_SPACE) - - /* Save the fixed argument area if it's part of the caller's frame and - is clobbered by argument setup for this call. */ - save_area = save_fixed_argument_area (reg_parm_stack_space, argblock, - &low_to_save, &high_to_save); + /* Save the fixed argument area if it's part of the caller's frame and + is clobbered by argument setup for this call. */ + save_area = save_fixed_argument_area (reg_parm_stack_space, argblock, + &low_to_save, &high_to_save); #endif - - /* Now store (and compute if necessary) all non-register parms. - These come before register parms, since they can require block-moves, - which could clobber the registers used for register parms. - Parms which have partial registers are not stored here, - but we do preallocate space here if they want that. */ + /* Now store (and compute if necessary) all non-register parms. + These come before register parms, since they can require block-moves, + which could clobber the registers used for register parms. + Parms which have partial registers are not stored here, + but we do preallocate space here if they want that. */ - for (i = 0; i < num_actuals; i++) - if (args[i].reg == 0 || args[i].pass_on_stack) - store_one_arg (&args[i], argblock, may_be_alloca, - args_size.var != 0, reg_parm_stack_space); - - /* If we have a parm that is passed in registers but not in memory - and whose alignment does not permit a direct copy into registers, - make a group of pseudos that correspond to each register that we - will later fill. */ - if (STRICT_ALIGNMENT) - store_unaligned_arguments_into_pseudos (args, num_actuals); - - /* Now store any partially-in-registers parm. - This is the last place a block-move can happen. */ - if (reg_parm_seen) - for (i = 0; i < num_actuals; i++) - if (args[i].partial != 0 && ! args[i].pass_on_stack) - store_one_arg (&args[i], argblock, may_be_alloca, - args_size.var != 0, reg_parm_stack_space); + for (i = 0; i < num_actuals; i++) + if (args[i].reg == 0 || args[i].pass_on_stack) + store_one_arg (&args[i], argblock, may_be_alloca, + args_size.var != 0, reg_parm_stack_space); + + /* If we have a parm that is passed in registers but not in memory + and whose alignment does not permit a direct copy into registers, + make a group of pseudos that correspond to each register that we + will later fill. */ + if (STRICT_ALIGNMENT) + store_unaligned_arguments_into_pseudos (args, num_actuals); + + /* Now store any partially-in-registers parm. + This is the last place a block-move can happen. */ + if (reg_parm_seen) + for (i = 0; i < num_actuals; i++) + if (args[i].partial != 0 && ! args[i].pass_on_stack) + store_one_arg (&args[i], argblock, may_be_alloca, + args_size.var != 0, reg_parm_stack_space); #ifndef PUSH_ARGS_REVERSED #ifdef PREFERRED_STACK_BOUNDARY - /* If we pushed args in forward order, perform stack alignment - after pushing the last arg. */ - if (argblock == 0) - anti_adjust_stack (GEN_INT (args_size.constant - unadjusted_args_size)); + /* If we pushed args in forward order, perform stack alignment + after pushing the last arg. */ + /* ??? Fix for arg_space_so_far. */ + if (argblock == 0) + anti_adjust_stack (GEN_INT (args_size.constant + - unadjusted_args_size)); #endif #endif - /* If register arguments require space on the stack and stack space - was not preallocated, allocate stack space here for arguments - passed in registers. */ + /* If register arguments require space on the stack and stack space + was not preallocated, allocate stack space here for arguments + passed in registers. */ #if ! defined(ACCUMULATE_OUTGOING_ARGS) && defined(OUTGOING_REG_PARM_STACK_SPACE) - if (must_preallocate == 0 && reg_parm_stack_space > 0) - anti_adjust_stack (GEN_INT (reg_parm_stack_space)); + if (must_preallocate == 0 && reg_parm_stack_space > 0) + anti_adjust_stack (GEN_INT (reg_parm_stack_space)); #endif - /* Pass the function the address in which to return a structure value. */ - if (structure_value_addr && ! structure_value_addr_parm) - { - emit_move_insn (struct_value_rtx, - force_reg (Pmode, - force_operand (structure_value_addr, - NULL_RTX))); - - /* Mark the memory for the aggregate as write-only. */ - if (current_function_check_memory_usage) - emit_library_call (chkr_set_right_libfunc, 1, - VOIDmode, 3, - structure_value_addr, Pmode, - GEN_INT (struct_value_size), TYPE_MODE (sizetype), - GEN_INT (MEMORY_USE_WO), - TYPE_MODE (integer_type_node)); - - if (GET_CODE (struct_value_rtx) == REG) - use_reg (&call_fusage, struct_value_rtx); - } - - funexp = prepare_call_address (funexp, fndecl, &call_fusage, reg_parm_seen); - - load_register_parameters (args, num_actuals, &call_fusage); - - /* Perform postincrements before actually calling the function. */ - emit_queue (); - - /* Save a pointer to the last insn before the call, so that we can - later safely search backwards to find the CALL_INSN. */ - before_call = get_last_insn (); + /* Pass the function the address in which to return a + structure value. */ + if (pass != 0 && structure_value_addr && ! structure_value_addr_parm) + { + emit_move_insn (struct_value_rtx, + force_reg (Pmode, + force_operand (structure_value_addr, + NULL_RTX))); + + /* Mark the memory for the aggregate as write-only. */ + if (current_function_check_memory_usage) + emit_library_call (chkr_set_right_libfunc, 1, + VOIDmode, 3, + structure_value_addr, ptr_mode, + GEN_INT (struct_value_size), + TYPE_MODE (sizetype), + GEN_INT (MEMORY_USE_WO), + TYPE_MODE (integer_type_node)); + + if (GET_CODE (struct_value_rtx) == REG) + use_reg (&call_fusage, struct_value_rtx); + } - /* All arguments and registers used for the call must be set up by now! */ + funexp = prepare_call_address (funexp, fndecl, &call_fusage, + reg_parm_seen); - /* Generate the actual call instruction. */ - emit_call_1 (funexp, fndecl, funtype, unadjusted_args_size, - args_size.constant, struct_value_size, - FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1), - valreg, old_inhibit_defer_pop, call_fusage, is_const, nothrow); + load_register_parameters (args, num_actuals, &call_fusage); + + /* Perform postincrements before actually calling the function. */ + emit_queue (); - /* If call is cse'able, make appropriate pair of reg-notes around it. - Test valreg so we don't crash; may safely ignore `const' - if return type is void. Disable for PARALLEL return values, because - we have no way to move such values into a pseudo register. */ - if (is_const && valreg != 0 && GET_CODE (valreg) != PARALLEL) - { - rtx note = 0; - rtx temp = gen_reg_rtx (GET_MODE (valreg)); - rtx insns; + /* Save a pointer to the last insn before the call, so that we can + later safely search backwards to find the CALL_INSN. */ + before_call = get_last_insn (); - /* Mark the return value as a pointer if needed. */ - if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE) + /* All arguments and registers used for the call must be set up by + now! */ + + /* Generate the actual call instruction. */ + emit_call_1 (funexp, fndecl, funtype, unadjusted_args_size, + args_size.constant, struct_value_size, + FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1), + valreg, old_inhibit_defer_pop, call_fusage, + ((is_const ? ECF_IS_CONST : 0) + | (nothrow ? ECF_NOTHROW : 0) + | (pass == 0 ? ECF_SIBCALL : 0))); + + /* If call is cse'able, make appropriate pair of reg-notes around it. + Test valreg so we don't crash; may safely ignore `const' + if return type is void. Disable for PARALLEL return values, because + we have no way to move such values into a pseudo register. */ + if (is_const && valreg != 0 && GET_CODE (valreg) != PARALLEL) { - tree pointed_to = TREE_TYPE (TREE_TYPE (exp)); - mark_reg_pointer (temp, TYPE_ALIGN (pointed_to) / BITS_PER_UNIT); - } + rtx note = 0; + rtx temp = gen_reg_rtx (GET_MODE (valreg)); + rtx insns; - /* Construct an "equal form" for the value which mentions all the - arguments in order as well as the function name. */ + /* Mark the return value as a pointer if needed. */ + if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE) + { + tree pointed_to = TREE_TYPE (TREE_TYPE (exp)); + mark_reg_pointer (temp, TYPE_ALIGN (pointed_to) / BITS_PER_UNIT); + } + + /* Construct an "equal form" for the value which mentions all the + arguments in order as well as the function name. */ #ifdef PUSH_ARGS_REVERSED - for (i = 0; i < num_actuals; i++) - note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note); + for (i = 0; i < num_actuals; i++) + note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note); #else - for (i = num_actuals - 1; i >= 0; i--) - note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note); + for (i = num_actuals - 1; i >= 0; i--) + note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note); #endif - note = gen_rtx_EXPR_LIST (VOIDmode, funexp, note); - - insns = get_insns (); - end_sequence (); - - emit_libcall_block (insns, temp, valreg, note); + note = gen_rtx_EXPR_LIST (VOIDmode, funexp, note); - valreg = temp; - } - else if (is_const) - { - /* Otherwise, just write out the sequence without a note. */ - rtx insns = get_insns (); - - end_sequence (); - emit_insns (insns); - } - else if (is_malloc) - { - rtx temp = gen_reg_rtx (GET_MODE (valreg)); - rtx last, insns; + insns = get_insns (); + end_sequence (); - /* The return value from a malloc-like function is a pointer. */ - if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE) - mark_reg_pointer (temp, BIGGEST_ALIGNMENT / BITS_PER_UNIT); - - emit_move_insn (temp, valreg); - - /* The return value from a malloc-like function can not alias - anything else. */ - last = get_last_insn (); - REG_NOTES (last) = - gen_rtx_EXPR_LIST (REG_NOALIAS, temp, REG_NOTES (last)); + emit_libcall_block (insns, temp, valreg, note); + + valreg = temp; + } + else if (is_const) + { + /* Otherwise, just write out the sequence without a note. */ + rtx insns = get_insns (); - /* Write out the sequence. */ - insns = get_insns (); - end_sequence (); - emit_insns (insns); - valreg = temp; - } + end_sequence (); + emit_insns (insns); + } + else if (is_malloc) + { + rtx temp = gen_reg_rtx (GET_MODE (valreg)); + rtx last, insns; + + /* The return value from a malloc-like function is a pointer. */ + if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE) + mark_reg_pointer (temp, BIGGEST_ALIGNMENT / BITS_PER_UNIT); + + emit_move_insn (temp, valreg); + + /* The return value from a malloc-like function can not alias + anything else. */ + last = get_last_insn (); + REG_NOTES (last) = + gen_rtx_EXPR_LIST (REG_NOALIAS, temp, REG_NOTES (last)); + + /* Write out the sequence. */ + insns = get_insns (); + end_sequence (); + emit_insns (insns); + valreg = temp; + } - /* For calls to `setjmp', etc., inform flow.c it should complain - if nonvolatile values are live. */ + /* For calls to `setjmp', etc., inform flow.c it should complain + if nonvolatile values are live. For functions that cannot return, + inform flow that control does not fall through. */ - if (returns_twice) - { - /* The NOTE_INSN_SETJMP note must be emitted immediately after the - CALL_INSN. Some ports emit more than just a CALL_INSN above, so - we must search for it here. */ - rtx last = get_last_insn (); - while (GET_CODE (last) != CALL_INSN) + if (returns_twice || is_volatile || is_longjmp || pass == 0) { - last = PREV_INSN (last); - /* There was no CALL_INSN? */ - if (last == before_call) - abort (); - } - emit_note_after (NOTE_INSN_SETJMP, last); - current_function_calls_setjmp = 1; - } + /* The barrier or NOTE_INSN_SETJMP note must be emitted + immediately after the CALL_INSN. Some ports emit more + than just a CALL_INSN above, so we must search for it here. */ - if (is_longjmp) - current_function_calls_longjmp = 1; + rtx last = get_last_insn (); + while (GET_CODE (last) != CALL_INSN) + { + last = PREV_INSN (last); + /* There was no CALL_INSN? */ + if (last == before_call) + abort (); + } - /* Notice functions that cannot return. - If optimizing, insns emitted below will be dead. - If not optimizing, they will exist, which is useful - if the user uses the `return' command in the debugger. */ + if (returns_twice) + { + emit_note_after (NOTE_INSN_SETJMP, last); + current_function_calls_setjmp = 1; + sibcall_failure = 1; + } + else + emit_barrier_after (last); + } - if (is_volatile || is_longjmp) - emit_barrier (); + if (is_longjmp) + current_function_calls_longjmp = 1, sibcall_failure = 1; - /* If value type not void, return an rtx for the value. */ + /* If value type not void, return an rtx for the value. */ - /* If there are cleanups to be called, don't use a hard reg as target. - We need to double check this and see if it matters anymore. */ - if (any_pending_cleanups (1) - && target && REG_P (target) - && REGNO (target) < FIRST_PSEUDO_REGISTER) - target = 0; + /* If there are cleanups to be called, don't use a hard reg as target. + We need to double check this and see if it matters anymore. */ + if (any_pending_cleanups (1) + && target && REG_P (target) + && REGNO (target) < FIRST_PSEUDO_REGISTER) + target = 0, sibcall_failure = 1; - if (TYPE_MODE (TREE_TYPE (exp)) == VOIDmode - || ignore) - { - target = const0_rtx; - } - else if (structure_value_addr) - { - if (target == 0 || GET_CODE (target) != MEM) + if (TYPE_MODE (TREE_TYPE (exp)) == VOIDmode + || ignore) { - target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)), - memory_address (TYPE_MODE (TREE_TYPE (exp)), - structure_value_addr)); - MEM_SET_IN_STRUCT_P (target, - AGGREGATE_TYPE_P (TREE_TYPE (exp))); + target = const0_rtx; } - } - else if (pcc_struct_value) - { - /* This is the special C++ case where we need to - know what the true target was. We take care to - never use this value more than once in one expression. */ - target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)), - copy_to_reg (valreg)); - MEM_SET_IN_STRUCT_P (target, AGGREGATE_TYPE_P (TREE_TYPE (exp))); - } - /* Handle calls that return values in multiple non-contiguous locations. - The Irix 6 ABI has examples of this. */ - else if (GET_CODE (valreg) == PARALLEL) - { - int bytes = int_size_in_bytes (TREE_TYPE (exp)); - - if (target == 0) + else if (structure_value_addr) { - target = assign_stack_temp (TYPE_MODE (TREE_TYPE (exp)), bytes, 0); + if (target == 0 || GET_CODE (target) != MEM) + { + target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)), + memory_address (TYPE_MODE (TREE_TYPE (exp)), + structure_value_addr)); + MEM_SET_IN_STRUCT_P (target, + AGGREGATE_TYPE_P (TREE_TYPE (exp))); + } + } + else if (pcc_struct_value) + { + /* This is the special C++ case where we need to + know what the true target was. We take care to + never use this value more than once in one expression. */ + target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)), + copy_to_reg (valreg)); MEM_SET_IN_STRUCT_P (target, AGGREGATE_TYPE_P (TREE_TYPE (exp))); - preserve_temp_slots (target); } + /* Handle calls that return values in multiple non-contiguous locations. + The Irix 6 ABI has examples of this. */ + else if (GET_CODE (valreg) == PARALLEL) + { + int bytes = int_size_in_bytes (TREE_TYPE (exp)); - if (! rtx_equal_p (target, valreg)) - emit_group_store (target, valreg, bytes, - TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT); - } - else if (target && GET_MODE (target) == TYPE_MODE (TREE_TYPE (exp)) - && GET_MODE (target) == GET_MODE (valreg)) - /* TARGET and VALREG cannot be equal at this point because the latter - would not have REG_FUNCTION_VALUE_P true, while the former would if - it were referring to the same register. - - If they refer to the same register, this move will be a no-op, except - when function inlining is being done. */ - emit_move_insn (target, valreg); - else if (TYPE_MODE (TREE_TYPE (exp)) == BLKmode) - target = copy_blkmode_from_reg (target, valreg, TREE_TYPE (exp)); - else - target = copy_to_reg (valreg); + if (target == 0) + { + target = assign_stack_temp (TYPE_MODE (TREE_TYPE (exp)), + bytes, 0); + MEM_SET_IN_STRUCT_P (target, AGGREGATE_TYPE_P (TREE_TYPE (exp))); + preserve_temp_slots (target); + } + + if (! rtx_equal_p (target, valreg)) + emit_group_store (target, valreg, bytes, + TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT); + /* We can not support sibling calls for this case. */ + sibcall_failure = 1; + } + else if (target + && GET_MODE (target) == TYPE_MODE (TREE_TYPE (exp)) + && GET_MODE (target) == GET_MODE (valreg)) + { + /* TARGET and VALREG cannot be equal at this point because the + latter would not have REG_FUNCTION_VALUE_P true, while the + former would if it were referring to the same register. + + If they refer to the same register, this move will be a no-op, + except when function inlining is being done. */ + emit_move_insn (target, valreg); + } + else if (TYPE_MODE (TREE_TYPE (exp)) == BLKmode) + target = copy_blkmode_from_reg (target, valreg, TREE_TYPE (exp)); + else + target = copy_to_reg (valreg); #ifdef PROMOTE_FUNCTION_RETURN - /* If we promoted this return value, make the proper SUBREG. TARGET - might be const0_rtx here, so be careful. */ - if (GET_CODE (target) == REG - && TYPE_MODE (TREE_TYPE (exp)) != BLKmode - && GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp))) - { - tree type = TREE_TYPE (exp); - int unsignedp = TREE_UNSIGNED (type); + /* If we promoted this return value, make the proper SUBREG. TARGET + might be const0_rtx here, so be careful. */ + if (GET_CODE (target) == REG + && TYPE_MODE (TREE_TYPE (exp)) != BLKmode + && GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp))) + { + tree type = TREE_TYPE (exp); + int unsignedp = TREE_UNSIGNED (type); - /* If we don't promote as expected, something is wrong. */ - if (GET_MODE (target) - != promote_mode (type, TYPE_MODE (type), &unsignedp, 1)) - abort (); + /* If we don't promote as expected, something is wrong. */ + if (GET_MODE (target) + != promote_mode (type, TYPE_MODE (type), &unsignedp, 1)) + abort (); - target = gen_rtx_SUBREG (TYPE_MODE (type), target, 0); - SUBREG_PROMOTED_VAR_P (target) = 1; - SUBREG_PROMOTED_UNSIGNED_P (target) = unsignedp; - } + target = gen_rtx_SUBREG (TYPE_MODE (type), target, 0); + SUBREG_PROMOTED_VAR_P (target) = 1; + SUBREG_PROMOTED_UNSIGNED_P (target) = unsignedp; + } #endif - /* If size of args is variable or this was a constructor call for a stack - argument, restore saved stack-pointer value. */ + /* If size of args is variable or this was a constructor call for a stack + argument, restore saved stack-pointer value. */ - if (old_stack_level) - { - emit_stack_restore (SAVE_BLOCK, old_stack_level, NULL_RTX); - pending_stack_adjust = old_pending_adj; + if (old_stack_level) + { + emit_stack_restore (SAVE_BLOCK, old_stack_level, NULL_RTX); + pending_stack_adjust = old_pending_adj; #ifdef ACCUMULATE_OUTGOING_ARGS - stack_arg_under_construction = old_stack_arg_under_construction; - highest_outgoing_arg_in_use = initial_highest_arg_in_use; - stack_usage_map = initial_stack_usage_map; + stack_arg_under_construction = old_stack_arg_under_construction; + highest_outgoing_arg_in_use = initial_highest_arg_in_use; + stack_usage_map = initial_stack_usage_map; #endif - } + sibcall_failure = 1; + } #ifdef ACCUMULATE_OUTGOING_ARGS - else - { + else + { #ifdef REG_PARM_STACK_SPACE - if (save_area) - restore_fixed_argument_area (save_area, argblock, - high_to_save, low_to_save); + if (save_area) + { + restore_fixed_argument_area (save_area, argblock, + high_to_save, low_to_save); + sibcall_failure = 1; + } #endif - /* If we saved any argument areas, restore them. */ - for (i = 0; i < num_actuals; i++) - if (args[i].save_area) - { - enum machine_mode save_mode = GET_MODE (args[i].save_area); - rtx stack_area - = gen_rtx_MEM (save_mode, - memory_address (save_mode, - XEXP (args[i].stack_slot, 0))); - - if (save_mode != BLKmode) - emit_move_insn (stack_area, args[i].save_area); - else - emit_block_move (stack_area, validize_mem (args[i].save_area), - GEN_INT (args[i].size.constant), - PARM_BOUNDARY / BITS_PER_UNIT); - } + /* If we saved any argument areas, restore them. */ + for (i = 0; i < num_actuals; i++) + if (args[i].save_area) + { + enum machine_mode save_mode = GET_MODE (args[i].save_area); + rtx stack_area + = gen_rtx_MEM (save_mode, + memory_address (save_mode, + XEXP (args[i].stack_slot, 0))); + + if (save_mode != BLKmode) + emit_move_insn (stack_area, args[i].save_area); + else + emit_block_move (stack_area, + validize_mem (args[i].save_area), + GEN_INT (args[i].size.constant), + PARM_BOUNDARY / BITS_PER_UNIT); + sibcall_failure = 1; + } - highest_outgoing_arg_in_use = initial_highest_arg_in_use; - stack_usage_map = initial_stack_usage_map; - } + highest_outgoing_arg_in_use = initial_highest_arg_in_use; + stack_usage_map = initial_stack_usage_map; + } #endif - /* If this was alloca, record the new stack level for nonlocal gotos. - Check for the handler slots since we might not have a save area - for non-local gotos. */ + /* If this was alloca, record the new stack level for nonlocal gotos. + Check for the handler slots since we might not have a save area + for non-local gotos. */ - if (may_be_alloca && nonlocal_goto_handler_slots != 0) - emit_stack_save (SAVE_NONLOCAL, &nonlocal_goto_stack_level, NULL_RTX); + if (may_be_alloca && nonlocal_goto_handler_slots != 0) + emit_stack_save (SAVE_NONLOCAL, &nonlocal_goto_stack_level, NULL_RTX); - pop_temp_slots (); + pop_temp_slots (); + + /* Free up storage we no longer need. */ + for (i = 0; i < num_actuals; ++i) + if (args[i].aligned_regs) + free (args[i].aligned_regs); + + insns = get_insns (); + end_sequence (); + + if (pass == 0) + { + tail_call_insns = insns; + + /* If the current function's argument block is not large enough + to hold the outoing arguments, or we encountered some other + situation we couldn't handle, zero out the sequence. */ + if (current_function_args_size < args_size.constant + || sibcall_failure) + tail_call_insns = NULL_RTX; + + /* Restore the pending stack adjustment now that we have + finished generating the sibling call sequence. */ + pending_stack_adjust = save_pending_stack_adjust; + } + else + normal_call_insns = insns; + } + + /* The function optimize_sibling_and_tail_recursive_calls doesn't + handle CALL_PLACEHOLDERs inside other CALL_PLACEHOLDERs. This + can happen if the arguments to this function call an inline + function who's expansion contains another CALL_PLACEHOLDER. + + If there are any C_Ps in any of these sequences, replace them + with their normal call. */ + + for (insn = normal_call_insns; insn; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + replace_call_placeholder (insn, sibcall_use_normal); + + for (insn = tail_call_insns; insn; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + replace_call_placeholder (insn, sibcall_use_normal); + + for (insn = tail_recursion_insns; insn; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + replace_call_placeholder (insn, sibcall_use_normal); + + /* If this was a potential tail recursion site, then emit a + CALL_PLACEHOLDER with the normal and the tail recursion streams. + One of them will be selected later. */ + if (tail_recursion_insns || tail_call_insns) + { + /* The tail recursion label must be kept around. We could expose + its use in the CALL_PLACEHOLDER, but that creates unwanted edges + and makes determining true tail recursion sites difficult. + + So we set LABEL_PRESERVE_P here, then clear it when we select + one of the call sequences after rtl generation is complete. */ + if (tail_recursion_insns) + LABEL_PRESERVE_P (tail_recursion_label) = 1; + emit_call_insn (gen_rtx_CALL_PLACEHOLDER (VOIDmode, normal_call_insns, + tail_call_insns, + tail_recursion_insns, + tail_recursion_label)); + } + else + emit_insns (normal_call_insns); - /* Free up storage we no longer need. */ - for (i = 0; i < num_actuals; ++i) - if (args[i].aligned_regs) - free (args[i].aligned_regs); + currently_expanding_call--; return target; } @@ -3171,7 +3488,9 @@ emit_library_call VPARAMS((rtx orgfun, int no_queue, enum machine_mode outmode, original_args_size.constant, args_size.constant, 0, FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1), outmode != VOIDmode ? hard_libcall_value (outmode) : NULL_RTX, - old_inhibit_defer_pop + 1, call_fusage, no_queue, nothrow); + old_inhibit_defer_pop + 1, call_fusage, + ((no_queue ? ECF_IS_CONST : 0) + | (nothrow ? ECF_NOTHROW : 0))); pop_temp_slots (); @@ -3772,7 +4091,9 @@ emit_library_call_value VPARAMS((rtx orgfun, rtx value, int no_queue, struct_value_size, FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1), mem_value == 0 ? hard_libcall_value (outmode) : NULL_RTX, - old_inhibit_defer_pop + 1, call_fusage, is_const, nothrow); + old_inhibit_defer_pop + 1, call_fusage, + ((is_const ? ECF_IS_CONST : 0) + | (nothrow ? ECF_NOTHROW : 0))); /* Now restore inhibit_defer_pop to its actual original value. */ OK_DEFER_POP; diff --git a/gcc/final.c b/gcc/final.c index 01dd1ba6b830..90437e0eb020 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -4019,7 +4019,8 @@ leaf_function_p () return 0; if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE - && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == CALL_INSN) + && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == CALL_INSN + && ! SIBLING_CALL_P (XVECEXP (PATTERN (insn), 0, 0))) return 0; } for (insn = current_function_epilogue_delay_list; insn; insn = XEXP (insn, 1)) @@ -4028,7 +4029,8 @@ leaf_function_p () return 0; if (GET_CODE (XEXP (insn, 0)) == INSN && GET_CODE (PATTERN (XEXP (insn, 0))) == SEQUENCE - && GET_CODE (XVECEXP (PATTERN (XEXP (insn, 0)), 0, 0)) == CALL_INSN) + && GET_CODE (XVECEXP (PATTERN (XEXP (insn, 0)), 0, 0)) == CALL_INSN + && ! SIBLING_CALL_P (XVECEXP (PATTERN (XEXP (insn, 0)), 0, 0))) return 0; } diff --git a/gcc/flow.c b/gcc/flow.c index 7030c1100622..14189b9953b7 100644 --- a/gcc/flow.c +++ b/gcc/flow.c @@ -154,10 +154,12 @@ Boston, MA 02111-1307, USA. */ #ifndef HAVE_epilogue #define HAVE_epilogue 0 #endif - #ifndef HAVE_prologue #define HAVE_prologue 0 #endif +#ifndef HAVE_sibcall_epilogue +#define HAVE_sibcall_epilogue 0 +#endif /* The contents of the current function definition are allocated in this obstack, and all are freed at the end of the function. @@ -592,7 +594,8 @@ find_basic_blocks_1 (f) does not imply an abnormal edge, it will be a bit before everything can be updated. So continue to emit a noop at the end of such a block. */ - if (GET_CODE (end) == CALL_INSN) + if (GET_CODE (end) == CALL_INSN + && ! SIBLING_CALL_P (end)) { rtx nop = gen_rtx_USE (VOIDmode, const0_rtx); end = emit_insn_after (nop, end); @@ -644,7 +647,8 @@ find_basic_blocks_1 (f) imply an abnormal edge, it will be a bit before everything can be updated. So continue to emit a noop at the end of such a block. */ - if (GET_CODE (end) == CALL_INSN) + if (GET_CODE (end) == CALL_INSN + && ! SIBLING_CALL_P (end)) { rtx nop = gen_rtx_USE (VOIDmode, const0_rtx); end = emit_insn_after (nop, end); @@ -973,6 +977,15 @@ make_edges (label_value_list) } } + /* If this is a sibling call insn, then this is in effect a + combined call and return, and so we need an edge to the + exit block. No need to worry about EH edges, since we + wouldn't have created the sibling call in the first place. */ + + if (code == CALL_INSN && SIBLING_CALL_P (insn)) + make_edge (edge_cache, bb, EXIT_BLOCK_PTR, 0); + else + /* If this is a CALL_INSN, then mark it as reaching the active EH handler for this CALL_INSN. If we're handling asynchronous exceptions then any insn can reach any of the active handlers. @@ -3249,8 +3262,10 @@ propagate_block (bb, old, significant, flags) instructions. Warn about probable compiler losage. */ if (insn_is_dead && reload_completed - && (HAVE_epilogue || HAVE_prologue) - && prologue_epilogue_contains (insn)) + && (((HAVE_epilogue || HAVE_prologue) + && prologue_epilogue_contains (insn)) + || (HAVE_sibcall_epilogue + && sibcall_epilogue_contains (insn)))) { if (flags & PROP_KILL_DEAD_CODE) { diff --git a/gcc/function.c b/gcc/function.c index e92b58154f2f..dfd87618396d 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -152,8 +152,12 @@ struct function *cfun = 0; struct function *all_functions = 0; /* These arrays record the INSN_UIDs of the prologue and epilogue insns. */ -static int *prologue; -static int *epilogue; +static varray_type prologue; +static varray_type epilogue; + +/* Array of INSN_UIDs to hold the INSN_UIDs for each sibcall epilogue + in this function. */ +static varray_type sibcall_epilogue; /* In order to evaluate some expressions, such as function calls returning structures in memory, we need to temporarily allocate stack locations. @@ -271,13 +275,15 @@ static void pad_below PARAMS ((struct args_size *, enum machine_mode, static tree round_down PARAMS ((tree, int)); #endif static rtx round_trampoline_addr PARAMS ((rtx)); +static tree *identify_blocks_1 PARAMS ((rtx, tree *, tree *, tree *)); +static void reorder_blocks_1 PARAMS ((rtx, tree, varray_type *)); static tree blocks_nreverse PARAMS ((tree)); static int all_blocks PARAMS ((tree, tree *)); static tree *get_block_vector PARAMS ((tree, int *)); /* We always define `record_insns' even if its not used so that we can always export `prologue_epilogue_contains'. */ -static int *record_insns PARAMS ((rtx)) ATTRIBUTE_UNUSED; -static int contains PARAMS ((rtx, int *)); +static void record_insns PARAMS ((rtx, varray_type *)) ATTRIBUTE_UNUSED; +static int contains PARAMS ((rtx, varray_type)); #ifdef HAVE_return static void emit_return_into_block PARAMS ((basic_block)); #endif @@ -1507,6 +1513,7 @@ fixup_var_refs (var, promoted_mode, unsignedp, ht) rtx first_insn = get_insns (); struct sequence_stack *stack = seq_stack; tree rtl_exps = rtl_expr_chain; + rtx insn; /* Must scan all insns for stack-refs that exceed the limit. */ fixup_var_refs_insns (var, promoted_mode, unsignedp, first_insn, @@ -1545,6 +1552,31 @@ fixup_var_refs (var, promoted_mode, unsignedp, ht) fixup_var_refs_insns (var, promoted_mode, unsignedp, catch_clauses, 0, 0); end_sequence (); + + /* Scan sequences saved in CALL_PLACEHOLDERS too. */ + for (insn = first_insn; insn; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + int i; + + /* Look at the Normal call, sibling call and tail recursion + sequences attached to the CALL_PLACEHOLDER. */ + for (i = 0; i < 3; i++) + { + rtx seq = XEXP (PATTERN (insn), i); + if (seq) + { + push_to_sequence (seq); + fixup_var_refs_insns (var, promoted_mode, unsignedp, + seq, 0, 0); + XEXP (PATTERN (insn), i) = get_insns (); + end_sequence (); + } + } + } + } } /* REPLACEMENTS is a pointer to a list of the struct fixup_replacement and X is @@ -5494,11 +5526,8 @@ identify_blocks (block, insns) rtx insns; { int n_blocks; - tree *block_vector; + tree *block_vector, *last_block_vector; tree *block_stack; - int depth = 0; - int current_block_number = 1; - rtx insn; if (block == 0) return; @@ -5508,35 +5537,83 @@ identify_blocks (block, insns) block_vector = get_block_vector (block, &n_blocks); block_stack = (tree *) xmalloc (n_blocks * sizeof (tree)); + last_block_vector = identify_blocks_1 (insns, block_vector + 1, + block_vector + n_blocks, block_stack); + + /* If we didn't use all of the subblocks, we've misplaced block notes. */ + /* ??? This appears to happen all the time. Latent bugs elsewhere? */ + if (0 && last_block_vector != block_vector + n_blocks) + abort (); + + free (block_vector); + free (block_stack); +} + +/* Subroutine of identify_blocks. Do the block substitution on the + insn chain beginning with INSNS. Recurse for CALL_PLACEHOLDER chains. + + BLOCK_STACK is pushed and popped for each BLOCK_BEGIN/BLOCK_END pair. + BLOCK_VECTOR is incremented for each block seen. */ + +static tree * +identify_blocks_1 (insns, block_vector, end_block_vector, orig_block_stack) + rtx insns; + tree *block_vector; + tree *end_block_vector; + tree *orig_block_stack; +{ + rtx insn; + tree *block_stack = orig_block_stack; + for (insn = insns; insn; insn = NEXT_INSN (insn)) - if (GET_CODE (insn) == NOTE) - { - if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG) - { - tree b; + { + if (GET_CODE (insn) == NOTE) + { + if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG) + { + tree b; - /* If there are more block notes than BLOCKs, something - is badly wrong. */ - if (current_block_number == n_blocks) - abort (); + /* If there are more block notes than BLOCKs, something + is badly wrong. */ + if (block_vector == end_block_vector) + abort (); - b = block_vector[current_block_number++]; - NOTE_BLOCK (insn) = b; - block_stack[depth++] = b; - } - else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END) - { - if (depth == 0) - /* There are more NOTE_INSN_BLOCK_ENDs that - NOTE_INSN_BLOCK_BEGs. Something is badly wrong. */ - abort (); + b = *block_vector++; + NOTE_BLOCK (insn) = b; + *block_stack++ = b; + } + else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END) + { + /* If there are more NOTE_INSN_BLOCK_ENDs than + NOTE_INSN_BLOCK_BEGs, something is badly wrong. */ + if (block_stack == orig_block_stack) + abort (); - NOTE_BLOCK (insn) = block_stack[--depth]; - } - } + NOTE_BLOCK (insn) = *--block_stack; + } + } + else if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + rtx cp = PATTERN (insn); + + block_vector = identify_blocks_1 (XEXP (cp, 0), block_vector, + end_block_vector, block_stack); + if (XEXP (cp, 1)) + block_vector = identify_blocks_1 (XEXP (cp, 1), block_vector, + end_block_vector, block_stack); + if (XEXP (cp, 2)) + block_vector = identify_blocks_1 (XEXP (cp, 2), block_vector, + end_block_vector, block_stack); + } + } - free (block_vector); - free (block_stack); + /* If there are more NOTE_INSN_BLOCK_BEGINs than NOTE_INSN_BLOCK_ENDs, + something is badly wrong. */ + if (block_stack != orig_block_stack) + abort (); + + return block_vector; } /* Given a revised instruction chain, rebuild the tree structure of @@ -5550,7 +5627,6 @@ reorder_blocks (block, insns) rtx insns; { tree current_block = block; - rtx insn; varray_type block_stack; if (block == NULL_TREE) @@ -5562,35 +5638,7 @@ reorder_blocks (block, insns) BLOCK_SUBBLOCKS (current_block) = 0; BLOCK_CHAIN (current_block) = 0; - for (insn = insns; insn; insn = NEXT_INSN (insn)) - if (GET_CODE (insn) == NOTE) - { - if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG) - { - tree block = NOTE_BLOCK (insn); - /* If we have seen this block before, copy it. */ - if (TREE_ASM_WRITTEN (block)) - { - block = copy_node (block); - NOTE_BLOCK (insn) = block; - } - BLOCK_SUBBLOCKS (block) = 0; - TREE_ASM_WRITTEN (block) = 1; - BLOCK_SUPERCONTEXT (block) = current_block; - BLOCK_CHAIN (block) = BLOCK_SUBBLOCKS (current_block); - BLOCK_SUBBLOCKS (current_block) = block; - current_block = block; - VARRAY_PUSH_TREE (block_stack, block); - } - else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END) - { - NOTE_BLOCK (insn) = VARRAY_TOP_TREE (block_stack); - VARRAY_POP (block_stack); - BLOCK_SUBBLOCKS (current_block) - = blocks_nreverse (BLOCK_SUBBLOCKS (current_block)); - current_block = BLOCK_SUPERCONTEXT (current_block); - } - } + reorder_blocks_1 (insns, current_block, &block_stack); BLOCK_SUBBLOCKS (current_block) = blocks_nreverse (BLOCK_SUBBLOCKS (current_block)); @@ -5600,6 +5648,60 @@ reorder_blocks (block, insns) return current_block; } +/* Helper function for reorder_blocks. Process the insn chain beginning + at INSNS. Recurse for CALL_PLACEHOLDER insns. */ + +static void +reorder_blocks_1 (insns, current_block, p_block_stack) + rtx insns; + tree current_block; + varray_type *p_block_stack; +{ + rtx insn; + + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == NOTE) + { + if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG) + { + tree block = NOTE_BLOCK (insn); + /* If we have seen this block before, copy it. */ + if (TREE_ASM_WRITTEN (block)) + { + block = copy_node (block); + NOTE_BLOCK (insn) = block; + } + BLOCK_SUBBLOCKS (block) = 0; + TREE_ASM_WRITTEN (block) = 1; + BLOCK_SUPERCONTEXT (block) = current_block; + BLOCK_CHAIN (block) = BLOCK_SUBBLOCKS (current_block); + BLOCK_SUBBLOCKS (current_block) = block; + current_block = block; + VARRAY_PUSH_TREE (*p_block_stack, block); + } + else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END) + { + NOTE_BLOCK (insn) = VARRAY_TOP_TREE (*p_block_stack); + VARRAY_POP (*p_block_stack); + BLOCK_SUBBLOCKS (current_block) + = blocks_nreverse (BLOCK_SUBBLOCKS (current_block)); + current_block = BLOCK_SUPERCONTEXT (current_block); + } + } + else if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + rtx cp = PATTERN (insn); + reorder_blocks_1 (XEXP (cp, 0), current_block, p_block_stack); + if (XEXP (cp, 1)) + reorder_blocks_1 (XEXP (cp, 1), current_block, p_block_stack); + if (XEXP (cp, 2)) + reorder_blocks_1 (XEXP (cp, 2), current_block, p_block_stack); + } + } +} + /* Reverse the order of elements in the chain T of blocks, and return the new head of the chain (old last element). */ @@ -5757,6 +5859,7 @@ prepare_function_start () cfun->preferred_stack_boundary = STACK_BOUNDARY; #else cfun->stack_alignment_needed = 0; + cfun->preferred_stack_boundary = 0; #endif /* Set if a call to setjmp is seen. */ @@ -5900,8 +6003,11 @@ void init_function_for_compilation () { reg_renumber = 0; + /* No prologue/epilogue insns yet. */ - prologue = epilogue = 0; + VARRAY_GROW (prologue, 0); + VARRAY_GROW (epilogue, 0); + VARRAY_GROW (sibcall_epilogue, 0); } /* Indicate that the current function uses extra args @@ -6586,30 +6692,32 @@ expand_function_end (filename, line, end_bindings) expand_fixups (get_insns ()); } -/* Create an array that records the INSN_UIDs of INSNS (either a sequence - or a single insn). */ +/* Extend a vector that records the INSN_UIDs of INSNS (either a + sequence or a single insn). */ -static int * -record_insns (insns) +static void +record_insns (insns, vecp) rtx insns; + varray_type *vecp; { - int *vec; - if (GET_CODE (insns) == SEQUENCE) { int len = XVECLEN (insns, 0); - vec = (int *) oballoc ((len + 1) * sizeof (int)); - vec[len] = 0; + int i = VARRAY_SIZE (*vecp); + + VARRAY_GROW (*vecp, i + len); while (--len >= 0) - vec[len] = INSN_UID (XVECEXP (insns, 0, len)); + { + VARRAY_INT (*vecp, i) = INSN_UID (XVECEXP (insns, 0, len)); + ++i; + } } else { - vec = (int *) oballoc (2 * sizeof (int)); - vec[0] = INSN_UID (insns); - vec[1] = 0; + int i = VARRAY_SIZE (*vecp); + VARRAY_GROW (*vecp, i + 1); + VARRAY_INT (*vecp, i) = INSN_UID (insns); } - return vec; } /* Determine how many INSN_UIDs in VEC are part of INSN. */ @@ -6617,7 +6725,7 @@ record_insns (insns) static int contains (insn, vec) rtx insn; - int *vec; + varray_type vec; { register int i, j; @@ -6626,15 +6734,15 @@ contains (insn, vec) { int count = 0; for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--) - for (j = 0; vec[j]; j++) - if (INSN_UID (XVECEXP (PATTERN (insn), 0, i)) == vec[j]) + for (j = VARRAY_SIZE (vec) - 1; j >= 0; --j) + if (INSN_UID (XVECEXP (PATTERN (insn), 0, i)) == VARRAY_INT (vec, j)) count++; return count; } else { - for (j = 0; vec[j]; j++) - if (INSN_UID (insn) == vec[j]) + for (j = VARRAY_SIZE (vec) - 1; j >= 0; --j) + if (INSN_UID (insn) == VARRAY_INT (vec, j)) return 1; } return 0; @@ -6644,13 +6752,22 @@ int prologue_epilogue_contains (insn) rtx insn; { - if (prologue && contains (insn, prologue)) + if (contains (insn, prologue)) return 1; - if (epilogue && contains (insn, epilogue)) + if (contains (insn, epilogue)) return 1; return 0; } +int +sibcall_epilogue_contains (insn) + rtx insn; +{ + if (sibcall_epilogue) + return contains (insn, sibcall_epilogue); + return 0; +} + #ifdef HAVE_return /* Insert gen_return at the end of block BB. This also means updating block_for_insn appropriately. */ @@ -6698,7 +6815,7 @@ thread_prologue_and_epilogue_insns (f) /* Retain a map of the prologue insns. */ if (GET_CODE (seq) != SEQUENCE) seq = get_insns (); - prologue = record_insns (seq); + record_insns (seq, &prologue); emit_note (NULL, NOTE_INSN_PROLOGUE_END); /* GDB handles `break f' by setting a breakpoint on the first @@ -6875,7 +6992,7 @@ thread_prologue_and_epilogue_insns (f) /* Retain a map of the epilogue insns. */ if (GET_CODE (seq) != SEQUENCE) seq = get_insns (); - epilogue = record_insns (seq); + record_insns (seq, &epilogue); seq = gen_sequence (); end_sequence(); @@ -6888,6 +7005,35 @@ epilogue_done: if (insertted) commit_edge_insertions (); + +#ifdef HAVE_sibcall_epilogue + /* Emit sibling epilogues before any sibling call sites. */ + for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) + { + basic_block bb = e->src; + rtx insn = bb->end; + rtx i; + + if (GET_CODE (insn) != CALL_INSN + || ! SIBLING_CALL_P (insn)) + continue; + + start_sequence (); + seq = gen_sibcall_epilogue (); + end_sequence (); + + i = PREV_INSN (insn); + emit_insn_before (seq, insn); + + /* Update the UID to basic block map. */ + for (i = NEXT_INSN (i); i != insn; i = NEXT_INSN (i)) + set_block_for_insn (i, bb); + + /* Retain a map of the epilogue insns. Used in life analysis to + avoid getting rid of sibcall epilogue insns. */ + record_insns (seq, &sibcall_epilogue); + } +#endif } /* Reposition the prologue-end and epilogue-begin notes after instruction @@ -6898,90 +7044,82 @@ reposition_prologue_and_epilogue_notes (f) rtx f ATTRIBUTE_UNUSED; { #if defined (HAVE_prologue) || defined (HAVE_epilogue) - /* Reposition the prologue and epilogue notes. */ - if (n_basic_blocks) + int len; + + if ((len = VARRAY_SIZE (prologue)) > 0) { - int len; + register rtx insn, note = 0; - if (prologue) + /* Scan from the beginning until we reach the last prologue insn. + We apparently can't depend on basic_block_{head,end} after + reorg has run. */ + for (insn = f; len && insn; insn = NEXT_INSN (insn)) { - register rtx insn, note = 0; - - /* Scan from the beginning until we reach the last prologue insn. - We apparently can't depend on basic_block_{head,end} after - reorg has run. */ - for (len = 0; prologue[len]; len++) - ; - for (insn = f; len && insn; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == NOTE) { - if (GET_CODE (insn) == NOTE) + if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PROLOGUE_END) + note = insn; + } + else if ((len -= contains (insn, prologue)) == 0) + { + rtx next; + /* Find the prologue-end note if we haven't already, and + move it to just after the last prologue insn. */ + if (note == 0) { - if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PROLOGUE_END) - note = insn; + for (note = insn; (note = NEXT_INSN (note));) + if (GET_CODE (note) == NOTE + && NOTE_LINE_NUMBER (note) == NOTE_INSN_PROLOGUE_END) + break; } - else if ((len -= contains (insn, prologue)) == 0) - { - rtx next; - /* Find the prologue-end note if we haven't already, and - move it to just after the last prologue insn. */ - if (note == 0) - { - for (note = insn; (note = NEXT_INSN (note));) - if (GET_CODE (note) == NOTE - && NOTE_LINE_NUMBER (note) == NOTE_INSN_PROLOGUE_END) - break; - } - next = NEXT_INSN (note); + next = NEXT_INSN (note); - /* Whether or not we can depend on BLOCK_HEAD, - attempt to keep it up-to-date. */ - if (BLOCK_HEAD (0) == note) - BLOCK_HEAD (0) = next; + /* Whether or not we can depend on BLOCK_HEAD, + attempt to keep it up-to-date. */ + if (BLOCK_HEAD (0) == note) + BLOCK_HEAD (0) = next; - remove_insn (note); - add_insn_after (note, insn); - } + remove_insn (note); + add_insn_after (note, insn); } } + } + + if ((len = VARRAY_SIZE (epilogue)) > 0) + { + register rtx insn, note = 0; - if (epilogue) + /* Scan from the end until we reach the first epilogue insn. + We apparently can't depend on basic_block_{head,end} after + reorg has run. */ + for (insn = get_last_insn (); len && insn; insn = PREV_INSN (insn)) { - register rtx insn, note = 0; - - /* Scan from the end until we reach the first epilogue insn. - We apparently can't depend on basic_block_{head,end} after - reorg has run. */ - for (len = 0; epilogue[len]; len++) - ; - for (insn = get_last_insn (); len && insn; insn = PREV_INSN (insn)) + if (GET_CODE (insn) == NOTE) { - if (GET_CODE (insn) == NOTE) + if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EPILOGUE_BEG) + note = insn; + } + else if ((len -= contains (insn, epilogue)) == 0) + { + /* Find the epilogue-begin note if we haven't already, and + move it to just before the first epilogue insn. */ + if (note == 0) { - if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EPILOGUE_BEG) - note = insn; + for (note = insn; (note = PREV_INSN (note));) + if (GET_CODE (note) == NOTE + && NOTE_LINE_NUMBER (note) == NOTE_INSN_EPILOGUE_BEG) + break; } - else if ((len -= contains (insn, epilogue)) == 0) - { - /* Find the epilogue-begin note if we haven't already, and - move it to just before the first epilogue insn. */ - if (note == 0) - { - for (note = insn; (note = PREV_INSN (note));) - if (GET_CODE (note) == NOTE - && NOTE_LINE_NUMBER (note) == NOTE_INSN_EPILOGUE_BEG) - break; - } - /* Whether or not we can depend on BLOCK_HEAD, - attempt to keep it up-to-date. */ - if (n_basic_blocks - && BLOCK_HEAD (n_basic_blocks-1) == insn) - BLOCK_HEAD (n_basic_blocks-1) = note; + /* Whether or not we can depend on BLOCK_HEAD, + attempt to keep it up-to-date. */ + if (n_basic_blocks + && BLOCK_HEAD (n_basic_blocks-1) == insn) + BLOCK_HEAD (n_basic_blocks-1) = note; - remove_insn (note); - add_insn_before (note, insn); - } + remove_insn (note); + add_insn_before (note, insn); } } } @@ -7095,4 +7233,8 @@ init_function_once () { ggc_add_root (&all_functions, 1, sizeof all_functions, mark_function_chain); + + VARRAY_INT_INIT (prologue, 0, "prologue"); + VARRAY_INT_INIT (epilogue, 0, "epilogue"); + VARRAY_INT_INIT (sibcall_epilogue, 0, "sibcall_epilogue"); } diff --git a/gcc/genflags.c b/gcc/genflags.c index d1f19e08b815..39b5354e7dc2 100644 --- a/gcc/genflags.c +++ b/gcc/genflags.c @@ -174,11 +174,15 @@ gen_insn (insn) call_value_pop) ignoring the extra arguments that are passed for some machines, so by default, turn off the prototype. */ - obstack_ptr = (name[0] == 'c' + obstack_ptr = ((name[0] == 'c' || name[0] == 's') && (!strcmp (name, "call") || !strcmp (name, "call_value") || !strcmp (name, "call_pop") - || !strcmp (name, "call_value_pop"))) + || !strcmp (name, "call_value_pop") + || !strcmp (name, "sibcall") + || !strcmp (name, "sibcall_value") + || !strcmp (name, "sibcall_pop") + || !strcmp (name, "sibcall_value_pop"))) ? &call_obstack : &normal_obstack; obstack_grow (obstack_ptr, &insn, sizeof (rtx)); diff --git a/gcc/integrate.c b/gcc/integrate.c index cf2200ec9599..e0374e1d9ff2 100644 --- a/gcc/integrate.c +++ b/gcc/integrate.c @@ -66,19 +66,22 @@ extern struct obstack *function_maybepermanent_obstack; static rtvec initialize_for_inline PARAMS ((tree)); static void note_modified_parmregs PARAMS ((rtx, rtx, void *)); static void integrate_parm_decls PARAMS ((tree, struct inline_remap *, - rtvec)); + rtvec)); static tree integrate_decl_tree PARAMS ((tree, - struct inline_remap *)); + struct inline_remap *)); static void subst_constants PARAMS ((rtx *, rtx, - struct inline_remap *, int)); + struct inline_remap *, int)); static void set_block_origin_self PARAMS ((tree)); static void set_decl_origin_self PARAMS ((tree)); static void set_block_abstract_flags PARAMS ((tree, int)); static void process_reg_param PARAMS ((struct inline_remap *, rtx, - rtx)); + rtx)); void set_decl_abstract_flags PARAMS ((tree, int)); static rtx expand_inline_function_eh_labelmap PARAMS ((rtx)); static void mark_stores PARAMS ((rtx, rtx, void *)); +static void save_parm_insns PARAMS ((rtx, rtx)); +static void copy_insn_list PARAMS ((rtx, struct inline_remap *, + rtx)); static int compare_blocks PARAMS ((const PTR, const PTR)); static int find_block PARAMS ((const PTR, const PTR)); @@ -423,16 +426,7 @@ save_for_inline_nocopy (fndecl) perform constant folding when its incoming value is constant). Otherwise, we have to copy its value into a new register and track the new register's life. */ - in_nonparm_insns = 0; - for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn)) - { - if (insn == first_nonparm_insn) - in_nonparm_insns = 1; - - if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') - /* Record what interesting things happen to our parameters. */ - note_stores (PATTERN (insn), note_modified_parmregs, NULL); - } + save_parm_insns (insn, first_nonparm_insn); /* We have now allocated all that needs to be allocated permanently on the rtx obstack. Set our high-water mark, so that we @@ -449,6 +443,48 @@ save_for_inline_nocopy (fndecl) /* Clean up. */ free (parmdecl_map); } + +/* Scan the chain of insns to see what happens to our PARM_DECLs. If a + PARM_DECL is used but never modified, we can substitute its rtl directly + when expanding inline (and perform constant folding when its incoming + value is constant). Otherwise, we have to copy its value into a new + register and track the new register's life. */ + +static void +save_parm_insns (insn, first_nonparm_insn) + rtx insn; + rtx first_nonparm_insn; +{ + in_nonparm_insns = 0; + + if (insn == NULL_RTX) + return; + + for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn)) + { + if (insn == first_nonparm_insn) + in_nonparm_insns = 1; + + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + /* Record what interesting things happen to our parameters. */ + note_stores (PATTERN (insn), note_modified_parmregs, NULL); + + /* If this is a CALL_PLACEHOLDER insn then we need to look into the + three attached sequences: normal call, sibling call and tail + recursion. */ + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + int i; + + for (i = 0; i < 3; i++) + save_parm_insns (XEXP (PATTERN (insn), i), + first_nonparm_insn); + } + } + } +} /* Note whether a parameter is modified or not. */ @@ -577,13 +613,11 @@ expand_inline_function (fndecl, parms, target, ignore, type, : parm_insns); tree *arg_trees; rtx *arg_vals; - rtx insn; int max_regno; register int i; int min_labelno = inl_f->emit->x_first_label_num; int max_labelno = inl_f->inl_max_label_num; int nargs; - rtx local_return_label = 0; rtx loc; rtx stack_save = 0; rtx temp; @@ -1089,7 +1123,100 @@ expand_inline_function (fndecl, parms, target, ignore, type, if (inl_f->calls_alloca) emit_stack_save (SAVE_BLOCK, &stack_save, NULL_RTX); - /* Now copy the insns one by one. Do this in two passes, first the insns and + /* Now copy the insns one by one. */ + copy_insn_list (insns, map, static_chain_value); + + /* Restore the stack pointer if we saved it above. */ + if (inl_f->calls_alloca) + emit_stack_restore (SAVE_BLOCK, stack_save, NULL_RTX); + + if (! cfun->x_whole_function_mode_p) + /* In statement-at-a-time mode, we just tell the front-end to add + this block to the list of blocks at this binding level. We + can't do it the way it's done for function-at-a-time mode the + superblocks have not been created yet. */ + insert_block (block); + else + { + BLOCK_CHAIN (block) + = BLOCK_CHAIN (DECL_INITIAL (current_function_decl)); + BLOCK_CHAIN (DECL_INITIAL (current_function_decl)) = block; + } + + /* End the scope containing the copied formal parameter variables + and copied LABEL_DECLs. We pass NULL_TREE for the variables list + here so that expand_end_bindings will not check for unused + variables. That's already been checked for when the inlined + function was defined. */ + expand_end_bindings (NULL_TREE, 1, 1); + + /* Must mark the line number note after inlined functions as a repeat, so + that the test coverage code can avoid counting the call twice. This + just tells the code to ignore the immediately following line note, since + there already exists a copy of this note before the expanded inline call. + This line number note is still needed for debugging though, so we can't + delete it. */ + if (flag_test_coverage) + emit_note (0, NOTE_REPEATED_LINE_NUMBER); + + emit_line_note (input_filename, lineno); + + /* If the function returns a BLKmode object in a register, copy it + out of the temp register into a BLKmode memory object. */ + if (target + && TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))) == BLKmode + && ! aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl)))) + target = copy_blkmode_from_reg (0, target, TREE_TYPE (TREE_TYPE (fndecl))); + + if (structure_value_addr) + { + target = gen_rtx_MEM (TYPE_MODE (type), + memory_address (TYPE_MODE (type), + structure_value_addr)); + MEM_SET_IN_STRUCT_P (target, 1); + } + + /* Make sure we free the things we explicitly allocated with xmalloc. */ + if (real_label_map) + free (real_label_map); + VARRAY_FREE (map->const_equiv_varray); + free (map->reg_map); + VARRAY_FREE (map->block_map); + free (map->insn_map); + free (map); + free (arg_vals); + free (arg_trees); + + inlining = inlining_previous; + + return target; +} + +/* Make copies of each insn in the given list using the mapping + computed in expand_inline_function. This function may call itself for + insns containing sequences. + + Copying is done in two passes, first the insns and then their REG_NOTES, + just like save_for_inline. + + If static_chain_value is non-zero, it represents the context-pointer + register for the function. */ + +static void +copy_insn_list (insns, map, static_chain_value) + rtx insns; + struct inline_remap *map; + rtx static_chain_value; +{ + register int i; + rtx insn; + rtx temp; + rtx local_return_label = NULL_RTX; +#ifdef HAVE_cc0 + rtx cc0_insn = 0; +#endif + + /* Copy the insns one by one. Do this in two passes, first the insns and then their REG_NOTES, just like save_for_inline. */ /* This loop is very similar to the loop in copy_loop_body in unroll.c. */ @@ -1283,11 +1410,50 @@ expand_inline_function (fndecl, parms, target, ignore, type, break; case CALL_INSN: + /* If this is a CALL_PLACEHOLDER insn then we need to copy the + three attached sequences: normal call, sibling call and tail + recursion. */ + if (GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + rtx sequence[3]; + rtx tail_label; + + for (i = 0; i < 3; i++) + { + rtx seq; + + sequence[i] = NULL_RTX; + seq = XEXP (PATTERN (insn), i); + if (seq) + { + start_sequence (); + copy_insn_list (seq, map, static_chain_value); + sequence[i] = get_insns (); + end_sequence (); + } + } + + /* Find the new tail recursion label. + It will already be substituted into sequence[2]. */ + tail_label = copy_rtx_and_substitute (XEXP (PATTERN (insn), 3), + map, 0); + + copy = emit_call_insn (gen_rtx_CALL_PLACEHOLDER (VOIDmode, + sequence[0], + sequence[1], + sequence[2], + tail_label)); + break; + } + pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); copy = emit_call_insn (pattern); + SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn); + /* Because the USAGE information potentially contains objects other than hard registers, we need to copy it. */ + CALL_INSN_FUNCTION_USAGE (copy) = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map, 0); @@ -1299,7 +1465,7 @@ expand_inline_function (fndecl, parms, target, ignore, type, #endif try_constants (copy, map); - /* Be lazy and assume CALL_INSNs clobber all hard registers. */ + /* Be lazy and assume CALL_INSNs clobber all hard registers. */ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0; break; @@ -1316,14 +1482,23 @@ expand_inline_function (fndecl, parms, target, ignore, type, break; case NOTE: - /* It is important to discard function-end and function-beg notes, - so we have only one of each in the current function. - Also, NOTE_INSN_DELETED notes aren't useful (save_for_inline + /* NOTE_INSN_FUNCTION_END and NOTE_INSN_FUNCTION_BEG are + discarded because it is important to have only one of + each in the current function. + + NOTE_INSN_DELETED notes aren't useful (save_for_inline deleted these in the copy used for continuing compilation, - not the copy used for inlining). */ + not the copy used for inlining). + + NOTE_INSN_BASIC_BLOCK is discarded because the saved bb + pointer (which will soon be dangling) confuses flow's + attempts to preserve bb structures during the compilation + of a function. */ + if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_FUNCTION_END && NOTE_LINE_NUMBER (insn) != NOTE_INSN_FUNCTION_BEG - && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED) + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK) { copy = emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn)); @@ -1403,71 +1578,6 @@ expand_inline_function (fndecl, parms, target, ignore, type, if (local_return_label) emit_label (local_return_label); - - /* Restore the stack pointer if we saved it above. */ - if (inl_f->calls_alloca) - emit_stack_restore (SAVE_BLOCK, stack_save, NULL_RTX); - - if (! cfun->x_whole_function_mode_p) - /* In statement-at-a-time mode, we just tell the front-end to add - this block to the list of blocks at this binding level. We - can't do it the way it's done for function-at-a-time mode the - superblocks have not been created yet. */ - insert_block (block); - else - { - BLOCK_CHAIN (block) - = BLOCK_CHAIN (DECL_INITIAL (current_function_decl)); - BLOCK_CHAIN (DECL_INITIAL (current_function_decl)) = block; - } - - /* End the scope containing the copied formal parameter variables - and copied LABEL_DECLs. We pass NULL_TREE for the variables list - here so that expand_end_bindings will not check for unused - variables. That's already been checked for when the inlined - function was defined. */ - expand_end_bindings (NULL_TREE, 1, 1); - - /* Must mark the line number note after inlined functions as a repeat, so - that the test coverage code can avoid counting the call twice. This - just tells the code to ignore the immediately following line note, since - there already exists a copy of this note before the expanded inline call. - This line number note is still needed for debugging though, so we can't - delete it. */ - if (flag_test_coverage) - emit_note (0, NOTE_REPEATED_LINE_NUMBER); - - emit_line_note (input_filename, lineno); - - /* If the function returns a BLKmode object in a register, copy it - out of the temp register into a BLKmode memory object. */ - if (target - && TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))) == BLKmode - && ! aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl)))) - target = copy_blkmode_from_reg (0, target, TREE_TYPE (TREE_TYPE (fndecl))); - - if (structure_value_addr) - { - target = gen_rtx_MEM (TYPE_MODE (type), - memory_address (TYPE_MODE (type), - structure_value_addr)); - MEM_SET_IN_STRUCT_P (target, 1); - } - - /* Make sure we free the things we explicitly allocated with xmalloc. */ - if (real_label_map) - free (real_label_map); - VARRAY_FREE (map->const_equiv_varray); - free (map->reg_map); - VARRAY_FREE (map->block_map); - free (map->insn_map); - free (map); - free (arg_vals); - free (arg_trees); - - inlining = inlining_previous; - - return target; } /* Given a chain of PARM_DECLs, ARGS, copy each decl into a VAR_DECL, @@ -1790,6 +1900,13 @@ copy_rtx_and_substitute (orig, map, for_lhs) = LABEL_PRESERVE_P (orig); return get_label_from_map (map, CODE_LABEL_NUMBER (orig)); + /* We need to handle "deleted" labels that appear in the DECL_RTL + of a LABEL_DECL. */ + case NOTE: + if (NOTE_LINE_NUMBER (orig) == NOTE_INSN_DELETED_LABEL) + return map->insn_map[INSN_UID (orig)]; + break; + case LABEL_REF: copy = gen_rtx_LABEL_REF @@ -2348,6 +2465,7 @@ subst_constants (loc, insn, map, memonly) case 'i': case 's': case 'w': + case 'n': case 't': break; diff --git a/gcc/jump.c b/gcc/jump.c index fc6f7df17def..b5352f43542a 100644 --- a/gcc/jump.c +++ b/gcc/jump.c @@ -125,13 +125,13 @@ static void delete_from_jump_chain PARAMS ((rtx)); static int delete_labelref_insn PARAMS ((rtx, rtx, int)); static void mark_modified_reg PARAMS ((rtx, rtx, void *)); static void redirect_tablejump PARAMS ((rtx, rtx)); -static void jump_optimize_1 PARAMS ((rtx, int, int, int, int)); +static void jump_optimize_1 PARAMS ((rtx, int, int, int, int, int)); #if ! defined(HAVE_cc0) && ! defined(HAVE_conditional_arithmetic) static rtx find_insert_position PARAMS ((rtx, rtx)); #endif static int returnjump_p_1 PARAMS ((rtx *, void *)); static void delete_prior_computation PARAMS ((rtx, rtx)); - + /* Main external entry point into the jump optimizer. See comments before jump_optimize_1 for descriptions of the arguments. */ void @@ -141,7 +141,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan) int noop_moves; int after_regscan; { - jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, 0); + jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, 0, 0); } /* Alternate entry into the jump optimizer. This entry point only rebuilds @@ -151,9 +151,16 @@ void rebuild_jump_labels (f) rtx f; { - jump_optimize_1 (f, 0, 0, 0, 1); + jump_optimize_1 (f, 0, 0, 0, 1, 0); } +/* Alternate entry into the jump optimizer. Do only trivial optimizations. */ +void +jump_optimize_minimal (f) + rtx f; +{ + jump_optimize_1 (f, 0, 0, 0, 0, 1); +} /* Delete no-op jumps and optimize jumps to jumps and jumps around jumps. @@ -175,15 +182,29 @@ rebuild_jump_labels (f) just determine whether control drops off the end of the function. This case occurs when we have -W and not -O. It works because `delete_insn' checks the value of `optimize' - and refrains from actually deleting when that is 0. */ + and refrains from actually deleting when that is 0. + + If MINIMAL is nonzero, then we only perform trivial optimizations: + + * Removal of unreachable code after BARRIERs. + * Removal of unreferenced CODE_LABELs. + * Removal of a jump to the next instruction. + * Removal of a conditional jump followed by an unconditional jump + to the same target as the conditional jump. + * Simplify a conditional jump around an unconditional jump. + * Simplify a jump to a jump. + * Delete extraneous line number notes. + */ static void -jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only) +jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, + mark_labels_only, minimal) rtx f; int cross_jump; int noop_moves; int after_regscan; int mark_labels_only; + int minimal; { register rtx insn, next; int changed; @@ -230,7 +251,8 @@ jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only) if (mark_labels_only) goto end; - exception_optimize (); + if (! minimal) + exception_optimize (); last_insn = delete_unreferenced_labels (f); @@ -320,7 +342,7 @@ jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only) if (nlabel != JUMP_LABEL (insn)) changed |= redirect_jump (insn, nlabel); - if (! optimize) + if (! optimize || ! minimal) continue; /* If a dispatch table always goes to the same place, @@ -2135,7 +2157,7 @@ jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only) not be cleared. This is especially true for the case where we delete the NOTE_FUNCTION_END note. CAN_REACH_END is cleared by the front-end before compiling each function. */ - if (calculate_can_reach_end (last_insn, optimize != 0)) + if (! minimal && calculate_can_reach_end (last_insn, optimize != 0)) can_reach_end = 1; end: diff --git a/gcc/rtl.c b/gcc/rtl.c index 7f7906e4f7d4..a8b1a9de63ac 100644 --- a/gcc/rtl.c +++ b/gcc/rtl.c @@ -406,14 +406,12 @@ copy_rtx (orig) walks over the RTL. */ copy->used = 0; - /* We do not copy JUMP, CALL, or FRAME_RELATED for INSNs. */ + /* We do not copy FRAME_RELATED for INSNs. */ if (GET_RTX_CLASS (code) == 'i') - { - copy->jump = 0; - copy->call = 0; - copy->frame_related = 0; - } - + copy->frame_related = 0; + copy->jump = orig->jump; + copy->call = orig->call; + format_ptr = GET_RTX_FORMAT (GET_CODE (copy)); for (i = 0; i < GET_RTX_LENGTH (GET_CODE (copy)); i++) diff --git a/gcc/rtl.def b/gcc/rtl.def index ba68ca0eba2c..570abdc1a971 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -1,7 +1,7 @@ /* This file contains the definitions and documentation for the Register Transfer Expressions (rtx's) that make up the Register Transfer Language (rtl) used in the Back End of the GNU compiler. - Copyright (C) 1987, 88, 92, 94, 95, 97, 98, 1999 + Copyright (C) 1987, 88, 92, 94, 95, 97, 98, 1999, 2000 Free Software Foundation, Inc. This file is part of GNU CC. @@ -880,7 +880,10 @@ DEF_RTL_EXPR(CONSTANT_P_RTX, "constant_p_rtx", "e", 'x') potential tail recursive calls were found. The tail recursion label is needed so that we can clear LABEL_PRESERVE_P - after we select a call method. */ + after we select a call method. + + This method of tail-call elimination is intended to be replaced by + tree-based optimizations once front-end conversions are complete. */ DEF_RTL_EXPR(CALL_PLACEHOLDER, "call_placeholder", "uuuu", 'x') /* The SSA phi operator. diff --git a/gcc/rtl.h b/gcc/rtl.h index 158ea7ead0fb..46750ac060dd 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -119,9 +119,12 @@ typedef struct rtx_def #else enum machine_mode mode : 8; #endif - /* LINK_COST_ZERO in an INSN_LIST. */ + /* 1 in an INSN if it can alter flow of control + within this function. + LINK_COST_ZERO in an INSN_LIST. */ unsigned int jump : 1; - /* LINK_COST_FREE in an INSN_LIST. */ + /* 1 in an INSN if it can call another function. + LINK_COST_FREE in an INSN_LIST. */ unsigned int call : 1; /* 1 in a MEM or REG if value of this expression will never change during the current function, even though it is not @@ -380,6 +383,9 @@ extern void rtvec_check_failed_bounds PARAMS ((rtvec, int, /* 1 if insn is a call to a const function. */ #define CONST_CALL_P(INSN) ((INSN)->unchanging) +/* 1 if insn (assumed to be a CALL_INSN) is a sibling call. */ +#define SIBLING_CALL_P(INSN) ((INSN)->jump) + /* 1 if insn is a branch that should not unconditionally execute its delay slots, i.e., it is an annulled branch. */ #define INSN_ANNULLED_BRANCH_P(INSN) ((INSN)->unchanging) @@ -1416,6 +1422,7 @@ extern int rtx_renumbered_equal_p PARAMS ((rtx, rtx)); extern int true_regnum PARAMS ((rtx)); extern int redirect_jump PARAMS ((rtx, rtx)); extern void jump_optimize PARAMS ((rtx, int, int, int)); +extern void jump_optimize_minimal PARAMS ((rtx)); extern void rebuild_jump_labels PARAMS ((rtx)); extern void thread_jumps PARAMS ((rtx, int, int)); extern int redirect_exp PARAMS ((rtx *, rtx, rtx, rtx)); @@ -1513,6 +1520,7 @@ extern void record_excess_regs PARAMS ((rtx, rtx, rtx *)); extern void reposition_prologue_and_epilogue_notes PARAMS ((rtx)); extern void thread_prologue_and_epilogue_insns PARAMS ((rtx)); extern int prologue_epilogue_contains PARAMS ((rtx)); +extern int sibcall_epilogue_contains PARAMS ((rtx)); extern HOST_WIDE_INT get_frame_size PARAMS ((void)); extern void preserve_rtl_expr_result PARAMS ((rtx)); extern void mark_temp_addr_taken PARAMS ((rtx)); @@ -1713,6 +1721,16 @@ extern void record_base_value PARAMS ((int, rtx, int)); extern void record_alias_subset PARAMS ((int, int)); extern rtx addr_side_effect_eval PARAMS ((rtx, int, int)); +/* In sibcall.c */ +typedef enum { + sibcall_use_normal = 1, + sibcall_use_tail_recursion, + sibcall_use_sibcall +} sibcall_use_t; + +extern void optimize_sibling_and_tail_recursive_calls PARAMS ((void)); +extern void replace_call_placeholder PARAMS ((rtx, sibcall_use_t)); + #ifdef STACK_REGS extern int stack_regs_mentioned PARAMS ((rtx insn)); #endif diff --git a/gcc/sibcall.c b/gcc/sibcall.c new file mode 100644 index 000000000000..b399137a0bf0 --- /dev/null +++ b/gcc/sibcall.c @@ -0,0 +1,578 @@ +/* Generic sibling call optimization support + Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include "config.h" +#include "system.h" + +#include "rtl.h" +#include "regs.h" +#include "function.h" +#include "hard-reg-set.h" +#include "flags.h" +#include "insn-config.h" +#include "recog.h" +#include "basic-block.h" +#include "output.h" +#include "except.h" + +static int identify_call_return_value PARAMS ((rtx, rtx *, rtx *)); +static rtx skip_copy_to_return_value PARAMS ((rtx, rtx, rtx)); +static rtx skip_use_of_return_value PARAMS ((rtx, enum rtx_code)); +static rtx skip_stack_adjustment PARAMS ((rtx)); +static rtx skip_jump_insn PARAMS ((rtx)); +static int uses_addressof PARAMS ((rtx)); +static int sequence_uses_addressof PARAMS ((rtx)); +static void purge_reg_equiv_notes PARAMS ((void)); + +/* Examine a CALL_PLACEHOLDER pattern and determine where the call's + return value is located. P_HARD_RETURN receives the hard register + that the function used; P_SOFT_RETURN receives the pseudo register + that the sequence used. Return non-zero if the values were located. */ + +static int +identify_call_return_value (cp, p_hard_return, p_soft_return) + rtx cp; + rtx *p_hard_return, *p_soft_return; +{ + rtx insn, set, hard, soft; + + /* Search forward through the "normal" call sequence to the CALL insn. */ + insn = XEXP (cp, 0); + while (GET_CODE (insn) != CALL_INSN) + insn = NEXT_INSN (insn); + + /* Assume the pattern is (set (dest) (call ...)), or that the first + member of a parallel is. This is the hard return register used + by the function. */ + if (GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_SRC (PATTERN (insn))) == CALL) + hard = SET_DEST (PATTERN (insn)); + else if (GET_CODE (PATTERN (insn)) == PARALLEL + && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == SET + && GET_CODE (SET_SRC (XVECEXP (PATTERN (insn), 0, 0))) == CALL) + hard = SET_DEST (XVECEXP (PATTERN (insn), 0, 0)); + else + return 0; + + /* If we didn't get a single hard register (e.g. a parallel), give up. */ + if (GET_CODE (hard) != REG) + return 0; + + /* If there's nothing after, there's no soft return value. */ + insn = NEXT_INSN (insn); + if (! insn) + return 0; + + /* We're looking for a source of the hard return register. */ + set = single_set (insn); + if (! set || SET_SRC (set) != hard) + return 0; + + soft = SET_DEST (set); + insn = NEXT_INSN (insn); + + /* Allow this first destination to be copied to a second register, + as might happen if the first register wasn't the particular pseudo + we'd been expecting. */ + if (insn + && (set = single_set (insn)) != NULL_RTX + && SET_SRC (set) == soft) + { + soft = SET_DEST (set); + insn = NEXT_INSN (insn); + } + + /* Don't fool with anything but pseudo registers. */ + if (GET_CODE (soft) != REG || REGNO (soft) < FIRST_PSEUDO_REGISTER) + return 0; + + /* This value must not be modified before the end of the sequence. */ + if (reg_set_between_p (soft, insn, NULL_RTX)) + return 0; + + *p_hard_return = hard; + *p_soft_return = soft; + + return 1; +} + +/* If the first real insn after ORIG_INSN copies to this function's + return value from RETVAL, then return the insn which performs the + copy. Otherwise return ORIG_INSN. */ + +static rtx +skip_copy_to_return_value (orig_insn, hardret, softret) + rtx orig_insn; + rtx hardret, softret; +{ + rtx insn, set = NULL_RTX; + + insn = next_nonnote_insn (orig_insn); + if (! insn) + return orig_insn; + + set = single_set (insn); + if (! set) + return orig_insn; + + /* The destination must be the same as the called function's return + value to ensure that any return value is put in the same place by the + current function and the function we're calling. + + Further, the source must be the same as the pseudo into which the + called function's return value was copied. Otherwise we're returning + some other value. */ + + if (SET_DEST (set) == current_function_return_rtx + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (hardret) + && SET_SRC (set) == softret) + return insn; + + /* It did not look like a copy of the return value, so return the + same insn we were passed. */ + return orig_insn; +} + +/* If the first real insn after ORIG_INSN is a CODE of this function's return + value, return insn. Otherwise return ORIG_INSN. */ + +static rtx +skip_use_of_return_value (orig_insn, code) + rtx orig_insn; + enum rtx_code code; +{ + rtx insn; + + insn = next_nonnote_insn (orig_insn); + + if (insn + && GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == code + && (XEXP (PATTERN (insn), 0) == current_function_return_rtx + || XEXP (PATTERN (insn), 0) == const0_rtx)) + return insn; + + return orig_insn; +} + +/* If the first real insn after ORIG_INSN adjusts the stack pointer + by a constant, return the insn with the stack pointer adjustment. + Otherwise return ORIG_INSN. */ + +static rtx +skip_stack_adjustment (orig_insn) + rtx orig_insn; +{ + rtx insn, set = NULL_RTX; + + insn = next_nonnote_insn (orig_insn); + + if (insn) + set = single_set (insn); + + /* The source must be the same as the current function's return value to + ensure that any return value is put in the same place by the current + function and the function we're calling. The destination register + must be a pseudo. */ + if (insn + && set + && GET_CODE (SET_SRC (set)) == PLUS + && XEXP (SET_SRC (set), 0) == stack_pointer_rtx + && GET_CODE (XEXP (SET_SRC (set), 1)) == CONST_INT + && SET_DEST (set) == stack_pointer_rtx) + return insn; + + /* It did not look like a copy of the return value, so return the + same insn we were passed. */ + return orig_insn; +} + +/* If the first real insn after ORIG_INSN is a jump, return the JUMP_INSN. + Otherwise return ORIG_INSN. */ + +static rtx +skip_jump_insn (orig_insn) + rtx orig_insn; +{ + rtx insn; + + insn = next_nonnote_insn (orig_insn); + + if (insn + && GET_CODE (insn) == JUMP_INSN + && simplejump_p (insn)) + return insn; + + return orig_insn; +} + +/* Scan the rtx X for an ADDRESSOF expressions. Return nonzero if an ADDRESSOF + expresion is found, else return zero. */ + +static int +uses_addressof (x) + rtx x; +{ + RTX_CODE code; + int i, j; + const char *fmt; + + if (x == NULL_RTX) + return 0; + + code = GET_CODE (x); + + if (code == ADDRESSOF) + return 1; + + /* Scan all subexpressions. */ + fmt = GET_RTX_FORMAT (code); + for (i = 0; i < GET_RTX_LENGTH (code); i++, fmt++) + { + if (*fmt == 'e') + { + if (uses_addressof (XEXP (x, i))) + return 1; + } + else if (*fmt == 'E') + { + for (j = 0; j < XVECLEN (x, i); j++) + if (uses_addressof (XVECEXP (x, i, j))) + return 1; + } + } + return 0; +} + +/* Scan the sequence of insns in SEQ to see if any have an ADDRESSOF + rtl expression. If an ADDRESSOF expression is found, return nonzero, + else return zero. + + This function handles CALL_PLACEHOLDERs which contain multiple sequences + of insns. */ + +static int +sequence_uses_addressof (seq) + rtx seq; +{ + rtx insn; + + for (insn = seq; insn; insn = NEXT_INSN (insn)) + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + /* If this is a CALL_PLACEHOLDER, then recursively call ourselves + with each nonempty sequence attached to the CALL_PLACEHOLDER. */ + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + if (XEXP (PATTERN (insn), 0) != NULL_RTX + && sequence_uses_addressof (XEXP (PATTERN (insn), 0))) + return 1; + if (XEXP (PATTERN (insn), 1) != NULL_RTX + && sequence_uses_addressof (XEXP (PATTERN (insn), 1))) + return 1; + if (XEXP (PATTERN (insn), 2) != NULL_RTX + && sequence_uses_addressof (XEXP (PATTERN (insn), 2))) + return 1; + } + else if (uses_addressof (PATTERN (insn)) + || (REG_NOTES (insn) && uses_addressof (REG_NOTES (insn)))) + return 1; + } + return 0; +} + +/* Remove all REG_EQUIV notes found in the insn chain. */ + +static void +purge_reg_equiv_notes () +{ + rtx insn; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + while (1) + { + rtx note = find_reg_note (insn, REG_EQUIV, 0); + if (note) + { + /* Remove the note and keep looking at the notes for + this insn. */ + remove_note (insn, note); + continue; + } + break; + } + } +} + +/* Replace the CALL_PLACEHOLDER with one of its children. INSN should be + the CALL_PLACEHOLDER insn; USE tells which child to use. */ + +void +replace_call_placeholder (insn, use) + rtx insn; + sibcall_use_t use; +{ + if (use == sibcall_use_tail_recursion) + emit_insns_before (XEXP (PATTERN (insn), 2), insn); + else if (use == sibcall_use_sibcall) + emit_insns_before (XEXP (PATTERN (insn), 1), insn); + else if (use == sibcall_use_normal) + emit_insns_before (XEXP (PATTERN (insn), 0), insn); + else + abort(); + + /* Turn off LABEL_PRESERVE_P for the tail recursion label if it + exists. We only had to set it long enough to keep the jump + pass above from deleting it as unused. */ + if (XEXP (PATTERN (insn), 3)) + LABEL_PRESERVE_P (XEXP (PATTERN (insn), 3)) = 0; + + /* "Delete" the placeholder insn. */ + PUT_CODE (insn, NOTE); + NOTE_SOURCE_FILE (insn) = 0; + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; +} + + +/* Given a (possibly empty) set of potential sibling or tail recursion call + sites, determine if optimization is possible. + + Potential sibling or tail recursion calls are marked with CALL_PLACEHOLDER + insns. The CALL_PLACEHOLDER insn holds chains of insns to implement a + normal call, sibling call or tail recursive call. + + Replace the CALL_PLACEHOLDER with an appropriate insn chain. */ + +void +optimize_sibling_and_tail_recursive_calls () +{ + rtx insn, insns; + basic_block alternate_exit = EXIT_BLOCK_PTR; + int current_function_uses_addressof; + int successful_sibling_call = 0; + int replaced_call_placeholder = 0; + edge e; + + insns = get_insns (); + + /* We do not perform these calls when flag_exceptions is true, so this + is probably a NOP at the current time. However, we may want to support + sibling and tail recursion optimizations in the future, so let's plan + ahead and find all the EH labels. */ + find_exception_handler_labels (); + + /* Run a jump optimization pass to clean up the CFG. We primarily want + this to thread jumps so that it is obvious which blocks jump to the + epilouge. */ + jump_optimize_minimal (insns); + + /* We need cfg information to determine which blocks are succeeded + only by the epilogue. */ + find_basic_blocks (insns, max_reg_num (), 0); + cleanup_cfg (insns); + + /* If there are no basic blocks, then there is nothing to do. */ + if (n_basic_blocks == 0) + return; + + /* Find the exit block. + + It is possible that we have blocks which can reach the exit block + directly. However, most of the time a block will jump (or fall into) + N_BASIC_BLOCKS - 1, which in turn falls into the exit block. */ + for (e = EXIT_BLOCK_PTR->pred; + e && alternate_exit == EXIT_BLOCK_PTR; + e = e->pred_next) + { + rtx insn; + + if (e->dest != EXIT_BLOCK_PTR || e->succ_next != NULL) + continue; + + /* Walk forwards through the last normal block and see if it + does nothing except fall into the exit block. */ + for (insn = BLOCK_HEAD (n_basic_blocks - 1); + insn; + insn = NEXT_INSN (insn)) + { + /* This should only happen once, at the start of this block. */ + if (GET_CODE (insn) == CODE_LABEL) + continue; + + if (GET_CODE (insn) == NOTE) + continue; + + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == USE) + continue; + + break; + } + + /* If INSN is zero, then the search walked all the way through the + block without hitting anything interesting. This block is a + valid alternate exit block. */ + if (insn == NULL) + alternate_exit = e->src; + } + + /* If the function uses ADDRESSOF, we can't (easily) determine + at this point if the value will end up on the stack. */ + current_function_uses_addressof = sequence_uses_addressof (insns); + + /* Walk the insn chain and find any CALL_PLACEHOLDER insns. We need to + select one of the insn sequences attached to each CALL_PLACEHOLDER. + + The different sequences represent different ways to implement the call, + ie, tail recursion, sibling call or normal call. + + Since we do not create nested CALL_PLACEHOLDERs, the scan + continues with the insn that was after a replaced CALL_PLACEHOLDER; + we don't rescan the replacement insns. */ + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == CALL_INSN + && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER) + { + int sibcall = (XEXP (PATTERN (insn), 1) != NULL_RTX); + int tailrecursion = (XEXP (PATTERN (insn), 2) != NULL_RTX); + basic_block succ_block, call_block; + rtx temp, hardret, softret; + + /* We must be careful with stack slots which are live at + potential optimization sites. + + ?!? This test is overly conservative and will be replaced. */ + if (frame_offset) + goto failure; + + /* alloca (until we have stack slot life analysis) inhibits + sibling call optimizations, but not tail recursion. + + Similarly if we have ADDRESSOF expressions. + + Similarly if we use varargs or stdarg since they implicitly + may take the address of an argument. */ + if (current_function_calls_alloca || current_function_uses_addressof + || current_function_varargs || current_function_stdarg) + sibcall = 0; + + call_block = BLOCK_FOR_INSN (insn); + + /* If the block has more than one successor, then we can not + perform sibcall or tail recursion optimizations. */ + if (call_block->succ == NULL + || call_block->succ->succ_next != NULL) + goto failure; + + /* If the single successor is not the exit block, then we can not + perform sibcall or tail recursion optimizations. + + Note that this test combined with the previous is sufficient + to prevent tail call optimization in the presense of active + exception handlers. */ + succ_block = call_block->succ->dest; + if (succ_block != EXIT_BLOCK_PTR && succ_block != alternate_exit) + goto failure; + + /* If the call was the end of the block, then we're OK. */ + temp = insn; + if (temp == call_block->end) + goto success; + + /* Skip over copying from the call's return value pseudo into + this function's hard return register. */ + if (identify_call_return_value (PATTERN (insn), &hardret, &softret)) + { + temp = skip_copy_to_return_value (temp, hardret, softret); + if (temp == call_block->end) + goto success; + } + + /* Skip any stack adjustment. */ + temp = skip_stack_adjustment (temp); + if (temp == call_block->end) + goto success; + + /* Skip over a CLOBBER of the return value (as a hard reg). */ + temp = skip_use_of_return_value (temp, CLOBBER); + if (temp == call_block->end) + goto success; + + /* Skip over a USE of the return value (as a hard reg). */ + temp = skip_use_of_return_value (temp, USE); + if (temp == call_block->end) + goto success; + + /* Skip over the JUMP_INSN at the end of the block. */ + temp = skip_jump_insn (temp); + if (GET_CODE (temp) == NOTE) + temp = next_nonnote_insn (temp); + if (temp == call_block->end) + goto success; + + /* There are operations at the end of the block which we must + execute after returning from the function call. So this call + can not be optimized. */ +failure: + sibcall = 0, tailrecursion = 0; +success: + + /* Select a set of insns to implement the call and emit them. + Tail recursion is the most efficient, so select it over + a tail/sibling call. */ + + if (sibcall) + successful_sibling_call = 1; + replaced_call_placeholder = 1; + replace_call_placeholder (insn, + tailrecursion != 0 + ? sibcall_use_tail_recursion + : sibcall != 0 + ? sibcall_use_sibcall + : sibcall_use_normal); + } + } + + /* A sibling call sequence invalidates any REG_EQUIV notes made for + this function's incoming arguments. + + At the start of RTL generation we know the only REG_EQUIV notes + in the rtl chain are those for incoming arguments, so we can safely + flush any REG_EQUIV note. + + This is (slight) overkill. We could keep track of the highest argument + we clobber and be more selective in removing notes, but it does not + seem to be worth the effort. */ + if (successful_sibling_call) + purge_reg_equiv_notes (); + + /* There may have been NOTE_INSN_BLOCK_{BEGIN,END} notes in the + CALL_PLACEHOLDER alternatives that we didn't emit. Rebuild the + lexical block tree to correspond to the notes that still exist. */ + if (replaced_call_placeholder) + unroll_block_trees (); + + /* This information will be invalid after inline expansion. Kill it now. */ + free_basic_block_vars (0); +} diff --git a/gcc/toplev.c b/gcc/toplev.c index fa7494d0e21f..1e1a650139c5 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -2986,6 +2986,15 @@ rest_of_compilation (decl) goto exit_rest_of_compilation; } + /* We may have potential sibling or tail recursion sites. Select one + (of possibly multiple) methods of performing the call. */ + init_EXPR_INSN_LIST_cache (); + if (optimize) + optimize_sibling_and_tail_recursive_calls (); + + if (ggc_p) + ggc_collect (); + /* Initialize some variables used by the optimizers. */ init_function_for_compilation (); @@ -3030,8 +3039,6 @@ rest_of_compilation (decl) unshare_all_rtl (current_function_decl, insns); - init_EXPR_INSN_LIST_cache (); - #ifdef SETJMP_VIA_SAVE_AREA /* This must be performed before virutal register instantiation. */ if (current_function_calls_alloca) diff --git a/gcc/tree.c b/gcc/tree.c index 72d85ee0ba96..c6045fffffea 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -285,6 +285,9 @@ static void mark_type_hash PARAMS ((void *)); void (*lang_unsave) PARAMS ((tree *)); void (*lang_unsave_expr_now) PARAMS ((tree)); +/* If non-null, a language specific version of safe_for_unsave. */ +int (*lang_safe_for_unsave) PARAMS ((tree)); + /* The string used as a placeholder instead of a source file name for built-in tree nodes. The variable, which is dynamically allocated, should be used; the macro is only used to initialize it. */ @@ -2666,6 +2669,82 @@ unsave_expr_now (expr) return expr; } + +/* Return nonzero if it is safe to unsave EXPR, else return zero. + It is not safe to unsave EXPR if it contains any embedded RTL_EXPRs. */ + +int +safe_for_unsave (expr) + tree expr; +{ + enum tree_code code; + register int i; + int first_rtl; + + if (expr == NULL_TREE) + return 1; + + code = TREE_CODE (expr); + first_rtl = first_rtl_op (code); + switch (code) + { + case RTL_EXPR: + return 0; + + case CALL_EXPR: + if (TREE_OPERAND (expr, 1) + && TREE_CODE (TREE_OPERAND (expr, 1)) == TREE_LIST) + { + tree exp = TREE_OPERAND (expr, 1); + while (exp) + { + if (! safe_for_unsave (TREE_VALUE (exp))) + return 0; + exp = TREE_CHAIN (exp); + } + } + break; + + default: + if (lang_safe_for_unsave) + switch ((*lang_safe_for_unsave) (expr)) + { + case -1: + break; + case 0: + return 0; + case 1: + return 1; + default: + abort (); + } + break; + } + + switch (TREE_CODE_CLASS (code)) + { + case 'c': /* a constant */ + case 't': /* a type node */ + case 'x': /* something random, like an identifier or an ERROR_MARK. */ + case 'd': /* A decl node */ + case 'b': /* A block node */ + return 1; + + case 'e': /* an expression */ + case 'r': /* a reference */ + case 's': /* an expression with side effects */ + case '<': /* a comparison expression */ + case '2': /* a binary arithmetic expression */ + case '1': /* a unary arithmetic expression */ + for (i = first_rtl - 1; i >= 0; i--) + if (! safe_for_unsave (TREE_OPERAND (expr, i))) + return 0; + return 1; + + default: + return 0; + } +} /* Return 1 if EXP contains a PLACEHOLDER_EXPR; i.e., if it represents a size or offset that depends on a field within a record. */ diff --git a/gcc/tree.h b/gcc/tree.h index c8cfdf900b21..f9503ceca88e 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1983,6 +1983,11 @@ extern int first_rtl_op PARAMS ((enum tree_code)); extern tree unsave_expr PARAMS ((tree)); +/* safe_for_reeval_p (EXP) returns nonzero if it is possible to + expand EXP multiple times. */ + +extern int safe_for_reeval_p PARAMS ((tree)); + /* Reset EXP in place so that it can be expaned again. Does not recurse into subtrees. */ @@ -2000,6 +2005,9 @@ extern tree unsave_expr_now PARAMS ((tree)); extern void (*lang_unsave) PARAMS ((tree *)); extern void (*lang_unsave_expr_now) PARAMS ((tree)); +/* If non-null, a language specific version of safe_for_unsave. */ +extern int (*lang_safe_for_unsave) PARAMS ((tree)); + /* Return 1 if EXP contains a PLACEHOLDER_EXPR; i.e., if it represents a size or offset that depends on a field within a record. -- 2.43.5