From 0a1c58a25ab5df1a3e4596024774641ebae8be2a Mon Sep 17 00:00:00 2001
From: Jeffrey A Law <law@cygnus.com>
Date: Fri, 17 Mar 2000 14:40:45 -0800
Subject: [PATCH] Sibling call optimizations.

Co-Authored-By: Richard Henderson <rth@cygnus.com>

From-SVN: r32612
---
 gcc/ChangeLog   |   54 ++
 gcc/Makefile.in |    5 +-
 gcc/calls.c     | 1499 ++++++++++++++++++++++++++++-------------------
 gcc/final.c     |    6 +-
 gcc/flow.c      |   25 +-
 gcc/function.c  |  444 +++++++++-----
 gcc/genflags.c  |    8 +-
 gcc/integrate.c |  294 +++++++---
 gcc/jump.c      |   40 +-
 gcc/rtl.c       |   12 +-
 gcc/rtl.def     |    7 +-
 gcc/rtl.h       |   22 +-
 gcc/sibcall.c   |  578 ++++++++++++++++++
 gcc/toplev.c    |   11 +-
 gcc/tree.c      |   79 +++
 gcc/tree.h      |    8 +
 16 files changed, 2232 insertions(+), 860 deletions(-)
 create mode 100644 gcc/sibcall.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b35f1aae9436..3ac1d635426b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,57 @@
+2000-03-17  Jeff Law  <law@cygnus.com>
+	    Richard Henderson  <rth@cygnus.com>
+
+	* Makefile.in (OBJS): Add sibcall.o.
+	(sibcall.o): New.
+	* sibcall.c: New file.
+	* calls.c (FUNCTION_OK_FOR_SIBCALL): Provide default.
+	(ECF_IS_CONST, ECF_NOTHROW, ECF_SIBCALL): New.
+	(emit_call_1): Replace `is_const' and `nothrow' with `ecf_flags'.
+	Emit sibcall patterns when requested.  Update all callers.
+	(expand_call): Generate CALL_PLACEHOLDER insns when tail call
+	elimination seems feasable.
+	* final.c (leaf_function_p): Sibling calls don't discount being
+	a leaf function.
+	* flow.c (HAVE_sibcall_epilogue): Provide default.
+	(find_basic_blocks_1): Sibling calls don't throw.
+	(make_edges): Make edge from sibling call to EXIT.
+	(propagate_block): Don't remove sibcall_epilogue insns.
+	* function.c (prologue, epilogue): Turn into varrays.  Update all uses.
+	(sibcall_epilogue): New.
+	(fixup_var_refs): Scan CALL_PLACEHOLDER sub-sequences.
+	(identify_blocks_1): Likewise.  Break out from ...
+	(identify_blocks): ... here.
+	(reorder_blocks_1): Scan CALL_PLACEHOLDER.  Break out from ...
+	(reorder_blocks): ... here.
+	(init_function_for_compilation): Zap prologue/epilogue as varrays.
+	(record_insns): Extend a varray instead of mallocing new memory.
+	(contains): Read a varray not array of ints.
+	(sibcall_epilogue_contains): New.
+	(thread_prologue_and_epilogue_insns): Emit and record
+	sibcall_epilogue patterns.
+	(init_function_once): Allocate prologue/epilogue varrays.
+	* genflags.c (gen_insn): Treat sibcall patterns as calls.
+	* integrate.c (save_parm_insns): Recurse on CALL_PLACEHOLDER patterns.
+	Broken out from ...
+	(save_for_inline_nocopy): ... here.
+	(copy_insn_list): Recurse on CALL_PLACEHOLDER patterns. 
+	Broken out from ...
+	(expand_inline_function): ... here.
+	(copy_rtx_and_substitute): Handle NOTE_INSN_DELETED_LABEL.
+	(subst_constants): Handle 'n' formats.
+	* jump.c (jump_optimize_minimal): New.
+	(jump_optimize_1): New arg `minimal'; update callers.  Elide most
+	optimizations if it's set.
+	* rtl.c (copy_rtx): Do copy jump & call for insns.
+	* rtl.h (struct rtx_def): Document use of jump and call for insns.
+	(SIBLING_CALL_P): New.
+	(sibcall_use_t): New.
+	* toplev.c (rest_of_compilation): Do init_EXPR_INSN_LIST_cache earlier.
+	Invoke optimize_sibling_and_tail_recursive_calls.
+	* tree.c (lang_safe_for_unsave): New.
+	(safe_for_unsave): New.
+	* tree.h (lang_safe_for_unsave, safe_for_unsave): Declare.
+
 2000-03-17  Mark Mitchell  <mark@codesourcery.com>
 
 	* objc/objc-act.c (encode_method_prototype): Pass types, not
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index b41eebb44321..51ff35f72626 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -675,7 +675,8 @@ OBJS = diagnostic.o \
  insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o lcm.o \
  profile.o insn-attrtab.o $(out_object_file) $(EXTRA_OBJS) convert.o \
  mbchar.o dyn-string.o splay-tree.o graph.o sbitmap.o resource.o hash.o \
- predict.o lists.o ggc-common.o $(GGC) simplify-rtx.o ssa.o bb-reorder.o
+ predict.o lists.o ggc-common.o $(GGC) simplify-rtx.o ssa.o bb-reorder.o \
+ sibcall.o
 
 # GEN files are listed separately, so they can be built before doing parallel
 #  makes for cc1 or cc1plus.  Otherwise sequent parallel make attempts to load
@@ -1562,6 +1563,8 @@ cse.o : cse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h flags.h \
 gcse.o : gcse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h \
    flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) \
    function.h output.h toplev.h
+sibcall.o : sibcall.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) function.h \
+   hard-reg-set.h flags.h insn-config.h $(RECOG_H) $(BASIC_BLOCK_H)
 resource.o : resource.c $(CONFIG_H) $(RTL_H) hard-reg-set.h system.h \
    $(BASIC_BLOCK_H) $(REGS_H) flags.h output.h resource.h function.h toplev.h \
    insn-attr.h
diff --git a/gcc/calls.c b/gcc/calls.c
index 6a4c1484ffc7..21feeed39ac1 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -32,6 +32,10 @@ Boston, MA 02111-1307, USA.  */
 #include "output.h"
 #include "tm_p.h"
 
+#if !defined FUNCTION_OK_FOR_SIBCALL
+#define FUNCTION_OK_FOR_SIBCALL(DECL) 1
+#endif
+
 #if !defined PREFERRED_STACK_BOUNDARY && defined STACK_BOUNDARY
 #define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
 #endif
@@ -132,9 +136,13 @@ int stack_arg_under_construction;
 
 static int calls_function	PARAMS ((tree, int));
 static int calls_function_1	PARAMS ((tree, int));
+
+#define ECF_IS_CONST		1
+#define ECF_NOTHROW		2
+#define ECF_SIBCALL		4
 static void emit_call_1		PARAMS ((rtx, tree, tree, HOST_WIDE_INT,
 					 HOST_WIDE_INT, HOST_WIDE_INT, rtx,
-					 rtx, int, rtx, int, int));
+					 rtx, int, rtx, int));
 static void precompute_register_parameters	PARAMS ((int,
 							 struct arg_data *,
 							 int *));
@@ -384,7 +392,7 @@ prepare_call_address (funexp, fndecl, call_fusage, reg_parm_seen)
 static void
 emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
 	     struct_value_size, next_arg_reg, valreg, old_inhibit_defer_pop,
-	     call_fusage, is_const, nothrow)
+	     call_fusage, ecf_flags)
      rtx funexp;
      tree fndecl ATTRIBUTE_UNUSED;
      tree funtype ATTRIBUTE_UNUSED;
@@ -395,7 +403,7 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
      rtx valreg;
      int old_inhibit_defer_pop;
      rtx call_fusage;
-     int is_const, nothrow;
+     int ecf_flags;
 {
   rtx rounded_stack_size_rtx = GEN_INT (rounded_stack_size);
 #if defined (HAVE_call) && defined (HAVE_call_value)
@@ -414,6 +422,33 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
     funexp = memory_address (FUNCTION_MODE, funexp);
 
 #ifndef ACCUMULATE_OUTGOING_ARGS
+#if defined (HAVE_sibcall_pop) && defined (HAVE_sibcall_value_pop)
+  if ((ecf_flags & ECF_SIBCALL)
+      && HAVE_sibcall_pop && HAVE_sibcall_value_pop
+      && (RETURN_POPS_ARGS (fndecl, funtype, stack_size) > 0 
+          || stack_size == 0))
+    {
+      rtx n_pop = GEN_INT (RETURN_POPS_ARGS (fndecl, funtype, stack_size));
+      rtx pat;
+
+      /* If this subroutine pops its own args, record that in the call insn
+	 if possible, for the sake of frame pointer elimination.  */
+
+      if (valreg)
+	pat = gen_sibcall_value_pop (valreg,
+				     gen_rtx_MEM (FUNCTION_MODE, funexp),
+				     rounded_stack_size_rtx, next_arg_reg,
+				     n_pop);
+      else
+	pat = gen_sibcall_pop (gen_rtx_MEM (FUNCTION_MODE, funexp),
+			       rounded_stack_size_rtx, next_arg_reg, n_pop);
+
+      emit_call_insn (pat);
+      already_popped = 1;
+    }
+  else
+#endif
+
 #if defined (HAVE_call_pop) && defined (HAVE_call_value_pop)
 /* If the target has "call" or "call_value" insns, then prefer them
    if no arguments are actually popped.  If the target does not have
@@ -447,6 +482,23 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
 #endif
 #endif
 
+#if defined (HAVE_sibcall) && defined (HAVE_sibcall_value)
+  if ((ecf_flags & ECF_SIBCALL)
+      && HAVE_sibcall && HAVE_sibcall_value)
+    {
+      if (valreg)
+	emit_call_insn (gen_sibcall_value (valreg,
+					   gen_rtx_MEM (FUNCTION_MODE, funexp),
+					   rounded_stack_size_rtx,
+					   next_arg_reg, NULL_RTX));
+      else
+	emit_call_insn (gen_sibcall (gen_rtx_MEM (FUNCTION_MODE, funexp),
+				     rounded_stack_size_rtx, next_arg_reg,
+				     struct_value_size_rtx));
+    }
+  else
+#endif
+
 #if defined (HAVE_call) && defined (HAVE_call_value)
   if (HAVE_call && HAVE_call_value)
     {
@@ -489,15 +541,17 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
     CALL_INSN_FUNCTION_USAGE (call_insn) = call_fusage;
 
   /* If this is a const call, then set the insn's unchanging bit.  */
-  if (is_const)
+  if (ecf_flags & ECF_IS_CONST)
     CONST_CALL_P (call_insn) = 1;
 
   /* If this call can't throw, attach a REG_EH_REGION reg note to that
      effect.  */
-  if (nothrow)
+  if (ecf_flags & ECF_NOTHROW)
     REG_NOTES (call_insn) = gen_rtx_EXPR_LIST (REG_EH_REGION, const0_rtx,
 					       REG_NOTES (call_insn));
 
+  SIBLING_CALL_P (call_insn) = ((ecf_flags & ECF_SIBCALL) != 0);
+
   /* Restore this now, so that we do defer pops for this call's args
      if the context of the call as a whole permits.  */
   inhibit_defer_pop = old_inhibit_defer_pop;
@@ -527,7 +581,8 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
 
   if (rounded_stack_size != 0)
     {
-      if (flag_defer_pop && inhibit_defer_pop == 0 && !is_const)
+      if (flag_defer_pop && inhibit_defer_pop == 0
+	  && !(ecf_flags & ECF_IS_CONST))
 	pending_stack_adjust += rounded_stack_size;
       else
 	adjust_stack (rounded_stack_size_rtx);
@@ -1227,6 +1282,8 @@ compute_argument_block_size (reg_parm_stack_space, args_size,
     {
 #ifdef PREFERRED_STACK_BOUNDARY
       preferred_stack_boundary /= BITS_PER_UNIT;
+      if (preferred_stack_boundary < 1)
+	preferred_stack_boundary = 1;
       args_size->constant = (((args_size->constant
 			       + arg_space_so_far
 			       + pending_stack_adjust
@@ -1602,17 +1659,31 @@ expand_call (exp, target, ignore)
      rtx target;
      int ignore;
 {
+  /* Nonzero if we are currently expanding a call.  */
+  static int currently_expanding_call = 0;
+
   /* List of actual parameters.  */
   tree actparms = TREE_OPERAND (exp, 1);
   /* RTX for the function to be called.  */
   rtx funexp;
+  /* Sequence of insns to perform a tail recursive "call".  */
+  rtx tail_recursion_insns = NULL_RTX;
+  /* Sequence of insns to perform a normal "call".  */
+  rtx normal_call_insns = NULL_RTX;
+  /* Sequence of insns to perform a tail recursive "call".  */
+  rtx tail_call_insns = NULL_RTX;
   /* Data type of the function.  */
   tree funtype;
   /* Declaration of the function being called,
      or 0 if the function is computed (not known by name).  */
   tree fndecl = 0;
   char *name = 0;
+#ifdef ACCUMULATE_OUTGOING_ARGS
   rtx before_call;
+#endif
+  rtx insn;
+  int safe_for_reeval;
+  int pass;
 
   /* Register in which non-BLKmode value will be returned,
      or 0 if no value or if value is BLKmode.  */
@@ -1708,17 +1779,10 @@ expand_call (exp, target, ignore)
   rtx old_stack_level = 0;
   int old_pending_adj = 0;
   int old_inhibit_defer_pop = inhibit_defer_pop;
-  rtx call_fusage = 0;
+  rtx call_fusage;
   register tree p;
   register int i;
-#ifdef PREFERRED_STACK_BOUNDARY
-  int preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
-#else
-  /* In this case preferred_stack_boundary variable is meaningless.
-     It is used only in order to keep ifdef noise down when calling
-     compute_argument_block_size.  */
-  int preferred_stack_boundary = 0;
-#endif
+  int preferred_stack_boundary;
 
   /* The value of the function call can be put in a hard register.  But
      if -fcheck-memory-usage, code which invokes functions (and thus
@@ -1947,11 +2011,91 @@ expand_call (exp, target, ignore)
       mark_addressable (fndecl);
     }
 
+  currently_expanding_call++;
+
+  /* If we're considering tail recursion optimizations, verify that the
+     arguments are safe for re-evaluation.  If we can unsave them, wrap
+     each argument in an UNSAVE_EXPR.  */
+
+  safe_for_reeval = 0;
+  if (optimize >= 2
+      && currently_expanding_call == 1
+      && stmt_loop_nest_empty ())
+    {
+      /* Verify that each argument is safe for re-evaluation.  */
+      for (p = actparms; p; p = TREE_CHAIN (p))
+	if (! safe_for_unsave (TREE_VALUE (p)))
+	  break;
+
+      if (p == NULL_TREE)
+        {
+	  tree new_actparms = NULL_TREE, q;
+
+	  for (p = actparms; p ; p = TREE_CHAIN (p))
+	    {
+	      tree np = build_tree_list (TREE_PURPOSE (p),
+					 unsave_expr (TREE_VALUE (p)));
+	      if (new_actparms)
+		TREE_CHAIN (q) = np;
+	      else
+		new_actparms = np;
+	      q = np;
+	    }
+
+	  actparms = new_actparms;
+	  safe_for_reeval = 1;
+	}
+    }
+
+  /* Generate a tail recursion sequence when calling ourselves.  */
+
+  if (safe_for_reeval
+      && TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR
+      && TREE_OPERAND (TREE_OPERAND (exp, 0), 0) == current_function_decl)
+    {
+      /* We want to emit any pending stack adjustments before the tail
+	 recursion "call".  That way we know any adjustment after the tail
+	 recursion call can be ignored if we indeed use the tail recursion
+	 call expansion.  */
+      int save_pending_stack_adjust = pending_stack_adjust;
+      rtx last;
+
+      /* Use a new sequence to hold any RTL we generate.  We do not even
+	 know if we will use this RTL yet.  The final decision can not be
+	 made until after RTL generation for the entire function is
+	 complete.  */
+      push_to_sequence (0);
+
+      /* Emit the pending stack adjustments before we expand any arguments.  */
+      do_pending_stack_adjust ();
+
+      optimize_tail_recursion (exp, get_last_insn ());
+ 
+      last = get_last_insn ();
+      tail_recursion_insns = get_insns ();
+      end_sequence ();
+
+      /* If the last insn on the tail recursion sequence is not a
+	 BARRIER, then tail recursion optimization failed.  */
+      if (last == NULL_RTX || GET_CODE (last) != BARRIER)
+	tail_recursion_insns = NULL_RTX;
+
+      /* Restore the original pending stack adjustment for the sibling and
+	 normal call cases below.  */
+      pending_stack_adjust = save_pending_stack_adjust;
+    }
+
   function_call_count++;
 
   if (fndecl && DECL_NAME (fndecl))
     name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 
+#ifdef PREFERRED_STACK_BOUNDARY
+  preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+#else
+  preferred_stack_boundary = STACK_BOUNDARY;
+#endif
+
   /* Ensure current function's preferred stack boundary is at least
      what we need.  We don't have to increase alignment for recursive
      functions.  */
@@ -1973,708 +2117,881 @@ expand_call (exp, target, ignore)
     abort ();
   funtype = TREE_TYPE (funtype);
 
-  /* When calling a const function, we must pop the stack args right away,
-     so that the pop is deleted or moved with the call.  */
-  if (is_const)
-    NO_DEFER_POP;
+  /* We want to make two insn chains; one for a sibling call, the other
+     for a normal call.  We will select one of the two chains after
+     initial RTL generation is complete.  */
+  for (pass = 0; pass < 2; pass++)
+    {
+      int sibcall_failure = 0;
+      /* We want to emit ay pending stack adjustments before the tail
+	 recursion "call".  That way we know any adjustment after the tail
+	 recursion call can be ignored if we indeed use the tail recursion
+	 call expansion.  */
+      int save_pending_stack_adjust;
+      rtx insns;
+      rtx before_call;
 
-  /* Don't let pending stack adjusts add up to too much.
-     Also, do all pending adjustments now
-     if there is any chance this might be a call to alloca.  */
+      if (pass == 0)
+	{
+	  /* Various reasons we can not use a sibling call.  */
+	  if (! safe_for_reeval
+#ifdef HAVE_sibcall_epilogue
+	      || ! HAVE_sibcall_epilogue
+#else
+	      || 1
+#endif
+	      /* The structure value address is used and modified in the
+		 loop below.  It does not seem worth the effort to save and
+		 restore it as a state variable since few optimizable
+		 sibling calls will return a structure.  */
+	      || structure_value_addr != NULL_RTX
+	      /* If the register holding the address is a callee saved
+		 register, then we lose.  We have no way to prevent that,
+		 so we only allow calls to named functions.  */
+	      || fndecl == NULL_TREE
+	      || ! FUNCTION_OK_FOR_SIBCALL (fndecl))
+	    continue;
 
-  if (pending_stack_adjust >= 32
-      || (pending_stack_adjust > 0 && may_be_alloca))
-    do_pending_stack_adjust ();
+	  /* State variables we need to save and restore between
+	     iterations.  */
+	  save_pending_stack_adjust = pending_stack_adjust;
+	}
 
-  if (profile_arc_flag && fork_or_exec)
-    {
-	/* A fork duplicates the profile information, and an exec discards
-	   it.  We can't rely on fork/exec to be paired.  So write out the
-	   profile information we have gathered so far, and clear it.  */
-      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__bb_fork_func"), 0,
-			 VOIDmode, 0);
-
-      /* ??? When __clone is called with CLONE_VM set, profiling is
-         subject to race conditions, just as with multithreaded programs.  */
-    }
+      /* Other state variables that we must reinitialize each time
+	 through the loop (that are not initialized by the loop itself.  */
+      argblock = 0;
+      call_fusage = 0;
 
-  /* Push the temporary stack slot level so that we can free any temporaries
-     we make.  */
-  push_temp_slots ();
+      /* Start a new sequence for the normal call case. 
 
-  /* Start updating where the next arg would go.
+	 From this point on, if the sibling call fails, we want to set
+	 sibcall_failure instead of continuing the loop.  */
+      start_sequence ();
 
-     On some machines (such as the PA) indirect calls have a different
-     calling convention than normal calls.  The last argument in
-     INIT_CUMULATIVE_ARGS tells the backend if this is an indirect call
-     or not.  */
-  INIT_CUMULATIVE_ARGS (args_so_far, funtype, NULL_RTX, (fndecl == 0));
+      /* When calling a const function, we must pop the stack args right away,
+	 so that the pop is deleted or moved with the call.  */
+      if (is_const)
+	NO_DEFER_POP;
 
-  /* If struct_value_rtx is 0, it means pass the address
-     as if it were an extra parameter.  */
-  if (structure_value_addr && struct_value_rtx == 0)
-    {
-      /* If structure_value_addr is a REG other than
-	 virtual_outgoing_args_rtx, we can use always use it.  If it
-	 is not a REG, we must always copy it into a register.
-	 If it is virtual_outgoing_args_rtx, we must copy it to another
-	 register in some cases.  */
-      rtx temp = (GET_CODE (structure_value_addr) != REG
-#ifdef ACCUMULATE_OUTGOING_ARGS
-		  || (stack_arg_under_construction
-		      && structure_value_addr == virtual_outgoing_args_rtx)
-#endif
-		  ? copy_addr_to_reg (structure_value_addr)
-		  : structure_value_addr);
-
-      actparms
-	= tree_cons (error_mark_node,
-		     make_tree (build_pointer_type (TREE_TYPE (funtype)),
-				temp),
-		     actparms);
-      structure_value_addr_parm = 1;
-    }
+      /* Don't let pending stack adjusts add up to too much.
+	 Also, do all pending adjustments now if there is any chance
+	 this might be a call to alloca or if we are expanding a sibling
+	 call sequence.  */
+      if (pending_stack_adjust >= 32
+	  || (pending_stack_adjust > 0 && may_be_alloca)
+	  || pass == 0)
+	do_pending_stack_adjust ();
 
-  /* Count the arguments and set NUM_ACTUALS.  */
-  for (p = actparms, i = 0; p; p = TREE_CHAIN (p)) i++;
-  num_actuals = i;
-
-  /* Compute number of named args.
-     Normally, don't include the last named arg if anonymous args follow.
-     We do include the last named arg if STRICT_ARGUMENT_NAMING is nonzero.
-     (If no anonymous args follow, the result of list_length is actually
-     one too large.  This is harmless.)
-
-     If PRETEND_OUTGOING_VARARGS_NAMED is set and STRICT_ARGUMENT_NAMING is
-     zero, this machine will be able to place unnamed args that were passed in
-     registers into the stack.  So treat all args as named.  This allows the
-     insns emitting for a specific argument list to be independent of the
-     function declaration.
-
-     If PRETEND_OUTGOING_VARARGS_NAMED is not set, we do not have any reliable
-     way to pass unnamed args in registers, so we must force them into
-     memory.  */
-
-  if ((STRICT_ARGUMENT_NAMING
-       || ! PRETEND_OUTGOING_VARARGS_NAMED)
-      && TYPE_ARG_TYPES (funtype) != 0)
-    n_named_args
-      = (list_length (TYPE_ARG_TYPES (funtype))
-	 /* Don't include the last named arg.  */
-	 - (STRICT_ARGUMENT_NAMING ? 0 : 1)
-	 /* Count the struct value address, if it is passed as a parm.  */
-	 + structure_value_addr_parm);
-  else
-    /* If we know nothing, treat all args as named.  */
-    n_named_args = num_actuals;
+      if (profile_arc_flag && fork_or_exec)
+	{
+	  /* A fork duplicates the profile information, and an exec discards
+	     it.  We can't rely on fork/exec to be paired.  So write out the
+	     profile information we have gathered so far, and clear it.  */
+	  /* ??? When Linux's __clone is called with CLONE_VM set, profiling
+	     is subject to race conditions, just as with multithreaded
+	     programs.  */
+
+	  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__bb_fork_func"), 0,
+			     VOIDmode, 0);
+	}
 
-  /* Make a vector to hold all the information about each arg.  */
-  args = (struct arg_data *) alloca (num_actuals * sizeof (struct arg_data));
-  bzero ((char *) args, num_actuals * sizeof (struct arg_data));
+      /* Push the temporary stack slot level so that we can free any
+	 temporaries we make.  */
+      push_temp_slots ();
+
+      /* Start updating where the next arg would go.
+
+	 On some machines (such as the PA) indirect calls have a different
+	 calling convention than normal calls.  The last argument in
+	 INIT_CUMULATIVE_ARGS tells the backend if this is an indirect call
+	 or not.  */
+      INIT_CUMULATIVE_ARGS (args_so_far, funtype, NULL_RTX, (fndecl == 0));
+
+      /* If struct_value_rtx is 0, it means pass the address
+	 as if it were an extra parameter.  */
+      if (structure_value_addr && struct_value_rtx == 0)
+	{
+	  /* If structure_value_addr is a REG other than
+	     virtual_outgoing_args_rtx, we can use always use it.  If it
+	     is not a REG, we must always copy it into a register.
+	     If it is virtual_outgoing_args_rtx, we must copy it to another
+	     register in some cases.  */
+	  rtx temp = (GET_CODE (structure_value_addr) != REG
+#ifdef ACCUMULATE_OUTGOING_ARGS
+		      || (stack_arg_under_construction
+			  && structure_value_addr == virtual_outgoing_args_rtx)
+#endif
+		      ? copy_addr_to_reg (structure_value_addr)
+		      : structure_value_addr);
+
+	  actparms
+	    = tree_cons (error_mark_node,
+			 make_tree (build_pointer_type (TREE_TYPE (funtype)),
+				    temp),
+			 actparms);
+	  structure_value_addr_parm = 1;
+	}
 
-  /* Build up entries inthe ARGS array, compute the size of the arguments
-     into ARGS_SIZE, etc.  */
-  initialize_argument_information (num_actuals, args, &args_size, n_named_args,
-				   actparms, fndecl, &args_so_far,
-				   reg_parm_stack_space, &old_stack_level,
-				   &old_pending_adj, &must_preallocate,
-				   &is_const);
+      /* Count the arguments and set NUM_ACTUALS.  */
+      for (p = actparms, i = 0; p; p = TREE_CHAIN (p)) i++;
+      num_actuals = i;
+
+      /* Compute number of named args.
+	 Normally, don't include the last named arg if anonymous args follow.
+	 We do include the last named arg if STRICT_ARGUMENT_NAMING is nonzero.
+	 (If no anonymous args follow, the result of list_length is actually
+	 one too large.  This is harmless.)
+
+	 If PRETEND_OUTGOING_VARARGS_NAMED is set and STRICT_ARGUMENT_NAMING is
+	 zero, this machine will be able to place unnamed args that were
+	 passed in registers into the stack.  So treat all args as named.
+	 This allows the insns emitting for a specific argument list to be
+	 independent of the function declaration.
+
+	 If PRETEND_OUTGOING_VARARGS_NAMED is not set, we do not have any
+	 reliable way to pass unnamed args in registers, so we must force
+	 them into memory.  */
+
+      if ((STRICT_ARGUMENT_NAMING
+	   || ! PRETEND_OUTGOING_VARARGS_NAMED)
+	  && TYPE_ARG_TYPES (funtype) != 0)
+	n_named_args
+	  = (list_length (TYPE_ARG_TYPES (funtype))
+	     /* Don't include the last named arg.  */
+	     - (STRICT_ARGUMENT_NAMING ? 0 : 1)
+	     /* Count the struct value address, if it is passed as a parm.  */
+	     + structure_value_addr_parm);
+      else
+	/* If we know nothing, treat all args as named.  */
+	n_named_args = num_actuals;
+
+      /* Make a vector to hold all the information about each arg.  */
+      args = (struct arg_data *) alloca (num_actuals
+					 * sizeof (struct arg_data));
+      bzero ((char *) args, num_actuals * sizeof (struct arg_data));
+
+      /* Build up entries inthe ARGS array, compute the size of the arguments
+	 into ARGS_SIZE, etc.  */
+      initialize_argument_information (num_actuals, args, &args_size,
+				       n_named_args, actparms, fndecl,
+				       &args_so_far, reg_parm_stack_space,
+				       &old_stack_level, &old_pending_adj,
+				       &must_preallocate, &is_const);
 
 #ifdef FINAL_REG_PARM_STACK_SPACE
-  reg_parm_stack_space = FINAL_REG_PARM_STACK_SPACE (args_size.constant,
-						     args_size.var);
+      reg_parm_stack_space = FINAL_REG_PARM_STACK_SPACE (args_size.constant,
+							 args_size.var);
 #endif
       
-  if (args_size.var)
-    {
-      /* If this function requires a variable-sized argument list, don't try to
-	 make a cse'able block for this call.  We may be able to do this
-	 eventually, but it is too complicated to keep track of what insns go
-	 in the cse'able block and which don't.  */
+      if (args_size.var)
+	{
+	  /* If this function requires a variable-sized argument list, don't
+	     try to make a cse'able block for this call.  We may be able to
+	     do this eventually, but it is too complicated to keep track of
+	     what insns go in the cse'able block and which don't. 
 
-      is_const = 0;
-      must_preallocate = 1;
-    }
+	     Also do not make a sibling call.  */
 
-  /* Compute the actual size of the argument block required.  The variable
-     and constant sizes must be combined, the size may have to be rounded,
-     and there may be a minimum required size.  */
-  unadjusted_args_size
-    = compute_argument_block_size (reg_parm_stack_space, &args_size,
-				   preferred_stack_boundary);
-
-  /* Now make final decision about preallocating stack space.  */
-  must_preallocate = finalize_must_preallocate (must_preallocate,
-						num_actuals, args, &args_size);
-
-  /* If the structure value address will reference the stack pointer, we must
-     stabilize it.  We don't need to do this if we know that we are not going
-     to adjust the stack pointer in processing this call.  */
-
-  if (structure_value_addr
-      && (reg_mentioned_p (virtual_stack_dynamic_rtx, structure_value_addr)
-       || reg_mentioned_p (virtual_outgoing_args_rtx, structure_value_addr))
-      && (args_size.var
+	  is_const = 0;
+	  must_preallocate = 1;
+	  sibcall_failure = 1;
+	}
+
+      /* Compute the actual size of the argument block required.  The variable
+	 and constant sizes must be combined, the size may have to be rounded,
+	 and there may be a minimum required size.  When generating a sibcall
+	 pattern, do not round up, since we'll be re-using whatever space our
+	 caller provided.  */
+      unadjusted_args_size
+	= compute_argument_block_size (reg_parm_stack_space, &args_size,
+				       (pass == 0 ? 0
+					: preferred_stack_boundary));
+
+      /* If the callee pops its own arguments, then it must pop exactly
+	 the same number of arguments as the current function.  */
+      if (RETURN_POPS_ARGS (fndecl, funtype, unadjusted_args_size)
+	  != RETURN_POPS_ARGS (current_function_decl,
+			       TREE_TYPE (current_function_decl),
+			       current_function_args_size))
+	sibcall_failure = 1;
+
+      /* Now make final decision about preallocating stack space.  */
+      must_preallocate = finalize_must_preallocate (must_preallocate,
+						    num_actuals, args,
+						    &args_size);
+
+      /* If the structure value address will reference the stack pointer, we
+	 must stabilize it.  We don't need to do this if we know that we are
+	 not going to adjust the stack pointer in processing this call.  */
+
+      if (structure_value_addr
+	  && (reg_mentioned_p (virtual_stack_dynamic_rtx, structure_value_addr)
+	      || reg_mentioned_p (virtual_outgoing_args_rtx,
+				  structure_value_addr))
+	  && (args_size.var
 #ifndef ACCUMULATE_OUTGOING_ARGS
-	  || args_size.constant
+	      || args_size.constant
 #endif
-	  ))
-    structure_value_addr = copy_to_reg (structure_value_addr);
+	      ))
+	structure_value_addr = copy_to_reg (structure_value_addr);
 
-  /* Precompute any arguments as needed.  */
-  precompute_arguments (is_const, must_preallocate, num_actuals,
-                        args, &args_size);
+      /* Precompute any arguments as needed.  */
+      precompute_arguments (is_const, must_preallocate, num_actuals,
+			    args, &args_size);
 
-  /* Now we are about to start emitting insns that can be deleted
-     if a libcall is deleted.  */
-  if (is_const || is_malloc)
-    start_sequence ();
+      /* Now we are about to start emitting insns that can be deleted
+	 if a libcall is deleted.  */
+      if (is_const || is_malloc)
+	start_sequence ();
 
-  /* If we have no actual push instructions, or shouldn't use them,
-     make space for all args right now.  */
+      /* If we have no actual push instructions, or shouldn't use them,
+	 make space for all args right now.  */
 
-  if (args_size.var != 0)
-    {
-      if (old_stack_level == 0)
+      if (args_size.var != 0)
 	{
-	  emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX);
-	  old_pending_adj = pending_stack_adjust;
-	  pending_stack_adjust = 0;
+	  if (old_stack_level == 0)
+	    {
+	      emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX);
+	      old_pending_adj = pending_stack_adjust;
+	      pending_stack_adjust = 0;
 #ifdef ACCUMULATE_OUTGOING_ARGS
-	  /* stack_arg_under_construction says whether a stack arg is
-	     being constructed at the old stack level.  Pushing the stack
-	     gets a clean outgoing argument block.  */
-	  old_stack_arg_under_construction = stack_arg_under_construction;
-	  stack_arg_under_construction = 0;
+	      /* stack_arg_under_construction says whether a stack arg is
+		 being constructed at the old stack level.  Pushing the stack
+		 gets a clean outgoing argument block.  */
+	      old_stack_arg_under_construction = stack_arg_under_construction;
+	      stack_arg_under_construction = 0;
 #endif
+	    }
+	  argblock = push_block (ARGS_SIZE_RTX (args_size), 0, 0);
 	}
-      argblock = push_block (ARGS_SIZE_RTX (args_size), 0, 0);
-    }
-  else
-    {
-      /* Note that we must go through the motions of allocating an argument
-	 block even if the size is zero because we may be storing args
-	 in the area reserved for register arguments, which may be part of
-	 the stack frame.  */
+      else
+	{
+	  /* Note that we must go through the motions of allocating an argument
+	     block even if the size is zero because we may be storing args
+	     in the area reserved for register arguments, which may be part of
+	     the stack frame.  */
 
-      int needed = args_size.constant;
+	  int needed = args_size.constant;
 
-      /* Store the maximum argument space used.  It will be pushed by
-	 the prologue (if ACCUMULATE_OUTGOING_ARGS, or stack overflow
-	 checking).  */
+	  /* Store the maximum argument space used.  It will be pushed by
+	     the prologue (if ACCUMULATE_OUTGOING_ARGS, or stack overflow
+	     checking).  */
 
-      if (needed > current_function_outgoing_args_size)
-	current_function_outgoing_args_size = needed;
+	  if (needed > current_function_outgoing_args_size)
+	    current_function_outgoing_args_size = needed;
 
-      if (must_preallocate)
-	{
+	  if (must_preallocate)
+	    {
 #ifdef ACCUMULATE_OUTGOING_ARGS
-	  /* Since the stack pointer will never be pushed, it is possible for
-	     the evaluation of a parm to clobber something we have already
-	     written to the stack.  Since most function calls on RISC machines
-	     do not use the stack, this is uncommon, but must work correctly.
+	      /* Since the stack pointer will never be pushed, it is possible
+		 for the evaluation of a parm to clobber something we have
+		 already written to the stack.  Since most function calls on
+		 RISC machines do not use the stack, this is uncommon, but
+		 must work correctly.
 
-	     Therefore, we save any area of the stack that was already written
-	     and that we are using.  Here we set up to do this by making a new
-	     stack usage map from the old one.  The actual save will be done
-	     by store_one_arg. 
+		 Therefore, we save any area of the stack that was already
+		 written and that we are using.  Here we set up to do this by
+		 making a new stack usage map from the old one.  The actual
+		 save will be done by store_one_arg. 
 
-	     Another approach might be to try to reorder the argument
-	     evaluations to avoid this conflicting stack usage.  */
+		 Another approach might be to try to reorder the argument
+		 evaluations to avoid this conflicting stack usage.  */
 
 #ifndef OUTGOING_REG_PARM_STACK_SPACE
-	  /* Since we will be writing into the entire argument area, the
-	     map must be allocated for its entire size, not just the part that
-	     is the responsibility of the caller.  */
-	  needed += reg_parm_stack_space;
+	      /* Since we will be writing into the entire argument area, the
+		 map must be allocated for its entire size, not just the part
+		 that is the responsibility of the caller.  */
+	      needed += reg_parm_stack_space;
 #endif
 
 #ifdef ARGS_GROW_DOWNWARD
-	  highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use,
-					     needed + 1);
+	      highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use,
+						 needed + 1);
 #else
-	  highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use,
-					     needed);
+	      highest_outgoing_arg_in_use = MAX (initial_highest_arg_in_use,
+						 needed);
 #endif
-	  stack_usage_map = (char *) alloca (highest_outgoing_arg_in_use);
+	      stack_usage_map = (char *) alloca (highest_outgoing_arg_in_use);
 
-	  if (initial_highest_arg_in_use)
-	    bcopy (initial_stack_usage_map, stack_usage_map,
-		   initial_highest_arg_in_use);
+	      if (initial_highest_arg_in_use)
+		bcopy (initial_stack_usage_map, stack_usage_map,
+		       initial_highest_arg_in_use);
 
-	  if (initial_highest_arg_in_use != highest_outgoing_arg_in_use)
-	    bzero (&stack_usage_map[initial_highest_arg_in_use],
-		   highest_outgoing_arg_in_use - initial_highest_arg_in_use);
-	  needed = 0;
+	      if (initial_highest_arg_in_use != highest_outgoing_arg_in_use)
+		bzero (&stack_usage_map[initial_highest_arg_in_use],
+		       (highest_outgoing_arg_in_use
+			- initial_highest_arg_in_use));
+	      needed = 0;
 
-	  /* The address of the outgoing argument list must not be copied to a
-	     register here, because argblock would be left pointing to the
-	     wrong place after the call to allocate_dynamic_stack_space below.
-	     */
+	      /* The address of the outgoing argument list must not be copied
+		 to a register here, because argblock would be left pointing
+		 to the wrong place after the call to
+		 allocate_dynamic_stack_space below. */
 
-	  argblock = virtual_outgoing_args_rtx;
+	      argblock = virtual_outgoing_args_rtx;
 
 #else /* not ACCUMULATE_OUTGOING_ARGS */
-	  if (inhibit_defer_pop == 0)
-	    {
-	      /* Try to reuse some or all of the pending_stack_adjust
-		 to get this space.  Maybe we can avoid any pushing.  */
-	      if (needed > pending_stack_adjust)
+	      if (inhibit_defer_pop == 0)
 		{
-		  needed -= pending_stack_adjust;
-		  pending_stack_adjust = 0;
+		  /* Try to reuse some or all of the pending_stack_adjust
+		     to get this space.  Maybe we can avoid any pushing.  */
+		  if (needed > pending_stack_adjust)
+		    {
+		      needed -= pending_stack_adjust;
+		      pending_stack_adjust = 0;
+		    }
+		  else
+		    {
+		      pending_stack_adjust -= needed;
+		      needed = 0;
+		    }
 		}
+	      /* Special case this because overhead of `push_block' in this
+		 case is non-trivial.  */
+	      if (needed == 0)
+		argblock = virtual_outgoing_args_rtx;
 	      else
-		{
-		  pending_stack_adjust -= needed;
-		  needed = 0;
-		}
-	    }
-	  /* Special case this because overhead of `push_block' in this
-	     case is non-trivial.  */
-	  if (needed == 0)
-	    argblock = virtual_outgoing_args_rtx;
-	  else
-	    argblock = push_block (GEN_INT (needed), 0, 0);
-
-	  /* We only really need to call `copy_to_reg' in the case where push
-	     insns are going to be used to pass ARGBLOCK to a function
-	     call in ARGS.  In that case, the stack pointer changes value
-	     from the allocation point to the call point, and hence
-	     the value of VIRTUAL_OUTGOING_ARGS_RTX changes as well.
-	     But might as well always do it.  */
-	  argblock = copy_to_reg (argblock);
+		argblock = push_block (GEN_INT (needed), 0, 0);
+
+	      /* We only really need to call `copy_to_reg' in the case where
+		 push insns are going to be used to pass ARGBLOCK to a function
+		 call in ARGS.  In that case, the stack pointer changes value
+		 from the allocation point to the call point, and hence
+		 the value of VIRTUAL_OUTGOING_ARGS_RTX changes as well.
+		 But might as well always do it.  */
+	      argblock = copy_to_reg (argblock);
 #endif /* not ACCUMULATE_OUTGOING_ARGS */
+	    }
+	}
+
+      /* The argument block when performing a sibling call is the
+	 incoming argument block.  */
+      if (pass == 0)
+	{
+	  rtx temp = plus_constant (arg_pointer_rtx,
+				    FIRST_PARM_OFFSET (current_function_decl));
+	  argblock = force_reg (Pmode, force_operand (temp, NULL_RTX));
 	}
-    }
 
 #ifdef ACCUMULATE_OUTGOING_ARGS
-  /* The save/restore code in store_one_arg handles all cases except one:
-     a constructor call (including a C function returning a BLKmode struct)
-     to initialize an argument.  */
-  if (stack_arg_under_construction)
-    {
+      /* The save/restore code in store_one_arg handles all cases except one:
+	 a constructor call (including a C function returning a BLKmode struct)
+	 to initialize an argument.  */
+      if (stack_arg_under_construction)
+	{
 #ifndef OUTGOING_REG_PARM_STACK_SPACE
-      rtx push_size = GEN_INT (reg_parm_stack_space + args_size.constant);
+	  rtx push_size = GEN_INT (reg_parm_stack_space + args_size.constant);
 #else
-      rtx push_size = GEN_INT (args_size.constant);
+	  rtx push_size = GEN_INT (args_size.constant);
 #endif
-      if (old_stack_level == 0)
-	{
-	  emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX);
-	  old_pending_adj = pending_stack_adjust;
-	  pending_stack_adjust = 0;
-	  /* stack_arg_under_construction says whether a stack arg is
-	     being constructed at the old stack level.  Pushing the stack
-	     gets a clean outgoing argument block.  */
-	  old_stack_arg_under_construction = stack_arg_under_construction;
-	  stack_arg_under_construction = 0;
-	  /* Make a new map for the new argument list.  */
-	  stack_usage_map = (char *)alloca (highest_outgoing_arg_in_use);
-	  bzero (stack_usage_map, highest_outgoing_arg_in_use);
-	  highest_outgoing_arg_in_use = 0;
+	  if (old_stack_level == 0)
+	    {
+	      emit_stack_save (SAVE_BLOCK, &old_stack_level, NULL_RTX);
+	      old_pending_adj = pending_stack_adjust;
+	      pending_stack_adjust = 0;
+	      /* stack_arg_under_construction says whether a stack arg is
+		 being constructed at the old stack level.  Pushing the stack
+		 gets a clean outgoing argument block.  */
+	      old_stack_arg_under_construction = stack_arg_under_construction;
+	      stack_arg_under_construction = 0;
+	      /* Make a new map for the new argument list.  */
+	      stack_usage_map = (char *)alloca (highest_outgoing_arg_in_use);
+	      bzero (stack_usage_map, highest_outgoing_arg_in_use);
+	      highest_outgoing_arg_in_use = 0;
+	    }
+	  allocate_dynamic_stack_space (push_size, NULL_RTX, BITS_PER_UNIT);
 	}
-      allocate_dynamic_stack_space (push_size, NULL_RTX, BITS_PER_UNIT);
-    }
-  /* If argument evaluation might modify the stack pointer, copy the
-     address of the argument list to a register.  */
-  for (i = 0; i < num_actuals; i++)
-    if (args[i].pass_on_stack)
-      {
-	argblock = copy_addr_to_reg (argblock);
-	break;
-      }
+      /* If argument evaluation might modify the stack pointer, copy the
+	 address of the argument list to a register.  */
+      for (i = 0; i < num_actuals; i++)
+	if (args[i].pass_on_stack)
+	  {
+	    argblock = copy_addr_to_reg (argblock);
+	    break;
+	  }
 #endif
 
-  compute_argument_addresses (args, argblock, num_actuals);
+      compute_argument_addresses (args, argblock, num_actuals);
 
 #ifdef PUSH_ARGS_REVERSED
 #ifdef PREFERRED_STACK_BOUNDARY
-  /* If we push args individually in reverse order, perform stack alignment
-     before the first push (the last arg).  */
-  if (args_size.constant != unadjusted_args_size)
-    {
-      /* When the stack adjustment is pending,
-	 we get better code by combining the adjustments.  */
-      if (pending_stack_adjust && !is_const
-	  && !inhibit_defer_pop)
+      /* If we push args individually in reverse order, perform stack alignment
+	 before the first push (the last arg).  */
+      if (args_size.constant != unadjusted_args_size)
 	{
-	  args_size.constant = (unadjusted_args_size
-			        + ((pending_stack_adjust + args_size.constant
-				    + arg_space_so_far
-				    - unadjusted_args_size)
-			           % (preferred_stack_boundary / BITS_PER_UNIT)));
-	  pending_stack_adjust -= args_size.constant - unadjusted_args_size;
-	  do_pending_stack_adjust ();
-	}
-      else if (argblock == 0)
-	anti_adjust_stack (GEN_INT (args_size.constant - unadjusted_args_size));
-      arg_space_so_far += args_size.constant - unadjusted_args_size;
+	  /* When the stack adjustment is pending, we get better code
+	     by combining the adjustments.  */
+	  if (pending_stack_adjust && ! is_const
+	      && ! inhibit_defer_pop)
+	    {
+	      args_size.constant = (unadjusted_args_size
+				    + ((pending_stack_adjust
+					+ args_size.constant
+					+ arg_space_so_far
+					- unadjusted_args_size)
+				       % (preferred_stack_boundary
+					  / BITS_PER_UNIT)));
+	      pending_stack_adjust -= (args_size.constant
+				       - unadjusted_args_size);
+	      do_pending_stack_adjust ();
+	    }
+	  else if (argblock == 0)
+	    anti_adjust_stack (GEN_INT (args_size.constant
+					- unadjusted_args_size));
+	  arg_space_so_far += args_size.constant - unadjusted_args_size;
 
-      /* Now that the stack is properly aligned, pops can't safely
-	 be deferred during the evaluation of the arguments.  */
-      NO_DEFER_POP;
-    }
+	  /* Now that the stack is properly aligned, pops can't safely
+	     be deferred during the evaluation of the arguments.  */
+	  NO_DEFER_POP;
+	}
 #endif
 #endif
 
-  /* Don't try to defer pops if preallocating, not even from the first arg,
-     since ARGBLOCK probably refers to the SP.  */
-  if (argblock)
-    NO_DEFER_POP;
+      /* Don't try to defer pops if preallocating, not even from the first arg,
+	 since ARGBLOCK probably refers to the SP.  */
+      if (argblock)
+	NO_DEFER_POP;
 
-  funexp = rtx_for_function_call (fndecl, exp);
+      funexp = rtx_for_function_call (fndecl, exp);
 
-  /* Figure out the register where the value, if any, will come back.  */
-  valreg = 0;
-  if (TYPE_MODE (TREE_TYPE (exp)) != VOIDmode
-      && ! structure_value_addr)
-    {
-      if (pcc_struct_value)
-	valreg = hard_function_value (build_pointer_type (TREE_TYPE (exp)),
-				      fndecl, 0);
-      else
-	valreg = hard_function_value (TREE_TYPE (exp), fndecl, 0);
-    }
+      /* Figure out the register where the value, if any, will come back.  */
+      valreg = 0;
+      if (TYPE_MODE (TREE_TYPE (exp)) != VOIDmode
+	  && ! structure_value_addr)
+	{
+	  if (pcc_struct_value)
+	    valreg = hard_function_value (build_pointer_type (TREE_TYPE (exp)),
+					  fndecl, 0);
+	  else
+	    valreg = hard_function_value (TREE_TYPE (exp), fndecl, 0);
+	}
 
-  /* Precompute all register parameters.  It isn't safe to compute anything
-     once we have started filling any specific hard regs.  */
-  precompute_register_parameters (num_actuals, args, &reg_parm_seen);
+      /* Precompute all register parameters.  It isn't safe to compute anything
+	 once we have started filling any specific hard regs.  */
+      precompute_register_parameters (num_actuals, args, &reg_parm_seen);
 
 #if defined(ACCUMULATE_OUTGOING_ARGS) && defined(REG_PARM_STACK_SPACE)
-
-  /* Save the fixed argument area if it's part of the caller's frame and
-     is clobbered by argument setup for this call.  */
-  save_area = save_fixed_argument_area (reg_parm_stack_space, argblock,
-					&low_to_save, &high_to_save);
+      /* Save the fixed argument area if it's part of the caller's frame and
+	 is clobbered by argument setup for this call.  */
+      save_area = save_fixed_argument_area (reg_parm_stack_space, argblock,
+					    &low_to_save, &high_to_save);
 #endif
-			
 
-  /* Now store (and compute if necessary) all non-register parms.
-     These come before register parms, since they can require block-moves,
-     which could clobber the registers used for register parms.
-     Parms which have partial registers are not stored here,
-     but we do preallocate space here if they want that.  */
+      /* Now store (and compute if necessary) all non-register parms.
+	 These come before register parms, since they can require block-moves,
+	 which could clobber the registers used for register parms.
+	 Parms which have partial registers are not stored here,
+	 but we do preallocate space here if they want that.  */
 
-  for (i = 0; i < num_actuals; i++)
-    if (args[i].reg == 0 || args[i].pass_on_stack)
-      store_one_arg (&args[i], argblock, may_be_alloca,
-		     args_size.var != 0, reg_parm_stack_space);
-
-  /* If we have a parm that is passed in registers but not in memory
-     and whose alignment does not permit a direct copy into registers,
-     make a group of pseudos that correspond to each register that we
-     will later fill.  */
-  if (STRICT_ALIGNMENT)
-    store_unaligned_arguments_into_pseudos (args, num_actuals);
-
-  /* Now store any partially-in-registers parm.
-     This is the last place a block-move can happen.  */
-  if (reg_parm_seen)
-    for (i = 0; i < num_actuals; i++)
-      if (args[i].partial != 0 && ! args[i].pass_on_stack)
-	store_one_arg (&args[i], argblock, may_be_alloca,
-		       args_size.var != 0, reg_parm_stack_space);
+      for (i = 0; i < num_actuals; i++)
+	if (args[i].reg == 0 || args[i].pass_on_stack)
+	  store_one_arg (&args[i], argblock, may_be_alloca,
+			 args_size.var != 0, reg_parm_stack_space);
+
+      /* If we have a parm that is passed in registers but not in memory
+	 and whose alignment does not permit a direct copy into registers,
+	 make a group of pseudos that correspond to each register that we
+	 will later fill.  */
+      if (STRICT_ALIGNMENT)
+	store_unaligned_arguments_into_pseudos (args, num_actuals);
+
+      /* Now store any partially-in-registers parm.
+	 This is the last place a block-move can happen.  */
+      if (reg_parm_seen)
+	for (i = 0; i < num_actuals; i++)
+	  if (args[i].partial != 0 && ! args[i].pass_on_stack)
+	    store_one_arg (&args[i], argblock, may_be_alloca,
+			   args_size.var != 0, reg_parm_stack_space);
 
 #ifndef PUSH_ARGS_REVERSED
 #ifdef PREFERRED_STACK_BOUNDARY
-  /* If we pushed args in forward order, perform stack alignment
-     after pushing the last arg.  */
-  if (argblock == 0)
-    anti_adjust_stack (GEN_INT (args_size.constant - unadjusted_args_size));
+      /* If we pushed args in forward order, perform stack alignment
+	 after pushing the last arg.  */
+      /* ??? Fix for arg_space_so_far.  */
+      if (argblock == 0)
+	anti_adjust_stack (GEN_INT (args_size.constant
+				    - unadjusted_args_size));
 #endif
 #endif
 
-  /* If register arguments require space on the stack and stack space
-     was not preallocated, allocate stack space here for arguments
-     passed in registers.  */
+      /* If register arguments require space on the stack and stack space
+	 was not preallocated, allocate stack space here for arguments
+	 passed in registers.  */
 #if ! defined(ACCUMULATE_OUTGOING_ARGS) && defined(OUTGOING_REG_PARM_STACK_SPACE)
-  if (must_preallocate == 0 && reg_parm_stack_space > 0)
-    anti_adjust_stack (GEN_INT (reg_parm_stack_space));
+      if (must_preallocate == 0 && reg_parm_stack_space > 0)
+	anti_adjust_stack (GEN_INT (reg_parm_stack_space));
 #endif
 
-  /* Pass the function the address in which to return a structure value.  */
-  if (structure_value_addr && ! structure_value_addr_parm)
-    {
-      emit_move_insn (struct_value_rtx,
-		      force_reg (Pmode,
-				 force_operand (structure_value_addr,
-						NULL_RTX)));
-
-      /* Mark the memory for the aggregate as write-only.  */
-      if (current_function_check_memory_usage)
-	emit_library_call (chkr_set_right_libfunc, 1,
-			   VOIDmode, 3,
-			   structure_value_addr, Pmode, 
-			   GEN_INT (struct_value_size), TYPE_MODE (sizetype),
-			   GEN_INT (MEMORY_USE_WO),
-			   TYPE_MODE (integer_type_node));
-
-      if (GET_CODE (struct_value_rtx) == REG)
-	  use_reg (&call_fusage, struct_value_rtx);
-    }
-
-  funexp = prepare_call_address (funexp, fndecl, &call_fusage, reg_parm_seen);
-
-  load_register_parameters (args, num_actuals, &call_fusage);
-
-  /* Perform postincrements before actually calling the function.  */
-  emit_queue ();
-
-  /* Save a pointer to the last insn before the call, so that we can
-     later safely search backwards to find the CALL_INSN.  */
-  before_call = get_last_insn ();
+      /* Pass the function the address in which to return a
+	 structure value.  */
+      if (pass != 0 && structure_value_addr && ! structure_value_addr_parm)
+	{
+	  emit_move_insn (struct_value_rtx,
+			  force_reg (Pmode,
+				     force_operand (structure_value_addr,
+						    NULL_RTX)));
+
+	  /* Mark the memory for the aggregate as write-only.  */
+	  if (current_function_check_memory_usage)
+	    emit_library_call (chkr_set_right_libfunc, 1,
+			       VOIDmode, 3,
+			       structure_value_addr, ptr_mode, 
+			       GEN_INT (struct_value_size),
+			       TYPE_MODE (sizetype),
+			       GEN_INT (MEMORY_USE_WO),
+			       TYPE_MODE (integer_type_node));
+
+	  if (GET_CODE (struct_value_rtx) == REG)
+	    use_reg (&call_fusage, struct_value_rtx);
+	}
 
-  /* All arguments and registers used for the call must be set up by now!  */
+      funexp = prepare_call_address (funexp, fndecl, &call_fusage,
+				     reg_parm_seen);
 
-  /* Generate the actual call instruction.  */
-  emit_call_1 (funexp, fndecl, funtype, unadjusted_args_size,
-	       args_size.constant, struct_value_size,
-	       FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1),
-	       valreg, old_inhibit_defer_pop, call_fusage, is_const, nothrow);
+      load_register_parameters (args, num_actuals, &call_fusage);
+     
+      /* Perform postincrements before actually calling the function.  */
+      emit_queue ();
 
-  /* If call is cse'able, make appropriate pair of reg-notes around it.
-     Test valreg so we don't crash; may safely ignore `const'
-     if return type is void.  Disable for PARALLEL return values, because
-     we have no way to move such values into a pseudo register.  */
-  if (is_const && valreg != 0 && GET_CODE (valreg) != PARALLEL)
-    {
-      rtx note = 0;
-      rtx temp = gen_reg_rtx (GET_MODE (valreg));
-      rtx insns;
+      /* Save a pointer to the last insn before the call, so that we can
+	 later safely search backwards to find the CALL_INSN.  */
+      before_call = get_last_insn ();
 
-      /* Mark the return value as a pointer if needed.  */
-      if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE)
+      /* All arguments and registers used for the call must be set up by
+	 now!  */
+
+      /* Generate the actual call instruction.  */
+      emit_call_1 (funexp, fndecl, funtype, unadjusted_args_size,
+		   args_size.constant, struct_value_size,
+		   FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1),
+		   valreg, old_inhibit_defer_pop, call_fusage,
+		   ((is_const ? ECF_IS_CONST : 0)
+		    | (nothrow ? ECF_NOTHROW : 0)
+		    | (pass == 0 ? ECF_SIBCALL : 0)));
+
+      /* If call is cse'able, make appropriate pair of reg-notes around it.
+	 Test valreg so we don't crash; may safely ignore `const'
+	 if return type is void.  Disable for PARALLEL return values, because
+	 we have no way to move such values into a pseudo register.  */
+      if (is_const && valreg != 0 && GET_CODE (valreg) != PARALLEL)
 	{
-	  tree pointed_to = TREE_TYPE (TREE_TYPE (exp));
-	  mark_reg_pointer (temp, TYPE_ALIGN (pointed_to) / BITS_PER_UNIT);
-	}
+	  rtx note = 0;
+	  rtx temp = gen_reg_rtx (GET_MODE (valreg));
+	  rtx insns;
 
-      /* Construct an "equal form" for the value which mentions all the
-	 arguments in order as well as the function name.  */
+	  /* Mark the return value as a pointer if needed.  */
+	  if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE)
+	    {
+	      tree pointed_to = TREE_TYPE (TREE_TYPE (exp));
+	      mark_reg_pointer (temp, TYPE_ALIGN (pointed_to) / BITS_PER_UNIT);
+	    }
+
+	  /* Construct an "equal form" for the value which mentions all the
+	     arguments in order as well as the function name.  */
 #ifdef PUSH_ARGS_REVERSED
-      for (i = 0; i < num_actuals; i++)
-	note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note);
+	  for (i = 0; i < num_actuals; i++)
+	    note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note);
 #else
-      for (i = num_actuals - 1; i >= 0; i--)
-	note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note);
+	  for (i = num_actuals - 1; i >= 0; i--)
+	    note = gen_rtx_EXPR_LIST (VOIDmode, args[i].initial_value, note);
 #endif
-      note = gen_rtx_EXPR_LIST (VOIDmode, funexp, note);
-
-      insns = get_insns ();
-      end_sequence ();
-
-      emit_libcall_block (insns, temp, valreg, note);
+	  note = gen_rtx_EXPR_LIST (VOIDmode, funexp, note);
 
-      valreg = temp;
-    }
-  else if (is_const)
-    {
-      /* Otherwise, just write out the sequence without a note.  */
-      rtx insns = get_insns ();
-
-      end_sequence ();
-      emit_insns (insns);
-    }
-  else if (is_malloc)
-    {
-      rtx temp = gen_reg_rtx (GET_MODE (valreg));
-      rtx last, insns;
+	  insns = get_insns ();
+	  end_sequence ();
 
-      /* The return value from a malloc-like function is a pointer. */
-      if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE)
-	mark_reg_pointer (temp, BIGGEST_ALIGNMENT / BITS_PER_UNIT);
-
-      emit_move_insn (temp, valreg);
-
-      /* The return value from a malloc-like function can not alias
-	 anything else.  */
-      last = get_last_insn ();
-      REG_NOTES (last) = 
-	gen_rtx_EXPR_LIST (REG_NOALIAS, temp, REG_NOTES (last));
+	  emit_libcall_block (insns, temp, valreg, note);
+  
+	  valreg = temp;
+	}
+      else if (is_const)
+	{
+	  /* Otherwise, just write out the sequence without a note.  */
+	  rtx insns = get_insns ();
 
-      /* Write out the sequence.  */
-      insns = get_insns ();
-      end_sequence ();
-      emit_insns (insns);
-      valreg = temp;
-    }
+	  end_sequence ();
+	  emit_insns (insns);
+	}
+      else if (is_malloc)
+	{
+	  rtx temp = gen_reg_rtx (GET_MODE (valreg));
+	  rtx last, insns;
+
+	  /* The return value from a malloc-like function is a pointer. */
+	  if (TREE_CODE (TREE_TYPE (exp)) == POINTER_TYPE)
+	    mark_reg_pointer (temp, BIGGEST_ALIGNMENT / BITS_PER_UNIT);
+
+	  emit_move_insn (temp, valreg);
+
+	  /* The return value from a malloc-like function can not alias
+	     anything else.  */
+	  last = get_last_insn ();
+	  REG_NOTES (last) = 
+	    gen_rtx_EXPR_LIST (REG_NOALIAS, temp, REG_NOTES (last));
+
+	  /* Write out the sequence.  */
+	  insns = get_insns ();
+	  end_sequence ();
+	  emit_insns (insns);
+	  valreg = temp;
+	}
 
-  /* For calls to `setjmp', etc., inform flow.c it should complain
-     if nonvolatile values are live.  */
+      /* For calls to `setjmp', etc., inform flow.c it should complain
+	 if nonvolatile values are live.  For functions that cannot return,
+	 inform flow that control does not fall through.  */
 
-  if (returns_twice)
-    {
-      /* The NOTE_INSN_SETJMP note must be emitted immediately after the
-	 CALL_INSN.  Some ports emit more than just a CALL_INSN above, so
-	 we must search for it here.  */
-      rtx last = get_last_insn ();
-      while (GET_CODE (last) != CALL_INSN)
+      if (returns_twice || is_volatile || is_longjmp || pass == 0)
 	{
-	  last = PREV_INSN (last);
-	  /* There was no CALL_INSN?  */
-	  if (last == before_call)
-	    abort ();
-	}
-      emit_note_after (NOTE_INSN_SETJMP, last);
-      current_function_calls_setjmp = 1;
-    }
+	  /* The barrier or NOTE_INSN_SETJMP note must be emitted
+	     immediately after the CALL_INSN.  Some ports emit more
+	     than just a CALL_INSN above, so we must search for it here.  */
 
-  if (is_longjmp)
-    current_function_calls_longjmp = 1;
+	  rtx last = get_last_insn ();
+	  while (GET_CODE (last) != CALL_INSN)
+	    {
+	      last = PREV_INSN (last);
+	      /* There was no CALL_INSN?  */
+	      if (last == before_call)
+		abort ();
+	    }
 
-  /* Notice functions that cannot return.
-     If optimizing, insns emitted below will be dead.
-     If not optimizing, they will exist, which is useful
-     if the user uses the `return' command in the debugger.  */
+	  if (returns_twice)
+	    {
+	      emit_note_after (NOTE_INSN_SETJMP, last);
+	      current_function_calls_setjmp = 1;
+	      sibcall_failure = 1;
+	    }
+	  else
+	    emit_barrier_after (last);
+	}
 
-  if (is_volatile || is_longjmp)
-    emit_barrier ();
+      if (is_longjmp)
+	current_function_calls_longjmp = 1, sibcall_failure = 1;
 
-  /* If value type not void, return an rtx for the value.  */
+      /* If value type not void, return an rtx for the value.  */
 
-  /* If there are cleanups to be called, don't use a hard reg as target.
-     We need to double check this and see if it matters anymore.  */
-  if (any_pending_cleanups (1)
-      && target && REG_P (target)
-      && REGNO (target) < FIRST_PSEUDO_REGISTER)
-    target = 0;
+      /* If there are cleanups to be called, don't use a hard reg as target.
+	 We need to double check this and see if it matters anymore.  */
+      if (any_pending_cleanups (1)
+	  && target && REG_P (target)
+	  && REGNO (target) < FIRST_PSEUDO_REGISTER)
+	target = 0, sibcall_failure = 1;
 
-  if (TYPE_MODE (TREE_TYPE (exp)) == VOIDmode
-      || ignore)
-    {
-      target = const0_rtx;
-    }
-  else if (structure_value_addr)
-    {
-      if (target == 0 || GET_CODE (target) != MEM)
+      if (TYPE_MODE (TREE_TYPE (exp)) == VOIDmode
+	  || ignore)
 	{
-	  target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)),
-				memory_address (TYPE_MODE (TREE_TYPE (exp)),
-						structure_value_addr));
-	  MEM_SET_IN_STRUCT_P (target,
-			       AGGREGATE_TYPE_P (TREE_TYPE (exp)));
+	  target = const0_rtx;
 	}
-    }
-  else if (pcc_struct_value)
-    {
-      /* This is the special C++ case where we need to
-	 know what the true target was.  We take care to
-	 never use this value more than once in one expression.  */
-      target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)),
-			    copy_to_reg (valreg));
-      MEM_SET_IN_STRUCT_P (target, AGGREGATE_TYPE_P (TREE_TYPE (exp)));
-    }
-  /* Handle calls that return values in multiple non-contiguous locations.
-     The Irix 6 ABI has examples of this.  */
-  else if (GET_CODE (valreg) == PARALLEL)
-    {
-      int bytes = int_size_in_bytes (TREE_TYPE (exp));
-
-      if (target == 0)
+      else if (structure_value_addr)
 	{
-	  target = assign_stack_temp (TYPE_MODE (TREE_TYPE (exp)), bytes, 0);
+	  if (target == 0 || GET_CODE (target) != MEM)
+	    {
+	      target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)),
+				    memory_address (TYPE_MODE (TREE_TYPE (exp)),
+						    structure_value_addr));
+	      MEM_SET_IN_STRUCT_P (target,
+				   AGGREGATE_TYPE_P (TREE_TYPE (exp)));
+	    }
+	}
+      else if (pcc_struct_value)
+	{
+	  /* This is the special C++ case where we need to
+	     know what the true target was.  We take care to
+	     never use this value more than once in one expression.  */
+	  target = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (exp)),
+				copy_to_reg (valreg));
 	  MEM_SET_IN_STRUCT_P (target, AGGREGATE_TYPE_P (TREE_TYPE (exp)));
-	  preserve_temp_slots (target);
 	}
+      /* Handle calls that return values in multiple non-contiguous locations.
+	 The Irix 6 ABI has examples of this.  */
+      else if (GET_CODE (valreg) == PARALLEL)
+	{
+	  int bytes = int_size_in_bytes (TREE_TYPE (exp));
 
-      if (! rtx_equal_p (target, valreg))
-        emit_group_store (target, valreg, bytes,
-			  TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT);
-    }
-  else if (target && GET_MODE (target) == TYPE_MODE (TREE_TYPE (exp))
-	   && GET_MODE (target) == GET_MODE (valreg))
-    /* TARGET and VALREG cannot be equal at this point because the latter
-       would not have REG_FUNCTION_VALUE_P true, while the former would if
-       it were referring to the same register.
-
-       If they refer to the same register, this move will be a no-op, except
-       when function inlining is being done.  */
-    emit_move_insn (target, valreg);
-  else if (TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
-    target = copy_blkmode_from_reg (target, valreg, TREE_TYPE (exp));
-  else
-    target = copy_to_reg (valreg);
+	  if (target == 0)
+	    {
+	      target = assign_stack_temp (TYPE_MODE (TREE_TYPE (exp)),
+					  bytes, 0);
+	      MEM_SET_IN_STRUCT_P (target, AGGREGATE_TYPE_P (TREE_TYPE (exp)));
+	      preserve_temp_slots (target);
+	    }
+
+	  if (! rtx_equal_p (target, valreg))
+	    emit_group_store (target, valreg, bytes,
+			      TYPE_ALIGN (TREE_TYPE (exp)) / BITS_PER_UNIT);
+	  /* We can not support sibling calls for this case.  */
+	  sibcall_failure = 1;
+	}
+      else if (target
+	       && GET_MODE (target) == TYPE_MODE (TREE_TYPE (exp))
+	       && GET_MODE (target) == GET_MODE (valreg))
+	{
+	  /* TARGET and VALREG cannot be equal at this point because the
+	     latter would not have REG_FUNCTION_VALUE_P true, while the
+	     former would if it were referring to the same register.
+
+	     If they refer to the same register, this move will be a no-op,
+	     except when function inlining is being done.  */
+	  emit_move_insn (target, valreg);
+	}
+      else if (TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
+	target = copy_blkmode_from_reg (target, valreg, TREE_TYPE (exp));
+      else
+	target = copy_to_reg (valreg);
 
 #ifdef PROMOTE_FUNCTION_RETURN
-  /* If we promoted this return value, make the proper SUBREG.  TARGET
-     might be const0_rtx here, so be careful.  */
-  if (GET_CODE (target) == REG
-      && TYPE_MODE (TREE_TYPE (exp)) != BLKmode
-      && GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp)))
-    {
-      tree type = TREE_TYPE (exp);
-      int unsignedp = TREE_UNSIGNED (type);
+      /* If we promoted this return value, make the proper SUBREG.  TARGET
+	 might be const0_rtx here, so be careful.  */
+      if (GET_CODE (target) == REG
+	  && TYPE_MODE (TREE_TYPE (exp)) != BLKmode
+	  && GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp)))
+	{
+	  tree type = TREE_TYPE (exp);
+	  int unsignedp = TREE_UNSIGNED (type);
 
-      /* If we don't promote as expected, something is wrong.  */
-      if (GET_MODE (target)
-	  != promote_mode (type, TYPE_MODE (type), &unsignedp, 1))
-	abort ();
+	  /* If we don't promote as expected, something is wrong.  */
+	  if (GET_MODE (target)
+	      != promote_mode (type, TYPE_MODE (type), &unsignedp, 1))
+	    abort ();
 
-      target = gen_rtx_SUBREG (TYPE_MODE (type), target, 0);
-      SUBREG_PROMOTED_VAR_P (target) = 1;
-      SUBREG_PROMOTED_UNSIGNED_P (target) = unsignedp;
-    }
+	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, 0);
+	  SUBREG_PROMOTED_VAR_P (target) = 1;
+	  SUBREG_PROMOTED_UNSIGNED_P (target) = unsignedp;
+	}
 #endif
 
-  /* If size of args is variable or this was a constructor call for a stack
-     argument, restore saved stack-pointer value.  */
+      /* If size of args is variable or this was a constructor call for a stack
+	 argument, restore saved stack-pointer value.  */
 
-  if (old_stack_level)
-    {
-      emit_stack_restore (SAVE_BLOCK, old_stack_level, NULL_RTX);
-      pending_stack_adjust = old_pending_adj;
+      if (old_stack_level)
+	{
+	  emit_stack_restore (SAVE_BLOCK, old_stack_level, NULL_RTX);
+	  pending_stack_adjust = old_pending_adj;
 #ifdef ACCUMULATE_OUTGOING_ARGS
-      stack_arg_under_construction = old_stack_arg_under_construction;
-      highest_outgoing_arg_in_use = initial_highest_arg_in_use;
-      stack_usage_map = initial_stack_usage_map;
+	  stack_arg_under_construction = old_stack_arg_under_construction;
+	  highest_outgoing_arg_in_use = initial_highest_arg_in_use;
+	  stack_usage_map = initial_stack_usage_map;
 #endif
-    }
+	  sibcall_failure = 1;
+	}
 #ifdef ACCUMULATE_OUTGOING_ARGS
-  else
-    {
+      else
+	{
 #ifdef REG_PARM_STACK_SPACE
-      if (save_area)
-	restore_fixed_argument_area (save_area, argblock,
-				     high_to_save, low_to_save);
+	  if (save_area)
+	    {
+	      restore_fixed_argument_area (save_area, argblock,
+					   high_to_save, low_to_save);
+	      sibcall_failure = 1;
+	    }
 #endif
 
-      /* If we saved any argument areas, restore them.  */
-      for (i = 0; i < num_actuals; i++)
-	if (args[i].save_area)
-	  {
-	    enum machine_mode save_mode = GET_MODE (args[i].save_area);
-	    rtx stack_area
-	      = gen_rtx_MEM (save_mode,
-			     memory_address (save_mode,
-					     XEXP (args[i].stack_slot, 0)));
-
-	    if (save_mode != BLKmode)
-	      emit_move_insn (stack_area, args[i].save_area);
-	    else
-	      emit_block_move (stack_area, validize_mem (args[i].save_area),
-			       GEN_INT (args[i].size.constant),
-			       PARM_BOUNDARY / BITS_PER_UNIT);
-	  }
+	  /* If we saved any argument areas, restore them.  */
+	  for (i = 0; i < num_actuals; i++)
+	    if (args[i].save_area)
+	      {
+		enum machine_mode save_mode = GET_MODE (args[i].save_area);
+		rtx stack_area
+		  = gen_rtx_MEM (save_mode,
+				 memory_address (save_mode,
+						 XEXP (args[i].stack_slot, 0)));
+
+		if (save_mode != BLKmode)
+		  emit_move_insn (stack_area, args[i].save_area);
+		else
+		  emit_block_move (stack_area,
+				   validize_mem (args[i].save_area),
+				   GEN_INT (args[i].size.constant),
+				   PARM_BOUNDARY / BITS_PER_UNIT);
+		sibcall_failure = 1;
+	      }
 
-      highest_outgoing_arg_in_use = initial_highest_arg_in_use;
-      stack_usage_map = initial_stack_usage_map;
-    }
+	  highest_outgoing_arg_in_use = initial_highest_arg_in_use;
+	  stack_usage_map = initial_stack_usage_map;
+	}
 #endif
 
-  /* If this was alloca, record the new stack level for nonlocal gotos.  
-     Check for the handler slots since we might not have a save area
-     for non-local gotos.  */
+      /* If this was alloca, record the new stack level for nonlocal gotos.  
+	 Check for the handler slots since we might not have a save area
+	 for non-local gotos.  */
 
-  if (may_be_alloca && nonlocal_goto_handler_slots != 0)
-    emit_stack_save (SAVE_NONLOCAL, &nonlocal_goto_stack_level, NULL_RTX);
+      if (may_be_alloca && nonlocal_goto_handler_slots != 0)
+	emit_stack_save (SAVE_NONLOCAL, &nonlocal_goto_stack_level, NULL_RTX);
 
-  pop_temp_slots ();
+      pop_temp_slots ();
+
+      /* Free up storage we no longer need.  */
+      for (i = 0; i < num_actuals; ++i)
+	if (args[i].aligned_regs)
+	  free (args[i].aligned_regs);
+
+      insns = get_insns ();
+      end_sequence ();
+
+      if (pass == 0)
+	{
+	  tail_call_insns = insns;
+
+	  /* If the current function's argument block is not large enough
+	     to hold the outoing arguments, or we encountered some other
+	     situation we couldn't handle, zero out the sequence.  */
+	  if (current_function_args_size < args_size.constant
+	      || sibcall_failure)
+	    tail_call_insns = NULL_RTX;
+
+	  /* Restore the pending stack adjustment now that we have
+	     finished generating the sibling call sequence.  */
+	  pending_stack_adjust = save_pending_stack_adjust;
+	}
+      else
+	normal_call_insns = insns;
+    }
+
+  /* The function optimize_sibling_and_tail_recursive_calls doesn't
+     handle CALL_PLACEHOLDERs inside other CALL_PLACEHOLDERs.  This
+     can happen if the arguments to this function call an inline
+     function who's expansion contains another CALL_PLACEHOLDER.
+
+     If there are any C_Ps in any of these sequences, replace them
+     with their normal call. */
+
+  for (insn = normal_call_insns; insn; insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == CALL_INSN
+	&& GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+      replace_call_placeholder (insn, sibcall_use_normal);
+
+  for (insn = tail_call_insns; insn; insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == CALL_INSN
+	&& GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+      replace_call_placeholder (insn, sibcall_use_normal);
+
+  for (insn = tail_recursion_insns; insn; insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == CALL_INSN
+	&& GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+      replace_call_placeholder (insn, sibcall_use_normal);
+
+  /* If this was a potential tail recursion site, then emit a
+     CALL_PLACEHOLDER with the normal and the tail recursion streams.
+     One of them will be selected later.  */
+  if (tail_recursion_insns || tail_call_insns)
+    {
+      /* The tail recursion label must be kept around.  We could expose
+	 its use in the CALL_PLACEHOLDER, but that creates unwanted edges
+	 and makes determining true tail recursion sites difficult.
+
+	 So we set LABEL_PRESERVE_P here, then clear it when we select
+	 one of the call sequences after rtl generation is complete.  */
+      if (tail_recursion_insns)
+	LABEL_PRESERVE_P (tail_recursion_label) = 1;
+      emit_call_insn (gen_rtx_CALL_PLACEHOLDER (VOIDmode, normal_call_insns,
+						tail_call_insns,
+						tail_recursion_insns,
+						tail_recursion_label));
+    }
+  else
+    emit_insns (normal_call_insns);
 
-  /* Free up storage we no longer need.  */
-  for (i = 0; i < num_actuals; ++i)
-    if (args[i].aligned_regs)
-      free (args[i].aligned_regs);
+  currently_expanding_call--;
 
   return target;
 }
@@ -3171,7 +3488,9 @@ emit_library_call VPARAMS((rtx orgfun, int no_queue, enum machine_mode outmode,
 	       original_args_size.constant, args_size.constant, 0,
 	       FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1),
 	       outmode != VOIDmode ? hard_libcall_value (outmode) : NULL_RTX,
-	       old_inhibit_defer_pop + 1, call_fusage, no_queue, nothrow);
+	       old_inhibit_defer_pop + 1, call_fusage,
+	       ((no_queue ? ECF_IS_CONST : 0)
+		| (nothrow ? ECF_NOTHROW : 0)));
 
   pop_temp_slots ();
 
@@ -3772,7 +4091,9 @@ emit_library_call_value VPARAMS((rtx orgfun, rtx value, int no_queue,
 	       struct_value_size,
 	       FUNCTION_ARG (args_so_far, VOIDmode, void_type_node, 1),
 	       mem_value == 0 ? hard_libcall_value (outmode) : NULL_RTX,
-	       old_inhibit_defer_pop + 1, call_fusage, is_const, nothrow);
+	       old_inhibit_defer_pop + 1, call_fusage,
+	       ((is_const ? ECF_IS_CONST : 0)
+		| (nothrow ? ECF_NOTHROW : 0)));
 
   /* Now restore inhibit_defer_pop to its actual original value.  */
   OK_DEFER_POP;
diff --git a/gcc/final.c b/gcc/final.c
index 01dd1ba6b830..90437e0eb020 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -4019,7 +4019,8 @@ leaf_function_p ()
 	return 0;
       if (GET_CODE (insn) == INSN
 	  && GET_CODE (PATTERN (insn)) == SEQUENCE
-	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == CALL_INSN)
+	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == CALL_INSN
+	  && ! SIBLING_CALL_P (XVECEXP (PATTERN (insn), 0, 0)))
 	return 0;
     }
   for (insn = current_function_epilogue_delay_list; insn; insn = XEXP (insn, 1))
@@ -4028,7 +4029,8 @@ leaf_function_p ()
 	return 0;
       if (GET_CODE (XEXP (insn, 0)) == INSN
 	  && GET_CODE (PATTERN (XEXP (insn, 0))) == SEQUENCE
-	  && GET_CODE (XVECEXP (PATTERN (XEXP (insn, 0)), 0, 0)) == CALL_INSN)
+	  && GET_CODE (XVECEXP (PATTERN (XEXP (insn, 0)), 0, 0)) == CALL_INSN
+	  && ! SIBLING_CALL_P (XVECEXP (PATTERN (XEXP (insn, 0)), 0, 0)))
 	return 0;
     }
 
diff --git a/gcc/flow.c b/gcc/flow.c
index 7030c1100622..14189b9953b7 100644
--- a/gcc/flow.c
+++ b/gcc/flow.c
@@ -154,10 +154,12 @@ Boston, MA 02111-1307, USA.  */
 #ifndef HAVE_epilogue
 #define HAVE_epilogue 0
 #endif
-
 #ifndef HAVE_prologue
 #define HAVE_prologue 0
 #endif
+#ifndef HAVE_sibcall_epilogue
+#define HAVE_sibcall_epilogue 0
+#endif
 
 /* The contents of the current function definition are allocated
    in this obstack, and all are freed at the end of the function.
@@ -592,7 +594,8 @@ find_basic_blocks_1 (f)
 		 does not imply an abnormal edge, it will be a bit before
 		 everything can be updated.  So continue to emit a noop at
 		 the end of such a block.  */
-	      if (GET_CODE (end) == CALL_INSN)
+	      if (GET_CODE (end) == CALL_INSN
+		  && ! SIBLING_CALL_P (end))
 		{
 		  rtx nop = gen_rtx_USE (VOIDmode, const0_rtx);
 		  end = emit_insn_after (nop, end);
@@ -644,7 +647,8 @@ find_basic_blocks_1 (f)
 	     imply an abnormal edge, it will be a bit before everything can
 	     be updated.  So continue to emit a noop at the end of such a
 	     block.  */
-	  if (GET_CODE (end) == CALL_INSN)
+	  if (GET_CODE (end) == CALL_INSN
+	      && ! SIBLING_CALL_P (end))
 	    {
 	      rtx nop = gen_rtx_USE (VOIDmode, const0_rtx);
 	      end = emit_insn_after (nop, end);
@@ -973,6 +977,15 @@ make_edges (label_value_list)
 	    }
 	}
 
+      /* If this is a sibling call insn, then this is in effect a 
+	 combined call and return, and so we need an edge to the
+	 exit block.  No need to worry about EH edges, since we
+	 wouldn't have created the sibling call in the first place.  */
+
+      if (code == CALL_INSN && SIBLING_CALL_P (insn))
+	make_edge (edge_cache, bb, EXIT_BLOCK_PTR, 0);
+      else
+
       /* If this is a CALL_INSN, then mark it as reaching the active EH
 	 handler for this CALL_INSN.  If we're handling asynchronous
 	 exceptions then any insn can reach any of the active handlers.
@@ -3249,8 +3262,10 @@ propagate_block (bb, old, significant, flags)
 	     instructions.  Warn about probable compiler losage.  */
 	  if (insn_is_dead
 	      && reload_completed
-	      && (HAVE_epilogue || HAVE_prologue)
-	      && prologue_epilogue_contains (insn))
+	      && (((HAVE_epilogue || HAVE_prologue)
+		   && prologue_epilogue_contains (insn))
+		  || (HAVE_sibcall_epilogue
+		      && sibcall_epilogue_contains (insn))))
 	    {
 	      if (flags & PROP_KILL_DEAD_CODE)
 	        { 
diff --git a/gcc/function.c b/gcc/function.c
index e92b58154f2f..dfd87618396d 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -152,8 +152,12 @@ struct function *cfun = 0;
 struct function *all_functions = 0;
 
 /* These arrays record the INSN_UIDs of the prologue and epilogue insns.  */
-static int *prologue;
-static int *epilogue;
+static varray_type prologue;
+static varray_type epilogue;
+
+/* Array of INSN_UIDs to hold the INSN_UIDs for each sibcall epilogue
+   in this function.  */
+static varray_type sibcall_epilogue;
 
 /* In order to evaluate some expressions, such as function calls returning
    structures in memory, we need to temporarily allocate stack locations.
@@ -271,13 +275,15 @@ static void pad_below		PARAMS ((struct args_size *, enum machine_mode,
 static tree round_down		PARAMS ((tree, int));
 #endif
 static rtx round_trampoline_addr PARAMS ((rtx));
+static tree *identify_blocks_1	PARAMS ((rtx, tree *, tree *, tree *));
+static void reorder_blocks_1	PARAMS ((rtx, tree, varray_type *));
 static tree blocks_nreverse	PARAMS ((tree));
 static int all_blocks		PARAMS ((tree, tree *));
 static tree *get_block_vector   PARAMS ((tree, int *));
 /* We always define `record_insns' even if its not used so that we
    can always export `prologue_epilogue_contains'.  */
-static int *record_insns	PARAMS ((rtx)) ATTRIBUTE_UNUSED;
-static int contains		PARAMS ((rtx, int *));
+static void record_insns	PARAMS ((rtx, varray_type *)) ATTRIBUTE_UNUSED;
+static int contains		PARAMS ((rtx, varray_type));
 #ifdef HAVE_return
 static void emit_return_into_block PARAMS ((basic_block));
 #endif
@@ -1507,6 +1513,7 @@ fixup_var_refs (var, promoted_mode, unsignedp, ht)
   rtx first_insn = get_insns ();
   struct sequence_stack *stack = seq_stack;
   tree rtl_exps = rtl_expr_chain;
+  rtx insn;
 
   /* Must scan all insns for stack-refs that exceed the limit.  */
   fixup_var_refs_insns (var, promoted_mode, unsignedp, first_insn, 
@@ -1545,6 +1552,31 @@ fixup_var_refs (var, promoted_mode, unsignedp, ht)
   fixup_var_refs_insns (var, promoted_mode, unsignedp, catch_clauses,
 			0, 0);
   end_sequence ();
+
+  /* Scan sequences saved in CALL_PLACEHOLDERS too.  */
+  for (insn = first_insn; insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == CALL_INSN
+	  && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	{
+	  int i;
+
+	  /* Look at the Normal call, sibling call and tail recursion
+	     sequences attached to the CALL_PLACEHOLDER.  */
+	  for (i = 0; i < 3; i++)
+	    {
+	      rtx seq = XEXP (PATTERN (insn), i);
+	      if (seq)
+		{
+		  push_to_sequence (seq);
+		  fixup_var_refs_insns (var, promoted_mode, unsignedp,
+					seq, 0, 0);
+		  XEXP (PATTERN (insn), i) = get_insns ();
+		  end_sequence ();
+		}
+	    }
+	}
+    }
 }
 
 /* REPLACEMENTS is a pointer to a list of the struct fixup_replacement and X is
@@ -5494,11 +5526,8 @@ identify_blocks (block, insns)
      rtx insns;
 {
   int n_blocks;
-  tree *block_vector;
+  tree *block_vector, *last_block_vector;
   tree *block_stack;
-  int depth = 0;
-  int current_block_number = 1;
-  rtx insn;
 
   if (block == 0)
     return;
@@ -5508,35 +5537,83 @@ identify_blocks (block, insns)
   block_vector = get_block_vector (block, &n_blocks);
   block_stack = (tree *) xmalloc (n_blocks * sizeof (tree));
 
+  last_block_vector = identify_blocks_1 (insns, block_vector + 1,
+					 block_vector + n_blocks, block_stack);
+
+  /* If we didn't use all of the subblocks, we've misplaced block notes.  */
+  /* ??? This appears to happen all the time.  Latent bugs elsewhere?  */
+  if (0 && last_block_vector != block_vector + n_blocks)
+    abort ();
+
+  free (block_vector);
+  free (block_stack);
+}
+
+/* Subroutine of identify_blocks.  Do the block substitution on the
+   insn chain beginning with INSNS.  Recurse for CALL_PLACEHOLDER chains.
+
+   BLOCK_STACK is pushed and popped for each BLOCK_BEGIN/BLOCK_END pair.
+   BLOCK_VECTOR is incremented for each block seen.  */
+
+static tree *
+identify_blocks_1 (insns, block_vector, end_block_vector, orig_block_stack)
+     rtx insns;
+     tree *block_vector;
+     tree *end_block_vector;
+     tree *orig_block_stack;
+{
+  rtx insn;
+  tree *block_stack = orig_block_stack;
+
   for (insn = insns; insn; insn = NEXT_INSN (insn))
-    if (GET_CODE (insn) == NOTE)
-      {
-	if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
-	  {
-	    tree b;
+    {
+      if (GET_CODE (insn) == NOTE)
+	{
+	  if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
+	    {
+	      tree b;
 
-	    /* If there are more block notes than BLOCKs, something
-	       is badly wrong.  */
-	    if (current_block_number == n_blocks)
-	      abort ();
+	      /* If there are more block notes than BLOCKs, something
+		 is badly wrong.  */
+	      if (block_vector == end_block_vector)
+		abort ();
 
-	    b = block_vector[current_block_number++];
-	    NOTE_BLOCK (insn) = b;
-	    block_stack[depth++] = b;
-	  }
-	else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
-	  {
-	    if (depth == 0)
-	      /* There are more NOTE_INSN_BLOCK_ENDs that
-		 NOTE_INSN_BLOCK_BEGs.  Something is badly wrong.  */
-	      abort ();
+	      b = *block_vector++;
+	      NOTE_BLOCK (insn) = b;
+	      *block_stack++ = b;
+	    }
+	  else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
+	    {
+	      /* If there are more NOTE_INSN_BLOCK_ENDs than
+		 NOTE_INSN_BLOCK_BEGs, something is badly wrong.  */
+	      if (block_stack == orig_block_stack)
+		abort ();
 
-	    NOTE_BLOCK (insn) = block_stack[--depth];
-	  }
-      }
+	      NOTE_BLOCK (insn) = *--block_stack;
+	    }
+        }
+      else if (GET_CODE (insn) == CALL_INSN
+	       && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	{
+	  rtx cp = PATTERN (insn);
+
+	  block_vector = identify_blocks_1 (XEXP (cp, 0), block_vector, 
+				            end_block_vector, block_stack);
+	  if (XEXP (cp, 1))
+	    block_vector = identify_blocks_1 (XEXP (cp, 1), block_vector,
+					      end_block_vector, block_stack);
+	  if (XEXP (cp, 2))
+	    block_vector = identify_blocks_1 (XEXP (cp, 2), block_vector,
+					      end_block_vector, block_stack);
+	}
+    }
 
-  free (block_vector);
-  free (block_stack);
+  /* If there are more NOTE_INSN_BLOCK_BEGINs than NOTE_INSN_BLOCK_ENDs,
+     something is badly wrong.  */
+  if (block_stack != orig_block_stack)
+    abort ();
+
+  return block_vector;
 }
 
 /* Given a revised instruction chain, rebuild the tree structure of
@@ -5550,7 +5627,6 @@ reorder_blocks (block, insns)
      rtx insns;
 {
   tree current_block = block;
-  rtx insn;
   varray_type block_stack;
 
   if (block == NULL_TREE)
@@ -5562,35 +5638,7 @@ reorder_blocks (block, insns)
   BLOCK_SUBBLOCKS (current_block) = 0;
   BLOCK_CHAIN (current_block) = 0;
 
-  for (insn = insns; insn; insn = NEXT_INSN (insn))
-    if (GET_CODE (insn) == NOTE)
-      {
-	if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
-	  {
-	    tree block = NOTE_BLOCK (insn);
-	    /* If we have seen this block before, copy it.  */
-	    if (TREE_ASM_WRITTEN (block))
-	      {
-		block = copy_node (block);
-		NOTE_BLOCK (insn) = block;
-	      }
-	    BLOCK_SUBBLOCKS (block) = 0;
-	    TREE_ASM_WRITTEN (block) = 1;
-	    BLOCK_SUPERCONTEXT (block) = current_block; 
-	    BLOCK_CHAIN (block) = BLOCK_SUBBLOCKS (current_block);
-	    BLOCK_SUBBLOCKS (current_block) = block;
-	    current_block = block;
-	    VARRAY_PUSH_TREE (block_stack, block);
-	  }
-	else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
-	  {
-	    NOTE_BLOCK (insn) = VARRAY_TOP_TREE (block_stack);
-	    VARRAY_POP (block_stack);
-	    BLOCK_SUBBLOCKS (current_block)
-	      = blocks_nreverse (BLOCK_SUBBLOCKS (current_block));
-	    current_block = BLOCK_SUPERCONTEXT (current_block);
-	  }
-      }
+  reorder_blocks_1 (insns, current_block, &block_stack);
 
   BLOCK_SUBBLOCKS (current_block)
     = blocks_nreverse (BLOCK_SUBBLOCKS (current_block));
@@ -5600,6 +5648,60 @@ reorder_blocks (block, insns)
   return current_block;
 }
 
+/* Helper function for reorder_blocks.  Process the insn chain beginning
+   at INSNS.  Recurse for CALL_PLACEHOLDER insns.  */
+
+static void
+reorder_blocks_1 (insns, current_block, p_block_stack)
+     rtx insns;
+     tree current_block;
+     varray_type *p_block_stack;
+{
+  rtx insn;
+
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == NOTE)
+	{
+	  if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
+	    {
+	      tree block = NOTE_BLOCK (insn);
+	      /* If we have seen this block before, copy it.  */
+	      if (TREE_ASM_WRITTEN (block))
+		{
+		  block = copy_node (block);
+		  NOTE_BLOCK (insn) = block;
+		}
+	      BLOCK_SUBBLOCKS (block) = 0;
+	      TREE_ASM_WRITTEN (block) = 1;
+	      BLOCK_SUPERCONTEXT (block) = current_block; 
+	      BLOCK_CHAIN (block) = BLOCK_SUBBLOCKS (current_block);
+	      BLOCK_SUBBLOCKS (current_block) = block;
+	      current_block = block;
+	      VARRAY_PUSH_TREE (*p_block_stack, block);
+	    }
+	  else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
+	    {
+	      NOTE_BLOCK (insn) = VARRAY_TOP_TREE (*p_block_stack);
+	      VARRAY_POP (*p_block_stack);
+	      BLOCK_SUBBLOCKS (current_block)
+		= blocks_nreverse (BLOCK_SUBBLOCKS (current_block));
+	      current_block = BLOCK_SUPERCONTEXT (current_block);
+	    }
+	}
+      else if (GET_CODE (insn) == CALL_INSN
+	       && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	{
+	  rtx cp = PATTERN (insn);
+	  reorder_blocks_1 (XEXP (cp, 0), current_block, p_block_stack);
+	  if (XEXP (cp, 1))
+	    reorder_blocks_1 (XEXP (cp, 1), current_block, p_block_stack);
+	  if (XEXP (cp, 2))
+	    reorder_blocks_1 (XEXP (cp, 2), current_block, p_block_stack);
+	}
+    }
+}
+
 /* Reverse the order of elements in the chain T of blocks,
    and return the new head of the chain (old last element).  */
 
@@ -5757,6 +5859,7 @@ prepare_function_start ()
   cfun->preferred_stack_boundary = STACK_BOUNDARY;
 #else
   cfun->stack_alignment_needed = 0;
+  cfun->preferred_stack_boundary = 0;
 #endif
 
   /* Set if a call to setjmp is seen.  */
@@ -5900,8 +6003,11 @@ void
 init_function_for_compilation ()
 {
   reg_renumber = 0;
+
   /* No prologue/epilogue insns yet.  */
-  prologue = epilogue = 0;
+  VARRAY_GROW (prologue, 0);
+  VARRAY_GROW (epilogue, 0);
+  VARRAY_GROW (sibcall_epilogue, 0);
 }
 
 /* Indicate that the current function uses extra args
@@ -6586,30 +6692,32 @@ expand_function_end (filename, line, end_bindings)
   expand_fixups (get_insns ());
 }
 
-/* Create an array that records the INSN_UIDs of INSNS (either a sequence
-   or a single insn).  */
+/* Extend a vector that records the INSN_UIDs of INSNS (either a
+   sequence or a single insn).  */
 
-static int *
-record_insns (insns)
+static void
+record_insns (insns, vecp)
      rtx insns;
+     varray_type *vecp;
 {
-  int *vec;
-
   if (GET_CODE (insns) == SEQUENCE)
     {
       int len = XVECLEN (insns, 0);
-      vec = (int *) oballoc ((len + 1) * sizeof (int));
-      vec[len] = 0;
+      int i = VARRAY_SIZE (*vecp);
+
+      VARRAY_GROW (*vecp, i + len);
       while (--len >= 0)
-	vec[len] = INSN_UID (XVECEXP (insns, 0, len));
+	{
+	  VARRAY_INT (*vecp, i) = INSN_UID (XVECEXP (insns, 0, len));
+	  ++i;
+	}
     }
   else
     {
-      vec = (int *) oballoc (2 * sizeof (int));
-      vec[0] = INSN_UID (insns);
-      vec[1] = 0;
+      int i = VARRAY_SIZE (*vecp);
+      VARRAY_GROW (*vecp, i + 1);
+      VARRAY_INT (*vecp, i) = INSN_UID (insns);
     }
-  return vec;
 }
 
 /* Determine how many INSN_UIDs in VEC are part of INSN.  */
@@ -6617,7 +6725,7 @@ record_insns (insns)
 static int
 contains (insn, vec)
      rtx insn;
-     int *vec;
+     varray_type vec;
 {
   register int i, j;
 
@@ -6626,15 +6734,15 @@ contains (insn, vec)
     {
       int count = 0;
       for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
-	for (j = 0; vec[j]; j++)
-	  if (INSN_UID (XVECEXP (PATTERN (insn), 0, i)) == vec[j])
+	for (j = VARRAY_SIZE (vec) - 1; j >= 0; --j)
+	  if (INSN_UID (XVECEXP (PATTERN (insn), 0, i)) == VARRAY_INT (vec, j))
 	    count++;
       return count;
     }
   else
     {
-      for (j = 0; vec[j]; j++)
-	if (INSN_UID (insn) == vec[j])
+      for (j = VARRAY_SIZE (vec) - 1; j >= 0; --j)
+	if (INSN_UID (insn) == VARRAY_INT (vec, j))
 	  return 1;
     }
   return 0;
@@ -6644,13 +6752,22 @@ int
 prologue_epilogue_contains (insn)
      rtx insn;
 {
-  if (prologue && contains (insn, prologue))
+  if (contains (insn, prologue))
     return 1;
-  if (epilogue && contains (insn, epilogue))
+  if (contains (insn, epilogue))
     return 1;
   return 0;
 }
 
+int
+sibcall_epilogue_contains (insn)
+      rtx insn;
+{
+  if (sibcall_epilogue)
+    return contains (insn, sibcall_epilogue);
+  return 0;
+}
+
 #ifdef HAVE_return
 /* Insert gen_return at the end of block BB.  This also means updating
    block_for_insn appropriately.  */
@@ -6698,7 +6815,7 @@ thread_prologue_and_epilogue_insns (f)
       /* Retain a map of the prologue insns.  */
       if (GET_CODE (seq) != SEQUENCE)
 	seq = get_insns ();
-      prologue = record_insns (seq);
+      record_insns (seq, &prologue);
       emit_note (NULL, NOTE_INSN_PROLOGUE_END);
 
       /* GDB handles `break f' by setting a breakpoint on the first
@@ -6875,7 +6992,7 @@ thread_prologue_and_epilogue_insns (f)
       /* Retain a map of the epilogue insns.  */
       if (GET_CODE (seq) != SEQUENCE)
 	seq = get_insns ();
-      epilogue = record_insns (seq);
+      record_insns (seq, &epilogue);
 
       seq = gen_sequence ();
       end_sequence();
@@ -6888,6 +7005,35 @@ epilogue_done:
 
   if (insertted)
     commit_edge_insertions ();
+
+#ifdef HAVE_sibcall_epilogue
+  /* Emit sibling epilogues before any sibling call sites.  */
+  for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
+    {
+      basic_block bb = e->src;
+      rtx insn = bb->end;
+      rtx i;
+
+      if (GET_CODE (insn) != CALL_INSN
+	  || ! SIBLING_CALL_P (insn))
+	continue;
+
+      start_sequence ();
+      seq = gen_sibcall_epilogue ();
+      end_sequence ();
+
+      i = PREV_INSN (insn);
+      emit_insn_before (seq, insn);
+
+      /* Update the UID to basic block map.  */
+      for (i = NEXT_INSN (i); i != insn; i = NEXT_INSN (i))
+	set_block_for_insn (i, bb);
+
+      /* Retain a map of the epilogue insns.  Used in life analysis to
+	 avoid getting rid of sibcall epilogue insns.  */
+      record_insns (seq, &sibcall_epilogue);
+    }
+#endif
 }
 
 /* Reposition the prologue-end and epilogue-begin notes after instruction
@@ -6898,90 +7044,82 @@ reposition_prologue_and_epilogue_notes (f)
      rtx f ATTRIBUTE_UNUSED;
 {
 #if defined (HAVE_prologue) || defined (HAVE_epilogue)
-  /* Reposition the prologue and epilogue notes.  */
-  if (n_basic_blocks)
+  int len;
+
+  if ((len = VARRAY_SIZE (prologue)) > 0)
     {
-      int len;
+      register rtx insn, note = 0;
 
-      if (prologue)
+      /* Scan from the beginning until we reach the last prologue insn.
+	 We apparently can't depend on basic_block_{head,end} after
+	 reorg has run.  */
+      for (insn = f; len && insn; insn = NEXT_INSN (insn))
 	{
-	  register rtx insn, note = 0;
-
-	  /* Scan from the beginning until we reach the last prologue insn.
-	     We apparently can't depend on basic_block_{head,end} after
-	     reorg has run.  */
-	  for (len = 0; prologue[len]; len++)
-	    ;
-	  for (insn = f; len && insn; insn = NEXT_INSN (insn))
+	  if (GET_CODE (insn) == NOTE)
 	    {
-	      if (GET_CODE (insn) == NOTE)
+	      if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PROLOGUE_END)
+		note = insn;
+	    }
+	  else if ((len -= contains (insn, prologue)) == 0)
+	    {
+	      rtx next;
+	      /* Find the prologue-end note if we haven't already, and
+		 move it to just after the last prologue insn.  */
+	      if (note == 0)
 		{
-		  if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_PROLOGUE_END)
-		    note = insn;
+		  for (note = insn; (note = NEXT_INSN (note));)
+		    if (GET_CODE (note) == NOTE
+			&& NOTE_LINE_NUMBER (note) == NOTE_INSN_PROLOGUE_END)
+		      break;
 		}
-	      else if ((len -= contains (insn, prologue)) == 0)
-		{
-		  rtx next;
-		  /* Find the prologue-end note if we haven't already, and
-		     move it to just after the last prologue insn.  */
-		  if (note == 0)
-		    {
-		      for (note = insn; (note = NEXT_INSN (note));)
-			if (GET_CODE (note) == NOTE
-			    && NOTE_LINE_NUMBER (note) == NOTE_INSN_PROLOGUE_END)
-			  break;
-		    }
 
-		  next = NEXT_INSN (note);
+	      next = NEXT_INSN (note);
 
-		  /* Whether or not we can depend on BLOCK_HEAD, 
-		     attempt to keep it up-to-date.  */
-		  if (BLOCK_HEAD (0) == note)
-		    BLOCK_HEAD (0) = next;
+	      /* Whether or not we can depend on BLOCK_HEAD, 
+		 attempt to keep it up-to-date.  */
+	      if (BLOCK_HEAD (0) == note)
+		BLOCK_HEAD (0) = next;
 
-		  remove_insn (note);
-		  add_insn_after (note, insn);
-		}
+	      remove_insn (note);
+	      add_insn_after (note, insn);
 	    }
 	}
+    }
+
+  if ((len = VARRAY_SIZE (epilogue)) > 0)
+    {
+      register rtx insn, note = 0;
 
-      if (epilogue)
+      /* Scan from the end until we reach the first epilogue insn.
+	 We apparently can't depend on basic_block_{head,end} after
+	 reorg has run.  */
+      for (insn = get_last_insn (); len && insn; insn = PREV_INSN (insn))
 	{
-	  register rtx insn, note = 0;
-
-	  /* Scan from the end until we reach the first epilogue insn.
-	     We apparently can't depend on basic_block_{head,end} after
-	     reorg has run.  */
-	  for (len = 0; epilogue[len]; len++)
-	    ;
-	  for (insn = get_last_insn (); len && insn; insn = PREV_INSN (insn))
+	  if (GET_CODE (insn) == NOTE)
 	    {
-	      if (GET_CODE (insn) == NOTE)
+	      if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EPILOGUE_BEG)
+		note = insn;
+	    }
+	  else if ((len -= contains (insn, epilogue)) == 0)
+	    {
+	      /* Find the epilogue-begin note if we haven't already, and
+		 move it to just before the first epilogue insn.  */
+	      if (note == 0)
 		{
-		  if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EPILOGUE_BEG)
-		    note = insn;
+		  for (note = insn; (note = PREV_INSN (note));)
+		    if (GET_CODE (note) == NOTE
+			&& NOTE_LINE_NUMBER (note) == NOTE_INSN_EPILOGUE_BEG)
+		      break;
 		}
-	      else if ((len -= contains (insn, epilogue)) == 0)
-		{
-		  /* Find the epilogue-begin note if we haven't already, and
-		     move it to just before the first epilogue insn.  */
-		  if (note == 0)
-		    {
-		      for (note = insn; (note = PREV_INSN (note));)
-			if (GET_CODE (note) == NOTE
-			    && NOTE_LINE_NUMBER (note) == NOTE_INSN_EPILOGUE_BEG)
-			  break;
-		    }
 
-		  /* Whether or not we can depend on BLOCK_HEAD, 
-		     attempt to keep it up-to-date.  */
-		  if (n_basic_blocks
-		      && BLOCK_HEAD (n_basic_blocks-1) == insn)
-		    BLOCK_HEAD (n_basic_blocks-1) = note;
+	      /* Whether or not we can depend on BLOCK_HEAD, 
+		 attempt to keep it up-to-date.  */
+	      if (n_basic_blocks
+		  && BLOCK_HEAD (n_basic_blocks-1) == insn)
+		BLOCK_HEAD (n_basic_blocks-1) = note;
 
-		  remove_insn (note);
-		  add_insn_before (note, insn);
-		}
+	      remove_insn (note);
+	      add_insn_before (note, insn);
 	    }
 	}
     }
@@ -7095,4 +7233,8 @@ init_function_once ()
 {
   ggc_add_root (&all_functions, 1, sizeof all_functions,
 		mark_function_chain);
+
+  VARRAY_INT_INIT (prologue, 0, "prologue");
+  VARRAY_INT_INIT (epilogue, 0, "epilogue");
+  VARRAY_INT_INIT (sibcall_epilogue, 0, "sibcall_epilogue");
 }
diff --git a/gcc/genflags.c b/gcc/genflags.c
index d1f19e08b815..39b5354e7dc2 100644
--- a/gcc/genflags.c
+++ b/gcc/genflags.c
@@ -174,11 +174,15 @@ gen_insn (insn)
      call_value_pop) ignoring the extra arguments that are passed for
      some machines, so by default, turn off the prototype.  */
 
-  obstack_ptr = (name[0] == 'c'
+  obstack_ptr = ((name[0] == 'c' || name[0] == 's')
 		 && (!strcmp (name, "call")
 		     || !strcmp (name, "call_value")
 		     || !strcmp (name, "call_pop")
-		     || !strcmp (name, "call_value_pop")))
+		     || !strcmp (name, "call_value_pop")
+		     || !strcmp (name, "sibcall")
+		     || !strcmp (name, "sibcall_value")
+		     || !strcmp (name, "sibcall_pop")
+		     || !strcmp (name, "sibcall_value_pop")))
     ? &call_obstack : &normal_obstack;
 
   obstack_grow (obstack_ptr, &insn, sizeof (rtx));
diff --git a/gcc/integrate.c b/gcc/integrate.c
index cf2200ec9599..e0374e1d9ff2 100644
--- a/gcc/integrate.c
+++ b/gcc/integrate.c
@@ -66,19 +66,22 @@ extern struct obstack *function_maybepermanent_obstack;
 static rtvec initialize_for_inline	PARAMS ((tree));
 static void note_modified_parmregs	PARAMS ((rtx, rtx, void *));
 static void integrate_parm_decls	PARAMS ((tree, struct inline_remap *,
-					       rtvec));
+						 rtvec));
 static tree integrate_decl_tree		PARAMS ((tree,
-					       struct inline_remap *));
+						 struct inline_remap *));
 static void subst_constants		PARAMS ((rtx *, rtx,
-					       struct inline_remap *, int));
+						 struct inline_remap *, int));
 static void set_block_origin_self	PARAMS ((tree));
 static void set_decl_origin_self	PARAMS ((tree));
 static void set_block_abstract_flags	PARAMS ((tree, int));
 static void process_reg_param		PARAMS ((struct inline_remap *, rtx,
-					       rtx));
+						 rtx));
 void set_decl_abstract_flags		PARAMS ((tree, int));
 static rtx expand_inline_function_eh_labelmap PARAMS ((rtx));
 static void mark_stores                 PARAMS ((rtx, rtx, void *));
+static void save_parm_insns		PARAMS ((rtx, rtx));
+static void copy_insn_list              PARAMS ((rtx, struct inline_remap *,
+						 rtx));
 static int compare_blocks               PARAMS ((const PTR, const PTR));
 static int find_block                   PARAMS ((const PTR, const PTR));
 
@@ -423,16 +426,7 @@ save_for_inline_nocopy (fndecl)
      perform constant folding when its incoming value is constant).
      Otherwise, we have to copy its value into a new register and track
      the new register's life.  */
-  in_nonparm_insns = 0;
-  for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn))
-    {
-      if (insn == first_nonparm_insn)
-	in_nonparm_insns = 1;
-
-      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
-	/* Record what interesting things happen to our parameters.  */
-	note_stores (PATTERN (insn), note_modified_parmregs, NULL);
-    }
+  save_parm_insns (insn, first_nonparm_insn);
 
   /* We have now allocated all that needs to be allocated permanently
      on the rtx obstack.  Set our high-water mark, so that we
@@ -449,6 +443,48 @@ save_for_inline_nocopy (fndecl)
   /* Clean up.  */
   free (parmdecl_map);
 }
+
+/* Scan the chain of insns to see what happens to our PARM_DECLs.  If a
+   PARM_DECL is used but never modified, we can substitute its rtl directly
+   when expanding inline (and perform constant folding when its incoming
+   value is constant). Otherwise, we have to copy its value into a new
+   register and track the new register's life.  */
+
+static void
+save_parm_insns (insn, first_nonparm_insn)
+    rtx insn;
+    rtx first_nonparm_insn;
+{
+  in_nonparm_insns = 0;
+
+  if (insn == NULL_RTX)
+    return;
+
+  for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn))
+    {
+      if (insn == first_nonparm_insn)
+	in_nonparm_insns = 1;
+
+      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+	{
+	  /* Record what interesting things happen to our parameters.  */
+	  note_stores (PATTERN (insn), note_modified_parmregs, NULL);
+
+	  /* If this is a CALL_PLACEHOLDER insn then we need to look into the
+	     three attached sequences: normal call, sibling call and tail
+	     recursion. */
+	  if (GET_CODE (insn) == CALL_INSN
+	      && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	    {
+	      int i;
+
+	      for (i = 0; i < 3; i++)
+		save_parm_insns (XEXP (PATTERN (insn), i),
+				 first_nonparm_insn);
+	    }
+	}
+    }
+}
 
 /* Note whether a parameter is modified or not.  */
 
@@ -577,13 +613,11 @@ expand_inline_function (fndecl, parms, target, ignore, type,
 	       : parm_insns);
   tree *arg_trees;
   rtx *arg_vals;
-  rtx insn;
   int max_regno;
   register int i;
   int min_labelno = inl_f->emit->x_first_label_num;
   int max_labelno = inl_f->inl_max_label_num;
   int nargs;
-  rtx local_return_label = 0;
   rtx loc;
   rtx stack_save = 0;
   rtx temp;
@@ -1089,7 +1123,100 @@ expand_inline_function (fndecl, parms, target, ignore, type,
   if (inl_f->calls_alloca)
     emit_stack_save (SAVE_BLOCK, &stack_save, NULL_RTX);
 
-  /* Now copy the insns one by one.  Do this in two passes, first the insns and
+  /* Now copy the insns one by one.  */
+  copy_insn_list (insns, map, static_chain_value);
+
+  /* Restore the stack pointer if we saved it above.  */
+  if (inl_f->calls_alloca)
+    emit_stack_restore (SAVE_BLOCK, stack_save, NULL_RTX);
+
+  if (! cfun->x_whole_function_mode_p)
+    /* In statement-at-a-time mode, we just tell the front-end to add
+       this block to the list of blocks at this binding level.  We
+       can't do it the way it's done for function-at-a-time mode the
+       superblocks have not been created yet.  */
+    insert_block (block);
+  else
+    {
+      BLOCK_CHAIN (block) 
+	= BLOCK_CHAIN (DECL_INITIAL (current_function_decl));
+      BLOCK_CHAIN (DECL_INITIAL (current_function_decl)) = block;
+    }
+
+  /* End the scope containing the copied formal parameter variables
+     and copied LABEL_DECLs.  We pass NULL_TREE for the variables list
+     here so that expand_end_bindings will not check for unused
+     variables.  That's already been checked for when the inlined
+     function was defined.  */
+  expand_end_bindings (NULL_TREE, 1, 1);
+
+  /* Must mark the line number note after inlined functions as a repeat, so
+     that the test coverage code can avoid counting the call twice.  This
+     just tells the code to ignore the immediately following line note, since
+     there already exists a copy of this note before the expanded inline call.
+     This line number note is still needed for debugging though, so we can't
+     delete it.  */
+  if (flag_test_coverage)
+    emit_note (0, NOTE_REPEATED_LINE_NUMBER);
+
+  emit_line_note (input_filename, lineno);
+
+  /* If the function returns a BLKmode object in a register, copy it
+     out of the temp register into a BLKmode memory object. */
+  if (target 
+      && TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))) == BLKmode
+      && ! aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl))))
+    target = copy_blkmode_from_reg (0, target, TREE_TYPE (TREE_TYPE (fndecl)));
+  
+  if (structure_value_addr)
+    {
+      target = gen_rtx_MEM (TYPE_MODE (type),
+			    memory_address (TYPE_MODE (type),
+					    structure_value_addr));
+      MEM_SET_IN_STRUCT_P (target, 1);
+    }
+
+  /* Make sure we free the things we explicitly allocated with xmalloc.  */
+  if (real_label_map)
+    free (real_label_map);
+  VARRAY_FREE (map->const_equiv_varray);
+  free (map->reg_map);
+  VARRAY_FREE (map->block_map);
+  free (map->insn_map);
+  free (map);
+  free (arg_vals);
+  free (arg_trees);
+
+  inlining = inlining_previous;
+
+  return target;
+}
+
+/* Make copies of each insn in the given list using the mapping
+   computed in expand_inline_function. This function may call itself for
+   insns containing sequences.
+   
+   Copying is done in two passes, first the insns and then their REG_NOTES,
+   just like save_for_inline.
+
+   If static_chain_value is non-zero, it represents the context-pointer
+   register for the function. */
+
+static void
+copy_insn_list (insns, map, static_chain_value)
+    rtx insns;
+    struct inline_remap *map;
+    rtx static_chain_value;
+{
+  register int i;
+  rtx insn;
+  rtx temp;
+  rtx local_return_label = NULL_RTX;
+#ifdef HAVE_cc0
+  rtx cc0_insn = 0;
+#endif
+
+  /* Copy the insns one by one.  Do this in two passes, first the insns and
      then their REG_NOTES, just like save_for_inline.  */
 
   /* This loop is very similar to the loop in copy_loop_body in unroll.c.  */
@@ -1283,11 +1410,50 @@ expand_inline_function (fndecl, parms, target, ignore, type,
 	  break;
 
 	case CALL_INSN:
+	  /* If this is a CALL_PLACEHOLDER insn then we need to copy the
+	     three attached sequences: normal call, sibling call and tail
+	     recursion. */
+	  if (GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	    {
+	      rtx sequence[3];
+	      rtx tail_label;
+
+	      for (i = 0; i < 3; i++)
+		{
+		  rtx seq;
+		  
+		  sequence[i] = NULL_RTX;
+		  seq = XEXP (PATTERN (insn), i);
+		  if (seq)
+		    {
+		      start_sequence ();
+		      copy_insn_list (seq, map, static_chain_value);
+		      sequence[i] = get_insns ();
+		      end_sequence ();
+		    }
+		}
+
+	      /* Find the new tail recursion label.  
+	         It will already be substituted into sequence[2].  */
+	      tail_label = copy_rtx_and_substitute (XEXP (PATTERN (insn), 3),
+						    map, 0);
+
+	      copy = emit_call_insn (gen_rtx_CALL_PLACEHOLDER (VOIDmode, 
+							sequence[0],
+							sequence[1],
+							sequence[2],
+							tail_label));
+	      break;
+	    }
+
 	  pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
 	  copy = emit_call_insn (pattern);
 
+	  SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn);
+
 	  /* Because the USAGE information potentially contains objects other
 	     than hard registers, we need to copy it.  */
+
 	  CALL_INSN_FUNCTION_USAGE (copy)
 	    = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn),
 				       map, 0);
@@ -1299,7 +1465,7 @@ expand_inline_function (fndecl, parms, target, ignore, type,
 #endif
 	  try_constants (copy, map);
 
-	  /* Be lazy and assume CALL_INSNs clobber all hard registers.  */
+	      /* Be lazy and assume CALL_INSNs clobber all hard registers.  */
 	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 	    VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0;
 	  break;
@@ -1316,14 +1482,23 @@ expand_inline_function (fndecl, parms, target, ignore, type,
 	  break;
 
 	case NOTE:
-	  /* It is important to discard function-end and function-beg notes,
-	     so we have only one of each in the current function.
-	     Also, NOTE_INSN_DELETED notes aren't useful (save_for_inline
+	  /* NOTE_INSN_FUNCTION_END and NOTE_INSN_FUNCTION_BEG are 
+	     discarded because it is important to have only one of 
+	     each in the current function.
+
+	     NOTE_INSN_DELETED notes aren't useful (save_for_inline
 	     deleted these in the copy used for continuing compilation,
-	     not the copy used for inlining).  */
+	     not the copy used for inlining).
+
+	     NOTE_INSN_BASIC_BLOCK is discarded because the saved bb
+	     pointer (which will soon be dangling) confuses flow's
+	     attempts to preserve bb structures during the compilation
+	     of a function.  */
+
 	  if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_FUNCTION_END
 	      && NOTE_LINE_NUMBER (insn) != NOTE_INSN_FUNCTION_BEG
-	      && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
+	      && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
+	      && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK)
 	    {
 	      copy = emit_note (NOTE_SOURCE_FILE (insn),
 				NOTE_LINE_NUMBER (insn));
@@ -1403,71 +1578,6 @@ expand_inline_function (fndecl, parms, target, ignore, type,
 
   if (local_return_label)
     emit_label (local_return_label);
-
-  /* Restore the stack pointer if we saved it above.  */
-  if (inl_f->calls_alloca)
-    emit_stack_restore (SAVE_BLOCK, stack_save, NULL_RTX);
-
-  if (! cfun->x_whole_function_mode_p)
-    /* In statement-at-a-time mode, we just tell the front-end to add
-       this block to the list of blocks at this binding level.  We
-       can't do it the way it's done for function-at-a-time mode the
-       superblocks have not been created yet.  */
-    insert_block (block);
-  else
-    {
-      BLOCK_CHAIN (block) 
-	= BLOCK_CHAIN (DECL_INITIAL (current_function_decl));
-      BLOCK_CHAIN (DECL_INITIAL (current_function_decl)) = block;
-    }
-
-  /* End the scope containing the copied formal parameter variables
-     and copied LABEL_DECLs.  We pass NULL_TREE for the variables list
-     here so that expand_end_bindings will not check for unused
-     variables.  That's already been checked for when the inlined
-     function was defined.  */
-  expand_end_bindings (NULL_TREE, 1, 1);
-
-  /* Must mark the line number note after inlined functions as a repeat, so
-     that the test coverage code can avoid counting the call twice.  This
-     just tells the code to ignore the immediately following line note, since
-     there already exists a copy of this note before the expanded inline call.
-     This line number note is still needed for debugging though, so we can't
-     delete it.  */
-  if (flag_test_coverage)
-    emit_note (0, NOTE_REPEATED_LINE_NUMBER);
-
-  emit_line_note (input_filename, lineno);
-
-  /* If the function returns a BLKmode object in a register, copy it
-     out of the temp register into a BLKmode memory object. */
-  if (target 
-      && TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))) == BLKmode
-      && ! aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl))))
-    target = copy_blkmode_from_reg (0, target, TREE_TYPE (TREE_TYPE (fndecl)));
-  
-  if (structure_value_addr)
-    {
-      target = gen_rtx_MEM (TYPE_MODE (type),
-			    memory_address (TYPE_MODE (type),
-					    structure_value_addr));
-      MEM_SET_IN_STRUCT_P (target, 1);
-    }
-
-  /* Make sure we free the things we explicitly allocated with xmalloc.  */
-  if (real_label_map)
-    free (real_label_map);
-  VARRAY_FREE (map->const_equiv_varray);
-  free (map->reg_map);
-  VARRAY_FREE (map->block_map);
-  free (map->insn_map);
-  free (map);
-  free (arg_vals);
-  free (arg_trees);
-
-  inlining = inlining_previous;
-
-  return target;
 }
 
 /* Given a chain of PARM_DECLs, ARGS, copy each decl into a VAR_DECL,
@@ -1790,6 +1900,13 @@ copy_rtx_and_substitute (orig, map, for_lhs)
 	= LABEL_PRESERVE_P (orig);
       return get_label_from_map (map, CODE_LABEL_NUMBER (orig));
 
+    /* We need to handle "deleted" labels that appear in the DECL_RTL
+       of a LABEL_DECL.  */
+    case NOTE:
+      if (NOTE_LINE_NUMBER (orig) == NOTE_INSN_DELETED_LABEL)
+	return map->insn_map[INSN_UID (orig)];
+      break;
+
     case LABEL_REF:
       copy
 	= gen_rtx_LABEL_REF
@@ -2348,6 +2465,7 @@ subst_constants (loc, insn, map, memonly)
 	case 'i':
 	case 's':
 	case 'w':
+ 	case 'n':
 	case 't':
 	  break;
 
diff --git a/gcc/jump.c b/gcc/jump.c
index fc6f7df17def..b5352f43542a 100644
--- a/gcc/jump.c
+++ b/gcc/jump.c
@@ -125,13 +125,13 @@ static void delete_from_jump_chain	PARAMS ((rtx));
 static int delete_labelref_insn		PARAMS ((rtx, rtx, int));
 static void mark_modified_reg		PARAMS ((rtx, rtx, void *));
 static void redirect_tablejump		PARAMS ((rtx, rtx));
-static void jump_optimize_1		PARAMS ((rtx, int, int, int, int));
+static void jump_optimize_1		PARAMS ((rtx, int, int, int, int, int));
 #if ! defined(HAVE_cc0) && ! defined(HAVE_conditional_arithmetic)
 static rtx find_insert_position         PARAMS ((rtx, rtx));
 #endif
 static int returnjump_p_1	        PARAMS ((rtx *, void *));
 static void delete_prior_computation    PARAMS ((rtx, rtx));
-
+
 /* Main external entry point into the jump optimizer.  See comments before
    jump_optimize_1 for descriptions of the arguments.  */
 void
@@ -141,7 +141,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
      int noop_moves;
      int after_regscan;
 {
-  jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, 0);
+  jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, 0, 0);
 }
 
 /* Alternate entry into the jump optimizer.  This entry point only rebuilds
@@ -151,9 +151,16 @@ void
 rebuild_jump_labels (f)
      rtx f;
 {
-  jump_optimize_1 (f, 0, 0, 0, 1);
+  jump_optimize_1 (f, 0, 0, 0, 1, 0);
 }
 
+/* Alternate entry into the jump optimizer.  Do only trivial optimizations.  */
+void
+jump_optimize_minimal (f)
+     rtx f;
+{
+  jump_optimize_1 (f, 0, 0, 0, 0, 1);
+}
 
 /* Delete no-op jumps and optimize jumps to jumps
    and jumps around jumps.
@@ -175,15 +182,29 @@ rebuild_jump_labels (f)
    just determine whether control drops off the end of the function.
    This case occurs when we have -W and not -O.
    It works because `delete_insn' checks the value of `optimize'
-   and refrains from actually deleting when that is 0.  */
+   and refrains from actually deleting when that is 0.
+
+   If MINIMAL is nonzero, then we only perform trivial optimizations:
+
+     * Removal of unreachable code after BARRIERs.
+     * Removal of unreferenced CODE_LABELs.
+     * Removal of a jump to the next instruction.
+     * Removal of a conditional jump followed by an unconditional jump
+       to the same target as the conditional jump.
+     * Simplify a conditional jump around an unconditional jump.
+     * Simplify a jump to a jump.
+     * Delete extraneous line number notes.
+  */
 
 static void
-jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only)
+jump_optimize_1 (f, cross_jump, noop_moves, after_regscan,
+		 mark_labels_only, minimal)
      rtx f;
      int cross_jump;
      int noop_moves;
      int after_regscan;
      int mark_labels_only;
+     int minimal;
 {
   register rtx insn, next;
   int changed;
@@ -230,7 +251,8 @@ jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only)
   if (mark_labels_only)
     goto end;
 
-  exception_optimize ();
+  if (! minimal)
+    exception_optimize ();
 
   last_insn = delete_unreferenced_labels (f);
 
@@ -320,7 +342,7 @@ jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only)
 	  if (nlabel != JUMP_LABEL (insn))
 	    changed |= redirect_jump (insn, nlabel);
 
-	  if (! optimize)
+	  if (! optimize || ! minimal)
 	    continue;
 
 	  /* If a dispatch table always goes to the same place,
@@ -2135,7 +2157,7 @@ jump_optimize_1 (f, cross_jump, noop_moves, after_regscan, mark_labels_only)
      not be cleared.  This is especially true for the case where we
      delete the NOTE_FUNCTION_END note.  CAN_REACH_END is cleared by
      the front-end before compiling each function.  */
-  if (calculate_can_reach_end (last_insn, optimize != 0))
+  if (! minimal && calculate_can_reach_end (last_insn, optimize != 0))
     can_reach_end = 1;
 
 end:
diff --git a/gcc/rtl.c b/gcc/rtl.c
index 7f7906e4f7d4..a8b1a9de63ac 100644
--- a/gcc/rtl.c
+++ b/gcc/rtl.c
@@ -406,14 +406,12 @@ copy_rtx (orig)
      walks over the RTL.  */
   copy->used = 0;
 
-  /* We do not copy JUMP, CALL, or FRAME_RELATED for INSNs.  */
+  /* We do not copy FRAME_RELATED for INSNs.  */
   if (GET_RTX_CLASS (code) == 'i')
-    {
-      copy->jump = 0;
-      copy->call = 0;
-      copy->frame_related = 0;
-    }
-  
+    copy->frame_related = 0;
+  copy->jump = orig->jump;
+  copy->call = orig->call;
+
   format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
 
   for (i = 0; i < GET_RTX_LENGTH (GET_CODE (copy)); i++)
diff --git a/gcc/rtl.def b/gcc/rtl.def
index ba68ca0eba2c..570abdc1a971 100644
--- a/gcc/rtl.def
+++ b/gcc/rtl.def
@@ -1,7 +1,7 @@
 /* This file contains the definitions and documentation for the
    Register Transfer Expressions (rtx's) that make up the
    Register Transfer Language (rtl) used in the Back End of the GNU compiler.
-   Copyright (C) 1987, 88, 92, 94, 95, 97, 98, 1999
+   Copyright (C) 1987, 88, 92, 94, 95, 97, 98, 1999, 2000
    Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -880,7 +880,10 @@ DEF_RTL_EXPR(CONSTANT_P_RTX, "constant_p_rtx", "e", 'x')
    potential tail recursive calls were found.
 
    The tail recursion label is needed so that we can clear LABEL_PRESERVE_P
-   after we select a call method.  */
+   after we select a call method.
+
+   This method of tail-call elimination is intended to be replaced by
+   tree-based optimizations once front-end conversions are complete.  */
 DEF_RTL_EXPR(CALL_PLACEHOLDER, "call_placeholder", "uuuu", 'x')
 
 /* The SSA phi operator. 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 158ea7ead0fb..46750ac060dd 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -119,9 +119,12 @@ typedef struct rtx_def
 #else
   enum machine_mode mode : 8;
 #endif
-  /* LINK_COST_ZERO in an INSN_LIST.  */
+  /* 1 in an INSN if it can alter flow of control
+     within this function.
+     LINK_COST_ZERO in an INSN_LIST.  */
   unsigned int jump : 1;
-  /* LINK_COST_FREE in an INSN_LIST.  */
+  /* 1 in an INSN if it can call another function.
+     LINK_COST_FREE in an INSN_LIST.  */
   unsigned int call : 1;
   /* 1 in a MEM or REG if value of this expression will never change
      during the current function, even though it is not
@@ -380,6 +383,9 @@ extern void rtvec_check_failed_bounds PARAMS ((rtvec, int,
 /* 1 if insn is a call to a const function.  */
 #define CONST_CALL_P(INSN) ((INSN)->unchanging)
 
+/* 1 if insn (assumed to be a CALL_INSN) is a sibling call.  */
+#define SIBLING_CALL_P(INSN) ((INSN)->jump)
+
 /* 1 if insn is a branch that should not unconditionally execute its
    delay slots, i.e., it is an annulled branch.   */
 #define INSN_ANNULLED_BRANCH_P(INSN) ((INSN)->unchanging)
@@ -1416,6 +1422,7 @@ extern int rtx_renumbered_equal_p	PARAMS ((rtx, rtx));
 extern int true_regnum			PARAMS ((rtx));
 extern int redirect_jump		PARAMS ((rtx, rtx));
 extern void jump_optimize		PARAMS ((rtx, int, int, int));
+extern void jump_optimize_minimal	PARAMS ((rtx));
 extern void rebuild_jump_labels		PARAMS ((rtx));
 extern void thread_jumps		PARAMS ((rtx, int, int));
 extern int redirect_exp			PARAMS ((rtx *, rtx, rtx, rtx));
@@ -1513,6 +1520,7 @@ extern void record_excess_regs		PARAMS ((rtx, rtx, rtx *));
 extern void reposition_prologue_and_epilogue_notes	PARAMS ((rtx));
 extern void thread_prologue_and_epilogue_insns		PARAMS ((rtx));
 extern int prologue_epilogue_contains			PARAMS ((rtx));
+extern int sibcall_epilogue_contains			PARAMS ((rtx));
 extern HOST_WIDE_INT get_frame_size			PARAMS ((void));
 extern void preserve_rtl_expr_result			PARAMS ((rtx));
 extern void mark_temp_addr_taken			PARAMS ((rtx));
@@ -1713,6 +1721,16 @@ extern void record_base_value		PARAMS ((int, rtx, int));
 extern void record_alias_subset         PARAMS ((int, int));
 extern rtx addr_side_effect_eval	PARAMS ((rtx, int, int));
 
+/* In sibcall.c */
+typedef enum {
+  sibcall_use_normal = 1,
+  sibcall_use_tail_recursion,
+  sibcall_use_sibcall
+} sibcall_use_t;
+
+extern void optimize_sibling_and_tail_recursive_calls PARAMS ((void));
+extern void replace_call_placeholder	PARAMS ((rtx, sibcall_use_t));
+
 #ifdef STACK_REGS
 extern int stack_regs_mentioned		PARAMS ((rtx insn));
 #endif
diff --git a/gcc/sibcall.c b/gcc/sibcall.c
new file mode 100644
index 000000000000..b399137a0bf0
--- /dev/null
+++ b/gcc/sibcall.c
@@ -0,0 +1,578 @@
+/* Generic sibling call optimization support
+   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include "config.h"
+#include "system.h"
+
+#include "rtl.h"
+#include "regs.h"
+#include "function.h"
+#include "hard-reg-set.h"
+#include "flags.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "basic-block.h"
+#include "output.h"
+#include "except.h"
+
+static int identify_call_return_value	PARAMS ((rtx, rtx *, rtx *));
+static rtx skip_copy_to_return_value	PARAMS ((rtx, rtx, rtx));
+static rtx skip_use_of_return_value	PARAMS ((rtx, enum rtx_code));
+static rtx skip_stack_adjustment	PARAMS ((rtx));
+static rtx skip_jump_insn		PARAMS ((rtx));
+static int uses_addressof		PARAMS ((rtx));
+static int sequence_uses_addressof	PARAMS ((rtx));
+static void purge_reg_equiv_notes	PARAMS ((void));
+
+/* Examine a CALL_PLACEHOLDER pattern and determine where the call's
+   return value is located.  P_HARD_RETURN receives the hard register
+   that the function used; P_SOFT_RETURN receives the pseudo register
+   that the sequence used.  Return non-zero if the values were located.  */
+
+static int
+identify_call_return_value (cp, p_hard_return, p_soft_return)
+     rtx cp;
+     rtx *p_hard_return, *p_soft_return;
+{
+  rtx insn, set, hard, soft;
+
+  /* Search forward through the "normal" call sequence to the CALL insn.  */
+  insn = XEXP (cp, 0);
+  while (GET_CODE (insn) != CALL_INSN)
+    insn = NEXT_INSN (insn);
+
+  /* Assume the pattern is (set (dest) (call ...)), or that the first
+     member of a parallel is.  This is the hard return register used
+     by the function.  */
+  if (GET_CODE (PATTERN (insn)) == SET
+      && GET_CODE (SET_SRC (PATTERN (insn))) == CALL)
+    hard = SET_DEST (PATTERN (insn));
+  else if (GET_CODE (PATTERN (insn)) == PARALLEL
+	   && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == SET
+	   && GET_CODE (SET_SRC (XVECEXP (PATTERN (insn), 0, 0))) == CALL)
+    hard = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
+  else
+    return 0;
+
+  /* If we didn't get a single hard register (e.g. a parallel), give up.  */
+  if (GET_CODE (hard) != REG)
+    return 0;
+    
+  /* If there's nothing after, there's no soft return value.  */
+  insn = NEXT_INSN (insn);
+  if (! insn)
+    return 0;
+  
+  /* We're looking for a source of the hard return register.  */
+  set = single_set (insn);
+  if (! set || SET_SRC (set) != hard)
+    return 0;
+
+  soft = SET_DEST (set);
+  insn = NEXT_INSN (insn);
+
+  /* Allow this first destination to be copied to a second register,
+     as might happen if the first register wasn't the particular pseudo
+     we'd been expecting.  */
+  if (insn
+      && (set = single_set (insn)) != NULL_RTX
+      && SET_SRC (set) == soft)
+    {
+      soft = SET_DEST (set);
+      insn = NEXT_INSN (insn);
+    }
+
+  /* Don't fool with anything but pseudo registers.  */
+  if (GET_CODE (soft) != REG || REGNO (soft) < FIRST_PSEUDO_REGISTER)
+    return 0;
+
+  /* This value must not be modified before the end of the sequence.  */
+  if (reg_set_between_p (soft, insn, NULL_RTX))
+    return 0;
+
+  *p_hard_return = hard;
+  *p_soft_return = soft;
+
+  return 1;
+}
+
+/* If the first real insn after ORIG_INSN copies to this function's
+   return value from RETVAL, then return the insn which performs the
+   copy.  Otherwise return ORIG_INSN.  */
+
+static rtx
+skip_copy_to_return_value (orig_insn, hardret, softret)
+     rtx orig_insn;
+     rtx hardret, softret;
+{
+  rtx insn, set = NULL_RTX;
+
+  insn = next_nonnote_insn (orig_insn);
+  if (! insn)
+    return orig_insn;
+
+  set = single_set (insn);
+  if (! set)
+    return orig_insn;
+
+  /* The destination must be the same as the called function's return
+     value to ensure that any return value is put in the same place by the
+     current function and the function we're calling. 
+
+     Further, the source must be the same as the pseudo into which the
+     called function's return value was copied.  Otherwise we're returning
+     some other value.  */
+
+  if (SET_DEST (set) == current_function_return_rtx
+      && REG_P (SET_DEST (set))
+      && REGNO (SET_DEST (set)) == REGNO (hardret)
+      && SET_SRC (set) == softret)
+    return insn;
+
+  /* It did not look like a copy of the return value, so return the
+     same insn we were passed.  */
+  return orig_insn;
+}
+
+/* If the first real insn after ORIG_INSN is a CODE of this function's return
+   value, return insn.  Otherwise return ORIG_INSN.  */
+
+static rtx
+skip_use_of_return_value (orig_insn, code)
+     rtx orig_insn;
+     enum rtx_code code;
+{
+  rtx insn;
+
+  insn = next_nonnote_insn (orig_insn);
+
+  if (insn
+      && GET_CODE (insn) == INSN
+      && GET_CODE (PATTERN (insn)) == code
+      && (XEXP (PATTERN (insn), 0) == current_function_return_rtx
+	  || XEXP (PATTERN (insn), 0) == const0_rtx))
+    return insn;
+
+  return orig_insn;
+}
+
+/* If the first real insn after ORIG_INSN adjusts the stack pointer
+   by a constant, return the insn with the stack pointer adjustment.
+   Otherwise return ORIG_INSN.  */
+
+static rtx
+skip_stack_adjustment (orig_insn)
+     rtx orig_insn;
+{
+  rtx insn, set = NULL_RTX;
+
+  insn = next_nonnote_insn (orig_insn);
+
+  if (insn)
+    set = single_set (insn);
+
+  /* The source must be the same as the current function's return value to
+     ensure that any return value is put in the same place by the current
+     function and the function we're calling.   The destination register
+     must be a pseudo.  */
+  if (insn
+      && set
+      && GET_CODE (SET_SRC (set)) == PLUS
+      && XEXP (SET_SRC (set), 0) == stack_pointer_rtx
+      && GET_CODE (XEXP (SET_SRC (set), 1)) == CONST_INT
+      && SET_DEST (set) == stack_pointer_rtx)
+    return insn;
+
+  /* It did not look like a copy of the return value, so return the
+     same insn we were passed.  */
+  return orig_insn;
+}
+
+/* If the first real insn after ORIG_INSN is a jump, return the JUMP_INSN.
+   Otherwise return ORIG_INSN.  */
+
+static rtx
+skip_jump_insn (orig_insn)
+     rtx orig_insn;
+{
+  rtx insn;
+
+  insn = next_nonnote_insn (orig_insn);
+
+  if (insn
+      && GET_CODE (insn) == JUMP_INSN
+      && simplejump_p (insn))
+    return insn;
+
+  return orig_insn;
+}
+
+/* Scan the rtx X for an ADDRESSOF expressions.  Return nonzero if an ADDRESSOF
+   expresion is found, else return zero.  */
+
+static int
+uses_addressof (x)
+     rtx x;
+{
+  RTX_CODE code;
+  int i, j;
+  const char *fmt;
+
+  if (x == NULL_RTX)
+    return 0;
+
+  code = GET_CODE (x);
+
+  if (code == ADDRESSOF)
+    return 1;
+
+  /* Scan all subexpressions. */
+  fmt = GET_RTX_FORMAT (code);
+  for (i = 0; i < GET_RTX_LENGTH (code); i++, fmt++)
+    {
+      if (*fmt == 'e')
+	{
+	  if (uses_addressof (XEXP (x, i)))
+	    return 1;
+	}
+      else if (*fmt == 'E')
+	{
+	  for (j = 0; j < XVECLEN (x, i); j++)
+	    if (uses_addressof (XVECEXP (x, i, j)))
+	      return 1;
+	}
+    }
+  return 0;
+}
+
+/* Scan the sequence of insns in SEQ to see if any have an ADDRESSOF
+   rtl expression.  If an ADDRESSOF expression is found, return nonzero,
+   else return zero.
+
+   This function handles CALL_PLACEHOLDERs which contain multiple sequences
+   of insns.  */
+
+static int
+sequence_uses_addressof (seq)
+     rtx seq;
+{
+  rtx insn;
+
+  for (insn = seq; insn; insn = NEXT_INSN (insn))
+    if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+      {
+	/* If this is a CALL_PLACEHOLDER, then recursively call ourselves
+	   with each nonempty sequence attached to the CALL_PLACEHOLDER.  */
+	if (GET_CODE (insn) == CALL_INSN
+	    && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	  {
+	    if (XEXP (PATTERN (insn), 0) != NULL_RTX
+		&& sequence_uses_addressof (XEXP (PATTERN (insn), 0)))
+	      return 1;
+	    if (XEXP (PATTERN (insn), 1) != NULL_RTX
+		&& sequence_uses_addressof (XEXP (PATTERN (insn), 1)))
+	      return 1;
+	    if (XEXP (PATTERN (insn), 2) != NULL_RTX
+		&& sequence_uses_addressof (XEXP (PATTERN (insn), 2)))
+	      return 1;
+	  }
+	else if (uses_addressof (PATTERN (insn))
+		 || (REG_NOTES (insn) && uses_addressof (REG_NOTES (insn))))
+	  return 1;
+      }
+  return 0;
+}
+
+/* Remove all REG_EQUIV notes found in the insn chain.  */
+
+static void
+purge_reg_equiv_notes ()
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      while (1)
+	{
+	  rtx note = find_reg_note (insn, REG_EQUIV, 0);
+	  if (note)
+	    {
+	      /* Remove the note and keep looking at the notes for
+		 this insn.  */
+	      remove_note (insn, note);
+	      continue;
+	    }
+	  break;
+	}
+    }
+}
+
+/* Replace the CALL_PLACEHOLDER with one of its children.  INSN should be
+   the CALL_PLACEHOLDER insn; USE tells which child to use.  */
+
+void
+replace_call_placeholder (insn, use)
+     rtx insn;
+     sibcall_use_t use;
+{
+  if (use == sibcall_use_tail_recursion)
+    emit_insns_before (XEXP (PATTERN (insn), 2), insn);
+  else if (use == sibcall_use_sibcall)
+    emit_insns_before (XEXP (PATTERN (insn), 1), insn);
+  else if (use == sibcall_use_normal)
+    emit_insns_before (XEXP (PATTERN (insn), 0), insn);
+  else
+    abort();
+
+  /* Turn off LABEL_PRESERVE_P for the tail recursion label if it
+     exists.  We only had to set it long enough to keep the jump
+     pass above from deleting it as unused.  */
+  if (XEXP (PATTERN (insn), 3))
+    LABEL_PRESERVE_P (XEXP (PATTERN (insn), 3)) = 0;
+  
+  /* "Delete" the placeholder insn. */
+  PUT_CODE (insn, NOTE);
+  NOTE_SOURCE_FILE (insn) = 0;
+  NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+}
+
+
+/* Given a (possibly empty) set of potential sibling or tail recursion call
+   sites, determine if optimization is possible.
+
+   Potential sibling or tail recursion calls are marked with CALL_PLACEHOLDER
+   insns.  The CALL_PLACEHOLDER insn holds chains of insns to implement a
+   normal call, sibling call or tail recursive call.
+
+   Replace the CALL_PLACEHOLDER with an appropriate insn chain.  */
+
+void
+optimize_sibling_and_tail_recursive_calls ()
+{
+  rtx insn, insns;
+  basic_block alternate_exit = EXIT_BLOCK_PTR;
+  int current_function_uses_addressof;
+  int successful_sibling_call = 0;
+  int replaced_call_placeholder = 0;
+  edge e;
+
+  insns = get_insns ();
+
+  /* We do not perform these calls when flag_exceptions is true, so this
+     is probably a NOP at the current time.  However, we may want to support
+     sibling and tail recursion optimizations in the future, so let's plan
+     ahead and find all the EH labels.  */
+  find_exception_handler_labels ();
+
+  /* Run a jump optimization pass to clean up the CFG.  We primarily want
+     this to thread jumps so that it is obvious which blocks jump to the
+     epilouge.  */
+  jump_optimize_minimal (insns);
+
+  /* We need cfg information to determine which blocks are succeeded
+     only by the epilogue.  */
+  find_basic_blocks (insns, max_reg_num (), 0);
+  cleanup_cfg (insns);
+
+  /* If there are no basic blocks, then there is nothing to do.  */
+  if (n_basic_blocks == 0)
+    return;
+
+  /* Find the exit block.
+
+     It is possible that we have blocks which can reach the exit block
+     directly.  However, most of the time a block will jump (or fall into)
+     N_BASIC_BLOCKS - 1, which in turn falls into the exit block.  */
+  for (e = EXIT_BLOCK_PTR->pred;
+       e && alternate_exit == EXIT_BLOCK_PTR;
+       e = e->pred_next)
+    {
+      rtx insn;
+
+      if (e->dest != EXIT_BLOCK_PTR || e->succ_next != NULL)
+	continue;
+
+      /* Walk forwards through the last normal block and see if it
+	 does nothing except fall into the exit block.  */
+      for (insn = BLOCK_HEAD (n_basic_blocks - 1);
+	   insn;
+	   insn = NEXT_INSN (insn))
+	{
+	  /* This should only happen once, at the start of this block.  */
+	  if (GET_CODE (insn) == CODE_LABEL)
+	    continue;
+
+	  if (GET_CODE (insn) == NOTE)
+	    continue;
+
+	  if (GET_CODE (insn) == INSN
+	      && GET_CODE (PATTERN (insn)) == USE)
+	    continue;
+
+	  break;
+	}
+
+      /* If INSN is zero, then the search walked all the way through the
+	 block without hitting anything interesting.  This block is a
+	 valid alternate exit block.  */
+      if (insn == NULL)
+	alternate_exit = e->src;
+    }
+
+  /* If the function uses ADDRESSOF, we can't (easily) determine
+     at this point if the value will end up on the stack.  */
+  current_function_uses_addressof = sequence_uses_addressof (insns);
+
+  /* Walk the insn chain and find any CALL_PLACEHOLDER insns.  We need to
+     select one of the insn sequences attached to each CALL_PLACEHOLDER.
+
+     The different sequences represent different ways to implement the call,
+     ie, tail recursion, sibling call or normal call.
+
+     Since we do not create nested CALL_PLACEHOLDERs, the scan
+     continues with the insn that was after a replaced CALL_PLACEHOLDER;
+     we don't rescan the replacement insns.  */
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == CALL_INSN
+	  && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
+	{
+	  int sibcall = (XEXP (PATTERN (insn), 1) != NULL_RTX);
+	  int tailrecursion = (XEXP (PATTERN (insn), 2) != NULL_RTX);
+	  basic_block succ_block, call_block;
+	  rtx temp, hardret, softret;
+
+	  /* We must be careful with stack slots which are live at
+	     potential optimization sites.
+
+	     ?!? This test is overly conservative and will be replaced.  */
+	  if (frame_offset)
+	    goto failure;
+
+	  /* alloca (until we have stack slot life analysis) inhibits
+	     sibling call optimizations, but not tail recursion.
+
+	     Similarly if we have ADDRESSOF expressions.
+
+	     Similarly if we use varargs or stdarg since they implicitly
+	     may take the address of an argument.  */
+ 	  if (current_function_calls_alloca || current_function_uses_addressof
+	      || current_function_varargs || current_function_stdarg)
+	    sibcall = 0;
+
+	  call_block = BLOCK_FOR_INSN (insn);
+
+	  /* If the block has more than one successor, then we can not
+	     perform sibcall or tail recursion optimizations.  */
+	  if (call_block->succ == NULL
+	      || call_block->succ->succ_next != NULL)
+	    goto failure;
+
+	  /* If the single successor is not the exit block, then we can not
+	     perform sibcall or tail recursion optimizations. 
+
+	     Note that this test combined with the previous is sufficient
+	     to prevent tail call optimization in the presense of active
+	     exception handlers.  */
+	  succ_block = call_block->succ->dest;
+	  if (succ_block != EXIT_BLOCK_PTR && succ_block != alternate_exit)
+	    goto failure;
+
+	  /* If the call was the end of the block, then we're OK.  */
+	  temp = insn;
+	  if (temp == call_block->end)
+	    goto success;
+
+	  /* Skip over copying from the call's return value pseudo into
+	     this function's hard return register.  */
+	  if (identify_call_return_value (PATTERN (insn), &hardret, &softret))
+	    {
+	      temp = skip_copy_to_return_value (temp, hardret, softret);
+	      if (temp == call_block->end)
+	        goto success;
+	    }
+
+	  /* Skip any stack adjustment.  */
+	  temp = skip_stack_adjustment (temp);
+	  if (temp == call_block->end)
+	    goto success;
+
+	  /* Skip over a CLOBBER of the return value (as a hard reg).  */
+	  temp = skip_use_of_return_value (temp, CLOBBER);
+	  if (temp == call_block->end)
+	    goto success;
+
+	  /* Skip over a USE of the return value (as a hard reg).  */
+	  temp = skip_use_of_return_value (temp, USE);
+	  if (temp == call_block->end)
+	    goto success;
+
+	  /* Skip over the JUMP_INSN at the end of the block.  */
+	  temp = skip_jump_insn (temp);
+	  if (GET_CODE (temp) == NOTE)
+	    temp = next_nonnote_insn (temp);
+	  if (temp == call_block->end)
+	    goto success;
+
+	  /* There are operations at the end of the block which we must
+	     execute after returning from the function call.  So this call
+	     can not be optimized.  */
+failure:
+	  sibcall = 0, tailrecursion = 0;
+success:
+
+	  /* Select a set of insns to implement the call and emit them.
+	     Tail recursion is the most efficient, so select it over
+	     a tail/sibling call.  */
+  
+	  if (sibcall)
+	    successful_sibling_call = 1;
+	  replaced_call_placeholder = 1;
+	  replace_call_placeholder (insn, 
+				    tailrecursion != 0 
+				      ? sibcall_use_tail_recursion
+				      : sibcall != 0
+					 ? sibcall_use_sibcall
+					 : sibcall_use_normal);
+	}
+    }
+
+  /* A sibling call sequence invalidates any REG_EQUIV notes made for
+     this function's incoming arguments. 
+
+     At the start of RTL generation we know the only REG_EQUIV notes
+     in the rtl chain are those for incoming arguments, so we can safely
+     flush any REG_EQUIV note. 
+
+     This is (slight) overkill.  We could keep track of the highest argument
+     we clobber and be more selective in removing notes, but it does not
+     seem to be worth the effort.  */
+  if (successful_sibling_call)
+    purge_reg_equiv_notes ();
+
+  /* There may have been NOTE_INSN_BLOCK_{BEGIN,END} notes in the 
+     CALL_PLACEHOLDER alternatives that we didn't emit.  Rebuild the
+     lexical block tree to correspond to the notes that still exist.  */
+  if (replaced_call_placeholder)
+    unroll_block_trees ();
+
+  /* This information will be invalid after inline expansion.  Kill it now.  */
+  free_basic_block_vars (0);
+}
diff --git a/gcc/toplev.c b/gcc/toplev.c
index fa7494d0e21f..1e1a650139c5 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -2986,6 +2986,15 @@ rest_of_compilation (decl)
 	goto exit_rest_of_compilation;
     }
 
+  /* We may have potential sibling or tail recursion sites.  Select one
+     (of possibly multiple) methods of performing the call.  */
+  init_EXPR_INSN_LIST_cache ();
+  if (optimize)
+    optimize_sibling_and_tail_recursive_calls ();
+  
+  if (ggc_p)
+    ggc_collect ();
+
   /* Initialize some variables used by the optimizers.  */
   init_function_for_compilation ();
 
@@ -3030,8 +3039,6 @@ rest_of_compilation (decl)
 
   unshare_all_rtl (current_function_decl, insns);
 
-  init_EXPR_INSN_LIST_cache ();
-
 #ifdef SETJMP_VIA_SAVE_AREA
   /* This must be performed before virutal register instantiation.  */
   if (current_function_calls_alloca)
diff --git a/gcc/tree.c b/gcc/tree.c
index 72d85ee0ba96..c6045fffffea 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -285,6 +285,9 @@ static void mark_type_hash PARAMS ((void *));
 void (*lang_unsave) PARAMS ((tree *));
 void (*lang_unsave_expr_now) PARAMS ((tree));
 
+/* If non-null, a language specific version of safe_for_unsave. */
+int (*lang_safe_for_unsave) PARAMS ((tree));
+
 /* The string used as a placeholder instead of a source file name for
    built-in tree nodes.  The variable, which is dynamically allocated,
    should be used; the macro is only used to initialize it.  */
@@ -2666,6 +2669,82 @@ unsave_expr_now (expr)
 
   return expr;
 }
+
+/* Return nonzero if it is safe to unsave EXPR, else return zero.
+   It is not safe to unsave EXPR if it contains any embedded RTL_EXPRs.  */
+
+int
+safe_for_unsave (expr)
+     tree expr;
+{
+  enum tree_code code;
+  register int i;
+  int first_rtl;
+
+  if (expr == NULL_TREE)
+    return 1;
+
+  code = TREE_CODE (expr);
+  first_rtl = first_rtl_op (code);
+  switch (code)
+    {
+    case RTL_EXPR:
+      return 0;
+
+    case CALL_EXPR:
+      if (TREE_OPERAND (expr, 1)
+	  && TREE_CODE (TREE_OPERAND (expr, 1)) == TREE_LIST)
+	{
+	  tree exp = TREE_OPERAND (expr, 1);
+	  while (exp)
+	    {
+	      if (! safe_for_unsave (TREE_VALUE (exp)))
+		return 0;
+	      exp = TREE_CHAIN (exp);
+	    }
+	}
+      break;
+
+    default:
+      if (lang_safe_for_unsave)
+	switch ((*lang_safe_for_unsave) (expr))
+	  {
+	  case -1:
+	    break;
+	  case 0:
+	    return 0;
+	  case 1:
+	    return 1;
+	  default:
+	    abort ();
+	  }
+      break;
+    }
+
+  switch (TREE_CODE_CLASS (code))
+    {
+    case 'c':  /* a constant */
+    case 't':  /* a type node */
+    case 'x':  /* something random, like an identifier or an ERROR_MARK.  */
+    case 'd':  /* A decl node */
+    case 'b':  /* A block node */
+      return 1;
+
+    case 'e':  /* an expression */
+    case 'r':  /* a reference */
+    case 's':  /* an expression with side effects */
+    case '<':  /* a comparison expression */
+    case '2':  /* a binary arithmetic expression */
+    case '1':  /* a unary arithmetic expression */
+      for (i = first_rtl - 1; i >= 0; i--)
+	if (! safe_for_unsave (TREE_OPERAND (expr, i)))
+	  return 0;
+      return 1;
+
+    default:
+      return 0;
+    }
+}
 
 /* Return 1 if EXP contains a PLACEHOLDER_EXPR; i.e., if it represents a size
    or offset that depends on a field within a record.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index c8cfdf900b21..f9503ceca88e 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1983,6 +1983,11 @@ extern int first_rtl_op			PARAMS ((enum tree_code));
 
 extern tree unsave_expr			PARAMS ((tree));
 
+/* safe_for_reeval_p (EXP) returns nonzero if it is possible to
+   expand EXP multiple times.  */
+
+extern int safe_for_reeval_p		PARAMS ((tree));
+
 /* Reset EXP in place so that it can be expaned again.  Does not
    recurse into subtrees.  */
 
@@ -2000,6 +2005,9 @@ extern tree unsave_expr_now		PARAMS ((tree));
 extern void (*lang_unsave)              PARAMS ((tree *));
 extern void (*lang_unsave_expr_now)     PARAMS ((tree));
 
+/* If non-null, a language specific version of safe_for_unsave. */
+extern int (*lang_safe_for_unsave)	PARAMS ((tree));
+
 /* Return 1 if EXP contains a PLACEHOLDER_EXPR; i.e., if it represents a size
    or offset that depends on a field within a record.
 
-- 
2.43.5