[PATCH] Optimize stdarg functions (take 2)

Jakub Jelinek jakub@redhat.com
Tue Sep 28 18:36:00 GMT 2004


On Mon, Sep 27, 2004 at 04:46:11PM -0700, Richard Henderson wrote:
> Ah, I missed that.  Ok.  Might could do with a comment at the set
> for va_start_bb or something?

Done.

> > > The escape analysis and [fg]pr counting code is kinda messy.  Can
> > > you split that out into more functions?
> > 
> > Will try tomorrow.

Done below.
I have also added ppc32 bits (and updated to edge vector changes).

2004-09-28  Jakub Jelinek  <jakub@redhat.com>

	* tree.h (enum tree_index): Add TI_VA_LIST_GPR_COUNTER_NAME
	and TI_VA_LIST_FPR_COUNTER_NAME.
	(va_list_gpr_counter_name, va_list_fpr_counter_name): Define.
	* tree-pass.h (pass_stdarg): Add.
	* tree-optimize.c (init_tree_optimization_passes): Add pass_stdarg.
	* tree-stdarg.c: New file.
	* Makefile.in (OBJS-common): Add tree-stdarg.o.
	(tree-stdarg.o): Add dependencies.
	* function.h (struct function): Add va_list_gpr_size and
	va_list_fpr_size fields.
	* function.c (allocate_struct_function): Initialize them.

	* config/i386/i386.c (ix86_build_builtin_va_list): Initialize
	va_list_{g,f}pr_counter_name.
	(ix86_setup_incoming_varargs): Don't do anything if reg_save
	area will not be used.  Only save registers that tree-stdarg.c
	detected they need saving.
	(ix86_va_start): Don't set up fields that won't be used.

	* config/rs6000/rs6000.c (rs6000_build_builtin_va_list): Initialize
	va_list_{g,f}pr_counter_name.
	(setup_incoming_varargs): Don't do anything if reg_save
	area will not be used.  Only save registers that tree-stdarg.c
	detected they need saving.
	(rs6000_va_start): Don't set up fields that won't be used.

	* gcc.c-torture/execute/stdarg-1.c: New test.
	* gcc.c-torture/execute/stdarg-2.c: New test.
	* gcc.c-torture/execute/stdarg-3.c: New test.
	* gcc.dg/tree-ssa/stdarg-1.c: New test.
	* gcc.dg/tree-ssa/stdarg-2.c: New test.
	* gcc.dg/tree-ssa/stdarg-3.c: New test.
	* gcc.dg/tree-ssa/stdarg-4.c: New test.
	* gcc.dg/tree-ssa/stdarg-5.c: New test.

--- gcc/tree.h.jj	2004-09-27 08:27:16.000000000 +0200
+++ gcc/tree.h	2004-09-28 12:47:55.427898754 +0200
@@ -2568,6 +2568,8 @@ enum tree_index
   TI_PID_TYPE,
   TI_PTRDIFF_TYPE,
   TI_VA_LIST_TYPE,
+  TI_VA_LIST_GPR_COUNTER_NAME,
+  TI_VA_LIST_FPR_COUNTER_NAME,
   TI_BOOLEAN_TYPE,
   TI_FILEPTR_TYPE,
 
@@ -2634,6 +2636,8 @@ extern GTY(()) tree global_trees[TI_MAX]
 #define pid_type_node                   global_trees[TI_PID_TYPE]
 #define ptrdiff_type_node		global_trees[TI_PTRDIFF_TYPE]
 #define va_list_type_node		global_trees[TI_VA_LIST_TYPE]
+#define va_list_gpr_counter_name	global_trees[TI_VA_LIST_GPR_COUNTER_NAME]
+#define va_list_fpr_counter_name	global_trees[TI_VA_LIST_FPR_COUNTER_NAME]
 /* The C type `FILE *'.  */
 #define fileptr_type_node		global_trees[TI_FILEPTR_TYPE]
 
--- gcc/tree-pass.h.jj	2004-09-27 08:27:16.000000000 +0200
+++ gcc/tree-pass.h	2004-09-28 12:47:55.428898573 +0200
@@ -147,6 +147,7 @@ extern struct tree_opt_pass pass_profile
 extern struct tree_opt_pass pass_pre_expand;
 extern struct tree_opt_pass pass_lower_vector_ssa;
 extern struct tree_opt_pass pass_fold_builtins;
+extern struct tree_opt_pass pass_stdarg;
 extern struct tree_opt_pass pass_early_warn_uninitialized;
 extern struct tree_opt_pass pass_late_warn_uninitialized;
 extern struct tree_opt_pass pass_warn_function_return;
--- gcc/function.c.jj	2004-09-28 10:19:47.000000000 +0200
+++ gcc/function.c	2004-09-28 12:52:37.000000000 +0200
@@ -3798,6 +3798,10 @@ allocate_struct_function (tree fndecl)
        && TYPE_ARG_TYPES (fntype) != 0
        && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
 	   != void_type_node));
+
+  /* Assume all registers in stdarg functions need to be saved.  */
+  cfun->va_list_gpr_size = VA_LIST_MAX_GPR_SIZE;
+  cfun->va_list_fpr_size = VA_LIST_MAX_FPR_SIZE;
 }
 
 /* Reset cfun, and other non-struct-function variables to defaults as
--- gcc/config/rs6000/rs6000.c.jj	2004-09-28 10:19:50.000000000 +0200
+++ gcc/config/rs6000/rs6000.c	2004-09-28 15:35:16.503687197 +0200
@@ -5384,8 +5384,14 @@ setup_incoming_varargs (CUMULATIVE_ARGS 
     }
 
   set = get_varargs_alias_set ();
-  if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG)
+  if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
+      && cfun->va_list_gpr_size)
     {
+      int nregs = GP_ARG_NUM_REG - first_reg_offset;
+
+      if (nregs > cfun->va_list_gpr_size)
+        nregs = cfun->va_list_gpr_size;
+
       mem = gen_rtx_MEM (BLKmode,
 		         plus_constant (save_area,
 					first_reg_offset * reg_size)),
@@ -5393,16 +5399,17 @@ setup_incoming_varargs (CUMULATIVE_ARGS 
       set_mem_align (mem, BITS_PER_WORD);
 
       rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
-			          GP_ARG_NUM_REG - first_reg_offset);
+			          nregs);
     }
 
   /* Save FP registers if needed.  */
   if (DEFAULT_ABI == ABI_V4
       && TARGET_HARD_FLOAT && TARGET_FPRS
       && ! no_rtl
-      && next_cum.fregno <= FP_ARG_V4_MAX_REG)
+      && next_cum.fregno <= FP_ARG_V4_MAX_REG
+      && cfun->va_list_fpr_size)
     {
-      int fregno = next_cum.fregno;
+      int fregno = next_cum.fregno, nregs;
       rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
       rtx lab = gen_label_rtx ();
       int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG) * 8);
@@ -5415,13 +5422,14 @@ setup_incoming_varargs (CUMULATIVE_ARGS 
 					    gen_rtx_LABEL_REF (VOIDmode, lab),
 					    pc_rtx)));
 
-      while (fregno <= FP_ARG_V4_MAX_REG)
+      for (nregs = 0;
+           fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
+           fregno++, off += 8, nregs++)
 	{
-	  mem = gen_rtx_MEM (DFmode, plus_constant (save_area, off));
+	  mem = gen_rtx_MEM (DFmode,
+	                     plus_constant (save_area, off));
           set_mem_alias_set (mem, set);
 	  emit_move_insn (mem, gen_rtx_REG (DFmode, fregno));
-	  fregno++;
-	  off += 8;
 	}
 
       emit_label (lab);
@@ -5443,9 +5451,11 @@ rs6000_build_builtin_va_list (void)
   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
 
-  f_gpr = build_decl (FIELD_DECL, get_identifier ("gpr"),
+  va_list_gpr_counter_name = get_identifier ("gpr");
+  f_gpr = build_decl (FIELD_DECL, va_list_gpr_counter_name,
 		      unsigned_char_type_node);
-  f_fpr = build_decl (FIELD_DECL, get_identifier ("fpr"),
+  va_list_fpr_counter_name = get_identifier ("fpr");
+  f_fpr = build_decl (FIELD_DECL, va_list_fpr_counter_name,
 		      unsigned_char_type_node);
   /* Give the two bytes of padding a name, so that -Wpadded won't warn on
      every user file.  */
@@ -5514,15 +5524,28 @@ rs6000_va_start (tree valist, rtx nextar
 	     HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
 	     words, n_gpr, n_fpr);
 
-  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
-	     build_int_cst (NULL_TREE, n_gpr));
-  TREE_SIDE_EFFECTS (t) = 1;
-  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+  if (cfun->va_list_gpr_size)
+    {
+      t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+                 build_int_cst (NULL_TREE, n_gpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
 
-  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
-	     build_int_cst (NULL_TREE, n_fpr));
-  TREE_SIDE_EFFECTS (t) = 1;
-  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+  if (cfun->va_list_fpr_size)
+    {
+      t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+	         build_int_cst (NULL_TREE, n_fpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* If there were no va_arg invocations, don't set up anything.  */
+  if (!cfun->va_list_gpr_size
+      && !cfun->va_list_fpr_size
+      && n_gpr < GP_ARG_NUM_REG
+      && n_fpr < FP_ARG_V4_MAX_REG)
+    return;
 
   /* Find the overflow area.  */
   t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
--- gcc/config/i386/i386.c.jj	2004-09-28 10:19:50.000000000 +0200
+++ gcc/config/i386/i386.c	2004-09-28 12:47:55.000000000 +0200
@@ -3088,9 +3088,11 @@ ix86_build_builtin_va_list (void)
   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
 
-  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
+  va_list_gpr_counter_name = get_identifier ("gp_offset");
+  f_gpr = build_decl (FIELD_DECL, va_list_gpr_counter_name,
 		      unsigned_type_node);
-  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
+  va_list_fpr_counter_name = get_identifier ("fp_offset");
+  f_fpr = build_decl (FIELD_DECL, va_list_fpr_counter_name,
 		      unsigned_type_node);
   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
 		      ptr_type_node);
@@ -3136,6 +3138,9 @@ ix86_setup_incoming_varargs (CUMULATIVE_
   if (!TARGET_64BIT)
     return;
 
+  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
+    return;
+
   /* Indicate to allocate space on the stack for varargs save area.  */
   ix86_save_varrargs_registers = 1;
 
@@ -3157,7 +3162,10 @@ ix86_setup_incoming_varargs (CUMULATIVE_
 
   set = get_varargs_alias_set ();
 
-  for (i = next_cum.regno; i < ix86_regparm; i++)
+  for (i = next_cum.regno;
+       i < ix86_regparm
+       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+       i++)
     {
       mem = gen_rtx_MEM (Pmode,
 			 plus_constant (save_area, i * UNITS_PER_WORD));
@@ -3166,7 +3174,7 @@ ix86_setup_incoming_varargs (CUMULATIVE_
 					x86_64_int_parameter_registers[i]));
     }
 
-  if (next_cum.sse_nregs)
+  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
     {
       /* Now emit code to save SSE registers.  The AX parameter contains number
 	 of SSE parameter registers used to call this function.  We use
@@ -3249,15 +3257,21 @@ ix86_va_start (tree valist, rtx nextarg)
     fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
 	     (int) words, (int) n_gpr, (int) n_fpr);
 
-  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
-	     build_int_cst (NULL_TREE, n_gpr * 8));
-  TREE_SIDE_EFFECTS (t) = 1;
-  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+  if (cfun->va_list_gpr_size)
+    {
+      t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+		 build_int_cst (NULL_TREE, n_gpr * 8));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
 
-  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
-	     build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
-  TREE_SIDE_EFFECTS (t) = 1;
-  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+  if (cfun->va_list_fpr_size)
+    {
+      t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+		 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
 
   /* Find the overflow area.  */
   t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
@@ -3268,12 +3282,15 @@ ix86_va_start (tree valist, rtx nextarg)
   TREE_SIDE_EFFECTS (t) = 1;
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 
-  /* Find the register save area.
-     Prologue of the function save it right above stack frame.  */
-  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
-  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
-  TREE_SIDE_EFFECTS (t) = 1;
-  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+    {
+      /* Find the register save area.
+	 Prologue of the function save it right above stack frame.  */
+      t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
+      t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
 }
 
 /* Implement va_arg.  */
--- gcc/tree-optimize.c.jj	2004-09-27 08:27:16.000000000 +0200
+++ gcc/tree-optimize.c	2004-09-28 12:47:55.000000000 +0200
@@ -371,6 +371,7 @@ init_tree_optimization_passes (void)
   NEXT_PASS (pass_ccp);
   NEXT_PASS (pass_redundant_phi);
   NEXT_PASS (pass_fold_builtins);
+  NEXT_PASS (pass_stdarg);
   NEXT_PASS (pass_split_crit_edges);
   NEXT_PASS (pass_pre);
   NEXT_PASS (pass_loop);
--- gcc/tree-stdarg.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/tree-stdarg.c	2004-09-28 16:03:12.444765491 +0200
@@ -0,0 +1,471 @@
+/* Pass computing data for optimizing stdarg functions.
+   Copyright (C) 2004 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "function.h"
+#include "langhooks.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-pass.h"
+
+/* A simple pass that attempts to optimize stdarg functions on architectures
+   that need to save register arguments to stack on entry to stdarg functions.
+   If the function doesn't use any va_start macros, no registers need to
+   be saved.  If va_start macros are used, the va_list variables don't escape
+   the function, it is only necessary to save registers that will be used
+   in va_arg macros.  E.g. if va_arg is only used with integral types
+   in the function, floating point registers don't need to be saved, etc.  */
+
+struct stdarg_info
+{
+  bitmap va_list_vars;
+  basic_block va_start_bb, bb;
+  int compute_sizes, va_start_count;
+};
+
+/* Return true if basic block VA_ARG_BB is dominated by VA_START_BB and
+   is executed at most as many times as VA_START_BB.  */
+
+static bool
+reachable_at_most_once (basic_block va_arg_bb, basic_block va_start_bb)
+{
+  edge *stack, e;
+  edge_iterator ei;
+  int sp;
+  sbitmap visited;
+  bool ret;
+
+  if (va_arg_bb == va_start_bb)
+    return true;
+
+  if (! dominated_by_p (CDI_DOMINATORS, va_arg_bb, va_start_bb))
+    return false;
+
+  stack = xmalloc ((n_basic_blocks + 1) * sizeof (edge));
+  sp = 0;
+
+  visited = sbitmap_alloc (last_basic_block);
+  sbitmap_zero (visited);
+  ret = true;
+
+  FOR_EACH_EDGE (e, ei, va_arg_bb->preds)
+    stack[sp++] = e;
+
+  while (sp)
+    {
+      basic_block src;
+
+      --sp;
+      e = stack[sp];
+      src = e->src;
+
+      if (e->flags & EDGE_COMPLEX)
+	{
+	  ret = false;
+	  break;
+	}
+
+      if (src == va_start_bb)
+	continue;
+
+      /* va_arg_bb can be executed more times than va_start_bb.  */
+      if (src == va_arg_bb)
+	{
+	  ret = false;
+	  break;
+	}
+
+      gcc_assert (src != ENTRY_BLOCK_PTR);
+
+      if (! TEST_BIT (visited, src->index))
+	{
+	  SET_BIT (visited, src->index);
+	  FOR_EACH_EDGE (e, ei, src->preds)
+	    stack[sp++] = e;
+	}
+    }
+
+  free (stack);
+  sbitmap_free (visited);
+  return ret;
+}
+
+
+/* For statement COUNTER = RHS, if RHS is COUNTER + constant,
+   return constant, otherwise return 0.  */
+
+static unsigned HOST_WIDE_INT
+va_list_counter_bump (tree counter, tree rhs)
+{
+  tree plus_stmt = SSA_NAME_DEF_STMT (rhs);
+  tree rhs1, addend, load_stmt, counter1;
+
+  if (TREE_CODE (plus_stmt) != MODIFY_EXPR
+      || TREE_OPERAND (plus_stmt, 0) != rhs)
+    return 0;
+
+  rhs1 = TREE_OPERAND (plus_stmt, 1);
+
+  if (TREE_CODE (rhs1) != PLUS_EXPR
+      || TREE_CODE (TREE_OPERAND (rhs1, 0)) != SSA_NAME
+      || TREE_CODE (TREE_OPERAND (rhs1, 1)) != INTEGER_CST
+      || !host_integerp (TREE_OPERAND (rhs1, 1), 1))
+    return 0;
+
+  addend = TREE_OPERAND (rhs1, 0);
+  load_stmt = SSA_NAME_DEF_STMT (addend);
+
+  if (TREE_CODE (load_stmt) != MODIFY_EXPR
+      || TREE_OPERAND (load_stmt, 0) != addend)
+    return 0;
+
+  counter1 = TREE_OPERAND (load_stmt, 1);
+  if (TREE_CODE (counter) != TREE_CODE (counter1))
+    return 0;
+
+  if (TREE_CODE (counter) == COMPONENT_REF)
+    {
+      if (get_base_address (counter) != get_base_address (counter1)
+	  || TREE_CODE (TREE_OPERAND (counter1, 1)) != FIELD_DECL
+	  || DECL_NAME (TREE_OPERAND (counter, 1))
+	     != DECL_NAME (TREE_OPERAND (counter1, 1)))
+	return 0;
+    }
+  else
+    return 0;
+
+  return tree_low_cst (TREE_OPERAND (rhs1, 1), 1);
+}
+
+
+/* Called by walk_tree to look for references to va_list variables.  */
+
+static tree
+find_va_list_reference (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED,
+			void *data)
+{
+  bitmap va_list_vars = (bitmap) data;
+  tree var = *tp;
+
+  if (TREE_CODE (var) == SSA_NAME)
+    var = SSA_NAME_VAR (var);
+
+  if (TREE_CODE (var) == VAR_DECL
+      && bitmap_bit_p (va_list_vars, var_ann (var)->uid))
+    return var;
+
+  return NULL_TREE;
+}
+
+
+/* Helper function of va_list_counter_struct_op.  Compute
+  cfun->va_list_{g,f}pr_size.  AP is a va_list GPR/FPR counter,
+  if WRITE_P is true, seen in AP = VAR, otherwise seen in VAR = AP
+  statement.  GPR_P is true if AP is a GPR counter, false if it is
+  a FPR counter.  */
+
+static void
+va_list_counter_op (struct stdarg_info *si, tree ap, tree var, bool gpr_p,
+		    bool write_p)
+{
+  unsigned HOST_WIDE_INT increment;
+
+  if (si->compute_sizes < 0)
+    {
+      si->compute_sizes = 0;
+      if (si->va_start_count == 1
+	  && reachable_at_most_once (si->bb, si->va_start_bb))
+	si->compute_sizes = 1;
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file,
+		 "bb%d will %sbe executed at most once for each va_start "
+		 "in bb%d\n", si->bb->index, si->compute_sizes ? "" : "not ",
+		 si->va_start_bb->index);
+    }
+
+  if (write_p
+      && si->compute_sizes
+      && (increment = va_list_counter_bump (ap, var)) != 0)
+    {
+      if (gpr_p && cfun->va_list_gpr_size + increment < VA_LIST_MAX_GPR_SIZE)
+	{
+	  cfun->va_list_gpr_size += increment;
+	  return;
+	}
+
+      if (!gpr_p && cfun->va_list_fpr_size + increment < VA_LIST_MAX_FPR_SIZE)
+	{
+	  cfun->va_list_fpr_size += increment;
+	  return;
+	}
+    }
+
+  if (write_p || !si->compute_sizes)
+    {
+      if (gpr_p)
+	cfun->va_list_gpr_size = VA_LIST_MAX_GPR_SIZE;
+      else
+	cfun->va_list_fpr_size = VA_LIST_MAX_FPR_SIZE;
+    }
+}
+
+
+/* If AP is a va_list GPR/FPR counter, compute cfun->va_list_{g,f}pr_size.
+   If WRITE_P is true, AP has been seen in AP = VAR assignment, if WRITE_P
+   is false, AP has been seen in VAR = AP assignment.
+   Return true if the AP = VAR (resp. VAR = AP) statement is a recognized
+   va_arg operation that doesn't cause the va_list variable to escape
+   current function.  */
+
+static bool
+va_list_counter_struct_op (struct stdarg_info *si, tree ap, tree var,
+			   bool write_p)
+{
+  tree base;
+
+  if (TREE_CODE (ap) != COMPONENT_REF
+      || TREE_CODE (TREE_OPERAND (ap, 1)) != FIELD_DECL)
+    return false;
+
+  if (TREE_CODE (var) != SSA_NAME
+      || bitmap_bit_p (si->va_list_vars, var_ann (SSA_NAME_VAR (var))->uid))
+    return false;
+
+  base = get_base_address (ap);
+  if (TREE_CODE (base) != VAR_DECL
+      || !bitmap_bit_p (si->va_list_vars, var_ann (base)->uid))
+    return false;
+
+  if (DECL_NAME (TREE_OPERAND (ap, 1)) == va_list_gpr_counter_name)
+    va_list_counter_op (si, ap, var, true, write_p);
+  else if (DECL_NAME (TREE_OPERAND (ap, 1)) == va_list_fpr_counter_name)
+    va_list_counter_op (si, ap, var, false, write_p);
+
+  return true;
+}
+
+
+/* Return true if this optimization pass should be done.
+   It makes only sense for stdarg functions.  */
+
+static bool
+gate_optimize_stdarg (void)
+{
+  /* This optimization is only for stdarg functions.  */
+  return current_function_stdarg != 0;
+}  
+
+
+/* Entry point to the stdarg optimization pass.  */
+
+static void
+execute_optimize_stdarg (void)
+{
+  basic_block bb;
+  bool va_list_escapes = false;
+  struct stdarg_info si;
+  const char *funcname = NULL;
+
+  cfun->va_list_gpr_size = 0;
+  cfun->va_list_fpr_size = 0;
+  memset (&si, 0, sizeof (si));
+  si.va_list_vars = BITMAP_XMALLOC ();
+
+  if (dump_file)
+    funcname = lang_hooks.decl_printable_name (current_function_decl, 2);
+
+  FOR_EACH_BB (bb)
+    {
+      block_stmt_iterator i;
+
+      for (i = bsi_start (bb); !bsi_end_p (i); bsi_next (&i))
+	{
+	  tree stmt = bsi_stmt (i);
+	  tree call = get_call_expr_in (stmt), callee;
+	  tree ap;
+
+	  if (!call)
+	    continue;
+
+	  callee = get_callee_fndecl (call);
+	  if (!callee
+	      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
+	      || DECL_FUNCTION_CODE (callee) != BUILT_IN_VA_START)
+	    continue;
+
+	  si.va_start_count++;
+	  ap = TREE_VALUE (TREE_OPERAND (call, 1));
+	  if (TREE_CODE (ap) != ADDR_EXPR
+	      || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_OPERAND (ap, 0)))
+		 != TYPE_MAIN_VARIANT (va_list_type_node)
+	      || TREE_CODE (TREE_OPERAND (ap, 0)) != VAR_DECL)
+	    {
+	      va_list_escapes = true;
+	      break;
+	    }
+
+	  ap = TREE_OPERAND (ap, 0);	    
+	  if (is_global_var (ap))
+	    {
+	      va_list_escapes = true;
+	      break;
+	    }
+
+	  bitmap_set_bit (si.va_list_vars, var_ann (ap)->uid);
+
+	  /* VA_START_BB will be only used if there is just one
+	     va_start in the function.  */
+	  si.va_start_bb = bb;
+	}
+
+      if (va_list_escapes)
+	break;
+    }
+
+  /* If there were no va_start uses in the function, there is no need to
+     save anything.  */
+  if (si.va_start_count == 0)
+    goto finish;
+
+  /* If some va_list arguments weren't local, we can't optimize.  */
+  if (va_list_escapes)
+    goto finish;
+
+  /* If the backend didn't tell us the field names, there is nothing
+     more we can do.  */
+  if (va_list_gpr_counter_name == NULL_TREE
+      && va_list_fpr_counter_name == NULL_TREE)
+    {
+      va_list_escapes = true;
+      goto finish;
+    }
+
+  FOR_EACH_BB (bb)
+    {
+      block_stmt_iterator i;
+
+      si.compute_sizes = -1;
+      si.bb = bb;
+      for (i = bsi_start (bb);
+	   !bsi_end_p (i) && !va_list_escapes;
+	   bsi_next (&i))
+	{
+	  tree stmt = bsi_stmt (i);
+	  tree call;
+
+	  /* Don't look at __builtin_va_{start,end}, they are ok.  */
+	  call = get_call_expr_in (stmt);
+	  if (call)
+	    {
+	      tree callee = get_callee_fndecl (call);
+
+	      if (callee
+		  && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL
+		  && (DECL_FUNCTION_CODE (callee) == BUILT_IN_VA_START
+		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_VA_END))
+		continue;
+	    }
+
+	  if (TREE_CODE (stmt) == MODIFY_EXPR)
+	    {
+	      tree lhs = TREE_OPERAND (stmt, 0);
+	      tree rhs = TREE_OPERAND (stmt, 1);
+
+	      if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
+		rhs = TREE_OPERAND (rhs, 0);
+
+	      /* Check for ap[0].field = temp.  */
+	      if (va_list_counter_struct_op (&si, lhs, rhs, true))
+		continue;
+
+	      /* Check for temp = ap[0].field.  */
+	      else if (va_list_counter_struct_op (&si, rhs, lhs, false))
+		continue;
+	    }
+
+	  /* All other uses of va_list are either va_copy (that is not handled
+	     in this optimization), taking address of va_list variable or
+	     passing va_list to other functions (in that case va_list might
+	     escape the function and therefore va_start needs to set it up
+	     fully), or some unexpected use of va_list.  None of these should
+	     happen in a gimplified VA_ARG_EXPR.  */
+	  if (walk_tree (&stmt, find_va_list_reference, si.va_list_vars, NULL))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fputs ("va_list escapes in ", dump_file);
+		  print_generic_expr (dump_file, stmt, dump_flags);
+		  fputc ('\n', dump_file);
+		}
+	      va_list_escapes = true;
+	    }
+	}
+
+      if (va_list_escapes)
+	break;
+    }
+
+finish:
+  if (va_list_escapes)
+    {
+      cfun->va_list_gpr_size = VA_LIST_MAX_GPR_SIZE;
+      cfun->va_list_fpr_size = VA_LIST_MAX_FPR_SIZE;
+    }
+  BITMAP_XFREE (si.va_list_vars);
+  if (dump_file)
+    {
+      fprintf (dump_file, "%s: va_list escapes %d, needs to save ",
+	       funcname, (int) va_list_escapes);
+      if (cfun->va_list_gpr_size >= VA_LIST_MAX_GPR_SIZE)
+	fputs ("all", dump_file);
+      else
+	fprintf (dump_file, "%d", cfun->va_list_gpr_size);
+      fputs (" GPR units and ", dump_file);
+      if (cfun->va_list_fpr_size >= VA_LIST_MAX_FPR_SIZE)
+	fputs ("all", dump_file);
+      else
+	fprintf (dump_file, "%d", cfun->va_list_fpr_size);
+      fputs (" FPR units.\n", dump_file);
+    }
+}
+
+
+struct tree_opt_pass pass_stdarg =
+{
+  "stdarg",				/* name */
+  gate_optimize_stdarg,			/* gate */
+  execute_optimize_stdarg,		/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  0,					/* tv_id */
+  PROP_cfg | PROP_ssa | PROP_alias,	/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_dump_func,			/* todo_flags_finish */
+  0					/* letter */
+};
--- gcc/Makefile.in.jj	2004-09-28 10:19:43.000000000 +0200
+++ gcc/Makefile.in	2004-09-28 12:47:55.000000000 +0200
@@ -925,7 +925,7 @@ OBJS-common = \
  varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o	   \
  et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o	   \
  rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o          \
- lambda-trans.o	lambda-code.o tree-loop-linear.o
+ lambda-trans.o	lambda-code.o tree-loop-linear.o tree-stdarg.o
 
 OBJS-md = $(out_object_file)
 OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o		   \
@@ -1772,6 +1772,8 @@ tree-loop-linear.o: tree-loop-linear.c $
    errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
    $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
    $(TREE_DATA_REF_H) $(SCEV_H)
+tree-stdarg.o: tree-stdarg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+   $(TREE_H) function.h diagnostic.h $(TREE_FLOW_H) tree-pass.h
 tree-gimple.o : tree-gimple.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(EXPR_H) \
 	$(RTL_H) $(TREE_GIMPLE_H) $(TM_H) coretypes.h bitmap.h $(GGC_H)
 tree-mudflap.o : $(CONFIG_H) errors.h $(SYSTEM_H) $(TREE_H) tree-inline.h \
--- gcc/function.h.jj	2004-09-22 09:42:44.000000000 +0200
+++ gcc/function.h	2004-09-28 12:50:27.000000000 +0200
@@ -428,8 +428,22 @@ struct function GTY(())
 
   /* Nonzero if code to initialize arg_pointer_save_area has been emitted.  */
   unsigned int arg_pointer_save_area_init : 1;
+
+  /* Number of units of general registers that need saving in stdarg
+     function.  What unit is depends on the backend, either it is number
+     of bytes, or it can be number of registers.  */
+  unsigned int va_list_gpr_size : 8;
+
+  /* Number of units of floating point registers that need saving in stdarg
+     function.  */
+  unsigned int va_list_fpr_size : 8;
 };
 
+/* If va_list_[gf]pr_size is set to this, it means we don't know how
+   many units need to be saved.  */
+#define VA_LIST_MAX_GPR_SIZE	255
+#define VA_LIST_MAX_FPR_SIZE	255
+
 /* The function currently being compiled.  */
 extern GTY(()) struct function *cfun;
 
--- gcc/testsuite/gcc.c-torture/execute/stdarg-3.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/stdarg-3.c	2004-09-28 12:47:55.494886625 +0200
@@ -0,0 +1,166 @@
+#include <stdarg.h>
+
+extern void abort (void);
+
+int foo_arg, bar_arg;
+long x;
+double d;
+va_list gap;
+struct S1 { int i; double d; int j; double e; } s1;
+struct S2 { double d; long i; } s2;
+int y;
+
+void
+bar (int v)
+{
+  bar_arg = v;
+}
+
+void
+f1 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    x = va_arg (ap, long);
+  va_end (ap);
+}
+
+void
+f2 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    d = va_arg (ap, double);
+  va_end (ap);
+}
+
+void
+f3 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      x = va_arg (ap, long);
+      va_end (ap);
+      bar (x);
+    }
+}
+
+void
+f4 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      d = va_arg (ap, double);
+      va_end (ap);
+      bar (d + 4.0);
+    }
+}
+
+void
+f5 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    s1 = va_arg (ap, struct S1);
+  va_end (ap);
+}
+
+void
+f6 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    s2 = va_arg (ap, struct S2);
+  va_end (ap);
+}
+
+void
+f7 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      s1 = va_arg (ap, struct S1);
+      va_end (ap);
+      bar (s1.i);
+    }
+}
+
+void
+f8 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      s2 = va_arg (ap, struct S2);
+      y = va_arg (ap, int);
+      va_end (ap);
+      bar (s2.i);
+    }
+}
+
+int
+main (void)
+{
+  struct S1 a1, a3;
+  struct S2 a2, a4;
+
+  f1 (7, 1L, 2L, 3L, 5L, 7L, 9L, 11L, 13L);
+  if (x != 11L)
+    abort ();
+  f2 (6, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0);
+  if (d != 32.0)
+    abort ();
+  f3 (2, 1L, 3L);
+  if (bar_arg != 1L || x != 1L)
+    abort ();
+  f4 (2, 17.0, 19.0);
+  if (bar_arg != 21 || d != 17.0)
+    abort ();
+  a1.i = 131;
+  a1.j = 251;
+  a1.d = 15.0;
+  a1.e = 191.0;
+  a3 = a1;
+  a3.j = 254;
+  a3.e = 178.0;
+  f5 (2, a1, a3, a1);
+  if (s1.i != 131 || s1.j != 254 || s1.d != 15.0 || s1.e != 178.0)
+    abort ();
+  f5 (3, a1, a3, a1);
+  if (s1.i != 131 || s1.j != 251 || s1.d != 15.0 || s1.e != 191.0)
+    abort ();
+  a2.i = 138;
+  a2.d = 16.0;
+  a4.i = 257;
+  a4.d = 176.0;
+  f6 (2, a2, a4, a2);
+  if (s2.i != 257 || s2.d != 176.0)
+    abort ();
+  f6 (3, a2, a4, a2);
+  if (s2.i != 138 || s2.d != 16.0)
+    abort ();
+  f7 (2, a3, a1, a1);
+  if (s1.i != 131 || s1.j != 254 || s1.d != 15.0 || s1.e != 178.0)
+    abort ();
+  if (bar_arg != 131)
+    abort ();
+  f8 (3, a4, a2, a2);
+  if (s2.i != 257 || s2.d != 176.0)
+    abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/stdarg-2.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/stdarg-2.c	2004-09-28 12:47:55.495886444 +0200
@@ -0,0 +1,175 @@
+#include <stdarg.h>
+
+extern void abort (void);
+
+int foo_arg, bar_arg;
+long x;
+double d;
+va_list gap;
+
+void
+foo (int v, va_list ap)
+{
+  switch (v)
+    {
+    case 5:
+      foo_arg = va_arg (ap, int) + va_arg (ap, double);
+      foo_arg += va_arg (ap, long long);
+      break;
+    case 8:
+      foo_arg = va_arg (ap, long long) + va_arg (ap, double);
+      break;
+    case 11:
+      foo_arg = va_arg (ap, int) + va_arg (ap, long double);
+      break;
+    default:
+      abort ();
+    }
+}
+
+void
+bar (int v)
+{
+  if (v == 0x4002)
+    {
+      if (va_arg (gap, int) != 13 || va_arg (gap, double) != -14.0)
+	abort ();
+    }
+  bar_arg = v;
+}
+
+void
+f1 (int i, ...)
+{
+  va_start (gap, i);
+  x = va_arg (gap, long);
+  va_end (gap);
+}
+
+void
+f2 (int i, ...)
+{
+  va_start (gap, i);
+  bar (i);
+  va_end (gap);
+}
+
+void
+f3 (int i, ...)
+{
+  va_list aps[10];
+  va_start (aps[4], i);
+  x = va_arg (aps[4], long);
+  va_end (aps[4]);
+}
+
+void
+f4 (int i, ...)
+{
+  va_list aps[10];
+  va_start (aps[4], i);
+  bar (i);
+  va_end (aps[4]);
+}
+
+void
+f5 (int i, ...)
+{
+  va_list aps[10];
+  va_start (aps[4], i);
+  foo (i, aps[4]);
+  va_end (aps[4]);
+}
+
+struct A { int i; va_list g; va_list h[2]; };
+
+void
+f6 (int i, ...)
+{
+  struct A a;
+  va_start (a.g, i);
+  x = va_arg (a.g, long);
+  va_end (a.g);
+}
+
+void
+f7 (int i, ...)
+{
+  struct A a;
+  va_start (a.g, i);
+  bar (i);
+  va_end (a.g);
+}
+
+void
+f8 (int i, ...)
+{
+  struct A a;
+  va_start (a.g, i);
+  foo (i, a.g);
+  va_end (a.g);
+}
+
+void
+f10 (int i, ...)
+{
+  struct A a;
+  va_start (a.h[1], i);
+  x = va_arg (a.h[1], long);
+  va_end (a.h[1]);
+}
+
+void
+f11 (int i, ...)
+{
+  struct A a;
+  va_start (a.h[1], i);
+  bar (i);
+  va_end (a.h[1]);
+}
+
+void
+f12 (int i, ...)
+{
+  struct A a;
+  va_start (a.h[1], i);
+  foo (i, a.h[1]);
+  va_end (a.h[1]);
+}
+
+int
+main (void)
+{
+  f1 (1, 79);
+  if (x != 79)
+    abort ();
+  f2 (0x4002, 13, -14.0);
+  if (bar_arg != 0x4002)
+    abort ();
+  f3 (3, 2031L);
+  if (bar_arg != 2031)
+    abort ();
+  f4 (4, 18);
+  f5 (5, 1, 19.0, 18LL);
+  if (foo_arg != 38)
+    abort ();
+  f6 (6, 18L);
+  if (x != 18L)
+    abort ();
+  f7 (7);
+  if (bar_arg != 7)
+    abort ();
+  f8 (8, 2031LL, 13.0);
+  if (foo_arg != 2044)
+    abort ();
+  f10 (9, 180L);
+  if (x != 180L)
+    abort ();
+  f11 (10);
+  if (bar_arg != 10)
+    abort ();
+  f12 (11, 2030, 12.0L);
+  if (foo_arg != 2042)
+    abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/stdarg-1.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/stdarg-1.c	2004-09-28 12:47:55.497886082 +0200
@@ -0,0 +1,156 @@
+#include <stdarg.h>
+
+extern void abort (void);
+
+int foo_arg, bar_arg;
+long x;
+double d;
+va_list gap;
+va_list *pap;
+
+void
+foo (int v, va_list ap)
+{
+  switch (v)
+    {
+    case 5: foo_arg = va_arg (ap, int); break;
+    default: abort ();
+    }
+}
+
+void
+bar (int v)
+{
+  if (v == 0x4006)
+    {
+      if (va_arg (gap, double) != 17.0
+	  || va_arg (gap, long) != 129L)
+	abort ();
+    }
+  else if (v == 0x4008)
+    {
+      if (va_arg (*pap, long long) != 14LL
+	  || va_arg (*pap, long double) != 131.0L
+	  || va_arg (*pap, int) != 17)
+	abort ();
+    }
+  bar_arg = v;
+}
+
+void
+f0 (int i, ...)
+{
+}
+
+void
+f1 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  va_end (ap);
+}
+
+void
+f2 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  bar (d);
+  x = va_arg (ap, long);
+  bar (x);
+  va_end (ap);
+}
+
+void
+f3 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  d = va_arg (ap, double);
+  va_end (ap);
+}
+
+void
+f4 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  x = va_arg (ap, double);
+  foo (i, ap);
+  va_end (ap);
+}
+
+void
+f5 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  va_copy (gap, ap);
+  bar (i);
+  va_end (ap);
+  va_end (gap);
+}
+
+void
+f6 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  bar (d);
+  va_arg (ap, long);
+  va_arg (ap, long);
+  x = va_arg (ap, long);
+  bar (x);
+  va_end (ap);
+}
+
+void
+f7 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  pap = ≈
+  bar (i);
+  va_end (ap);
+}
+
+void
+f8 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  pap = ≈
+  bar (i);
+  d = va_arg (ap, double);
+  va_end (ap);
+}
+
+int
+main (void)
+{
+  f0 (1);
+  f1 (2);
+  d = 31.0;
+  f2 (3, 28L);
+  if (bar_arg != 28 || x != 28)
+    abort ();
+  f3 (4, 131.0);
+  if (d != 131.0)
+    abort ();
+  f4 (5, 16.0, 128);
+  if (x != 16 || foo_arg != 128)
+    abort ();
+  f5 (0x4006, 17.0, 129L);
+  if (bar_arg != 0x4006)
+    abort ();
+  f6 (7, 12L, 14L, -31L);
+  if (bar_arg != -31)
+    abort ();
+  f7 (0x4008, 14LL, 131.0L, 17, 26.0);
+  if (bar_arg != 0x4008)
+    abort ();
+  f8 (0x4008, 14LL, 131.0L, 17, 27.0);
+  if (bar_arg != 0x4008 || d != 27.0)
+    abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c	2004-09-28 16:05:21.615726282 +0200
@@ -0,0 +1,67 @@
+/* This test is only for architectures that save general and floating point
+   registers separately in stdarg functions.  */
+/* { dg-do compile { target x86_64-*-* powerpc-*-* } } */
+/* { dg-options "-O2 -fdump-tree-stdarg" } */
+
+#include <stdarg.h>
+
+extern void foo (int, va_list);
+extern void bar (int);
+long x;
+double d;
+
+/* Here va_arg can be executed more than once for one va_start.  All GPR
+   registers needs to be saved.  */
+void
+f1 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    x = va_arg (ap, long);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f2 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    d = va_arg (ap, double);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" } } */
+
+/* Here va_arg can be executed at most as many times as va_start.
+   Only one GPR needs to be saved.  */
+void
+f3 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      x = va_arg (ap, long);
+      va_end (ap);
+      bar (x);
+    }
+}
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f4 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      d = va_arg (ap, double);
+      va_end (ap);
+      bar (d + 2.5);
+    }
+}
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c	2004-09-28 16:01:50.445391225 +0200
@@ -0,0 +1,124 @@
+/* This test is only for architectures that save general and floating point
+   registers separately in stdarg functions.  */
+/* { dg-do compile { target x86_64-*-* powerpc-*-* } } */
+/* { dg-options "-O2 -fdump-tree-stdarg" } */
+
+#include <stdarg.h>
+
+extern void foo (int, va_list);
+extern void bar (int);
+long x;
+va_list gap;
+
+/* If va_list is not local variable, it escapes the function.  */
+void
+f1 (int i, ...)
+{
+  va_start (gap, i);
+  x = va_arg (gap, long);
+  va_end (gap);
+}
+/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f2 (int i, ...)
+{
+  va_start (gap, i);
+  bar (i);
+  va_end (gap);
+}
+/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+/* tree-stdarg.c only handles va_list variables, not arrays of them or
+   va_list fields embedded in structures.  */
+void
+f3 (int i, ...)
+{
+  va_list aps[10];
+  va_start (aps[4], i);
+  x = va_arg (aps[4], long);
+  va_end (aps[4]);
+}
+/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f4 (int i, ...)
+{
+  va_list aps[10];
+  va_start (aps[4], i);
+  bar (i);
+  va_end (aps[4]);
+}
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f5 (int i, ...)
+{
+  va_list aps[10];
+  va_start (aps[4], i);
+  foo (i, aps[4]);
+  va_end (aps[4]);
+}
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+struct A { int i; va_list g; va_list h[2]; };
+
+void
+f6 (int i, ...)
+{
+  struct A a;
+  va_start (a.g, i);
+  x = va_arg (a.g, long);
+  va_end (a.g);
+}
+/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f7 (int i, ...)
+{
+  struct A a;
+  va_start (a.g, i);
+  bar (i);
+  va_end (a.g);
+}
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f8 (int i, ...)
+{
+  struct A a;
+  va_start (a.g, i);
+  foo (i, a.g);
+  va_end (a.g);
+}
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f10 (int i, ...)
+{
+  struct A a;
+  va_start (a.h[1], i);
+  x = va_arg (a.h[1], long);
+  va_end (a.h[1]);
+}
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f11 (int i, ...)
+{
+  struct A a;
+  va_start (a.h[1], i);
+  bar (i);
+  va_end (a.h[1]);
+}
+/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f12 (int i, ...)
+{
+  struct A a;
+  va_start (a.h[1], i);
+  foo (i, a.h[1]);
+  va_end (a.h[1]);
+}
+/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c	2004-09-28 16:02:31.874001839 +0200
@@ -0,0 +1,131 @@
+/* This test is only for architectures that save general and floating point
+   registers separately in stdarg functions.  */
+/* { dg-do compile { target x86_64-*-* powerpc-*-* } } */
+/* { dg-options "-O2 -fdump-tree-stdarg" } */
+
+#include <stdarg.h>
+
+extern void foo (int, va_list);
+extern void bar (int);
+long x;
+double d;
+va_list gap;
+va_list *pap;
+
+void
+f1 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f2 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  bar (d);
+  x = va_arg (ap, long);
+  bar (x);
+  va_end (ap);
+}
+/* Assume the counters can be number of registers or bytes on 32-bit
+   architecture or bytes on 64-bit architecture.  */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f3 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  d = va_arg (ap, double);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" } } */
+
+void
+f4 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  x = va_arg (ap, double);
+  foo (i, ap);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f5 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  va_copy (gap, ap);
+  bar (i);
+  va_end (ap);
+  va_end (gap);
+}
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f6 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  bar (d);
+  va_arg (ap, long);
+  va_arg (ap, long);
+  x = va_arg (ap, long);
+  bar (x);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f7 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  pap = ≈
+  bar (6);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f8 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  pap = ≈
+  bar (d);
+  x = va_arg (ap, long);
+  bar (x);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f9 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  __asm __volatile ("" : "=r" (pap) : "0" (&ap));
+  bar (6);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+void
+f10 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  __asm __volatile ("" : "=r" (pap) : "0" (&ap));
+  bar (d);
+  x = va_arg (ap, long);
+  bar (x);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/stdarg-1.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/stdarg-1.c	2004-09-28 16:01:50.445391225 +0200
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-stdarg" } */
+
+#include <stdarg.h>
+
+/* This can be handled on all arches.  If there is no va_start, registers don't need
+   to be saved.  */
+void
+f1 (int i, ...)
+{
+}
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" } } */
--- gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c.jj	2004-09-28 12:47:55.000000000 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c	2004-09-28 16:01:50.446391047 +0200
@@ -0,0 +1,66 @@
+/* This test is specific to x86-64 function passing.  */
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options "-O2 -fdump-tree-stdarg" } */
+
+#include <stdarg.h>
+
+extern void foo (int, va_list);
+extern void bar (int);
+struct S1 { int i; double d; int j; double e; } s1;
+struct S2 { double d; long i; } s2;
+int y;
+
+/* Here va_arg can be executed more than once for one va_start.  */
+void
+f1 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    s1 = va_arg (ap, struct S1);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f2 (int i, ...)
+{
+  va_list ap;
+  va_start (ap, i);
+  while (i-- > 0)
+    s2 = va_arg (ap, struct S2);
+  va_end (ap);
+}
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" } } */
+
+/* Here va_arg can be executed at most as many times as va_start.  */
+void
+f3 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      s1 = va_arg (ap, struct S1);
+      va_end (ap);
+      bar (s1.i);
+    }
+}
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" } } */
+
+void
+f4 (int i, ...)
+{
+  va_list ap;
+  int j = i;
+  while (j-- > 0)
+    {
+      va_start (ap, i);
+      s2 = va_arg (ap, struct S2);
+      y = va_arg (ap, int);
+      va_end (ap);
+      bar (s2.i);
+    }
+}
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" } } */


	Jakub



More information about the Gcc-patches mailing list