This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] callgraph and unit at time compilation


Hi,
this patch is my first try to implement some callgraph based
optimization.  At the moment it contains just code to build callgraph,
modifies c frontend to defer compilation of all functions and do
out-of-order inlining and in next pass inlining of static function
called just once with updating of the structure.

I hope to have some more exact numbers soon, but I've measured about
3-5% speedup for gzip, vpr, vortex and gap from spec2000 testsuite and
regression for parser for some reason. There are also slight speedups in
parser and mcf (my numbers come from different versions of compiler with
other flags, so I will send full spec run once I will have free machine
for it).  The compile time cost is about 7%, code growth caused by more
inlining opurtunities 5%.

My question is whether this approach looks sensible.  If so, I would
like to hear some comments from people more experienced with tree
inlining than I am.

If it looks sane, I would like to polish the patch futher for BIB branch
(try to get it working for C++) and implement flag to allow use of
specialized calling conventions for static functions that are never
called indirectly - that should help a bit to i386 especially in PIC
mode.

Comments?

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/egcs/gcc/Makefile.in,v
retrieving revision 1.939.2.13
diff -c -3 -p -r1.939.2.13 Makefile.in
*** Makefile.in	5 Nov 2002 19:11:50 -0000	1.939.2.13
--- Makefile.in	15 Nov 2002 22:04:00 -0000
*************** OBJS = alias.o bb-reorder.o bitmap.o bui
*** 754,760 ****
   sibcall.o simplify-rtx.o ssa.o ssa-ccp.o ssa-dce.o stmt.o		   \
   stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
   tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
!  et-forest.o $(GGC) $(out_object_file) $(EXTRA_OBJS)
  
  BACKEND = main.o libbackend.a
  
--- 754,760 ----
   sibcall.o simplify-rtx.o ssa.o ssa-ccp.o ssa-dce.o stmt.o		   \
   stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \
   tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \
!  et-forest.o callgraph.o $(GGC) $(out_object_file) $(EXTRA_OBJS)
  
  BACKEND = main.o libbackend.a
  
Index: c-decl.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/c-decl.c,v
retrieving revision 1.349.2.3
diff -c -3 -p -r1.349.2.3 c-decl.c
*** c-decl.c	21 Oct 2002 17:51:52 -0000	1.349.2.3
--- c-decl.c	15 Nov 2002 22:04:13 -0000
*************** c_expand_deferred_function (fndecl)
*** 6404,6415 ****
  {
    /* DECL_INLINE or DECL_RESULT might got cleared after the inline
       function was deferred, e.g. in duplicate_decls.  */
!   if (DECL_INLINE (fndecl) && DECL_RESULT (fndecl))
      {
        c_expand_body (fndecl, 0, 0);
        current_function_decl = NULL;
      }
  }
  
  /* Generate the RTL for the body of FNDECL.  If NESTED_P is nonzero,
     then we are already in the process of generating RTL for another
--- 6404,6422 ----
  {
    /* DECL_INLINE or DECL_RESULT might got cleared after the inline
       function was deferred, e.g. in duplicate_decls.  */
!   if (/*DECL_INLINE (fndecl) && DECL_RESULT (fndecl)*/1)
      {
        c_expand_body (fndecl, 0, 0);
        current_function_decl = NULL;
      }
  }
+ void
+ expand_deferred_function (fndecl)
+      tree fndecl;
+ {
+   c_expand_body (fndecl, 0, 0);
+   current_function_decl = NULL;
+ }
  
  /* Generate the RTL for the body of FNDECL.  If NESTED_P is nonzero,
     then we are already in the process of generating RTL for another
*************** c_expand_body (fndecl, nested_p, can_def
*** 6428,6433 ****
--- 6435,6442 ----
    if (flag_syntax_only)
      return;
  
+   create_cgraph_edges (fndecl, DECL_SAVED_TREE (fndecl));
+ 
    if (flag_inline_trees)
      {
        /* First, cache whether the current function is inlinable.  Some
*************** c_expand_body (fndecl, nested_p, can_def
*** 6436,6446 ****
        timevar_push (TV_INTEGRATION);
        uninlinable = ! tree_inlinable_function_p (fndecl);
        
!       if (! uninlinable && can_defer_p
! 	  /* Save function tree for inlining.  Should return 0 if the
!              language does not support function deferring or the
!              function could not be deferred.  */
! 	  && defer_fn (fndecl))
  	{
  	  /* Let the back-end know that this function exists.  */
  	  (*debug_hooks->deferred_inline_function) (fndecl);
--- 6445,6457 ----
        timevar_push (TV_INTEGRATION);
        uninlinable = ! tree_inlinable_function_p (fndecl);
        
!       if (can_defer_p &&
! 	  (flag_unit_at_time
! 	   || (! uninlinable && can_defer_p
! 	       /* Save function tree for inlining.  Should return 0 if the
! 		  language does not support function deferring or the
! 		  function could not be deferred.  */
! 	       && defer_fn (fndecl))))
  	{
  	  /* Let the back-end know that this function exists.  */
  	  (*debug_hooks->deferred_inline_function) (fndecl);
Index: c-objc-common.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/c-objc-common.c,v
retrieving revision 1.15.4.3
diff -c -3 -p -r1.15.4.3 c-objc-common.c
*** c-objc-common.c	28 Oct 2002 19:47:00 -0000	1.15.4.3
--- c-objc-common.c	15 Nov 2002 22:04:13 -0000
*************** expand_deferred_fns ()
*** 290,295 ****
--- 290,298 ----
  {
    unsigned int i;
  
+   if (flag_unit_at_time)
+     cgraph_optimize ();
+ 
    for (i = 0; i < VARRAY_ACTIVE_SIZE (deferred_fns); i++)
      {
        tree decl = VARRAY_TREE (deferred_fns, i);
Index: callgraph.c
===================================================================
RCS file: callgraph.c
diff -N callgraph.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- callgraph.c	15 Nov 2002 22:04:13 -0000
***************
*** 0 ****
--- 1,322 ----
+ /* Callgraph handling code.
+    Copyright (C) 2002 Free Software Foundation, Inc.
+    Contributed by Jan Hubicka
+ 
+ This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+ 
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ #include "config.h"
+ #include "system.h"
+ #include "tree.h"
+ #include <hashtab.h>
+ #include "tree-inline.h"
+ 
+ /* The cgraph data strutcture.
+    Each function decl has assigned cgraph_node listing calees and callers.  */
+ 
+ struct cgraph_node
+ {
+   tree decl;
+   struct cgraph_edge *callees;
+   struct cgraph_edge *callers;
+   struct cgraph_node *next;
+   void *aux;
+ };
+ 
+ struct cgraph_edge
+ {
+   struct cgraph_node *caller, *callee;
+   struct cgraph_edge *next_caller;
+   struct cgraph_edge *next_callee;
+ };
+ 
+ static htab_t cgraph_hash = 0;
+ static struct cgraph_node *cgraph_nodes;
+ 
+ static struct cgraph_node *cgraph_node PARAMS ((tree decl));
+ static struct cgraph_edge *create_edge PARAMS ((struct cgraph_node *, 
+ 						struct cgraph_node *));
+ static void remove_edge PARAMS ((struct cgraph_node *, 
+ 				 struct cgraph_node *));
+ static struct cgraph_edge *record_call PARAMS ((tree, tree));
+ static tree record_call_1 PARAMS ((tree *, int *, void *));
+ static hashval_t hash_node PARAMS ((const PTR));
+ static int eq_node PARAMS ((const PTR, const PTR));
+ static struct cgraph_node *cgraph_node PARAMS ((tree));
+ static tree fixup_calls PARAMS ((tree *, int *, void *));
+ 
+ /* Returns a hash code for P.  */
+ 
+ static hashval_t
+ hash_node (p)
+      const PTR p;
+ {
+   return (hashval_t)
+     htab_hash_pointer (DECL_NAME (((struct cgraph_node *) p)->decl));
+ }
+ 
+ /* Returns non-zero if P1 and P2 are equal.  */
+ 
+ static int
+ eq_node (p1, p2)
+      const PTR p1;
+      const PTR p2;
+ {
+   return (DECL_NAME (((struct cgraph_node *) p1)->decl)) ==
+     DECL_NAME ((tree) p2);
+ }
+ 
+ /* Return cgraph node assigned to DECL.  Create new one when needed.  */
+ static struct cgraph_node *
+ cgraph_node (decl)
+      tree decl;
+ {
+   struct cgraph_node *node;
+   struct cgraph_node **slot;
+   if (!cgraph_hash)
+     {
+       cgraph_hash = htab_create (10, hash_node, eq_node, NULL);
+     }
+ 
+   slot =
+     (struct cgraph_node **) htab_find_slot_with_hash (cgraph_hash, decl,
+ 							 htab_hash_pointer
+ 							 (DECL_NAME (decl)),
+ 							 1);
+   if (*slot)
+     return *slot;
+   node = xmalloc (sizeof (*node));
+   node->decl = decl;
+   node->callees = NULL;
+   node->callers = NULL;
+   node->next = cgraph_nodes;
+   cgraph_nodes = node;
+   *slot = node;
+   return node;
+ }
+ 
+ /* Create edge from CALLER to CALLEE in the cgraph.  */
+ 
+ static struct cgraph_edge *
+ create_edge (caller, callee)
+      struct cgraph_node *caller, *callee;
+ {
+   struct cgraph_edge *edge = xmalloc (sizeof (struct cgraph_edge));
+   edge->caller = caller;
+   edge->callee = callee;
+   edge->next_caller = callee->callers;
+   edge->next_callee = caller->callees;
+   caller->callees = edge;
+   callee->callers = edge;
+   return edge;
+ }
+ 
+ /* Create edge from CALLER to CALLEE in the cgraph.  */
+ 
+ static void
+ remove_edge (caller, callee)
+      struct cgraph_node *caller, *callee;
+ {
+   struct cgraph_edge **edge, **edge2;
+   for (edge = &callee->callers; *edge && (*edge)->caller != caller;
+        edge = &((*edge)->next_caller))
+     ;
+   if (!*edge)
+     abort ();
+   *edge = (*edge)->next_caller;
+   for (edge2 = &caller->callees; *edge2 && (*edge2)->callee != callee;
+        edge2 = &(*edge2)->next_callee)
+     ;
+   if (!*edge2)
+     abort ();
+   *edge2 = (*edge2)->next_callee;
+ }
+ 
+ /* Record call from CALLER to CALLEE  */
+ static struct cgraph_edge *
+ record_call (caller, callee)
+      tree caller, callee;
+ {
+   return create_edge (cgraph_node (caller), cgraph_node (callee));
+ }
+ 
+ void
+ cgraph_remove_call (caller, callee)
+      tree caller, callee;
+ {
+   remove_edge (cgraph_node (caller), cgraph_node (callee));
+ }
+ 
+ /* Return true when CALLER_DECL calls CALLEE_DECL.  */
+ bool
+ cgraph_calls_p (caller_decl, callee_decl)
+      tree caller_decl, callee_decl;
+ {
+   struct cgraph_node *caller = cgraph_node (caller_decl);
+   struct cgraph_node *callee = cgraph_node (callee_decl);
+   struct cgraph_edge *edge;
+   for (edge = callee->callers; edge && (edge)->caller != caller;
+        edge = (edge->next_caller))
+     ;
+   return edge != NULL;
+ }
+ 
+ /* Walk tree and record all calls.  Called via walk_tree.  */
+ static tree
+ record_call_1 (tp, walk_subtrees, data)
+      tree *tp;
+      int *walk_subtrees ATTRIBUTE_UNUSED;
+      void *data;
+ {
+   if (TREE_CODE (*tp) == CALL_EXPR)
+     {
+       tree decl = TREE_OPERAND (*tp, 0);
+       if (TREE_CODE (decl) == ADDR_EXPR)
+ 	decl = TREE_OPERAND (decl, 0);
+       if (TREE_CODE (decl) == FUNCTION_DECL)
+ 	record_call (data, decl);
+     }
+   return NULL;
+ }
+ 
+ /* Create cgraph edges for function calles via BODY.  */
+ void
+ create_cgraph_edges (decl, body)
+      tree decl;
+      tree body;
+ {
+   struct cgraph_node *node = cgraph_node (decl);
+   node->decl = decl;
+   walk_tree (&body, record_call_1, decl, NULL);
+ }
+ 
+ /* Dump the callgraph.  */
+ void
+ dump_cgraph (f)
+      FILE *f;
+ {
+   struct cgraph_node *node;
+   fprintf (f, "Callgraph:\n\n");
+   for (node = cgraph_nodes; node; node = node->next)
+     {
+       struct cgraph_edge *edge;
+       fprintf (f, "%s\n  called by :",
+ 	       IDENTIFIER_POINTER (DECL_NAME (node->decl)));
+       for (edge = node->callers; edge; edge = edge->next_caller)
+ 	fprintf (f, "%s ",
+ 		 IDENTIFIER_POINTER (DECL_NAME (edge->caller->decl)));
+       fprintf (f, "\n  calls: ");
+       for (edge = node->callees; edge; edge = edge->next_callee)
+ 	fprintf (f, "%s ",
+ 		 IDENTIFIER_POINTER (DECL_NAME (edge->callee->decl)));
+       fprintf (f, "\n");
+     }
+ }
+ 
+ /* Replace forward declaration by real ones allowing reverse inlining.  */
+ static tree
+ fixup_calls (tp, walk_subtrees, data)
+      tree *tp;
+      int *walk_subtrees ATTRIBUTE_UNUSED;
+      void *data ATTRIBUTE_UNUSED;
+ {
+   if (TREE_CODE (*tp) == CALL_EXPR)
+     {
+       tree *decl = &TREE_OPERAND (*tp, 0);
+       if (TREE_CODE (*decl) == ADDR_EXPR)
+ 	decl = &TREE_OPERAND (*decl, 0);
+       if (TREE_CODE (*decl) == FUNCTION_DECL)
+ 	*decl = cgraph_node (*decl)->decl;
+     }
+   return NULL;
+ }
+ 
+ /* Perform simple optimizations based on callgraph.  */
+ void
+ cgraph_optimize ()
+ {
+   struct cgraph_node *node;
+   bool marked = 0;
+ 
+   /* First kill forward declaration so reverse inling works properly.  */
+   for (node = cgraph_nodes; node; node = node->next)
+     {
+       tree decl = node->decl;
+       if (DECL_SAVED_TREE (decl))
+ 	walk_tree (&DECL_SAVED_TREE (decl), fixup_calls, NULL, NULL);
+     }
+   /* Now inline functions marked for inlining.  */
+   for (node = cgraph_nodes; node; node = node->next)
+     {
+       tree decl = node->decl;
+       if (DECL_SAVED_TREE (decl)
+ 	  && (TREE_PUBLIC (decl)
+ 	      || !tree_inlinable_function_p (decl))
+ 	  && !TREE_ASM_WRITTEN (decl))
+ 	optimize_inline_calls (decl);
+     }
+   /* Now look for function called only once and mark them to inline.  From this
+      point number of calls to given function won't grow.  */
+   for (node = cgraph_nodes; node; node = node->next)
+     {
+       tree decl = node->decl;
+       if (node->callers && !node->callers->next_caller && !TREE_PUBLIC (decl))
+ 	{
+ 	  DECL_ATTRIBUTES (decl) =
+ 	    chainon (DECL_ATTRIBUTES (decl),
+ 		     build_tree_list (get_identifier ("always_inline"),
+ 				      NULL_TREE));
+ 	  DECL_INLINE (decl) = 1;
+ 	  DECL_UNINLINABLE (decl) = 0;
+ 	  node->aux = (void *) 1;
+ 	  marked = 1;
+ 	}
+       else
+ 	node->aux = NULL;
+     }
+   /* Inline function we marked.  */
+   if (marked)
+     for (node = cgraph_nodes; node; node = node->next)
+       {
+ 	tree decl = node->decl;
+ 	if (DECL_SAVED_TREE (decl)
+ 	    && (TREE_PUBLIC (decl)
+ 		|| !tree_inlinable_function_p (decl))
+ 	    && !TREE_ASM_WRITTEN (decl))
+ 	  {
+ 	    struct cgraph_edge *e;
+ 	    for (e = node->callees; e && !e->callee->aux ; e = e->next_callee)
+ 	      ;
+ 	    if (e)
+ 	      optimize_inline_calls (decl);
+ 	  }
+       }
+ #if 0
+   /* Output everything.  */
+   for (node = cgraph_nodes; node; node = node->next)
+     {
+       tree decl = node->decl;
+       if (DECL_SAVED_TREE (decl))
+ 	{
+ 	  if (!TREE_ASM_WRITTEN (decl))
+ 	    {
+ 	      expand_deferred_function (decl);
+ 	      current_function_decl = NULL;
+ 	    }
+ 	}
+     }
+ #endif
+ }
Index: flags.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/flags.h,v
retrieving revision 1.88.4.3
diff -c -3 -p -r1.88.4.3 flags.h
*** flags.h	21 Oct 2002 17:51:56 -0000	1.88.4.3
--- flags.h	15 Nov 2002 22:04:14 -0000
*************** extern int flag_zero_initialized_in_bss;
*** 659,664 ****
--- 659,666 ----
  /* Nonzero means disable transformations observable by signaling NaNs.  */
  extern int flag_signaling_nans;
  
+ extern int flag_unit_at_time;
+ 
  /* True if the given mode has a NaN representation and the treatment of
     NaN operands is important.  Certain optimizations, such as folding
     x * 0 into x, are not correct for NaN operands, and are normally
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/toplev.c,v
retrieving revision 1.668.4.10
diff -c -3 -p -r1.668.4.10 toplev.c
*** toplev.c	5 Nov 2002 19:11:55 -0000	1.668.4.10
--- toplev.c	15 Nov 2002 22:04:18 -0000
*************** int flag_new_regalloc = 0;
*** 883,888 ****
--- 883,892 ----
  
  int flag_tracer = 0;
  
+ /* Nonzero if we porform compilation unit at time compilation.  */
+ 
+ int flag_unit_at_time = 0;
+ 
  /* Values of the -falign-* flags: how much to align labels in code.
     0 means `use default', 1 means `don't align'.
     For each variable, there is an _log variant which is the power
*************** static const lang_independent_options f_
*** 995,1000 ****
--- 999,1006 ----
     N_("Optimize sibling and tail recursive calls") },
    {"tracer", &flag_tracer, 1,
     N_("Perform superblock formation via tail duplication") },
+   {"unit-at-time", &flag_unit_at_time, 1,
+    N_("Compile whole compilation unit at time") },
    {"cse-follow-jumps", &flag_cse_follow_jumps, 1,
     N_("When running CSE, follow jumps to their targets") },
    {"cse-skip-blocks", &flag_cse_skip_blocks, 1,
*************** parse_options_and_default_flags (argc, a
*** 4848,4853 ****
--- 4854,4860 ----
        flag_delete_null_pointer_checks = 1;
        flag_reorder_blocks = 1;
        flag_reorder_functions = 1;
+       flag_unit_at_time = 1;
      }
  
    if (optimize >= 3)
Index: tree-inline.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/tree-inline.c,v
retrieving revision 1.28.4.5
diff -c -3 -p -r1.28.4.5 tree-inline.c
*** tree-inline.c	28 Oct 2002 19:47:09 -0000	1.28.4.5
--- tree-inline.c	15 Nov 2002 22:04:19 -0000
*************** typedef struct inline_data
*** 103,108 ****
--- 103,110 ----
    /* Hash table used to prevent walk_tree from visiting the same node
       umpteen million times.  */
    htab_t tree_pruner;
+   /* Decl of function we are inlining into.  */
+   tree decl;
  } inline_data;
  
  /* Prototypes.  */
*************** expand_call_inline (tp, walk_subtrees, d
*** 1299,1304 ****
--- 1301,1312 ----
  
    (*lang_hooks.tree_inlining.end_inlining) (fn);
  
+   /* Update callgraph if needed.  */
+   if (id->decl && cgraph_calls_p (id->decl, fn))
+     {
+       cgraph_remove_call (id->decl, fn);
+       create_cgraph_edges (id->decl, *tp);
+     }
    /* Keep iterating.  */
    return NULL_TREE;
  }
*************** optimize_inline_calls (fn)
*** 1331,1336 ****
--- 1339,1345 ----
    /* Clear out ID.  */
    memset (&id, 0, sizeof (id));
  
+   id.decl = fn;
    /* Don't allow recursion into FN.  */
    VARRAY_TREE_INIT (id.fns, 32, "fns");
    VARRAY_PUSH_TREE (id.fns, fn);
Index: tree.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/tree.h,v
retrieving revision 1.349.4.11
diff -c -3 -p -r1.349.4.11 tree.h
*** tree.h	28 Oct 2002 19:47:10 -0000	1.349.4.11
--- tree.h	15 Nov 2002 22:04:20 -0000
*************** extern void dump_node			PARAMS ((tree, i
*** 3141,3146 ****
--- 3141,3154 ----
  extern int dump_switch_p                PARAMS ((const char *));
  extern const char *dump_flag_name	PARAMS ((enum tree_dump_index));
  
+ void expand_deferred_function PARAMS ((tree));
+ /* In callgraph.c  */
+ void create_cgraph_edges		PARAMS ((tree, tree));
+ void dump_cgraph			PARAMS ((FILE *));
+ void cgraph_output_all_functions	PARAMS ((void));
+ void cgraph_remove_call			PARAMS ((tree, tree));
+ bool cgraph_calls_p			PARAMS ((tree, tree));
+ 
  
  /* Redefine abort to report an internal error w/o coredump, and
     reporting the location of the error in the source file.  This logic


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]