PATCH: Profile driven specialization of indirect/virtual calls

Tomas Bily tbily@suse.cz
Tue Dec 19 00:15:00 GMT 2006


Hi,

this is update to current profiler histogram infrastructure.

Bootstraped & tested on x86-linux, x86_64-linux.
OK?

 Tomas
-------------- next part --------------
Index: gcc/cgraph.c
===================================================================
*** gcc/cgraph.c	(revision 119879)
--- gcc/cgraph.c	(working copy)
*************** int cgraph_n_nodes;
*** 116,121 ****
--- 116,124 ----
  /* Maximal uid used in cgraph nodes.  */
  int cgraph_max_uid;
  
+ /* Maximal pid used for profiling */
+ int cgraph_max_pid;
+ 
  /* Set when whole unit has been analyzed so we can access global info.  */
  bool cgraph_global_info_ready = false;
  
*************** cgraph_create_node (void)
*** 164,169 ****
--- 167,173 ----
    node = GGC_CNEW (struct cgraph_node);
    node->next = cgraph_nodes;
    node->uid = cgraph_max_uid++;
+   node->pid = -1;
    node->order = cgraph_order++;
    if (cgraph_nodes)
      cgraph_nodes->previous = node;
*************** void
*** 638,644 ****
  dump_cgraph_node (FILE *f, struct cgraph_node *node)
  {
    struct cgraph_edge *edge;
!   fprintf (f, "%s/%i:", cgraph_node_name (node), node->uid);
    if (node->global.inlined_to)
      fprintf (f, " (inline copy in %s/%i)",
  	     cgraph_node_name (node->global.inlined_to),
--- 642,648 ----
  dump_cgraph_node (FILE *f, struct cgraph_node *node)
  {
    struct cgraph_edge *edge;
!   fprintf (f, "%s/%i(%i):", cgraph_node_name (node), node->uid, node->pid);
    if (node->global.inlined_to)
      fprintf (f, " (inline copy in %s/%i)",
  	     cgraph_node_name (node->global.inlined_to),
Index: gcc/cgraph.h
===================================================================
*** gcc/cgraph.h	(revision 119879)
--- gcc/cgraph.h	(working copy)
*************** struct cgraph_node GTY((chain_next ("%h.
*** 180,185 ****
--- 180,189 ----
       into clone before compiling so the function in original form can be
       inlined later.  This pointer points to the clone.  */
    tree inline_decl;
+ 
+   /* unique id for profiling. pid is not suitable because of different
+      number of cfg nodes with -fprofile-generate and -fprofile-use */
+   int pid;
  };
  
  struct cgraph_edge GTY((chain_next ("%h.next_caller"), chain_prev ("%h.prev_caller")))
*************** struct cgraph_asm_node GTY(())
*** 253,258 ****
--- 257,263 ----
  extern GTY(()) struct cgraph_node *cgraph_nodes;
  extern GTY(()) int cgraph_n_nodes;
  extern GTY(()) int cgraph_max_uid;
+ extern GTY(()) int cgraph_max_pid;
  extern bool cgraph_global_info_ready;
  extern bool cgraph_function_flags_ready;
  extern GTY(()) struct cgraph_node *cgraph_nodes_queue;
Index: gcc/value-prof.c
===================================================================
*** gcc/value-prof.c	(revision 119879)
--- gcc/value-prof.c	(working copy)
*************** Software Foundation, 51 Franklin Street,
*** 40,45 ****
--- 40,46 ----
  #include "coverage.h"
  #include "tree.h"
  #include "gcov-io.h"
+ #include "cgraph.h"
  #include "timevar.h"
  #include "tree-pass.h"
  #include "toplev.h"
*************** static struct value_prof_hooks *value_pr
*** 60,65 ****
--- 61,71 ----
        FIXME: This transformation was removed together with RTL based value
        profiling.
  
+    3) Indirect/virtual call specialization. If we can determine most
+       common function callee in indirect/virtual call. We can use this
+       information to improve code effectivity (espetialy info for
+       inliner).
+ 
     Every such optimization should add its requirements for profiled values to
     insn_values_to_profile function.  This function is called from branch_prob
     in profile.c and the requested values are instrumented by it in the first
*************** static bool tree_divmod_fixed_value_tran
*** 81,86 ****
--- 87,93 ----
  static bool tree_mod_pow2_value_transform (tree);
  static bool tree_mod_subtract_transform (tree);
  static bool tree_stringops_transform (block_stmt_iterator *);
+ static bool tree_ic_transform (tree);
  
  /* Allocate histogram value.  */
  
*************** dump_histogram_value (FILE *dump_file, h
*** 254,259 ****
--- 261,279 ----
  	}
        fprintf (dump_file, ".\n");
        break;
+     case HIST_TYPE_INDIR_CALL:
+       fprintf (dump_file, "Indirect call ");
+       if (hist->hvalue.counters)
+ 	{
+ 	   fprintf (dump_file, "value:"HOST_WIDEST_INT_PRINT_DEC
+ 		    " match:"HOST_WIDEST_INT_PRINT_DEC
+ 		    " wrong:"HOST_WIDEST_INT_PRINT_DEC,
+ 		    (HOST_WIDEST_INT) hist->hvalue.counters[0],
+ 		    (HOST_WIDEST_INT) hist->hvalue.counters[1],
+ 		    (HOST_WIDEST_INT) hist->hvalue.counters[2]);
+ 	}
+       fprintf (dump_file, ".\n");
+       break;
     }
  }
  
*************** tree_value_profile_transformations (void
*** 432,438 ****
  	      && (tree_mod_subtract_transform (stmt)
  		  || tree_divmod_fixed_value_transform (stmt)
  		  || tree_mod_pow2_value_transform (stmt)
! 		  || tree_stringops_transform (&bsi)))
  	    {
  	      stmt = bsi_stmt (bsi);
  	      changed = true;
--- 452,459 ----
  	      && (tree_mod_subtract_transform (stmt)
  		  || tree_divmod_fixed_value_transform (stmt)
  		  || tree_mod_pow2_value_transform (stmt)
! 		  || tree_stringops_transform (&bsi)
! 		  || tree_ic_transform (stmt)))
  	    {
  	      stmt = bsi_stmt (bsi);
  	      changed = true;
*************** tree_mod_subtract_transform (tree stmt)
*** 967,972 ****
--- 988,1178 ----
    return true;
  }
  
+ static struct cgraph_node** pid_map = NULL;
+ 
+ /* Initialize map of pids (pid -> cgraph node) */
+ 
+ static void 
+ init_pid_map (void)
+ {
+   struct cgraph_node *n;
+ 
+   if (pid_map != NULL)
+     return;
+ 
+   pid_map = (struct cgraph_node**) xmalloc (sizeof (struct cgraph_node*) * cgraph_max_pid);
+ 
+   for (n = cgraph_nodes; n; n = n->next)
+     {
+       if (n->pid != -1)
+ 	pid_map [n->pid] = n;
+     }
+ }
+ 
+ /* Return cgraph node for function with pid */
+ 
+ static inline struct cgraph_node*
+ find_func_by_pid (int	pid)
+ {
+   init_pid_map ();
+ 
+   return pid_map [pid];
+ }
+ 
+ /* Do transformation
+ 
+   if (actual_callee_addres == addres_of_most_common_function/method)
+     do direct call
+   else
+     old call
+  */
+ 
+ static tree
+ tree_ic (tree stmt, tree call, struct cgraph_node* direct_call, 
+ 	 int prob, gcov_type count, gcov_type all)
+ {
+   tree stmt1, stmt2, stmt3;
+   tree tmp1, tmpv;
+   tree label_decl1 = create_artificial_label ();
+   tree label_decl2 = create_artificial_label ();
+   tree label1, label2;
+   tree bb1end, bb2end, bb3end;
+   tree new_call;
+   basic_block bb, bb2, bb3, bb4;
+   tree optype = build_pointer_type (void_type_node);
+   edge e12, e13, e23, e24, e34;
+   block_stmt_iterator bsi;
+   int region;
+ 
+   bb = bb_for_stmt (stmt);
+   bsi = bsi_for_stmt (stmt);
+   
+   tmpv = create_tmp_var (optype, "PROF");
+   tmp1 = create_tmp_var (optype, "PROF");
+   stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmpv, unshare_expr (TREE_OPERAND (call, 0)));
+   stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, fold_convert (optype, build_addr (direct_call->decl, current_function_decl)));
+   stmt3 = build3 (COND_EXPR, void_type_node,
+ 	    build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
+ 	    build1 (GOTO_EXPR, void_type_node, label_decl2),
+ 	    build1 (GOTO_EXPR, void_type_node, label_decl1));
+   bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
+   bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
+   bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
+   bb1end = stmt3;
+ 
+   label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
+   stmt1 = unshare_expr (stmt);
+   new_call = get_call_expr_in (stmt1);
+   TREE_OPERAND (new_call, 0) = build_addr (direct_call->decl, current_function_decl);
+   bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
+   bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
+   bb2end = stmt1;
+ 
+   label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
+   bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
+   bb3end = stmt;
+ 
+   /* Fix eh edges */
+   region = lookup_stmt_eh_region (stmt1);
+   if (region >= 0 && !TREE_NOTHROW (new_call))
+     {
+       add_stmt_to_eh_region (stmt1, region);
+       make_eh_edges (stmt1);
+     }
+ 
+   /* Fix CFG. */
+   /* Edge e23 connects bb2 to bb3, etc. */
+   e12 = split_block (bb, bb1end);
+   bb2 = e12->dest;
+   bb2->count = count;
+   e23 = split_block (bb2, bb2end);
+   bb3 = e23->dest;
+   bb3->count = all - count;
+   e34 = split_block (bb3, bb3end);
+   bb4 = e34->dest;
+   bb4->count = all;
+ 
+   e12->flags &= ~EDGE_FALLTHRU;
+   e12->flags |= EDGE_FALSE_VALUE;
+   e12->probability = prob;
+   e12->count = count;
+ 
+   e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
+   e13->probability = REG_BR_PROB_BASE - prob;
+   e13->count = all - count;
+ 
+   remove_edge (e23);
+   
+   e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
+   e24->probability = REG_BR_PROB_BASE;
+   e24->count = count;
+ 
+   e34->probability = REG_BR_PROB_BASE;
+   e34->count = all - count;  
+ 
+   return stmt1;
+ }
+ 
+ /*
+   For every checked indirect/virtual call determine if most common pid of
+   function/class method has probability more than 50%. If yes modify code of
+   this call to:
+  */
+ 
+ static bool
+ tree_ic_transform (tree stmt)
+ {
+   histogram_value histogram;
+   gcov_type val, count, all;
+   int prob;
+   tree call, callee, modify;
+   struct cgraph_node *direct_call;
+   
+   call = get_call_expr_in (stmt);
+ 
+   if (!call || TREE_CODE (call) != CALL_EXPR)
+     return false;
+ 
+   callee = TREE_OPERAND (call, 0);
+ 
+   if (TREE_CODE (callee) == ADDR_EXPR)
+     return false;
+ 
+   histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL);
+   if (!histogram)
+     return false;
+ 
+   val = histogram->hvalue.counters [0];
+   count = histogram->hvalue.counters [1];
+   all = histogram->hvalue.counters [2];
+   gimple_remove_histogram_value (cfun, stmt, histogram);
+ 
+   if (4 * count <= 3 * all)
+     return false;
+ 
+   prob = (count * REG_BR_PROB_BASE + all / 2) / all;
+   direct_call = find_func_by_pid ((int)val);
+ 
+   if (direct_call == NULL)
+     return false;
+ 
+   modify = tree_ic (stmt, call, direct_call, prob, count, all);
+ 
+   if (dump_file)
+     {
+       fprintf (dump_file, "Indirect call -> direct call ");
+       print_generic_expr (dump_file, call, TDF_SLIM);
+       fprintf (dump_file, "=> ");
+       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
+       fprintf (dump_file, " transformation on insn ");
+       print_generic_stmt (dump_file, stmt, TDF_SLIM);
+       fprintf (dump_file, " to ");
+       print_generic_stmt (dump_file, modify, TDF_SLIM);
+     }
+ 
+   return true;
+ }
+ 
  /* Return true if the stringop FNDECL with ARGLIST shall be profiled.  */
  static bool
  interesting_stringop_to_profile_p (tree fndecl, tree arglist)
*************** tree_divmod_values_to_profile (tree stmt
*** 1260,1265 ****
--- 1466,1498 ----
      }
  }
  
+ /* Find calls inside STMT for that we want to measure histograms for 
+    indirect/virtual call optimization. */ 
+ static void
+ tree_indirect_call_to_profile (tree stmt, histogram_values *values)
+ {
+   tree			call;
+   tree			callee;
+ 
+   call = get_call_expr_in (stmt);
+ 
+   if (!call || TREE_CODE (call) != CALL_EXPR)
+     return;
+ 
+   callee = TREE_OPERAND (call, 0);
+   
+   if (TREE_CODE (callee) == ADDR_EXPR)
+     return;
+ 
+   VEC_reserve (histogram_value, heap, *values, 3);
+ 
+   VEC_quick_push (histogram_value, *values, 
+ 		  gimple_alloc_histogram_value (cfun, HIST_TYPE_INDIR_CALL,
+ 						stmt, callee));
+ 
+   return;
+ }
+ 
  /* Find values inside STMT for that we want to measure histograms for
     string operations.  */
  static void
*************** tree_values_to_profile (tree stmt, histo
*** 1303,1308 ****
--- 1536,1542 ----
      {
        tree_divmod_values_to_profile (stmt, values);
        tree_stringops_values_to_profile (stmt, values);
+       tree_indirect_call_to_profile (stmt, values);
      }
  }
  
*************** tree_find_values_to_profile (histogram_v
*** 1339,1344 ****
--- 1573,1582 ----
  	  hist->n_counters = 4;
  	  break;
  
+  	case HIST_TYPE_INDIR_CALL:
+  	  hist->n_counters = 3;
+ 	  break;
+ 
  	default:
  	  gcc_unreachable ();
  	}
Index: gcc/value-prof.h
===================================================================
*** gcc/value-prof.h	(revision 119879)
--- gcc/value-prof.h	(working copy)
*************** enum hist_type
*** 29,36 ****
    HIST_TYPE_POW2,	/* Histogram of power of 2 values.  */
    HIST_TYPE_SINGLE_VALUE, /* Tries to identify the value that is (almost)
  			   always constant.  */
!   HIST_TYPE_CONST_DELTA	/* Tries to identify the (almost) always constant
  			   difference between two evaluations of a value.  */
  };
  
  #define COUNTER_FOR_HIST_TYPE(TYPE) ((int) (TYPE) + GCOV_FIRST_VALUE_COUNTER)
--- 29,38 ----
    HIST_TYPE_POW2,	/* Histogram of power of 2 values.  */
    HIST_TYPE_SINGLE_VALUE, /* Tries to identify the value that is (almost)
  			   always constant.  */
!   HIST_TYPE_CONST_DELTA, /* Tries to identify the (almost) always constant
  			   difference between two evaluations of a value.  */
+   HIST_TYPE_INDIR_CALL   /* Tries to identify the function that is (almost) 
+ 			    called in indirect call */
  };
  
  #define COUNTER_FOR_HIST_TYPE(TYPE) ((int) (TYPE) + GCOV_FIRST_VALUE_COUNTER)
*************** struct profile_hooks {
*** 94,99 ****
--- 96,104 ----
    /* Insert code to find the most common value of a difference between two
       evaluations of an expression.  */
    void (*gen_const_delta_profiler) (histogram_value, unsigned, unsigned);
+ 
+   /* Insert code to find the most common indirect call */
+   void (*gen_ic_profiler) (histogram_value, unsigned, unsigned);
  };
  
  histogram_value gimple_histogram_value (struct function *, tree);
Index: gcc/cgraphunit.c
===================================================================
*** gcc/cgraphunit.c	(revision 119879)
--- gcc/cgraphunit.c	(working copy)
*************** cgraph_finalize_function (tree decl, boo
*** 385,390 ****
--- 385,391 ----
    if (node->local.finalized)
      cgraph_reset_node (node);
  
+   node->pid = cgraph_max_pid ++;
    notice_global_symbol (decl);
    node->decl = decl;
    node->local.finalized = true;
Index: gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c	(revision 0)
--- gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c	(revision 0)
***************
*** 0 ****
--- 1,45 ----
+ /* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-tree_profile" } */
+ 
+ static int a1 (void)
+ {
+     return 10;
+ }
+ 
+ static int a2 (void)
+ {
+     return 0;
+ }
+ 
+ typedef int (*tp) (void);
+ 
+ static tp aa [] = {a2, a1, a1, a1, a1};
+ 
+ void setp (int (**pp) (void), int i)
+ {
+   if (!i)
+     *pp = aa [i];
+   else
+     *pp = aa [(i & 2) + 1];
+ }
+ 
+ int
+ main (void)
+ {
+   int (*p) (void);
+   int  i;
+ 
+   for (i = 0; i < 10; i ++)
+     {
+ 	setp (&p, i);
+ 	p ();
+     }
+   
+   return 0;
+ }
+ 
+ /* { dg-final-use { scan-tree-dump "Indirect call -> direct call.* a1 transformation on insn" "tree_profile"} } */
+ /* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */                                                                                
+ /* { dg-final-use { cleanup-tree-dump "optimized" } } */                                                                                              
+ /* { dg-final-use { cleanup-tree-dump "tree_profile" } } */
+                                                                                            
+ 
Index: gcc/testsuite/g++.dg/dg.exp
===================================================================
*** gcc/testsuite/g++.dg/dg.exp	(revision 119879)
--- gcc/testsuite/g++.dg/dg.exp	(working copy)
*************** set tests [prune $tests $srcdir/$subdir/
*** 41,46 ****
--- 41,47 ----
  set tests [prune $tests $srcdir/$subdir/tls/*]
  set tests [prune $tests $srcdir/$subdir/vect/*]
  set tests [prune $tests $srcdir/$subdir/gomp/*]
+ set tests [prune $tests $srcdir/$subdir/tree-prof/*]
  
  # Main loop.
  dg-runtest $tests "" $DEFAULT_CXXFLAGS
Index: gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
===================================================================
*** gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C	(revision 0)
--- gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C	(revision 0)
***************
*** 0 ****
--- 1,39 ----
+ /* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-tree_profile" } */
+ 
+ struct A {
+   A () {}
+ 
+   virtual int AA (void)
+   { return 0; }
+ 
+ };
+ 
+ struct B : public A {
+   B () {}
+ 
+   virtual int AA (void)
+   { return 1; }
+ };
+ 
+ int
+ main (void)
+ {
+   A a;
+   B b;
+   
+   A* p;
+ 
+   p = &a;
+   p->AA ();
+ 
+   p = &b;
+   p->AA ();
+   
+   return 0;
+ }
+ 
+ /* { dg-final-use { scan-tree-dump "Indirect call -> direct call.* AA transformation on insn" "tree_profile"} } */
+ /* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */                                                                                
+ /* { dg-final-use { cleanup-tree-dump "optimized" } } */                                                                                              
+ /* { dg-final-use { cleanup-tree-dump "tree_profile" } } */                                                                                           
+ 
Index: gcc/testsuite/g++.dg/tree-prof/tree-prof.exp
===================================================================
*** gcc/testsuite/g++.dg/tree-prof/tree-prof.exp	(revision 0)
--- gcc/testsuite/g++.dg/tree-prof/tree-prof.exp	(revision 0)
***************
*** 0 ****
--- 1,53 ----
+ #   Copyright (C) 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+ 
+ # This program is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  
+ 
+ # Test the functionality of programs compiled with profile-directed block
+ # ordering using -fprofile-generate followed by -fbranch-use.
+ 
+ load_lib target-supports.exp
+ 
+ # Some targets don't support tree profiling.
+ if { ![check_profiling_available ""] } {
+     return
+ }
+ 
+ # The procedures in profopt.exp need these parameters.
+ set tool g++
+ set prof_ext "gcda gcno"
+ 
+ # Override the list defined in profopt.exp.
+ set PROFOPT_OPTIONS [list {}]
+ 
+ if $tracelevel then {
+     strace $tracelevel
+ }
+ 
+ # Load support procs.
+ load_lib profopt.exp
+ 
+ # These are globals used by profopt-execute.  The first is options
+ # needed to generate profile data, the second is options to use the
+ # profile data.
+ set profile_option "-fprofile-generate"
+ set feedback_option "-fprofile-use"
+ 
+ foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.C]] {
+     # If we're only testing specific files and this isn't one of them, skip it.
+     if ![runtest_file_p $runtests $src] then {
+         continue
+     }
+     profopt-execute $src
+ }
Index: gcc/gcov-io.h
===================================================================
*** gcc/gcov-io.h	(revision 119879)
--- gcc/gcov-io.h	(working copy)
*************** typedef HOST_WIDEST_INT gcov_type;
*** 327,349 ****
  #define GCOV_COUNTER_V_SINGLE	3  /* The most common value of expression.  */
  #define GCOV_COUNTER_V_DELTA	4  /* The most common difference between
  				      consecutive values of expression.  */
! #define GCOV_LAST_VALUE_COUNTER 4  /* The last of counters used for value
  				      profiling.  */
! #define GCOV_COUNTERS		5
  
  /* Number of counters used for value profiling.  */
  #define GCOV_N_VALUE_COUNTERS \
    (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1)
    
    /* A list of human readable names of the counters */
! #define GCOV_COUNTER_NAMES	{"arcs", "interval", "pow2", "single", "delta"}
    
    /* Names of merge functions for counters.  */
  #define GCOV_MERGE_FUNCTIONS	{"__gcov_merge_add",	\
  				 "__gcov_merge_add",	\
  				 "__gcov_merge_add",	\
  				 "__gcov_merge_single",	\
! 				 "__gcov_merge_delta"}
    
  /* Convert a counter index to a tag.  */
  #define GCOV_TAG_FOR_COUNTER(COUNT)				\
--- 327,352 ----
  #define GCOV_COUNTER_V_SINGLE	3  /* The most common value of expression.  */
  #define GCOV_COUNTER_V_DELTA	4  /* The most common difference between
  				      consecutive values of expression.  */
! 
! #define GCOV_COUNTER_V_INDIR	5  /* The most common indirect address */
! #define GCOV_LAST_VALUE_COUNTER 5  /* The last of counters used for value
  				      profiling.  */
! #define GCOV_COUNTERS		6
  
  /* Number of counters used for value profiling.  */
  #define GCOV_N_VALUE_COUNTERS \
    (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1)
    
    /* A list of human readable names of the counters */
! #define GCOV_COUNTER_NAMES	{"arcs", "interval", "pow2", "single", "delta", "indirect_call"}
    
    /* Names of merge functions for counters.  */
  #define GCOV_MERGE_FUNCTIONS	{"__gcov_merge_add",	\
  				 "__gcov_merge_add",	\
  				 "__gcov_merge_add",	\
  				 "__gcov_merge_single",	\
! 				 "__gcov_merge_delta",  \
! 				 "__gcov_merge_single" }
    
  /* Convert a counter index to a tag.  */
  #define GCOV_TAG_FOR_COUNTER(COUNT)				\
Index: gcc/profile.c
===================================================================
*** gcc/profile.c	(revision 119879)
--- gcc/profile.c	(working copy)
*************** instrument_values (histogram_values valu
*** 192,197 ****
--- 192,201 ----
  	  t = GCOV_COUNTER_V_DELTA;
  	  break;
  
+  	case HIST_TYPE_INDIR_CALL:
+  	  t = GCOV_COUNTER_V_INDIR;
+  	  break;
+ 
  	default:
  	  gcc_unreachable ();
  	}
*************** instrument_values (histogram_values valu
*** 216,221 ****
--- 220,229 ----
  	  (profile_hooks->gen_const_delta_profiler) (hist, t, 0);
  	  break;
  
+  	case HIST_TYPE_INDIR_CALL:
+  	  (profile_hooks->gen_ic_profiler) (hist, t, 0);
+   	  break;
+ 
  	default:
  	  gcc_unreachable ();
  	}
Index: gcc/tree-profile.c
===================================================================
*** gcc/tree-profile.c	(revision 119879)
--- gcc/tree-profile.c	(working copy)
*************** Software Foundation, 51 Franklin Street,
*** 45,59 ****
--- 45,92 ----
  #include "timevar.h"
  #include "value-prof.h"
  #include "ggc.h"
+ #include "cgraph.h"
  
  static GTY(()) tree gcov_type_node;
  static GTY(()) tree tree_interval_profiler_fn;
  static GTY(()) tree tree_pow2_profiler_fn;
  static GTY(()) tree tree_one_value_profiler_fn;
+ static GTY(()) tree tree_indirect_call_profiler_fn;
  
  
+ static GTY(()) tree ic_void_ptr_var;
+ static GTY(()) tree ic_gcov_type_ptr_var;
+ static GTY(()) tree ptr_void;
+ 
  /* Do initialization work for the edge profiler.  */
  
+ /* Add code:
+    static gcov*	__gcov_indirect_call_counters; // pointer to actual counter
+    static void*	__gcov_indirect_call_callee; // actual callie addres
+ */
+ static void
+ tree_init_ic_make_global_vars (void)
+ {
+   tree  gcov_type_ptr;
+ 
+   ptr_void = build_pointer_type (void_type_node);
+   
+   ic_void_ptr_var = build_decl (VAR_DECL, get_identifier ("__gcov_indirect_call_callee"), ptr_void);
+   TREE_STATIC (ic_void_ptr_var) = 1;
+   TREE_PUBLIC (ic_void_ptr_var) = 0;
+   DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
+   DECL_INITIAL (ic_void_ptr_var) = NULL;
+   assemble_variable (ic_void_ptr_var, 0, 0, 0);
+ 
+   gcov_type_ptr = build_pointer_type (get_gcov_type ());
+   ic_gcov_type_ptr_var = build_decl (VAR_DECL, get_identifier ("__gcov_indirect_call_counters"), gcov_type_ptr);
+   TREE_STATIC (ic_gcov_type_ptr_var) = 1;
+   TREE_PUBLIC (ic_gcov_type_ptr_var) = 0;
+   DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1;
+   DECL_INITIAL (ic_gcov_type_ptr_var) = NULL;
+   assemble_variable (ic_gcov_type_ptr_var, 0, 0, 0);
+ }
+ 
  static void
  tree_init_edge_profiler (void)
  {
*************** tree_init_edge_profiler (void)
*** 61,66 ****
--- 94,100 ----
    tree pow2_profiler_fn_type;
    tree one_value_profiler_fn_type;
    tree gcov_type_ptr;
+   tree ic_profiler_fn_type;
  
    if (!gcov_type_node)
      {
*************** tree_init_edge_profiler (void)
*** 93,98 ****
--- 127,144 ----
        tree_one_value_profiler_fn
  	      = build_fn_decl ("__gcov_one_value_profiler",
  				     one_value_profiler_fn_type);
+ 
+       tree_init_ic_make_global_vars ();
+       
+       /* void (*) (gcov_type *, gcov_type, void *, void *)  */
+       ic_profiler_fn_type
+ 	       = build_function_type_list (void_type_node,
+ 					  gcov_type_ptr, gcov_type_node,
+ 					  ptr_void,
+ 					  ptr_void, NULL_TREE);
+       tree_indirect_call_profiler_fn
+ 	      = build_fn_decl ("__gcov_indirect_call_profiler",
+ 				     ic_profiler_fn_type);
      }
  }
  
*************** tree_gen_one_value_profiler (histogram_v
*** 201,206 ****
--- 247,332 ----
    bsi_insert_before (&bsi, call, BSI_SAME_STMT);
  }
  
+ 
+ /* Output instructions as GIMPLE trees for code to find the most
+    common called function in indirect call.  
+    VALUE is the call expression whose indirect callie is profiled.
+    TAG is the tag of the section for counters, BASE is offset of the
+    counter position.  */
+ 
+ static void
+ tree_gen_ic_profiler (histogram_value value, unsigned tag, unsigned base)
+ {
+   tree tmp1, stmt1, stmt2, stmt3;
+   tree stmt = value->hvalue.stmt;
+   block_stmt_iterator bsi = bsi_for_stmt (stmt);
+   tree ref = tree_coverage_counter_ref (tag, base), ref_ptr;
+ 
+   ref_ptr = force_gimple_operand_bsi (&bsi,
+ 				      build_addr (ref, current_function_decl),
+ 				      true, NULL_TREE);
+ 
+   /* Insert code:
+     
+     __gcov_indirect_call_counters = get_relevant_counter_ptr (); 
+     __gcov_indirect_call_callee = (void *) indirect call argument;
+    */
+ 
+   tmp1 = create_tmp_var (ptr_void, "PROF");
+   stmt1 = build2 (GIMPLE_MODIFY_STMT, build_pointer_type (get_gcov_type ()), ic_gcov_type_ptr_var, ref_ptr);
+   stmt2 = build2 (GIMPLE_MODIFY_STMT, ptr_void, tmp1, unshare_expr (value->hvalue.value));
+   stmt3 = build2 (GIMPLE_MODIFY_STMT, ptr_void, ic_void_ptr_var, tmp1);
+ 
+   bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
+   bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
+   bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
+ }
+ 
+ 
+ /* Output instructions as GIMPLE trees for code to find the most
+    common called function in indirect call. Insert instructions at the
+    begining of every possible called function.
+   */
+ 
+ static void
+ tree_gen_ic_func_profiler (void)
+ {
+   struct cgraph_node * c_node = cgraph_node (current_function_decl);
+   block_stmt_iterator bsi;
+   edge e;
+   basic_block bb;
+   edge_iterator ei;
+   tree stmt1;
+   tree args, tree_uid, cur_func;
+ 
+   if (flag_unit_at_a_time)
+     {
+       if (!c_node->needed)
+ 	return;
+     }
+   else
+     c_node->needed = true;
+ 
+   tree_init_edge_profiler ();
+   
+   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
+     {
+       bb = split_edge (e);
+       bsi = bsi_start (bb);
+       cur_func = force_gimple_operand_bsi (&bsi,
+ 					   build_addr (current_function_decl, current_function_decl),
+ 					   true, NULL_TREE);
+       tree_uid = build_int_cst (gcov_type_node, c_node->pid);
+       args = tree_cons (NULL_TREE, ic_gcov_type_ptr_var,
+ 			tree_cons (NULL_TREE, tree_uid,
+ 				   tree_cons (NULL_TREE, cur_func,
+ 					      tree_cons (NULL_TREE, ic_void_ptr_var,
+ 							 NULL_TREE))));
+       stmt1 = build_function_call_expr (tree_indirect_call_profiler_fn, args);
+       bsi_insert_after (&bsi, stmt1, BSI_SAME_STMT);
+     }
+ }
+ 
  /* Output instructions as GIMPLE trees for code to find the most common value 
     of a difference between two evaluations of an expression.
     VALUE is the expression whose value is profiled.  TAG is the tag of the
*************** static unsigned int
*** 238,243 ****
--- 364,374 ----
  tree_profiling (void)
  {
    branch_prob ();
+ 
+   if (! flag_branch_probabilities 
+       && flag_profile_values)
+     tree_gen_ic_func_profiler ();
+ 
    if (flag_branch_probabilities
        && flag_profile_values
        && flag_value_profile_transformations)
*************** struct profile_hooks tree_profile_hooks 
*** 301,307 ****
    tree_gen_interval_profiler,   /* gen_interval_profiler */
    tree_gen_pow2_profiler,       /* gen_pow2_profiler */
    tree_gen_one_value_profiler,  /* gen_one_value_profiler */
!   tree_gen_const_delta_profiler /* gen_const_delta_profiler */
  };
  
  #include "gt-tree-profile.h"
--- 432,439 ----
    tree_gen_interval_profiler,   /* gen_interval_profiler */
    tree_gen_pow2_profiler,       /* gen_pow2_profiler */
    tree_gen_one_value_profiler,  /* gen_one_value_profiler */
!   tree_gen_const_delta_profiler,/* gen_const_delta_profiler */
!   tree_gen_ic_profiler,		/* gen_ic_profiler */
  };
  
  #include "gt-tree-profile.h"
Index: gcc/Makefile.in
===================================================================
*** gcc/Makefile.in	(revision 119879)
--- gcc/Makefile.in	(working copy)
*************** LIB2FUNCS_ST = _eprintf __gcc_bcmp
*** 1058,1064 ****
  LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \
      _gcov_fork _gcov_execl _gcov_execlp _gcov_execle \
      _gcov_execv _gcov_execvp _gcov_execve \
!     _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler
  
  FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \
      _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
--- 1058,1065 ----
  LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \
      _gcov_fork _gcov_execl _gcov_execlp _gcov_execle \
      _gcov_execv _gcov_execvp _gcov_execve \
!     _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \
!     _gcov_indirect_call_profiler
  
  FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \
      _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
*************** profile.o : profile.c $(CONFIG_H) $(SYST
*** 2420,2426 ****
  tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
     $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \
     $(FUNCTION_H) toplev.h $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \
!    tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) $(GGC_H) gt-tree-profile.h
  value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \
     $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H) $(DIAGNOSTIC_H) \
--- 2421,2427 ----
  tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
     $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \
     $(FUNCTION_H) toplev.h $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \
!    tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) $(GGC_H) gt-tree-profile.h $(CGRAPH_H)
  value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \
     $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H) $(DIAGNOSTIC_H) \
Index: gcc/libgcov.c
===================================================================
*** gcc/libgcov.c	(revision 119879)
--- gcc/libgcov.c	(working copy)
*************** __gcov_one_value_profiler (gcov_type *co
*** 753,758 ****
--- 753,768 ----
  }
  #endif
  
+ #ifdef L_gcov_indirect_call_profiler
+ /* Tries to determine the most common value among its inputs. */
+ void
+ __gcov_indirect_call_profiler (gcov_type* counter, gcov_type value, void* cur_func, void* callee_func)
+ {
+   if (cur_func == callee_func)
+     __gcov_one_value_profiler (counter, value);
+ }
+ #endif
+ 
  #ifdef L_gcov_fork
  /* A wrapper for the fork function.  Flushes the accumulated profiling data, so
     that they are not counted twice.  */


More information about the Gcc-patches mailing list