Index: gcc/cgraphbuild.c =================================================================== --- gcc/cgraphbuild.c (revision 191813) +++ gcc/cgraphbuild.c (working copy) @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see #include "except.h" #include "l-ipo.h" #include "ipa-inline.h" +#include "auto-profile.h" /* Context of record_reference. */ struct record_reference_ctx @@ -497,6 +498,9 @@ build_cgraph_edges (void) tree decl; unsigned ix; + if (flag_auto_profile) + afdo_set_current_function_count (); + /* Create the callgraph edges and record the nodes referenced by the function. body. */ FOR_EACH_BB (bb) @@ -607,8 +611,9 @@ rebuild_cgraph_edges (void) cgraph_node_remove_callees (node); ipa_remove_all_references (&node->ref_list); - node->count = ENTRY_BLOCK_PTR->count; - node->max_bb_count = 0; + if (!flag_auto_profile) + node->count = ENTRY_BLOCK_PTR->count; + node->max_bb_count = node->count; FOR_EACH_BB (bb) { Index: gcc/cgraph.c =================================================================== --- gcc/cgraph.c (revision 191813) +++ gcc/cgraph.c (working copy) @@ -101,6 +101,7 @@ The callgraph: #include "l-ipo.h" #include "ipa-inline.h" #include "opts.h" +#include "auto-profile.h" const char * const ld_plugin_symbol_resolution_names[]= { @@ -2183,10 +2184,17 @@ cgraph_clone_node (struct cgraph_node *n, tree dec new_node->count = count; new_node->max_bb_count = count; if (n->count) - new_node->max_bb_count = ((n->max_bb_count + n->count / 2) - / n->count) * count; + { + new_node->max_bb_count = ((n->max_bb_count + n->count / 2) + / n->count) * count; + new_node->total_count = ((n->total_count + n->count / 2) + / n->count) * count; + } new_node->is_versioned_clone = n->is_versioned_clone; new_node->frequency = n->frequency; + if (flag_auto_profile && count > 0 + && new_node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) + new_node->frequency = NODE_FREQUENCY_NORMAL; new_node->clone = n->clone; new_node->clone.tree_map = 0; if (n->count) @@ -2268,6 +2276,9 @@ clone_function_name (tree decl, const char *suffix prefix[len] = '_'; #endif ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++); + if (flag_auto_profile) + afdo_add_bfd_name_mapping (xstrdup (tmp_name), + xstrdup (lang_hooks.dwarf_name (decl, 0))); return get_identifier (tmp_name); } Index: gcc/cgraph.h =================================================================== --- gcc/cgraph.h (revision 191813) +++ gcc/cgraph.h (working copy) @@ -198,6 +198,8 @@ struct GTY((chain_next ("%h.next"), chain_prev ("% /* Expected number of executions: calculated in profile.c. */ gcov_type count; + /* Total sampled count of the function. */ + gcov_type total_count; /* Maximum count of any basic block in the function. */ gcov_type max_bb_count; /* How to scale counts at materialization time; used to merge Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 191813) +++ gcc/tree-pass.h (working copy) @@ -465,6 +465,7 @@ extern struct gimple_opt_pass pass_tree_convert_bu extern struct simple_ipa_opt_pass pass_ipa_lower_emutls; extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility; extern struct simple_ipa_opt_pass pass_ipa_tree_profile; +extern struct simple_ipa_opt_pass pass_ipa_auto_profile; extern struct simple_ipa_opt_pass pass_early_local_passes; Index: gcc/cfghooks.c =================================================================== --- gcc/cfghooks.c (revision 191813) +++ gcc/cfghooks.c (working copy) @@ -775,6 +775,19 @@ make_forwarder_block (basic_block bb, bool (*redir } } + if (flag_auto_profile) + { + dummy->frequency = 0; + dummy->count = 0; + for (ei = ei_start (dummy->preds); (e = ei_safe_edge (ei)); ei_next (&ei)) + { + dummy->frequency += EDGE_FREQUENCY (e); + dummy->count += e->count; + } + if (dummy->frequency > REG_BR_PROB_BASE) + dummy->frequency = REG_BR_PROB_BASE; + } + if (dom_info_available_p (CDI_DOMINATORS)) { VEC (basic_block, heap) *doms_to_fix = VEC_alloc (basic_block, heap, 2); Index: gcc/ipa-inline-transform.c =================================================================== --- gcc/ipa-inline-transform.c (revision 191813) +++ gcc/ipa-inline-transform.c (working copy) @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "tree-pass.h" #include "l-ipo.h" +#include "auto-profile.h" #include "diagnostic-core.h" int ncalls_inlined; @@ -165,6 +166,16 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d *overall_size -= inline_summary (e->callee)->size; nfunctions_inlined++; } + if (flag_auto_profile) + { + e->callee->total_count = afdo_get_callsite_count (e); + /* For functions that does not exist in the profile-collection + build, we need to add the scale factor to the copy_scale + table. Because the cloned callee only has as scaled portion + of the counts from the actual callee. */ + if (e->callee->total_count == 0) + afdo_add_copy_scale (e); + } duplicate = false; e->callee->local.externally_visible = false; update_noncloned_frequencies (e->callee, e->frequency); @@ -172,11 +183,46 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d else { struct cgraph_node *n; + gcov_type count = e->count; + gcov_type callsite_total_count = 0; + if (flag_auto_profile) + { + callsite_total_count = afdo_get_callsite_count (e); + /* If the callsite is inlined in the profile-collection build, + i.e. the cloned callee has its separate profile, we will use + this separate profile to annotate the callee, and the real + callee body will not be affected. Thus here we need to disable + update_original. */ + if (callsite_total_count > 0) + update_original = false; + } n = cgraph_clone_node (e->callee, e->callee->decl, - e->count, e->frequency, + count, e->frequency, update_original, NULL, true); + if (flag_auto_profile) + { + n->total_count = callsite_total_count; + if (callsite_total_count == 0) + afdo_add_copy_scale (e); + } cgraph_redirect_edge_callee (e, n); } + if (flag_auto_profile && e->callee->total_count > 0) + { + /* The callee's total count will be non-zero if the callsite + was inlined in the profile-collection build, In this case, + the original callee may be label unlikely_executed, which + may prevent its callees being inlined. Thus we need to reset + its frequency to normal. */ + if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) + e->callee->frequency = NODE_FREQUENCY_NORMAL; + /* we do not have enough information to calculate the node count + and max_bb_count. Thus we set them to the same value to make + other optimizations aware that they are from cloned inline + instances. */ + e->callee->count = e->callee->total_count; + e->callee->max_bb_count = e->callee->total_count; + } } if (e->caller->global.inlined_to) Index: gcc/toplev.c =================================================================== --- gcc/toplev.c (revision 191813) +++ gcc/toplev.c (working copy) @@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-alias.h" #include "plugin.h" #include "tree-threadsafe-analyze.h" +#include "auto-profile.h" #if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO) #include "dwarf2out.h" @@ -700,6 +701,10 @@ compile_file (void) } #endif + /* Auto profile finalization. */ + if (flag_auto_profile) + end_auto_profile (); + /* Invoke registered plugin callbacks. */ invoke_plugin_callbacks (PLUGIN_FINISH_UNIT, NULL); @@ -1494,6 +1499,9 @@ process_options (void) error ("target system does not support the \"%s\" debug format", debug_type_names[write_symbols]); + if (flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE) + debug_hooks = &auto_profile_debug_hooks; + /* We know which debug output will be used so we can set flag_var_tracking and flag_var_tracking_uninit if the user has not specified them. */ if (debug_info_level < DINFO_LEVEL_NORMAL Index: gcc/debug.h =================================================================== --- gcc/debug.h (revision 191813) +++ gcc/debug.h (working copy) @@ -171,6 +171,7 @@ extern const struct gcc_debug_hooks sdb_debug_hook extern const struct gcc_debug_hooks xcoff_debug_hooks; extern const struct gcc_debug_hooks dwarf2_debug_hooks; extern const struct gcc_debug_hooks vmsdbg_debug_hooks; +extern const struct gcc_debug_hooks auto_profile_debug_hooks; /* Dwarf2 frame information. */ Index: gcc/cgraphunit.c =================================================================== --- gcc/cgraphunit.c (revision 191813) +++ gcc/cgraphunit.c (working copy) @@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-utils.h" #include "lto-streamer.h" #include "l-ipo.h" +#include "auto-profile.h" static void cgraph_expand_all_functions (void); static void cgraph_mark_functions_to_output (void); @@ -1346,6 +1347,11 @@ cgraph_finalize_compilation_unit (void) backend_entered_p = true; + /* Before compilation, auto profile will process the profile to build the + hash tables for later optimizations. */ + if (flag_auto_profile) + process_auto_profile (); + /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ if (flag_lto) lto_streamer_hooks_init (); @@ -2517,6 +2523,7 @@ cgraph_copy_node_for_versioning (struct cgraph_nod new_version->rtl = old_version->rtl; new_version->reachable = true; new_version->count = old_version->count; + new_version->total_count = old_version->total_count; new_version->max_bb_count = old_version->max_bb_count; new_version->is_versioned_clone = true; Index: gcc/regs.h =================================================================== --- gcc/regs.h (revision 191813) +++ gcc/regs.h (working copy) @@ -136,6 +136,7 @@ extern size_t reg_info_p_size; frequency. */ #define REG_FREQ_FROM_BB(bb) (optimize_size \ || (flag_branch_probabilities \ + && !flag_auto_profile \ && !ENTRY_BLOCK_PTR->count) \ ? REG_FREQ_MAX \ : ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\ Index: gcc/gcov-io.h =================================================================== --- gcc/gcov-io.h (revision 191813) +++ gcc/gcov-io.h (working copy) @@ -456,6 +456,9 @@ typedef unsigned HOST_WIDEST_INT gcov_type_unsigne #define GCOV_TAG_PMU_BRANCH_MISPREDICT_LENGTH (8) #define GCOV_TAG_PMU_TOOL_HEADER ((gcov_unsigned_t)0xa9000000) #define GCOV_TAG_MODULE_INFO ((gcov_unsigned_t)0xab000000) +#define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000) +#define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000) +#define GCOV_TAG_AFDO_MODULE_GROUPING ((gcov_unsigned_t)0xae000000) #define GCOV_TAG_PMU_STRING_TABLE_ENTRY ((gcov_unsigned_t)0xad000000) #define GCOV_TAG_PMU_STRING_TABLE_ENTRY_LENGTH(filename) \ (gcov_string_length (filename) + 1) Index: gcc/ira-int.h =================================================================== --- gcc/ira-int.h (revision 191813) +++ gcc/ira-int.h (working copy) @@ -44,7 +44,8 @@ along with GCC; see the file COPYING3. If not see executed, frequency is always equivalent. Otherwise rescale the edge frequency. */ #define REG_FREQ_FROM_EDGE_FREQ(freq) \ - (optimize_size || (flag_branch_probabilities && !ENTRY_BLOCK_PTR->count) \ + (optimize_size || (flag_branch_probabilities \ + && !flag_auto_profile && !ENTRY_BLOCK_PTR->count) \ ? REG_FREQ_MAX : (freq * REG_FREQ_MAX / BB_FREQ_MAX) \ ? (freq * REG_FREQ_MAX / BB_FREQ_MAX) : 1) Index: gcc/ipa-inline.c =================================================================== --- gcc/ipa-inline.c (revision 191813) +++ gcc/ipa-inline.c (working copy) @@ -119,10 +119,12 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "ipa-inline.h" #include "ipa-utils.h" +#include "auto-profile.h" /* Statistics we collect about inlining algorithm. */ static int overall_size; static gcov_type max_count; +static gcov_type max_total_count; /* Global variable to denote if it is in ipa-inline pass. */ bool is_in_ipa_inline = false; @@ -478,6 +480,9 @@ edge_hot_enough_p (struct cgraph_edge *edge) { if (cgraph_maybe_hot_edge_p (edge)) return true; + if (flag_auto_profile + && maybe_hot_count_p (afdo_get_callsite_count (edge))) + return true; if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER) && maybe_hot_count_p (edge->caller->max_bb_count)) return true; @@ -860,6 +865,16 @@ edge_badness (struct cgraph_edge *edge, bool dump) ((int) ((double) edge->count * INT_MIN / 2 / max_count / 512) * relative_time_benefit (callee_info, edge, time_growth)) / growth; + if (flag_auto_profile && max_total_count > 0) + { + gcov_type afdo_badness = + ((int) + ((double) afdo_get_callsite_count (edge) * INT_MIN / 2 / + max_total_count / 512) * + relative_time_benefit (callee_info, edge, time_growth)) / growth; + if (afdo_badness < badness) + badness = afdo_badness; + } /* Be sure that insanity of the profile won't lead to increasing counts in the scalling and thus to overflow in the computation above. */ @@ -1445,6 +1460,7 @@ inline_small_functions (void) metrics. */ max_count = 0; + max_total_count = 0; initialize_growth_caches (); FOR_EACH_DEFINED_FUNCTION (node) @@ -1459,6 +1475,9 @@ inline_small_functions (void) initial_size += info->size; } + if (max_total_count < node->total_count) + max_total_count = node->total_count; + for (edge = node->callers; edge; edge = edge->next_caller) if (max_count < edge->count) max_count = edge->count; @@ -1493,6 +1512,7 @@ inline_small_functions (void) } gcc_assert (in_lto_p + || flag_auto_profile || !max_count || (profile_info && flag_branch_probabilities)); @@ -1526,7 +1546,7 @@ inline_small_functions (void) of date value on it, we re-insert it now. */ current_badness = edge_badness (edge, false); gcc_assert (cached_badness == current_badness); - gcc_assert (current_badness >= badness); + gcc_assert (flag_auto_profile || current_badness >= badness); if (current_badness != badness) { edge->aux = fibheap_insert (heap, current_badness, edge); @@ -1646,7 +1666,7 @@ inline_small_functions (void) is propagated from caller. We don't track when this happen and thus we need to recompute everything all the time. Once this is solved, "|| 1" should go away. */ - if (callee->global.inlined_to || 1) + if (callee->global.inlined_to)// || 1) update_all_callee_keys (heap, callee, updated_nodes); else update_callee_keys (heap, edge->callee, updated_nodes); Index: gcc/dwarf2out.c =================================================================== --- gcc/dwarf2out.c (revision 191813) +++ gcc/dwarf2out.c (working copy) @@ -2722,6 +2722,41 @@ const struct gcc_debug_hooks dwarf2_debug_hooks = 1, /* start_end_main_source_file */ TYPE_SYMTAB_IS_DIE /* tree_type_symtab_field */ }; + +const struct gcc_debug_hooks auto_profile_debug_hooks = +{ + debug_nothing_charstar, + debug_nothing_charstar, + debug_nothing_void, + debug_nothing_int_charstar, + debug_nothing_int_charstar, + debug_nothing_int_charstar, + debug_nothing_int, + debug_nothing_int_int, /* begin_block */ + debug_nothing_int_int, /* end_block */ + dwarf2out_ignore_block, /* ignore_block */ + debug_nothing_int_charstar_int_bool, /* source_line */ + debug_nothing_int_charstar, /* begin_prologue */ + debug_nothing_int_charstar, /* end_prologue */ + debug_nothing_int_charstar, /* begin_epilogue */ + debug_nothing_int_charstar, /* end_epilogue */ + debug_nothing_tree, /* begin_function */ + debug_nothing_int, /* end_function */ + debug_nothing_tree, /* function_decl */ + debug_nothing_tree, /* global_decl */ + debug_nothing_tree_int, /* type_decl */ + debug_nothing_tree_tree_tree_bool, /* imported_module_or_decl */ + debug_nothing_tree, /* deferred_inline_function */ + debug_nothing_tree, /* outlining_inline_function */ + debug_nothing_rtx, /* label */ + debug_nothing_int, /* handle_pch */ + debug_nothing_rtx, /* var_location */ + debug_nothing_void, /* switch_text_section */ + debug_nothing_tree_tree, /* set_name */ + 0, /* start_end_main_source_file */ + TYPE_SYMTAB_IS_ADDRESS /* tree_type_symtab_field */ +}; + /* NOTE: In the comments in this file, many references are made to "Debugging Information Entries". This term is abbreviated as `DIE' Index: gcc/opts.c =================================================================== --- gcc/opts.c (revision 191813) +++ gcc/opts.c (working copy) @@ -1653,6 +1653,33 @@ common_handle_option (struct gcc_options *opts, opts->x_flag_gcse_after_reload = value; break; + case OPT_fauto_profile_: + auto_profile_file = xstrdup (arg); + opts->x_flag_auto_profile = true; + value = true; + /* No break here - do -fauto-profile processing. */ + case OPT_fauto_profile: + if (!opts_set->x_flag_branch_probabilities) + opts->x_flag_branch_probabilities = value; + if (!opts_set->x_flag_unroll_loops) + opts->x_flag_unroll_loops = value; + if (!opts_set->x_flag_peel_loops) + opts->x_flag_peel_loops = value; + if (!opts_set->x_flag_inline_functions) + opts->x_flag_inline_functions = value; + if (!opts_set->x_flag_ipa_cp) + opts->x_flag_ipa_cp = value; + if (!opts_set->x_flag_ipa_cp_clone + && value && opts->x_flag_ipa_cp) + opts->x_flag_ipa_cp_clone = value; + if (!opts_set->x_flag_predictive_commoning) + opts->x_flag_predictive_commoning = value; + if (!opts_set->x_flag_unswitch_loops) + opts->x_flag_unswitch_loops = value; + if (!opts_set->x_flag_gcse_after_reload) + opts->x_flag_gcse_after_reload = value; + break; + case OPT_fprofile_generate_: opts->x_profile_data_prefix = xstrdup (arg); value = true; Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 191813) +++ gcc/timevar.def (working copy) @@ -81,6 +81,7 @@ DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltra DEFTIMEVAR (TV_WHOPR_WPA_LTRANS_EXEC , "whopr wpa->ltrans") DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference") DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile") +DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile") DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") DEFTIMEVAR (TV_IPA_PTA , "ipa points-to") DEFTIMEVAR (TV_IPA_SRA , "ipa SRA") Index: gcc/predict.c =================================================================== --- gcc/predict.c (revision 191813) +++ gcc/predict.c (working copy) @@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "cfgloop.h" #include "pointer-set.h" +#include "auto-profile.h" /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ @@ -2749,7 +2750,8 @@ compute_function_frequency (void) if (DECL_STATIC_DESTRUCTOR (current_function_decl)) node->only_called_at_exit = true; - if (!profile_info || !flag_branch_probabilities) + if (!profile_info || !flag_branch_probabilities + || (flag_auto_profile && profile_status == PROFILE_GUESSED)) { int flags = flags_from_decl_or_type (current_function_decl); if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) @@ -2851,16 +2853,30 @@ rebuild_frequencies (void) timevar_push (TV_REBUILD_FREQUENCIES); if (profile_status == PROFILE_GUESSED) { - loop_optimizer_init (0); - add_noreturn_fake_exit_edges (); - mark_irreducible_loops (); - connect_infinite_loops_to_exit (); - estimate_bb_frequencies (); - remove_fake_exit_edges (); - loop_optimizer_finalize (); + if (flag_auto_profile && counts_to_freqs ()) + { + afdo_calculate_branch_prob (); + counts_to_freqs(); + profile_status = PROFILE_READ; + compute_function_frequency (); + } + else + { + loop_optimizer_init (0); + add_noreturn_fake_exit_edges (); + mark_irreducible_loops (); + connect_infinite_loops_to_exit (); + estimate_bb_frequencies (); + remove_fake_exit_edges (); + loop_optimizer_finalize (); + } } else if (profile_status == PROFILE_READ) - counts_to_freqs (); + { + if (flag_auto_profile) + afdo_calculate_branch_prob (); + counts_to_freqs (); + } else gcc_unreachable (); timevar_pop (TV_REBUILD_FREQUENCIES); Index: gcc/auto-profile.c =================================================================== --- gcc/auto-profile.c (revision 0) +++ gcc/auto-profile.c (revision 0) @@ -0,0 +1,1580 @@ +/* Calculate branch probabilities, and basic block execution counts. + Copyright (C) 2011. Free Software Foundation, Inc. + Contributed by Dehao Chen (dehao@google.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Read and annotate call graph profile from the auto profile data + file. */ + +#include + +#include "config.h" +#include "system.h" +#include "flags.h" /* for auto_profile_file. */ +#include "basic-block.h" /* for gcov_type. */ +#include "diagnostic-core.h" /* for inform(). */ +#include "gcov-io.h" /* for gcov_read_unsigned(). */ +#include "input.h" /* for expanded_location. */ +#include "profile.h" /* for profile_info. */ +#include "langhooks.h" /* for langhooks. */ +#include "opts.h" /* for in_fnames. */ +#include "tree-pass.h" /* for ipa pass. */ +#include "cfgloop.h" /* for loop_optimizer_init. */ +#include "gimple.h" +#include "cgraph.h" +#include "tree-flow.h" +#include "auto-profile.h" + +/* The following routines implements AutoFDO optimization. + + This optimization uses sampling profiles to annotate basic block counts + and uses heuristics to estimate branch probabilities. + + There are three phases in AutoFDO: + + Phase 1: Read profile from the profile data file. + The following info is read from the profile datafile: + * Function names and file names. + * Source level profile, which is a mapping from inline stack to + its sample counts. + * Module profile: Module to aux-modules mapping + Phase 1 just reads in data without processing it. It is invoked + before tree parsing because LIPO needs module profile before tree + parsing. (read_aux_modules) + + Phase 2: Process profile to build internal data structure (hashmap). + This is done after tree parsing, because the processing requires the map + from function name to its debug name (bfd_name). The following hashmaps + is used to store profile. + * function_htab: map from function_name to its entry_bb count + * stack_htab: map from inline stack to its sample count + * bfd_name_htab: map from function name to its debug name (bfd_name) + * module_htab: map from module name to its aux-module names + + Phase 3: Annotate control flow graph. + AutoFDO invokes a separate pass over the control flow graph to: + * Annotate basic block count + * Estimate branch probability + + After the above 3 phases, all profile is readily annotated on the GCC IR. + AutoFDO tries to reuse all FDO infrastructure as much as possible to make + use of the profile. E.g. it uses existing mechanism to calculate the basic + block/edge frequency, as well as the cgraph node/edge count. + + However, AutoFDO still differs from FDO in the following aspects: + + * Profile is not accurate, because AutoFDO uses sampling to collect + profile, and uses debug info to represent the profile. As a result, + some hot basic blocks may have zero sample count. Because of this, + some optimization needs to be adjusted (e.g. loop peeling/unrolling). + * Each cloned context has its own profile, but these contexts may + not even exist when doing annotation. This provides more context- + sensitive profiles, but at the same time, adds complexity to the + implementation. Because of this, additional profile annotation is + needed for each function after the inline pass, and count scaling + is tricky in the second annotation. +*/ + +#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" +#define SP_HTAB_INIT_SIZE 2000 + +/* GCOV data structures to represent profile stored in the .afdo file. */ + +struct gcov_callsite_pos +{ + const char *file; + const char *func; + gcov_unsigned_t line; + gcov_unsigned_t discr; +}; + +struct gcov_callsite +{ + struct gcov_callsite_pos pos; + const char *func; + gcov_type count; +}; + +struct gcov_stack +{ + const char *func_name; + const char *callee_name; + struct gcov_callsite_pos *stack; + gcov_unsigned_t size; + gcov_type num_inst; + gcov_type count; +}; + +struct gcov_function +{ + const char *name; + const char *file; + gcov_type total_count; + gcov_type entry_count; + gcov_type max_count; + /* Number of call stacks in the function. */ + gcov_unsigned_t stack_num; + /* All the call stacks in the function. */ + struct gcov_stack *stacks; +}; + +struct afdo_bfd_name +{ + const char *assembler_name; + /* bfd_name is the name that debugger used for function name matching. + Different assembler names could map to the same bfd_name. */ + const char *bfd_name; +}; + +struct afdo_module +{ + const char *name; + int ident; + unsigned exported; + unsigned has_asm; + unsigned num_aux_modules; + unsigned num_quote_paths; + unsigned num_bracket_paths; + unsigned num_cpp_defines; + unsigned num_cpp_includes; + unsigned num_cl_args; + const char **strings; +}; + +/* Store the file name strings read from the profile data file. */ +static const char **file_names; + +/* gcov_ctr_summary structure to store the profile_info. */ +static struct gcov_ctr_summary *afdo_profile_info; + +/* Hash table to hold function information. */ +static htab_t function_htab; + +/* Hash table to hold stack information. */ +static htab_t stack_htab; + +/* Hash table to hold inline scale information. */ +static htab_t stack_scale_htab; + +/* Hash table to hold assembler name to bfd name mapping. */ +static htab_t bfd_name_htab; + +/* Hash table to hold module informaition. */ +static htab_t module_htab; + +/* Store the module hash table contents. */ +static struct afdo_module *modules; + +/* File static variables, which is used to pass information between + init_auto_profile and process_auto_profile. */ +static gcov_unsigned_t function_num; +static gcov_unsigned_t total_module_num; +static struct gcov_function *gcov_functions; + +/* Check if PATH_NAME is absolute path, if yes, strip the directory part + of the PATH_NAME, return the file name. */ + +static const char * +afdo_get_filename (const char *path_name) +{ + const char* last; + return path_name; + if (path_name == NULL) + return NULL; + last = strrchr(path_name, '/'); + return ((last == 0) ? path_name : last + 1); +} + +/* Given an assembler function NAME, return its original name. strip the + suffix at the end of the function name, added by optimizations such as + constant propagation etc. */ + +static gcov_unsigned_t +afdo_get_original_name_size (const char *name) +{ + const char *ret; + if (!name) + return 0; + ret = strchr (name, '.'); + if (!ret) + return strlen(name); + else + return ret - name; +} + +/* Given an asssembler function NAME, return its corresponding bfd name. + If the mapping cannot be found, it means that the assembler function + name is not used/emitted in the current module(s). */ + +static const char * +afdo_get_bfd_name (const char *name) +{ + struct afdo_bfd_name bfd, *bfd_entry; + gcov_unsigned_t size = afdo_get_original_name_size (name); + /* If the function name is cloned, we want to find its original name. */ + char *buf = (char *) alloca (size + 1); + strncpy (buf, name, size); + buf[size] = 0; + bfd.assembler_name = buf; + bfd_entry = (struct afdo_bfd_name *) htab_find (bfd_name_htab, &bfd); + if (!bfd_entry) + return name; + return bfd_entry->bfd_name; +} + +/* Traverse the cgraph, add each function's name to to bfd_name mapping. */ + +static void +afdo_read_bfd_names (void) +{ + struct cgraph_node *node; + + for (node = cgraph_nodes; node; node = node->next) + { + const char *bfd_name; + if (lang_hooks.dwarf_name (node->decl, 0) == NULL) + continue; + bfd_name = xstrdup (lang_hooks.dwarf_name (node->decl, 0)); + afdo_add_bfd_name_mapping + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)), bfd_name); + } +} + +/* Hash function for struct afdo_stack. */ + +static hashval_t +afdo_stack_hash (const void *stack) +{ + gcov_unsigned_t i; + /* An arbitrary initial value borrowed from hashtab.c. */ + hashval_t h = 0x9e3779b9; + const struct gcov_stack *s = (const struct gcov_stack *) stack; + if (s->callee_name) + h = iterative_hash (afdo_get_bfd_name (s->callee_name), + strlen (afdo_get_bfd_name (s->callee_name)), h); + if (s->func_name) + h = iterative_hash (s->func_name, + afdo_get_original_name_size (s->func_name), h); + for (i = 0; i < s->size; i++) { + const struct gcov_callsite_pos *p = s->stack + i; + const char *file = afdo_get_filename (p->file); + h = iterative_hash (file, strlen (file), h); + h = iterative_hash (&p->line, sizeof (p->line), h); + if (i == 0) + h = iterative_hash (&p->discr, sizeof (p->discr), h); + } + return h; +} + +/* Check if two afdo_stack P and Q are identical. */ + +static int +afdo_stack_eq (const void *p, const void *q) +{ + const struct gcov_stack *s1 = (const struct gcov_stack *) p; + const struct gcov_stack *s2 = (const struct gcov_stack *) q; + + gcov_unsigned_t i; + if (s1->func_name == NULL || s2->func_name == NULL) + return 0; + + if (s1->callee_name == NULL) + { + if (s2->callee_name != NULL) + return 0; + } + else if (s2->callee_name != NULL + && strcmp (afdo_get_bfd_name (s1->callee_name), + afdo_get_bfd_name (s2->callee_name))) + return 0; + + i = afdo_get_original_name_size (s1->func_name); + if (i != afdo_get_original_name_size (s2->func_name)) + return 0; + + if (strncmp (s1->func_name, s2->func_name, i)) + return 0; + + if (s1->size != s2->size) + return 0; + for (i = 0; i < s1->size; i++) + { + const struct gcov_callsite_pos *p1 = s1->stack + i; + const struct gcov_callsite_pos *p2 = s2->stack + i; + if (strcmp (afdo_get_filename(p1->file), afdo_get_filename(p2->file)) + || p1->line != p2->line || (i== 0 && p1->discr != p2->discr)) + return 0; + } + return 1; +} + +/* Hash function for struct afdo_function. */ + +static hashval_t +afdo_function_hash (const void *func) +{ + /* An arbitrary initial value borrowed from hashtab.c. */ + hashval_t h = 0x9e3779b9; + const struct gcov_function *f = (const struct gcov_function *) func; + + if (f->name) + h = iterative_hash (f->name, afdo_get_original_name_size (f->name), h); + return h; +} + +/* Check if two afdo_function P and Q are identical. */ + +static int +afdo_function_eq (const void *p, const void *q) +{ + const struct gcov_function *f1 = (const struct gcov_function *) p; + const struct gcov_function *f2 = (const struct gcov_function *) q; + gcov_unsigned_t i; + + if (f1->name == NULL || f2->name == NULL) + return 0; + + i = afdo_get_original_name_size (f1->name); + if (i != afdo_get_original_name_size (f2->name)) + return 0; + + return !strncmp (f1->name, f2->name, i); +} + +/* Hash function for struct afdo_bfd_name. */ + +static hashval_t +afdo_bfd_name_hash (const void *func) +{ + hashval_t h = 0x9e3779b9; + const struct afdo_bfd_name *f = (const struct afdo_bfd_name *) func; + + if (f->assembler_name) + h = iterative_hash (f->assembler_name, strlen (f->assembler_name), h); + return h; +} + +/* Check if two struct afdo_bfd_name P and Q are identical. */ + +static int +afdo_bfd_name_eq (const void *p, const void *q) +{ + const struct afdo_bfd_name *b1 = (const struct afdo_bfd_name *) p; + const struct afdo_bfd_name *b2 = (const struct afdo_bfd_name *) q; + + if (b1->assembler_name == NULL || b2->assembler_name == NULL) + return 0; + + return !strcmp (b1->assembler_name, b2->assembler_name); +} + +/* Free the hash table entry P. */ + +static void +afdo_bfd_name_del (void *p) +{ + free (p); +} + +/* Hash Function for struct afdo_module. */ + +static hashval_t +afdo_module_hash (const void *module) +{ + hashval_t h = 0x9e3779b9; + const struct afdo_module *m = (const struct afdo_module *)module; + + if (m->name) + h = iterative_hash (m->name, strlen (m->name), h); + + return h; +} + +/* Check if two struct afdo_module P and Q are identical. */ + +static int +afdo_module_eq (const void *p, const void *q) +{ + const struct afdo_module *m1 = (const struct afdo_module *)p; + const struct afdo_module *m2 = (const struct afdo_module *)q; + + if (m1->name == NULL || m2->name == NULL) + return 0; + + return !strcmp (m1->name, m2->name); +} + +/* Return the total number of emitted string for MODULE. */ + +static unsigned long long +afdo_module_num_strings (const struct afdo_module *module) +{ + return module->num_quote_paths + + module->num_bracket_paths + + module->num_cpp_defines + + module->num_cpp_includes + + module->num_cl_args; +} + +/* Add a module (specified in MODULE) into gcov_module_info format in + MODULE_INFO, which is used by LIPO to import auxiliary modules. + Set the is_primary flag if IS_PRIMARY is set. */ + +static void +afdo_add_module (struct gcov_module_info **module_info, + const struct afdo_module *module, + gcov_unsigned_t is_primary) +{ + unsigned i; + size_t info_sz; + + info_sz = sizeof (struct gcov_module_info) + + sizeof (void *) * afdo_module_num_strings (module); + *module_info = XCNEWVAR (struct gcov_module_info, info_sz); + (*module_info)->ident = module->ident; + (*module_info)->is_primary = is_primary; + (*module_info)->is_exported = is_primary ? module->exported : 1; + (*module_info)->source_filename = (char *) module->name; + (*module_info)->num_quote_paths = module->num_quote_paths; + (*module_info)->num_bracket_paths = module->num_bracket_paths; + (*module_info)->num_cpp_defines = module->num_cpp_defines; + (*module_info)->num_cpp_includes = module->num_cpp_includes; + (*module_info)->num_cl_args = module->num_cl_args; + for (i = 0; i < afdo_module_num_strings (module); i++) + (*module_info)->string_array[i] = (char *) + module->strings[module->num_aux_modules + i]; +} + +/* Read in the auxiliary modules for the current primary module. */ + +static void +read_aux_modules (void) +{ + unsigned i, curr_module = 1; + struct afdo_module module, *entry; + + module.name = in_fnames[0]; + entry = (struct afdo_module *) htab_find (module_htab, &module); + if (!entry) + { + inform (0, "primary module %s cannot be found.", in_fnames[0]); + return; + } + module_infos = XCNEWVEC (struct gcov_module_info *, + entry->num_aux_modules + 1); + afdo_add_module (module_infos, entry, true); + primary_module_id = entry->ident; + for (i = 0; i < entry->num_aux_modules; i++) + { + struct afdo_module *aux_entry; + module.name = entry->strings[i]; + if (!strcmp (module.name, in_fnames[0])) + continue; + aux_entry = (struct afdo_module *) htab_find (module_htab, &module); + if (!aux_entry) + { + inform (0, "aux module %s cannot be found.", module.name); + continue; + } + afdo_add_module (&module_infos[curr_module++], aux_entry, false); + add_input_filename (module.name); + } +} + +/* Return the size of the inline stack of the STMT. */ + +static int +get_inline_stack_size_by_stmt (gimple stmt) +{ + tree block; + int size = 1; + + if (!stmt) + return 0; + block = gimple_block (stmt); + if (!block || TREE_CODE (block) != BLOCK || !gimple_location (stmt)) + return 0; + + for ( block = BLOCK_SUPERCONTEXT (block); + block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) { + /* Traverse the nesting blocks. If the block contains the source + location info, save the source location info to the inline stack. */ + if (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION) + continue; + size ++; + } + return size; +} + +/* Return the size of the inline stack of the EDGE. All inlined callsites + along he inline chain are recorded. */ + +static int +get_inline_stack_size_by_edge (struct cgraph_edge *edge) +{ + struct cgraph_edge *e; + int size = 0; + for (e= edge; e; e = e->caller->callers) + { + gimple stmt = e->call_stmt; + if (!stmt) + break; + size += get_inline_stack_size_by_stmt (stmt); + if (!e->caller->global.inlined_to) + break; + } + return size; +} + +/* Return the function name of a given lexical BLOCK. */ + +static const char * +get_function_name_from_block (tree block) +{ + tree decl; + for (decl = BLOCK_ABSTRACT_ORIGIN (block); + decl && (TREE_CODE (decl) == BLOCK); + decl = BLOCK_ABSTRACT_ORIGIN (decl)) + if (TREE_CODE (decl) == FUNCTION_DECL) + break; + return decl ? IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) : NULL; +} + +/* Store the inline stack of STMT to POS_STACK, return the size of the + stack. Set the discriminator of the inline stack if DISCR is TRUE. */ + +static int +get_inline_stack_by_stmt (gimple stmt, tree decl, + struct gcov_callsite_pos *pos_stack, bool discr) +{ + tree block; + int idx = 0; + source_location loc; + + if (!stmt) + return 0; + block = gimple_block (stmt); + if (!block || TREE_CODE (block) != BLOCK || !gimple_location (stmt)) + return 0; + + loc = gimple_location (stmt); + pos_stack[idx].file = expand_location(loc).file; + pos_stack[idx].line = expand_location(loc).line; + if (discr) + pos_stack[idx].discr = get_discriminator_from_locus (loc); + else + pos_stack[idx].discr = 0; + idx++; + for (block = BLOCK_SUPERCONTEXT (block); + block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) + { + if (! BLOCK_SOURCE_LOCATION (block) > 0) + continue; + loc = BLOCK_SOURCE_LOCATION (block); + pos_stack[idx].file = expand_location (loc).file; + pos_stack[idx].line = expand_location (loc).line; + pos_stack[idx - 1].func = get_function_name_from_block (block); + pos_stack[idx++].discr = 0; + } + if (decl) + pos_stack[idx - 1].func = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + return idx; +} + +/* Store the inline stack of EDGE to POS_STACK, return the size of the + stack. All inlined callsites along the inline stack are recorded. */ + +static int +get_inline_stack_by_edge (struct cgraph_edge *edge, + struct gcov_callsite_pos *pos_stack) +{ + struct cgraph_edge *e; + int size = 0; + + for (e = edge; e; e = e->caller->callers) + { + gimple stmt = e->call_stmt; + if (!stmt) + break; + size += get_inline_stack_by_stmt (stmt, e->caller->decl, + pos_stack + size, false); + if (!e->caller->global.inlined_to) + break; + } + return size; +} + +/* Read sample count info of the function with DECL, and save them + to ENTRY_COUNT and TOTAL_COUNT respectively. */ + +static void +afdo_get_function_count (tree decl, + gcov_type *entry_count, + gcov_type *total_count) +{ + struct gcov_function func; + const struct gcov_function *func_entry; + + *entry_count = 0; + *total_count = 0; + func.name = + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + func.file = DECL_SOURCE_FILE (decl); + func_entry = (const struct gcov_function *) + htab_find (function_htab, &func); + if (func_entry) + { + (*total_count) += func_entry->total_count; + (*entry_count) += func_entry->entry_count; + return; + } + func.name = afdo_get_bfd_name (func.name); + func_entry = (const struct gcov_function *) + htab_find (function_htab, &func); + if (func_entry) + { + (*total_count) += func_entry->total_count; + (*entry_count) += func_entry->entry_count; + } +} + +/* Set the node count of the current function, and update the entry_bb + count. */ + +void +afdo_set_current_function_count (void) +{ + gcov_type entry_count; + gcov_type total_count; + struct cgraph_node *node = cgraph_get_create_node (current_function_decl); + + afdo_get_function_count (current_function_decl, &entry_count, &total_count); + node->total_count += total_count; + node->count += entry_count; + ENTRY_BLOCK_PTR->count = node->count; +} + +/* Add the AS_NAME->BFD_NAME to the assembler_name to bfd_name mapping. */ + +void +afdo_add_bfd_name_mapping (const char *as_name, const char *bfd_name) +{ + struct afdo_bfd_name **slot; + struct afdo_bfd_name *entry = (struct afdo_bfd_name *) + xmalloc (sizeof (struct afdo_bfd_name)); + + entry->assembler_name = as_name; + entry->bfd_name = bfd_name; + slot = (struct afdo_bfd_name **) + htab_find_slot (bfd_name_htab, entry, INSERT); + if (!*slot) + *slot = entry; + else + free (entry); +} + +/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count) + is recorded in a hash map. */ + +void +afdo_add_copy_scale (struct cgraph_edge *edge) +{ + struct gcov_stack *stack; + struct gcov_stack **stack_slot; + int scale = edge->caller->count ? + (double) REG_BR_PROB_BASE * edge->count / edge->caller->count + : REG_BR_PROB_BASE; + int size = get_inline_stack_size_by_edge (edge); + + if (size == 0) + return; + stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack)); + stack->func_name + = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME ( + edge->caller->global.inlined_to ? + edge->caller->global.inlined_to->decl : edge->caller->decl)); + stack->callee_name + = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)); + stack->size = size; + stack->stack = (struct gcov_callsite_pos *) + xmalloc (sizeof (struct gcov_callsite_pos) * size); + stack->count = scale; + + get_inline_stack_by_edge (edge, stack->stack); + + stack_slot = (struct gcov_stack **) + htab_find_slot (stack_scale_htab, stack, INSERT); + if (!*stack_slot) + *stack_slot = stack; + else + (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count); +} + +/* For a given POS_STACK with SIZE, get the COUNT/NUM_INST info for the + inline stack. If CALLEE_NAME is non-null, the COUNT represents the + total count in the inline stack. Otherwise, the COUNT represents the + count of an ordinary statement. Return FALSE if profile is not found + for the given POS_STACK. */ + +static bool +get_stack_count (struct gcov_callsite_pos *pos_stack, + const char *callee_name, + int size, + gcov_type *count, gcov_type *num_inst) +{ + int i; + + for (i = 0; i < size; i++) + { + struct gcov_stack stack, *entry; + stack.func_name = pos_stack[size - i - 1].func; + stack.callee_name = callee_name; + stack.stack = pos_stack; + stack.size = size - i; + entry = (struct gcov_stack *) htab_find (stack_htab, &stack); + if (entry) + { + if (i == 0) + { + *count = entry->count; + *num_inst = entry->num_inst; + return true; + } + else + { + struct gcov_stack scale_stack, *scale_entry; + scale_stack.stack = pos_stack + size - i; + scale_stack.size = i; + scale_stack.func_name = pos_stack[size - 1].func; + scale_stack.callee_name = stack.func_name; + scale_entry = (struct gcov_stack *) + htab_find (stack_scale_htab, &scale_stack); + if (scale_entry) + { + *count = entry->count * scale_entry->count + / REG_BR_PROB_BASE; + *num_inst = entry->num_inst; + return true; + } + } + } + } + *count = 0; + *num_inst = 0; + return false; +} + +/* For a given STMT, get the COUNT and NUM_INST from its profile. + Return FALSE if profile is not found for STMT. */ + +static bool +get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst) +{ + struct gcov_callsite_pos *pos_stack; + int size; + + if (!stmt) + return false; + size = get_inline_stack_size_by_stmt (stmt); + if (size == 0) + return false; + + pos_stack = (struct gcov_callsite_pos *) + alloca (sizeof (struct gcov_callsite_pos) * size); + + get_inline_stack_by_stmt (stmt, current_function_decl, pos_stack, true); + + return get_stack_count (pos_stack, NULL, size, count, num_inst); +} + +/* For a given EDGE, return the total count of the inlined callsite. */ + +gcov_type +afdo_get_callsite_count (struct cgraph_edge *edge) +{ + struct gcov_callsite_pos *pos_stack; + gcov_type count, num_inst; + const char *callee_name = + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)); + int size = get_inline_stack_size_by_edge (edge); + + if (size == 0) + return 0; + pos_stack = (struct gcov_callsite_pos *) + alloca (sizeof (struct gcov_callsite_pos) * size); + + get_inline_stack_by_edge (edge, pos_stack); + + if (get_stack_count (pos_stack, callee_name, size, &count, &num_inst)) + return count; + else + return 0; +} + +/* For a given BB, return its execution count. */ + +gcov_type +afdo_get_bb_count (basic_block bb) +{ + gimple_stmt_iterator gsi; + gcov_type max_count = 0; + bool has_annotated = false; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gcov_type count, num_inst; + gimple stmt = gsi_stmt (gsi); + if (get_stmt_count (stmt, &count, &num_inst)) + { + if (count > max_count) + max_count = count; + has_annotated = true; + } + } + if (has_annotated) + { + bb->flags |= BB_ANNOTATED; + return max_count; + } + else + return 0; +} + +/* Annotate auto profile to the control flow graph. */ + +static void +afdo_annotate_cfg (void) +{ + basic_block bb; + gcov_type max_count = 0; + + FOR_EACH_BB (bb) + { + bb->count = afdo_get_bb_count (bb); + if (bb->count > max_count) + max_count = bb->count; + } + if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count) + ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count; + if (max_count > 0) + { + counts_to_freqs (); + afdo_calculate_branch_prob (); + profile_status = PROFILE_READ; + } +} + +/* Read profile from profile data file. Write to the module hashmap. */ + +static void +read_profile (void) +{ + gcov_unsigned_t i, j, k, file_name_num; + + if (gcov_open (auto_profile_file, 1) == -1) + { + inform (0, "Cannot open profile file %s.", auto_profile_file); + flag_auto_profile = 0; + return; + } + + if (gcov_read_unsigned () != GCOV_DATA_MAGIC) + { + inform (0, "Magic number does not mathch."); + flag_auto_profile = 0; + return; + } + + if (gcov_read_unsigned () != GCOV_VERSION) + { +/* inform (0, "Vesion number does not mathch."); + flag_auto_profile = 0; + return;*/; + } + + /* Skip the empty integer. */ + gcov_read_unsigned (); + gcc_assert (gcov_read_unsigned () == GCOV_TAG_AFDO_FILE_NAMES); + + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the file name table. */ + file_name_num = gcov_read_unsigned (); + file_names = (const char **) + xmalloc (sizeof (const char *) * file_name_num); + for (i = 0; i < file_name_num; i++) + file_names[i] = xstrdup (gcov_read_string ()); + + if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) + { + inform (0, "Not expected TAG."); + return; + } + + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the function/callsite profile, and store it in local + data structure. */ + function_num = gcov_read_unsigned (); + gcov_functions = (struct gcov_function *) + xmalloc (function_num * sizeof (struct gcov_function)); + for (i = 0; i < function_num; i++) + { + gcov_functions[i].name = xstrdup (gcov_read_string ()); + gcov_functions[i].file = file_names[gcov_read_unsigned ()]; + gcov_functions[i].total_count = gcov_read_counter (); + gcov_functions[i].entry_count = gcov_read_counter (); + gcov_functions[i].max_count = 0; + gcov_functions[i].stack_num = gcov_read_unsigned (); + gcov_functions[i].stacks = (struct gcov_stack *) + xmalloc (gcov_functions[i].stack_num * sizeof (struct gcov_stack)); + for (j = 0; j < gcov_functions[i].stack_num; j++) + { + gcov_functions[i].stacks[j].func_name = gcov_functions[i].name; + gcov_functions[i].stacks[j].callee_name = NULL; + gcov_functions[i].stacks[j].size = gcov_read_unsigned (); + gcov_functions[i].stacks[j].stack = (struct gcov_callsite_pos *) + xmalloc (gcov_functions[i].stacks[j].size + * sizeof (struct gcov_callsite_pos)); + for (k = 0; k < gcov_functions[i].stacks[j].size; k++) + { + gcov_functions[i].stacks[j].stack[k].func = + file_names[gcov_read_unsigned ()]; + gcov_functions[i].stacks[j].stack[k].file = + file_names[gcov_read_unsigned ()]; + gcov_functions[i].stacks[j].stack[k].line = + gcov_read_unsigned (); + gcov_functions[i].stacks[j].stack[k].discr = + gcov_read_unsigned (); + } + gcov_functions[i].stacks[j].count = gcov_read_counter (); + gcov_functions[i].stacks[j].num_inst = gcov_read_counter (); + } + } + + /* Read in the module info. */ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_MODULE_GROUPING) + { + inform (0, "Not expected TAG."); + return; + } + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the file name table. */ + total_module_num = gcov_read_unsigned (); + modules = (struct afdo_module *) + xmalloc (total_module_num * sizeof (struct afdo_module)); + for (i = 0; i < total_module_num; i++) + { + unsigned num_strings; + struct afdo_module **slot; + modules[i].name = xstrdup (gcov_read_string()); + modules[i].ident = i + 1; + /* exported flag. */ + modules[i].exported = gcov_read_unsigned(); + /* has_asm flag. */ + modules[i].has_asm = gcov_read_unsigned(); + /* aux_module and 5 options. */ + modules[i].num_aux_modules = gcov_read_unsigned(); + modules[i].num_quote_paths = gcov_read_unsigned(); + modules[i].num_bracket_paths = gcov_read_unsigned(); + modules[i].num_cpp_defines = gcov_read_unsigned(); + modules[i].num_cpp_includes = gcov_read_unsigned(); + modules[i].num_cl_args = gcov_read_unsigned(); + num_strings = modules[i].num_aux_modules + + modules[i].num_quote_paths + + modules[i].num_bracket_paths + + modules[i].num_cpp_defines + + modules[i].num_cpp_includes + + modules[i].num_cl_args; + modules[i].strings = (const char **) + xmalloc (num_strings * sizeof (char *)); + for (j = 0; j < num_strings; j++) + modules[i].strings[j] = xstrdup (gcov_read_string()); + slot = (struct afdo_module **) + htab_find_slot (module_htab, &modules[i], INSERT); + if (!*slot) + *slot = &modules[i]; + else + gcc_unreachable (); + } +} + +/* Process the profile data and build the function/callsite/callstack + hash maps. */ + +void +process_auto_profile (void) +{ + unsigned i; + + afdo_read_bfd_names (); + for (i = 0; i < function_num; i++) + { + struct gcov_function **func_slot = (struct gcov_function **) + htab_find_slot (function_htab, gcov_functions + i, INSERT); + if (!*func_slot) + *func_slot = gcov_functions + i; + } + + for (i = 0; i < function_num; i++) + { + unsigned j; + struct gcov_function *func = gcov_functions + i; + for (j = 0; j < func->stack_num; j++) + { + unsigned k; + unsigned stack_size = func->stacks[j].size; + gcov_type count = func->stacks[j].count; + struct gcov_stack **stack_slot = (struct gcov_stack **) + htab_find_slot (stack_htab, func->stacks + j, INSERT); + if (func->stacks[j].num_inst && count > afdo_profile_info->sum_max) + afdo_profile_info->sum_max = count / func->stacks[j].num_inst; + if (*stack_slot) + { + (*stack_slot)->count += count; + if ((*stack_slot)->num_inst < func->stacks[j].num_inst) + (*stack_slot)->num_inst = func->stacks[j].num_inst; + } + else + *stack_slot = func->stacks + j; + for (k = 1; k < stack_size; k++) + { + struct gcov_stack *new_stack = (struct gcov_stack *) + xmalloc (sizeof (struct gcov_stack)); + new_stack->func_name = func->stacks[j].func_name; + new_stack->callee_name = + func->stacks[j].stack[stack_size - k - 1].func; + new_stack->stack = func->stacks[j].stack + stack_size - k; + new_stack->size = k; + new_stack->num_inst = 0; + new_stack->count = 0; + stack_slot = (struct gcov_stack **) + htab_find_slot (stack_htab, new_stack, INSERT); + if (*stack_slot) + { + (*stack_slot)->count += count; + free (new_stack); + } + else + *stack_slot = new_stack; + } + } + } +} + +/* Create the hash tables, and read the profile from the profile data + file. */ + +void +init_auto_profile (void) +{ + if (auto_profile_file == NULL) + auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; + + /* Initialize the function hash table. */ + function_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, + afdo_function_hash, + afdo_function_eq, + 0, + xcalloc, + free); + /* Initialize the stack hash table. */ + stack_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, + afdo_stack_hash, + afdo_stack_eq, + 0, + xcalloc, + free); + /* Initialize the stack scale hash table. */ + stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, + afdo_stack_hash, + afdo_stack_eq, + 0, + xcalloc, + free); + /* Initialize the bfd name mapping table. */ + bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, + afdo_bfd_name_hash, + afdo_bfd_name_eq, + afdo_bfd_name_del, + xcalloc, + free); + /* Initialize the module hash table. */ + module_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, + afdo_module_hash, + afdo_module_eq, + 0, + xcalloc, + free); + + afdo_profile_info = (struct gcov_ctr_summary *) + xcalloc (1, sizeof (struct gcov_ctr_summary)); + afdo_profile_info->runs = 1; + afdo_profile_info->sum_max = 0; + + /* Read the profile from the profile file. */ + read_profile (); + + if (flag_dyn_ipa) + read_aux_modules(); +} + +/* Free the resources. */ + +void +end_auto_profile (void) +{ + unsigned i, j; + + for (i = 0; i < function_num; i++) + { + for (j = 0; j < gcov_functions[i].stack_num; ++j) + free (gcov_functions[i].stacks[j].stack); + free (gcov_functions[i].stacks); + } + free (gcov_functions); + + for (i = 0; i < total_module_num; i++) + free (modules[i].strings); + free (modules); + free (afdo_profile_info); + free (file_names); + htab_delete (function_htab); + htab_delete (stack_htab); + htab_delete (stack_scale_htab); + htab_delete (bfd_name_htab); + htab_delete (module_htab); + profile_info = NULL; +} + +/* BB1 and BB2 are in an equivalent class iff: + 1. BB1 dominates BB2. + 2. BB2 post-dominates BB1. + 3. BB1 and BB2 are in the same loop nest. + This function finds the equivalent class for each basic block, and + stores a pointer to the first BB in its equivalent class. Meanwhile, + set bb counts for the same equivalent class to be idenical. */ + +static void +afdo_find_equiv_class (void) +{ + basic_block bb; + + FOR_ALL_BB (bb) + bb->aux = NULL; + + FOR_ALL_BB (bb) + { + VEC (basic_block, heap) *dom_bbs; + basic_block bb1; + int i; + + if (bb->aux != NULL) + continue; + bb->aux = bb; + dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); + FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb1) + if (bb1->aux == NULL + && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0) + { + bb->count = MAX (bb->count, bb1->count); + bb->flags |= BB_ANNOTATED; + } + } + dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb); + FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb1) + if (bb1->aux == NULL + && dominated_by_p (CDI_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0) + { + bb->count = MAX (bb->count, bb1->count); + bb->flags |= BB_ANNOTATED; + } + } + } +} + +/* If a baisk block only has one in/out edge, then the bb count and he + edge count should be the same. + IS_SUCC is true if the out edge of the basic block is examined. + Return TRUE if any basic block/edge count is changed. */ + +static bool +afdo_propagate_single_edge (bool is_succ) +{ + basic_block bb; + bool changed = false; + + FOR_EACH_BB (bb) + if (is_succ ? single_succ_p (bb) : single_pred_p (bb)) + { + edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb); + if (((e->flags & EDGE_ANNOTATED) == 0) + && ((bb->flags & BB_ANNOTATED) != 0)) + { + e->count = bb->count; + e->flags |= EDGE_ANNOTATED; + changed = true; + } + else if (((e->flags & EDGE_ANNOTATED) != 0) + && ((bb->flags & BB_ANNOTATED) == 0)) + { + bb->count = e->count; + bb->flags |= BB_ANNOTATED; + changed = true; + } + else if (bb->count != e->count) + { + e->count = bb->count = MAX (bb->count, e->count); + changed = true; + } + } + return changed; +} + +/* If a basic block's count is known, and only one of its in/out edges' count + is unknown, its count can be calculated. + Meanwhile, if all of the in/out edges' counts are known, then the basic + block's unknown count can also be calculated. + IS_SUCC is true if out edges of a basic blocks are examined. + Return TRUE if any basic block/edge count is changed. */ + +static bool +afdo_propagate_multi_edge (bool is_succ) +{ + basic_block bb; + bool changed = false; + + FOR_EACH_BB (bb) + { + edge e, unknown_edge = NULL; + edge_iterator ei; + int num_unknown_edge = 0; + gcov_type total_known_count = 0; + + if (is_succ) + { + FOR_EACH_EDGE (e, ei, bb->succs) + if ((e->flags & EDGE_ANNOTATED) == 0) + num_unknown_edge ++, unknown_edge = e; + else + total_known_count += e->count; + } + else + { + FOR_EACH_EDGE (e, ei, bb->preds) + if ((e->flags & EDGE_ANNOTATED) == 0) + num_unknown_edge ++, unknown_edge = e; + else + total_known_count += e->count; + } + + if (num_unknown_edge == 0) + { + if (total_known_count > bb->count) + { + bb->count = total_known_count; + changed = true; + } + if ((bb->flags & BB_ANNOTATED) == 0) + { + bb->flags |= BB_ANNOTATED; + changed = true; + } + } + else if (num_unknown_edge == 1 + && (bb->flags & BB_ANNOTATED) != 0) + { + if (bb->count >= total_known_count) + { + unknown_edge->count = bb->count - total_known_count; + unknown_edge->flags |= EDGE_ANNOTATED; + changed = true; + } + } + } + return changed; +} + +/* Special propagation for circuit expressions. Because GCC translates + control flow into data flow for circuit expressions. E.g. + BB1: + if (a && b) + BB2 + else + BB3 + + will be translated into: + + BB1: + if (a) + goto BB.t1 + else + goto BB.t3 + BB.t1: + if (b) + goto BB.t2 + else + goto BB.t3 + BB.t2: + goto BB.t3 + BB.t3: + tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) + if (tmp) + goto BB2 + else + goto BB3 + + In this case, we need to propagate through PHI to determine the edge + count of BB1->BB.t1, BB.t1->BB.t2. */ + +static void +afdo_propagate_circuit (void) +{ + basic_block bb; + FOR_ALL_BB (bb) + { + gimple phi_stmt; + tree cmp_rhs, cmp_lhs; + gimple cmp_stmt = last_stmt (bb); + edge e; + edge_iterator ei; + + if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) + continue; + cmp_rhs = gimple_cond_rhs (cmp_stmt); + cmp_lhs = gimple_cond_lhs (cmp_stmt); + if (!TREE_CONSTANT (cmp_rhs) + || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) + continue; + if (TREE_CODE (cmp_lhs) != SSA_NAME) + continue; + if ((bb->flags & BB_ANNOTATED) == 0) + continue; + phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs); + while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN + && gimple_assign_single_p (phi_stmt) + && TREE_CODE(gimple_assign_rhs1 (phi_stmt)) == SSA_NAME) + phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt)); + if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI) + continue; + FOR_EACH_EDGE (e, ei, bb->succs) + { + unsigned i, total = 0; + edge only_one; + bool check_value_one = (((integer_onep (cmp_rhs)) + ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) + ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); + if ((e->flags & EDGE_ANNOTATED) == 0) + continue; + for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) + { + tree val = gimple_phi_arg_def (phi_stmt, i); + edge ep = gimple_phi_arg_edge (phi_stmt, i); + + if (!TREE_CONSTANT (val) || !(integer_zerop (val) + || integer_onep (val))) + continue; + if (check_value_one ^ integer_onep (val)) + continue; + total++; + only_one = ep; + if (e->probability == 0 && (e->flags & EDGE_ANNOTATED) == 0) + { + ep->probability = 0; + ep->count = 0; + ep->flags |= EDGE_ANNOTATED; + } + } + if (total == 1 && (only_one->flags & EDGE_ANNOTATED) == 0) + { + only_one->probability = e->probability; + only_one->count = e->count; + only_one->flags |= EDGE_ANNOTATED; + } + } + } +} + +/* Propagate the basic block count and edge count on the control flow + graph. We do the propagation iteratively until stablize. */ + +static void +afdo_propagate (void) +{ + basic_block bb; + bool changed = true; + + FOR_ALL_BB (bb) + { + bb->count = ((basic_block) bb->aux)->count; + if ((((basic_block) bb->aux)->flags & BB_ANNOTATED) != 0) + bb->flags |= BB_ANNOTATED; + } + + while (changed) + { + changed = false; + + if (afdo_propagate_single_edge (true)) + changed = true; + if (afdo_propagate_single_edge (false)) + changed = true; + if (afdo_propagate_multi_edge (true)) + changed = true; + if (afdo_propagate_multi_edge (false)) + changed = true; + afdo_propagate_circuit (); + } +} + +/* Propagate counts on control flow graph and calculate branch + probabilities. */ + +void +afdo_calculate_branch_prob (void) +{ + basic_block bb; + bool has_sample = false; + + FOR_EACH_BB (bb) + if (bb->count > 0) + has_sample = true; + + if (!has_sample) + return; + + calculate_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (0); + + afdo_find_equiv_class (); + afdo_propagate (); + + FOR_EACH_BB (bb) + { + edge e; + edge_iterator ei; + int num_unknown_succ = 0; + gcov_type total_count = 0; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + if ((e->flags & EDGE_ANNOTATED) == 0) + num_unknown_succ ++; + else + total_count += e->count; + } + if (num_unknown_succ == 0 && total_count > 0) + { + FOR_EACH_EDGE (e, ei, bb->succs) + e->probability = + (double) e->count * REG_BR_PROB_BASE / total_count; + } + } + FOR_ALL_BB (bb) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = + (double) bb->count * e->probability / REG_BR_PROB_BASE; + bb->aux = NULL; + } + + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); +} + +/* Use AutoFDO profile to annoate the control flow graph. */ + +static unsigned int +auto_profile (void) +{ + struct cgraph_node *node; + + if (cgraph_state == CGRAPH_STATE_FINISHED) + return 0; + + init_node_map(); + profile_info = afdo_profile_info; + + for (node = cgraph_nodes; node; node = node->next) + { + if (!node->analyzed + || !gimple_has_body_p (node->decl) + || !(!node->clone_of || node->decl != node->clone_of->decl)) + continue; + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION + || DECL_STRUCT_FUNCTION (node->decl)->after_tree_profile) + continue; + + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + current_function_decl = node->decl; + + afdo_annotate_cfg (); + compute_function_frequency (); + + current_function_decl = NULL; + pop_cfun (); + } + + return TODO_rebuild_cgraph_edges; +} + +static bool +gate_auto_profile_ipa (void) +{ + return flag_auto_profile; +} + +struct simple_ipa_opt_pass pass_ipa_auto_profile = +{ + { + SIMPLE_IPA_PASS, + "afdo", /* name */ + gate_auto_profile_ipa, /* gate */ + auto_profile, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_IPA_AUTOFDO, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func /* todo_flags_finish */ + } +}; Index: gcc/auto-profile.h =================================================================== --- gcc/auto-profile.h (revision 0) +++ gcc/auto-profile.h (revision 0) @@ -0,0 +1,46 @@ +/* coverage.h - Defines data exported from coverage.c + Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2008 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef AUTO_PROFILE_H +#define AUTO_PROFILE_H + +/* Read, process, finalize AutoFDO data structures. */ +extern void init_auto_profile (void); +extern void end_auto_profile (void); +extern void process_auto_profile (void); + +/* Annotate function's count and total count. */ +extern void afdo_set_current_function_count (void); + +/* Add the assembly_name to bfd_name mapping. */ +extern void afdo_add_bfd_name_mapping (const char *, const char *); + +/* Add copy scale for an inlined edge to stack_scale_map. */ +extern void afdo_add_copy_scale (struct cgraph_edge *); + +/* Calculate branch probability in both AutoFDO pass and after inlining. */ +extern void afdo_calculate_branch_prob (void); + +/* Calculate total sample count of an inlined callsite. */ +extern gcov_type afdo_get_callsite_count (struct cgraph_edge *); + +/* Calculate basic block count. */ +extern gcov_type afdo_get_bb_count (basic_block); +#endif /* AUTO_PROFILE_H */ Index: gcc/profile.c =================================================================== --- gcc/profile.c (revision 191813) +++ gcc/profile.c (working copy) @@ -662,6 +662,7 @@ compute_branch_probabilities (unsigned cfg_checksu /* Very simple sanity checks so we catch bugs in our profiling code. */ if (!profile_info) return; + if (profile_info->run_max * profile_info->runs < profile_info->sum_max) { error ("corrupted profile info: run_max * runs < sum_max"); @@ -1447,7 +1448,7 @@ branch_prob (void) if (flag_profile_values) gimple_find_values_to_profile (&values); - if (flag_branch_probabilities) + if (flag_branch_probabilities && !flag_auto_profile) { compute_branch_probabilities (cfg_checksum, lineno_checksum); if (flag_profile_values) Index: gcc/loop-unroll.c =================================================================== --- gcc/loop-unroll.c (revision 191813) +++ gcc/loop-unroll.c (working copy) @@ -1154,6 +1154,10 @@ decide_unroll_runtime_iterations (struct loop *loo return; } + if (flag_auto_profile + && expected_loop_iterations (loop) > loop->header->count) + return; + /* Success; now force nunroll to be power of 2, as we are unable to cope with overflows in computation of number of iterations. */ for (i = 1; 2 * i <= nunroll; i *= 2) Index: gcc/coverage.c =================================================================== --- gcc/coverage.c (revision 191813) +++ gcc/coverage.c (working copy) @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "filenames.h" #include "dwarf2asm.h" #include "target.h" +#include "auto-profile.h" #include "gcov-io.h" #include "gcov-io.c" @@ -274,7 +275,6 @@ static struct opt_desc force_matching_cg_opts[] = { { "-fexceptions", "-fno-exceptions", true }, { "-fsized-delete", "-fno-sized-delete", false }, - { "-frtti", "-fno-rtti", true }, { NULL, NULL, false } }; @@ -2296,6 +2296,8 @@ coverage_init (const char *filename, const char* s init_pmu_profiling (); tree_init_instrumentation_sampling (); } + if (flag_auto_profile) + init_auto_profile (); } /* Return True if any type of profiling is enabled which requires linking Index: gcc/tree-ssa-live.c =================================================================== --- gcc/tree-ssa-live.c (revision 191813) +++ gcc/tree-ssa-live.c (working copy) @@ -546,7 +546,7 @@ remove_unused_scope_block_p (tree scope) else if (!nsubblocks) ; /* For terse debug info we can eliminate info on unused variables. */ - else if (!generate_debug_line_table + else if (!flag_auto_profile && !generate_debug_line_table && (debug_info_level == DINFO_LEVEL_NONE || debug_info_level == DINFO_LEVEL_TERSE)) { Index: gcc/common.opt =================================================================== --- gcc/common.opt (revision 191813) +++ gcc/common.opt (working copy) @@ -913,6 +913,16 @@ fauto-inc-dec Common Report Var(flag_auto_inc_dec) Init(1) Generate auto-inc/dec instructions +fauto-profile +Common Report Var(flag_auto_profile) Optimization +Use sample profile information for call graph node weights. The default +profile file is fbdata.afdo in 'pwd'. + +fauto-profile= +Common Joined RejectNegative Var(auto_profile_file) +Use sample profile information for call graph node weights. The profile +file is specified in the argument. + ; -fcheck-bounds causes gcc to generate array bounds checks. ; For C, C++ and ObjC: defaults off. ; For Java: defaults to on. Index: gcc/tree-inline.c =================================================================== --- gcc/tree-inline.c (revision 191813) +++ gcc/tree-inline.c (working copy) @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see #include "integrate.h" #include "langhooks.h" #include "l-ipo.h" +#include "auto-profile.h" #include "rtl.h" /* FIXME: For asm_str_count. */ @@ -1824,6 +1825,15 @@ copy_bb (copy_body_data *id, basic_block bb, int f copy_gsi = gsi_last_bb (copy_basic_block); } + if (flag_auto_profile && profile_info) + { + gcov_type count = afdo_get_bb_count (copy_basic_block); + if (copy_basic_block->flags & BB_ANNOTATED) + copy_basic_block->count = count; + else if (bb->flags & BB_ANNOTATED) + copy_basic_block->flags |= BB_ANNOTATED; + } + return copy_basic_block; } @@ -1918,9 +1928,10 @@ copy_edges_for_bb (basic_block bb, gcov_type count edge new_edge; flags = old_edge->flags; + flags &= (~EDGE_ANNOTATED); /* Return edges do get a FALLTHRU flag when the get inlined. */ - if (old_edge->dest->index == EXIT_BLOCK && !old_edge->flags + if (old_edge->dest->index == EXIT_BLOCK && !flags && old_edge->dest->aux != EXIT_BLOCK_PTR) flags |= EDGE_FALLTHRU; new_edge = make_edge (new_bb, (basic_block) old_edge->dest->aux, flags); @@ -2253,6 +2264,8 @@ copy_cfg_body (copy_body_data * id, gcov_type coun if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count) count_scale = (REG_BR_PROB_BASE * (double)count / ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count); + else if (flag_auto_profile && count == 0) + count_scale = 0; else count_scale = REG_BR_PROB_BASE; Index: gcc/tree-profile.c =================================================================== --- gcc/tree-profile.c (revision 191813) +++ gcc/tree-profile.c (working copy) @@ -1642,7 +1642,7 @@ direct_call_profiling (void) static bool gate_tree_profile_ipa (void) { - return (!in_lto_p + return (!in_lto_p && !flag_auto_profile && (flag_branch_probabilities || flag_test_coverage || profile_arc_flag || flag_profile_reusedist || flag_optimize_locality)); Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 191813) +++ gcc/Makefile.in (working copy) @@ -874,6 +874,7 @@ GIMPLE_H = gimple.h gimple.def gsstruct.def pointe TRANS_MEM_H = trans-mem.h GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h COVERAGE_H = coverage.h $(GCOV_IO_H) +AUTO_PROFILE_H = auto-profile.h DEMANGLE_H = $(srcdir)/../include/demangle.h RECOG_H = recog.h ALIAS_H = alias.h coretypes.h @@ -1161,6 +1162,7 @@ OBJS = \ alias.o \ alloc-pool.o \ auto-inc-dec.o \ + auto-profile.o \ bb-reorder.o \ bitmap.o \ bt-load.o \ @@ -3034,6 +3036,10 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $ $(HASHTAB_H) tree-iterator.h $(CGRAPH_H) $(TREE_PASS_H) gcov-io.c $(TM_P_H) \ opts.h $(TREE_FLOW_H) $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h l-ipo.h dwarf2asm.h \ $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H) +auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \ + $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \ + $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) \ + $(AUTO_PROFILE_H) cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \ $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) output.h $(FUNCTION_H) $(TREE_PASS_H) \ Index: gcc/basic-block.h =================================================================== --- gcc/basic-block.h (revision 191813) +++ gcc/basic-block.h (working copy) @@ -89,8 +89,9 @@ DEF_VEC_ALLOC_P(edge,heap); and cold sections, when we do partitioning. */ #define EDGE_PRESERVE 0x4000 /* Never merge blocks via this edge. */ -#define EDGE_ALL_FLAGS 0x7fff - +#define EDGE_ANNOTATED 0x8000 /* Edge has been annotated by AutoFDO + profile. */ +#define EDGE_ALL_FLAGS 0xffff #define EDGE_COMPLEX \ (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_EH | EDGE_PRESERVE) @@ -268,7 +269,11 @@ enum bb_flags /* Set on blocks that are in a transaction. This is calculated on demand, and is available after calling compute_transaction_bits(). */ - BB_IN_TRANSACTION = 1 << 13 + BB_IN_TRANSACTION = 1 << 13, + + /* Set on blocks that has been annotated by AutoFDO profile. This is + calculated while propagating profiles on control flow graph. */ + BB_ANNOTATED = 1 << 14 }; /* Dummy flag for convenience in the hot/cold partitioning code. */ Index: gcc/tree-cfg.c =================================================================== --- gcc/tree-cfg.c (revision 191813) +++ gcc/tree-cfg.c (working copy) @@ -1806,6 +1806,14 @@ gimple_merge_blocks (basic_block a, basic_block b) } } + /* When merging two BBs, if their counts are different, the larger + count is selected as the new bb count. */ + if (flag_auto_profile && a->count != b->count) + { + a->count = MAX (a->count, b->count); + a->frequency = MAX (a->frequency, b->frequency); + } + /* Merge the sequences. */ last = gsi_last_bb (a); gsi_insert_seq_after (&last, bb_seq (b), GSI_NEW_STMT); Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 191813) +++ gcc/passes.c (working copy) @@ -1251,6 +1251,7 @@ init_optimization_passes (void) NEXT_PASS (pass_rebuild_cgraph_edges); NEXT_PASS (pass_inline_parameters); } + NEXT_PASS (pass_ipa_auto_profile); NEXT_PASS (pass_ipa_tree_profile); { struct opt_pass **p = &pass_ipa_tree_profile.pass.sub;