This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [GOOGLE] Refactor AutoFDO
- From: Xinliang David Li <davidxl at google dot com>
- To: Dehao Chen <dehao at google dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 7 Aug 2013 10:46:22 -0700
- Subject: Re: [GOOGLE] Refactor AutoFDO
- References: <CAO2gOZV1c6SXj20M6Ss-m7oWMeYLhoowPodjNw8jb5w9An9AdA at mail dot gmail dot com> <CAAkRFZKPi1SibiUcer_NNd+8HF+c38uYm6LjguggvZSt_wcJPw at mail dot gmail dot com> <CAO2gOZWin9FkgNJGn_F6W9-0RFOLHVvAmsDrkTvjShK+9gZ6Zg at mail dot gmail dot com> <CAAkRFZKi0zMTc-gMFwBRTnaaFhTM3d54QnxiEDqzk-yesAajBA at mail dot gmail dot com> <CAO2gOZVCr_DUrTy67k4rbmXmsmRTf+JK+WtFy-vFzKCKoaXa+Q at mail dot gmail dot com> <CAAkRFZ+dyN3FL+0OBQjpjr58Z8Yu=gOtnSoEm84t4XLMz1mixg at mail dot gmail dot com> <CAO2gOZX7rfd+-UEcn1zFzATN1gwzoZP8juVZ2-z32CTSJOCbRQ at mail dot gmail dot com> <CAAkRFZKxiVRvsqRJ0wDYAiB3Ure36TYZeYRM_k-LYffn4zH2OQ at mail dot gmail dot com> <CAO2gOZXoRFa+yQWD=9COrza5i7OU_tjDD59PJH=U5GUXpchJFA at mail dot gmail dot com>
ok for google branch.
David
On Tue, Aug 6, 2013 at 3:41 PM, Dehao Chen <dehao@google.com> wrote:
> Patch updated.
>
> http://codereview.appspot.com/12079043
>
> Thanks,
> Dehao
>
>
> On Fri, Aug 2, 2013 at 11:21 AM, Xinliang David Li <davidxl@google.com> wrote:
>> More to follow.
>>
>> David
>>
>>>>static void
>>>> read_profile (void)
>>>> {
>>>> if (gcov_open (auto_profile_file, 1) == 0)
>>>> {
>>>> inform (0, "Cannot open profile file %s.", auto_profile_file);
>>
>>
>> Should be at least warning instead -- I think error is probably more
>> appropriate -- this is different from regular FDO, where one missing
>> gcda might be ok.
>>
>>>> flag_auto_profile = 0;
>>>> return;
>>>> }
>>>>
>>>> if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
>>>> {
>>>> inform (0, "Magic number does not mathch.");
>>
>> Should be an error.
>>
>>>> flag_auto_profile = 0;
>>>> return;
>>>> }
>>>>
>>>>
>>>> /* function_name_map. */
>>>> afdo_function_name_map = function_name_map::create ();
>>>> if (afdo_function_name_map == NULL)
>>>> {
>>>> inform (0, "Cannot read string table.");
>>
>> Should be an error unless there is a way to recover. Falling back to
>> non-fdo is not the solution.
>>
>>>> flag_auto_profile = 0;
>>>> return;
>>>> }
>>>>
>>>> /* autofdo_source_profile. */
>>>> afdo_source_profile = autofdo_source_profile::create ();
>>>> if (afdo_source_profile == NULL)
>>>> {
>>>> inform (0, "Cannot read function profile.");
>>
>> An error.
>>
>>>> flag_auto_profile = 0;
>>>> return;
>>>> }
>>>>
>>>> /* autofdo_module_profile. */
>>>> afdo_module_profile = autofdo_module_profile::create ();
>>>> if (afdo_module_profile == NULL)
>>>> {
>>>> inform (0, "Cannot read module profile.");
>>
>> Should be an error.
>>
>>>> flag_auto_profile = 0;
>>>> return;
>>>> }
>>>>
>>>> /* Read in the working set. */
>>>> if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
>>>> {
>>>> inform (0, "Not expected TAG.");
>>
>> Error.
>>
>>>> unsigned curr_module = 1, max_group = PARAM_VALUE (PARAM_MAX_LIPO_GROUP);
>>>> for (string_vector::const_iterator iter = aux_modules->begin();
>>>> iter != aux_modules->end(); ++iter)
>>>> {
>>>> gcov_module_info *aux_module = afdo_module_profile->get_module (*iter);
>>>> if (aux_module == module)
>>>> continue;
>>>> if (aux_module == NULL)
>>>> {
>>>> inform (0, "aux module %s cannot be found.", *iter);
>>>> continue;
>>>> }
>>>> if ((aux_module->lang & GCOV_MODULE_LANG_MASK) !=
>>>> (module->lang & GCOV_MODULE_LANG_MASK))
>>>> {
>>>> inform (0, "Not importing %s: source language"
>>>> " different from primary module's source language", *iter);
>>
>>
>> Should be guarded with -fopt-info -- see similar code in coverage.c.
>>
>>>> continue;
>>>> }
>>>> if ((aux_module->lang & GCOV_MODULE_ASM_STMTS)
>>>> && flag_ripa_disallow_asm_modules)
>>>> {
>>>> if (flag_opt_info)
>>>> inform (0, "Not importing %s: contains "
>>>> "assembler statements", *iter);
>>
>> Use -fopt-info
>>
>>>> continue;
>>>> }
>>>> if (max_group != 0 && curr_module == max_group)
>>>> {
>>>> if (flag_opt_info)
>>>> inform (0, "Not importing %s: maximum group size reached", *iter);
>>>> }
>>>> if (incompatible_cl_args (module, aux_module))
>>>> {
>>>> if (flag_opt_info)
>>>> inform (0, "Not importing %s: command-line"
>>>> " arguments not compatible with primary module", *iter);
>>>> continue;
>>
>> Use -fopt-info.
>>
>>
>>>> }
>>>> module_infos[curr_module++] = aux_module;
>>>> add_input_filename (*iter);
>>>> }
>>>> }
>>>>
>>>> /* From AutoFDO profiles, find values inside STMT for that we want to measure
>>>> histograms for indirect-call optimization. */
>>>>
>>>> {
>>>> hist->hvalue.counters[3] = 0;
>>>> hist->hvalue.counters[4] = 0;
>>>> }
>>
>> It might be better to create a commmon API to create/update histogram
>> entry instead of peeking into the details of the data structure -- to
>> avoid future breaks. Such a change can be done as a follow up and
>> needs to be in trunk.
>
> Will update this in a follow-up patch.
>
>>
>>>> }
>>>>
>>>> /* From AutoFDO profiles, find values inside STMT for that we want to measure
>>>> histograms and adds them to list VALUES. */
>>>>
>>>> static void
>>>> afdo_vpt (gimple stmt, const icall_target_map &map)
>>>> {
>>>> afdo_indirect_call (stmt, map);
>>>> }
>>>>
>>>> /* For a given BB, return its execution count, and annotate value profile
>>>> on statements. */
>>>>
>>>> static gcov_type
>>>> afdo_get_bb_count (basic_block bb)
>>>> {
>>>> gimple_stmt_iterator gsi;
>>>> gcov_type max_count = 0;
>>>> bool has_annotated = false;
>>>>
>>>> for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>> {
>>>> count_info info;
>>>> gimple stmt = gsi_stmt (gsi);
>>>> if (afdo_source_profile->get_count_info (stmt, &info))
>>>> {
>>
>> indentation.
>
> the tab is not well displayed in the patch...
>
>>
>>>> if (info.first > max_count)
>>>> max_count = info.first;
>>>> has_annotated = true;
>>>> if (info.second.size() > 0)
>>>> afdo_vpt (stmt, info.second);
>>>> }
>>>> }
>>>> if (has_annotated)
>>>> {
>>>> bb->flags |= BB_ANNOTATED;
>>>> return max_count;
>>>> }
>>>> else
>>>> return 0;
>>>> }
>>>>
>>>>
>>>> static void
>>>> afdo_annotate_cfg (void)
>>>> {
>>>> basic_block bb;
>>>> const function_instance *s =
>>>> afdo_source_profile->get_function_instance_by_decl (
>>>> current_function_decl);
>>
>> need a new line after decls.
>>
>> On Thu, Aug 1, 2013 at 2:49 PM, Dehao Chen <dehao@google.com> wrote:
>>> On Wed, Jul 31, 2013 at 10:14 AM, Xinliang David Li <davidxl@google.com> wrote:
>>>> Another round of documentation and naming (not for coding style, but
>>>> for clearer semantics) related comments:
>>>>
>>>> David
>>>>
>>>>>
>>>>> namespace autofdo {
>>>>>
>>>>> /* Represent a source location: (function_decl, lineno). */
>>>>> typedef std::pair<tree, unsigned> decl_lineno;
>>>>> /* Represent an inline stack. vector[0] is the leaf node. */
>>>>> typedef std::vector<decl_lineno> inline_stack;
>>>>
>>>> leaf or root?
>>>
>>> leaf
>>>
>>>>
>>>>> /* String array. */
>>>>> typedef std::vector<const char *> string_vector;
>>>>
>>>> Module name, function name strings or a generic string table??
>>>
>>> done
>>>>
>>>>> /* Map from index in string_map to profile count. */
>>>>> typedef std::map<unsigned, gcov_type> target_map;
>>>>
>>>> What index? function index? Is profile count function profile count
>>>> or target call count?
>>>
>>> done
>>>>
>>>> Should it renamed to icall_target_map to be clearer?
>>>
>>> done
>>>>
>>>>> /* Represent profile count: (execution_count, value_profile_histogram. */
>>>>> typedef std::pair<gcov_type, target_map> count_info;
>>>>>
>>>>
>>>> The executation count is for what entity?
>>>
>>> done
>>>>
>>>>> struct string_compare
>>>>> {
>>>>> bool operator() (const char *a, const char *b) const
>>>>> { return strcmp (a, b) < 0; }
>>>>> };
>>>>>
>>>>> /* Store a string array, indexed by string position in the array. */
>>>>> class string_map {
>>>>
>>>> Is it better to rename it to function_name_map ? string_map is too general.
>>>
>>> done
>>>>
>>>>
>>>>> public:
>>>>> static string_map *create ();
>>>>>
>>>>> /* For a given string, returns its index. */
>>>>> int get_index (const char *name) const;
>>>>> /* For a given decl, returns the index of the decl name. */
>>>>> int get_index_by_decl (tree decl) const;
>>>>> /* For a given index, returns the string. */
>>>>> const char *get_name (int index) const;
>>>>>
>>>>> private:
>>>>> string_map () {}
>>>>> bool read ();
>>>>>
>>>>> typedef std::map<const char *, unsigned, string_compare> string_index_map;
>>>>> string_vector vector_;
>>>>> string_index_map map_;
>>>>> };
>>>>>
>>>>> /* Profile of a function copy:
>>>>> 1. total_count of the copy.
>>>>> 2. head_count of the copy (only valid when the copy is a top-level
>>>>> symbol, i.e. it is the original copy instead of the inlined copy).
>>>>> 3. map from source location (decl_lineno) to profile (count_info).
>>>>
>>>> Source location of the inlined callsite?
>>>
>>> done
>>>>
>>>>> 4. map from callsite (64 bit integer, higher 32 bit is source location
>>>>> (decl_lineno), lower 32 bit is callee function name index in
>>>>> string_map) to callee symbol. */
>>>>> class symbol {
>>>>
>>>>
>>>> May be rename it to " function_instance" ?
>>>
>>> done
>>>>
>>>>> public:
>>>>> typedef std::vector<symbol *> symbol_stack;
>>>>>
>>>>> /* Read the profile and create a symbol with head count as HEAD_COUNT.
>>>>> Recursively read callsites to create nested symbols too. STACK is used
>>>>> to track the recursive creation process. */
>>>>> static const symbol *read_symbol (symbol_stack *stack, gcov_type head_count);
>>>>>
>>>>> /* Recursively deallocate all callsites (nested symbols). */
>>>>> ~symbol ();
>>>>>
>>>>> /* Accessors. */
>>>>> unsigned name () const { return name_; }
>>>>> gcov_type total_count () const { return total_count_; }
>>>>> gcov_type head_count () const { return head_count_; }
>>>>>
>>>>> /* Recursively traverse STACK starting from LEVEL to find the corresponding
>>>>> symbol. */
>>>>> const symbol *get_symbol (const inline_stack &stack, unsigned level) const;
>>>>>
>>>>> /* Return the profile info for LOC. */
>>>>> bool get_count_info (location_t loc, count_info *info) const;
>>>>>
>>>>> private:
>>>>> symbol (unsigned name, gcov_type head_count)
>>>>> : name_(name), total_count_(0), head_count_(head_count) {}
>>>>> const symbol *get_symbol_by_decl (unsigned lineno, tree decl) const;
>>>>>
>>>>> /* Map from callsite (64 bit integer, higher 32 bit is source location
>>>>> (decl_lineno), lower 32 bit is callee function name index in
>>>>> string_map) to callee symbol. */
>>>>> typedef std::map<gcov_type, const symbol *> callsite_map;
>>>>
>>>> Why not making a pair and use it as the key?
>>>
>>> done
>>>>
>>>>> /* Map from source location (decl_lineno) to profile (count_info). */
>>>>> typedef std::map<unsigned, count_info> position_count_map;
>>>>>
>>>>> /* symbol name index in the string map. */
>>>>
>>>>
>>>> Index in string_vector or string_map?
>>>
>>> done
>>>>
>>>>> unsigned name_;
>>>>> /* The total sampled count. */
>>>>> gcov_type total_count_;
>>>>> /* The total sampled count in the head bb. */
>>>>> gcov_type head_count_;
>>>>> /* Map from callsite location to callee symbol. */
>>>>> callsite_map callsites;
>>>>> /* Map from source location to count and instruction number. */
>>>>> position_count_map pos_counts;
>>>>> };
>>>>>
>>>>> /* Profile for all functions. */
>>>>> class symbol_map {
>>>>
>>>>
>>>> symbol_map --> program_profiles or afdo_profile ?
>>>
>>> done
>>>>
>>>>> public:
>>>>> static symbol_map *create ()
>>>>> {
>>>>> symbol_map *map = new symbol_map ();
>>>>> if (map->read ())
>>>>> return map;
>>>>> delete map;
>>>>> return NULL;
>>>>> }
>>>>> ~symbol_map ();
>>>>> /* For a given DECL, returns the top-level symbol. */
>>>>> const symbol *get_symbol_by_decl (tree decl) const;
>>>>> /* Find profile info for a given gimple STMT. If found, store the profile
>>>>> info in INFO, and return true; otherwise return false. */
>>>>> bool get_count_info (gimple stmt, count_info *info) const;
>>>>> /* Find total count of the callee of EDGE. */
>>>>> gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
>>>>>
>>>>> private:
>>>>> /* Map from symbol name index (in string_map) to symbol. */
>>>>> typedef std::map<unsigned, const symbol *> Namesymbol_map;
>>>>>
>>>>> symbol_map () {}
>>>>> bool read ();
>>>>> /* Return the symbol in the profile that correspond to the inline STACK. */
>>>>> const symbol *get_symbol_by_inline_stack (const inline_stack &stack) const;
>>>>>
>>>>> Namesymbol_map map_;
>>>>> };
>>>>>
>>>>> /* Module profile. */
>>>>> class module_map {
>>>>
>>>> afdo_module_profile ?
>>>
>>> done
>>>>
>>>>> public:
>>>>> static module_map *create ()
>>>>> {
>>>>> module_map *map = new module_map ();
>>>>> if (map->read ())
>>>>> return map;
>>>>> delete map;
>>>>> return NULL;
>>>>> }
>>>>>
>>>>> /* For a given module NAME, returns this module's gcov_m
>>>>
>>>> On Tue, Jul 30, 2013 at 4:48 PM, Dehao Chen <dehao@google.com> wrote:
>>>>> Patch updated to fix the code style and documentation.
>>>>>
>>>>> Thanks,
>>>>> Dehao
>>>>>
>>>>> On Tue, Jul 30, 2013 at 2:24 PM, Xinliang David Li <davidxl@google.com> wrote:
>>>>>> I have some preliminary comments. Mostly just related to code style
>>>>>> and missing documentation.
>>>>>>
>>>>>> David
>>>>>>
>>>>>>>
>>>>>>> #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
>>>>>>>
>>>>>>> struct SourceLocation
>>>>>>
>>>>>> Is using Upper case in struct names allowed?
>>>>>>
>>>>>>> {
>>>>>>> tree func_decl;
>>>>>>> unsigned lineno;
>>>>>>> };
>>>>>>>
>>>>>>> typedef std::vector<const char *> StringVector;
>>>>>>> typedef std::vector<SourceLocation> InlineStack;
>>>>>>> typedef std::map<unsigned, gcov_type> TargetMap;
>>>>>>>
>>>>>>
>>>>>> Add short description of each new types.
>>>>>>
>>>>>>> struct ProfileInfo
>>>>>>> {
>>>>>>> gcov_type count;
>>>>>>> TargetMap target_map;
>>>>>>> };
>>>>>>>
>>>>>>> struct StringCompare
>>>>>>> {
>>>>>>> bool operator() (const char* a, const char* b) const
>>>>>>
>>>>>> '*' should bind to name.
>>>>>>
>>>>>>> { return strcmp (a, b) < 0; }
>>>>>>> };
>>>>>>>
>>>>>>
>>>>>>> class StringMap {
>>>>>>> public:
>>>>>>> static StringMap *Create();
>>>>>>> int GetIndex (const char *name) const;
>>>>>>> int GetIndexByDecl (tree decl) const;
>>>>>>> const char *GetName (int index) const;
>>>>>>>
>>>>>>> private:
>>>>>>> StringMap () {}
>>>>>>> bool Read ();
>>>>>>>
>>>>>>> typedef std::map<const char *, unsigned, StringCompare> StringIndexMap;
>>>>>>> StringVector vector_;
>>>>>>> StringIndexMap map_;
>>>>>>> };
>>>>>>
>>>>>> Add some documentation on the new type, indicating what is 'index'.
>>>>>>
>>>>>>>
>>>>>>> class Symbol {
>>>>>>
>>>>>> The name 'Symbol' is too generic -- can cause conflicts in the future
>>>>>> unless namespace is used. ALso missing documentation.
>>>>>>
>>>>>>> public:
>>>>>>> typedef std::vector<Symbol *> SymbolStack;
>>>>>>
>>>>>> Fix indentation problems.
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> /* Read the profile and create a symbol with head count as HEAD_COUNT.
>>>>>>> Recursively read callsites to create nested symbols too. STACK is used
>>>>>>> to track the recursive creation process. */
>>>>>>> static const Symbol *ReadSymbol (SymbolStack *stack, gcov_type head_count);
>>>>>>>
>>>>>>> /* Recursively deallocate all callsites (nested symbols). */
>>>>>>> ~Symbol ();
>>>>>>>
>>>>>>> /* Accessors. */
>>>>>>> unsigned name () const { return name_; }
>>>>>>> gcov_type total_count () const { return total_count_; }
>>>>>>> gcov_type head_count () const { return head_count_; }
>>>>>>>
>>>>>>> /* Recursively traverse STACK starting from LEVEL to find the corresponding
>>>>>>> symbol. */
>>>>>>> const Symbol *GetSymbol (const InlineStack &stack, unsigned level) const;
>>>>>>>
>>>>>>> /* Return the profile info for LOC. */
>>>>>>> bool GetProfileInfo (location_t loc, ProfileInfo *info) const;
>>>>>>>
>>>>>>> private:
>>>>>>> Symbol (unsigned name, gcov_type head_count)
>>>>>>> : name_(name), total_count_(0), head_count_(head_count) {}
>>>>>>> const Symbol *GetSymbolByDecl (unsigned lineno, tree decl) const;
>>>>>>>
>>>>>>> typedef std::map<gcov_type, const Symbol *> CallsiteMap;
>>>>>>
>>>>>> Need documentation for this map.
>>>>>>
>>>>>>> typedef std::map<unsigned, ProfileInfo> PositionCountMap;
>>>>>>
>>>>>> Need documentation.
>>>>>>
>>>>>>>
>>>>>>> /* Symbol name index in the string map. */
>>>>>>> unsigned name_;
>>>>>>> /* The total sampled count. */
>>>>>>> gcov_type total_count_;
>>>>>>> /* The total sampled count in the head bb. */
>>>>>>> gcov_type head_count_;
>>>>>>> /* Map from callsite location to callee symbol. */
>>>>>>> CallsiteMap callsites;
>>>>>>> /* Map from source location to count and instruction number. */
>>>>>>> PositionCountMap pos_counts;
>>>>>>> };
>>>>>>>
>>>>>>> class SymbolMap {
>>>>>>
>>>>>> Need documentation.
>>>>>>
>>>>>>> public:
>>>>>>> static SymbolMap *Create ()
>>>>>>> {
>>>>>>> SymbolMap *map = new SymbolMap ();
>>>>>>> if (map->Read ())
>>>>>>> return map;
>>>>>>> delete map;
>>>>>>> return NULL;
>>>>>>> }
>>>>>>> ~SymbolMap ();
>>>>>>> const Symbol *GetSymbolByDecl (tree decl) const;
>>>>>>> bool GetProfileInfo (gimple stmt, ProfileInfo *info) const;
>>>>>>> gcov_type GetCallsiteTotalCount (struct cgraph_edge *edge) const;
>>>>>>
>>>>>> Missing documentation for the interfaces
>>>>>>>
>>>>>>> private:
>>>>>>
>>>>>>> typedef std::map<unsigned, const Symbol *> NameSymbolMap;
>>>>>>
>>>>>> map from what to symbol?
>>>>>>
>>>>>>>
>>>>>>> SymbolMap () {}
>>>>>>> bool Read ();
>>>>>>> const Symbol *GetSymbolByInlineStack (const InlineStack &stack) const;
>>>>>>
>>>>>> Missing documentation for the interfaces
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> NameSymbolMap map_;
>>>>>>> };
>>>>>>>
>>>>>>> class ModuleMap {
>>>>>>
>>>>>> Need documentation.
>>>>>>
>>>>>> On Tue, Jul 30, 2013 at 11:03 AM, Dehao Chen <dehao@google.com> wrote:
>>>>>>> I just rebased the CL to head and updated the patch.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Dehao
>>>>>>>
>>>>>>> On Tue, Jul 30, 2013 at 10:09 AM, Xinliang David Li <davidxl@google.com> wrote:
>>>>>>>> I can not apply the patch cleanly in my v17 gcc client -- there is
>>>>>>>> some problem in auto-profile.c.
>>>>>>>>
>>>>>>>> David
>>>>>>>>
>>>>>>>> On Mon, Jul 29, 2013 at 7:52 PM, Dehao Chen <dehao@google.com> wrote:
>>>>>>>>> This patch refactors AutoFDO to use:
>>>>>>>>>
>>>>>>>>> 1. C++ to rewrite the whole thing.
>>>>>>>>> 2. Use tree instead of hashtable to represent the profile.
>>>>>>>>> 3. Make AutoFDO standalone: keep changes to other modules minimum.
>>>>>>>>>
>>>>>>>>> Bootstrapped and passed regression test and benchmark test.
>>>>>>>>>
>>>>>>>>> OK for google-4_8 branch?
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>> Dehao
>>>>>>>>>
>>>>>>>>> http://codereview.appspot.com/12079043