Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 141911) +++ doc/invoke.texi (working copy) @@ -332,7 +332,8 @@ Objective-C and Objective-C++ Dialects}. -fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol -fearly-inlining -fexpensive-optimizations -ffast-math @gol -ffinite-math-only -ffloat-store -fforward-propagate @gol --ffunction-sections -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm @gol +-ffunction-sections -fpartition-functions-into-sections=@var{n} @gol +-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm @gol -fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol -finline-small-functions -fipa-cp -fipa-cp-clone -fipa-marix-reorg -fipa-pta @gol @@ -6353,6 +6354,16 @@ Also profile feedback must be available Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. +@item -fpartition-functions-into-sections=@var{n} +@opindex fpartition-functions-into-sections +Partition function into sections according to a threshold which indicates +the maximum number of bytes each section can contain. This transformation +is aimed for targets with limited local store which may not suffice to +hold large functions. To overcome this the function is partitioned into +several sections which can be swapped while running. This may result +in an undefined behavior when using an @code{asm} statement. +-ffunction-sections flag is implied implicitly when setting this flag. + @item -fstrict-aliasing @opindex fstrict-aliasing Allows the compiler to assume the strictest aliasing rules applicable to Index: tree-pass.h =================================================================== --- tree-pass.h (revision 141911) +++ tree-pass.h (working copy) @@ -226,6 +226,7 @@ struct dump_file_info #define TODO_remove_functions (1 << 8) #define TODO_rebuild_frequencies (1 << 9) #define TODO_verify_rtl_sharing (1 << 10) +#define TODO_check_sections (1 << 11) /* To-do flags for calls to update_ssa. */ @@ -458,7 +459,8 @@ extern struct rtl_opt_pass pass_stack_pt extern struct rtl_opt_pass pass_initialize_regs; extern struct rtl_opt_pass pass_combine; extern struct rtl_opt_pass pass_if_after_combine; -extern struct rtl_opt_pass pass_partition_blocks; +extern struct rtl_opt_pass pass_partition_blocks_hot_cold; +extern struct rtl_opt_pass pass_partition_blocks_size; extern struct rtl_opt_pass pass_match_asm_constraints; extern struct rtl_opt_pass pass_regmove; extern struct rtl_opt_pass pass_split_all_insns; Index: target.h =================================================================== --- target.h (revision 141911) +++ target.h (working copy) @@ -751,6 +751,31 @@ struct gcc_target delayed-branch scheduling. */ void (* machine_dependent_reorg) (void); + /* Functions relating to basic-block partitioning into sections. */ + struct bb_partitioning_into_sections + { + /* Estimate the number of extra instructions that will be added for each + section. Called when partitioning a function into sections. */ + unsigned HOST_WIDE_INT (* estimate_section_overhead) (void); + + /* Return the size of instruction in bytes. Take into account the + size of extra machine depndent instructions that can be added as + a result of insn. (like branch-hints for branch instructions). + Called when partitioning a function into sections. */ + unsigned HOST_WIDE_INT (* estimate_instruction_size) (rtx); + + /* Return true if the basic-block with index BB_INDEX should be put in + a new section. + The following are all taking into account in the decision: + + BB_SIZE - The size of the basic-block, + ESTIMATE_MAX_SECTION_SIZE - The maximum size of a section, + LAST_SECTION_SIZE - The size of the last section. */ + bool (* start_new_section) (int bb_index, unsigned HOST_WIDE_INT bb_size, + unsigned HOST_WIDE_INT estimate_max_section_size, + unsigned HOST_WIDE_INT last_section_size); + } bb_partitioning; + /* Create the __builtin_va_list type. */ tree (* build_builtin_va_list) (void); Index: ddg.c =================================================================== --- ddg.c (revision 141911) +++ ddg.c (working copy) @@ -81,7 +81,7 @@ mark_mem_use_1 (rtx *x, void *data) } /* Returns nonzero if INSN reads from memory. */ -static bool +bool mem_read_insn_p (rtx insn) { mem_ref_p = false; @@ -97,7 +97,7 @@ mark_mem_store (rtx loc, const_rtx sette } /* Returns nonzero if INSN writes to memory. */ -static bool +bool mem_write_insn_p (rtx insn) { mem_ref_p = false; Index: ddg.h =================================================================== --- ddg.h (revision 141911) +++ ddg.h (working copy) @@ -183,4 +183,7 @@ void free_ddg_all_sccs (ddg_all_sccs_ptr int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to); int longest_simple_path (ddg_ptr, int from, int to, sbitmap via); +bool mem_read_insn_p (rtx); +bool mem_write_insn_p (rtx); + #endif /* GCC_DDG_H */ Index: final.c =================================================================== --- final.c (revision 141911) +++ final.c (working copy) @@ -1822,7 +1822,10 @@ final_scan_insn (rtx insn, FILE *file, i #endif (*debug_hooks->switch_text_section) (); - switch_to_section (current_function_section ()); + if (flag_partition_functions_into_sections) + switch_to_section (text_part_section (NOTE_TEXT_SECTION (insn))); + else + switch_to_section (current_function_section ()); break; case NOTE_INSN_BASIC_BLOCK: @@ -4218,7 +4221,7 @@ struct rtl_opt_pass pass_final = 0, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ - 0, /* todo_flags_start */ + TODO_check_sections, /* todo_flags_start */ TODO_ggc_collect /* todo_flags_finish */ } }; Index: toplev.c =================================================================== --- toplev.c (revision 141911) +++ toplev.c (working copy) @@ -1894,6 +1894,12 @@ process_options (void) warning (0, "-fdata-sections not supported for this target"); flag_data_sections = 0; } + if (flag_partition_functions_into_sections) + { + warning (0, "-fpartition-functions-into-sections disabled; " + " -ffunction-sections not supported for this target"); + flag_partition_functions_into_sections = 0; + } } if (flag_function_sections && profile_flag) @@ -1901,7 +1907,13 @@ process_options (void) warning (0, "-ffunction-sections disabled; it makes profiling impossible"); flag_function_sections = 0; } - + if (flag_partition_functions_into_sections && profile_flag) + { + warning (0, "-fpartition-functions-into-sections disabled; " + "it makes profiling impossible"); + flag_partition_functions_into_sections = 0; + } + #ifndef HAVE_prefetch if (flag_prefetch_loop_arrays) { @@ -1924,6 +1936,15 @@ process_options (void) flag_prefetch_loop_arrays = 0; } + /* TODO: Support debugging for -fpartition-functions-into-sections. */ + if (flag_partition_functions_into_sections + && ((write_symbols != NO_DEBUG) || flag_unwind_tables)) + { + warning (0, "-fpartition-functions-into-sections disabled; " + "it does not work with debugging or unwind tables"); + flag_partition_functions_into_sections = 0; + } + /* The presence of IEEE signaling NaNs, implies all math can trap. */ if (flag_signaling_nans) flag_trapping_math = 1; Index: opts.c =================================================================== --- opts.c (revision 141911) +++ opts.c (working copy) @@ -1066,6 +1066,14 @@ decode_options (unsigned int argc, const flag_reorder_blocks = 1; } + if (flag_exceptions && flag_partition_functions_into_sections) + { + inform + (input_location, "-fpartition-functions-into-sections does not " + "work with exceptions"); + flag_partition_functions_into_sections = 0; + } + /* If user requested unwind info, then turn off the partitioning optimization. */ @@ -1077,6 +1085,14 @@ decode_options (unsigned int argc, const flag_reorder_blocks = 1; } + if (flag_unwind_tables && ! targetm.unwind_tables_default + && flag_partition_functions_into_sections) + { + inform (input_location, "-fpartition-functions-into-sections " + "does not support unwind info"); + flag_partition_functions_into_sections = 0; + } + /* If the target requested unwind info, then turn off the partitioning optimization with a different message. Likewise, if the target does not support named sections. */ @@ -1091,6 +1107,32 @@ decode_options (unsigned int argc, const flag_reorder_blocks = 1; } + if (flag_partition_functions_into_sections + && (!targetm.have_named_sections + || (flag_unwind_tables && targetm.unwind_tables_default))) + { + inform + (input_location, "-fpartition-functions-into-sections does not work " + "on this architecture"); + flag_partition_functions_into_sections = 0; + } + if (flag_partition_functions_into_sections + && (!targetm.have_named_sections)) + { + warning + (0, "-fpartition-functions-into-sections does not work on this " + "architecture"); + flag_partition_functions_into_sections = 0; + } + if (flag_partition_functions_into_sections + && (!HAS_LONG_COND_BRANCH || !HAS_LONG_UNCOND_BRANCH)) + { + warning + (0, "-fpartition-functions-into-sections does not work on this " + "architecture"); + flag_partition_functions_into_sections = 0; + } + /* Pipelining of outer loops is only possible when general pipelining capabilities are requested. */ if (!flag_sel_sched_pipelining) @@ -1902,6 +1944,16 @@ common_handle_option (size_t scode, cons flag_sched_stalled_insns = -1; break; + case OPT_fpartition_functions_into_sections_: + flag_partition_functions_into_sections = value; + if (flag_partition_functions_into_sections != 0) + { + flag_function_sections = 1; + /* TODO: Use asm directives to handle multiple sections. */ + flag_dwarf2_cfi_asm = 0; + } + break; + case OPT_fsched_stalled_insns_dep_: flag_sched_stalled_insns_dep = value; break; Index: timevar.def =================================================================== --- timevar.def (revision 141911) +++ timevar.def (working copy) @@ -158,6 +158,7 @@ DEFTIMEVAR (TV_DCE , " DEFTIMEVAR (TV_DSE1 , "dead store elim1") DEFTIMEVAR (TV_DSE2 , "dead store elim2") DEFTIMEVAR (TV_LOOP , "loop analysis") +DEFTIMEVAR (TV_PARTITION_BLOCKS_SIZE , "partition functions") DEFTIMEVAR (TV_GCSE , "global CSE") DEFTIMEVAR (TV_CPROP1 , "CPROP 1") DEFTIMEVAR (TV_PRE , "PRE") Index: function.h =================================================================== --- function.h (revision 141911) +++ function.h (working copy) @@ -236,6 +236,10 @@ struct incoming_args GTY(()) rtx internal_arg_pointer; }; +typedef const char *const_str; /* For DEF_VEC_P. */ +DEF_VEC_P(const_str); +DEF_VEC_ALLOC_P(const_str, gc); + /* Data for function partitioning. */ struct function_subsections GTY(()) { @@ -252,6 +256,32 @@ struct function_subsections GTY(()) targetm.asm_out.named_section. */ const char *unlikely_text_section_name; + + /* Arrays to hold assembly labels for the text sections, to + be used by debugger functions for determining the size of text + sections. */ + + VEC(const_str,gc) *section_start_labels; + VEC(const_str,gc) *section_end_labels; + + /* String to be used for names of the arbitrary cold text sections + parts, via targetm.asm_out.named_section. */ + + const char *unlikely_part_text_section_name; + + /* String to be used for names of the arbitrary hot text section + parts, via targetm.asm_out.named_section. */ + + const char *part_text_section_name; + + /* A variable to hold the number of sections in the function. */ + + unsigned int number_of_sections; + + /* The first section with different hotness attribute. */ + + unsigned int first_text_section_part_changed; + }; /* Datastructures maintained for currently processed function in RTL form. */ Index: print-rtl.c =================================================================== --- print-rtl.c (revision 141996) +++ print-rtl.c (working copy) @@ -312,7 +312,9 @@ print_rtx (const_rtx in_rtx) { #ifndef GENERATOR_FILE basic_block bb = NOTE_BASIC_BLOCK (in_rtx); - if (bb != 0) + /* When partitioning into multiple sections + NOTE_BASIC_BLOCK holds the section id. */ + if (bb != 0 && !flag_partition_functions_into_sections) fprintf (outfile, " [bb %d]", bb->index); #endif break; Index: cfglayout.c =================================================================== --- cfglayout.c (revision 141911) +++ cfglayout.c (working copy) @@ -676,6 +676,7 @@ relink_block_chain (bool stay_in_cfglayo { bb->aux = NULL; bb->il.rtl->visited = 0; + bb->il.rtl->skip = 0; if (!stay_in_cfglayout_mode) bb->il.rtl->header = bb->il.rtl->footer = NULL; } Index: common.opt =================================================================== --- common.opt (revision 141911) +++ common.opt (working copy) @@ -938,6 +938,16 @@ freorder-functions Common Report Var(flag_reorder_functions) Optimization Reorder functions to improve code placement +fpartition-functions-into-sections +Common Report Var(flag_partition_functions_into_sections) +Partition function into sections + +fpartition-functions-into-sections= +Common RejectNegative Joined UInteger +-fpartition-functions-into-sections= +Partition function into sections. Number indicates the maximum number +of bytes each section can contain + frerun-cse-after-loop Common Report Var(flag_rerun_cse_after_loop) Init(2) Optimization Add a common subexpression elimination pass after loop optimizations Index: varasm.c =================================================================== --- varasm.c (revision 141911) +++ varasm.c (working copy) @@ -71,6 +71,10 @@ struct addr_const; struct constant_descriptor_rtx; struct rtx_constant_pool; +/* If the function is spread on multiple sections; this variable saves + the name of the last seen section. */ +const char *last_part_text_section_name; + #define n_deferred_constants (crtl->varasm.deferred_constants) /* Number for making the label on the next @@ -168,6 +172,8 @@ section *in_section; at the cold section. */ bool in_cold_section_p; +bool in_part_section_p; + /* A linked list of all the unnamed sections. */ static GTY(()) section *unnamed_sections; @@ -684,6 +690,91 @@ unlikely_text_section (void) return get_named_section (NULL, UNLIKELY_EXECUTED_TEXT_SECTION_NAME, 0); } +/* Initialize the string which will be part of the section name. + It should contain the function name. UNLIKELY_P is true if the + section is cold, otherwise it is false. */ + +static void +initialize_part_section_name (bool unlikely_p) +{ + const char *stripped_name; + char *name; + char *buffer; + tree dsn; + + gcc_assert (cfun && current_function_decl); + + if (crtl->subsections.unlikely_text_section_name + && crtl->subsections.unlikely_part_text_section_name) + return; + + dsn = DECL_SECTION_NAME (current_function_decl); + + /* DECL_SECTION_NAME usually holds the function name. However, if + -fprofile-use flag is used then DECL_SECTION_NAME would hold + .text.hot or .text.unlikely. The function name is used to + differentiate between sections in different functions which has + the same section id, so we need to get it. */ + if (dsn && !flag_profile_use) + { + name = (char *) alloca (TREE_STRING_LENGTH (dsn) + 1); + memcpy (name, TREE_STRING_POINTER (dsn), TREE_STRING_LENGTH (dsn) + 1); + stripped_name = targetm.strip_name_encoding (name); + } + else + stripped_name = current_function_name (); + + if (unlikely_p) + { + buffer = ACONCAT ((stripped_name, "_unlikely.part", NULL)); + crtl->subsections.unlikely_part_text_section_name = ggc_strdup (buffer); + } + else + { + buffer = ACONCAT ((stripped_name, ".part", NULL)); + crtl->subsections.part_text_section_name = ggc_strdup (buffer); + } +} + +/* Tell assembler to switch to a new section with SECTION_ID. */ + +section * +text_part_section (int section_id) +{ + char section_id_str[2 + 2 * HOST_BITS_PER_INT / 4 + 1]; + char *section_name; + const char *prefix_str; + bool unlikely_p = false; + + in_part_section_p = true; + gcc_assert (flag_function_sections && cfun); + + if (first_function_block_is_cold + && (unsigned) section_id < + crtl->subsections.first_text_section_part_changed) + unlikely_p = true; + else if (!first_function_block_is_cold + && (unsigned) section_id >= + crtl->subsections.first_text_section_part_changed) + unlikely_p = true; + + if (!unlikely_p && !crtl->subsections.part_text_section_name) + initialize_part_section_name (false); + if (unlikely_p && !crtl->subsections.unlikely_part_text_section_name) + initialize_part_section_name (true); + + snprintf (section_id_str, sizeof (section_id_str), "%d", section_id); + if (unlikely_p) + prefix_str = crtl->subsections.unlikely_part_text_section_name; + else + prefix_str = crtl->subsections.part_text_section_name; + + section_name = ACONCAT ((prefix_str, section_id_str, NULL)); + last_part_text_section_name = ggc_strdup (section_name); + return get_named_section (NULL, last_part_text_section_name, 0); +} + + /* When called within a function context, return true if the function has been assigned a cold text section and if SECT is that section. When called outside a function context, return true if SECT is the @@ -834,6 +925,9 @@ function_section (tree decl) else return targetm.asm_out.select_section (decl, reloc, DECL_ALIGN (decl)); #else + if (flag_partition_functions_into_sections + && crtl->subsections.number_of_sections) + return text_part_section (0); return reloc ? unlikely_text_section () : hot_function_section (decl); #endif } @@ -852,6 +946,8 @@ current_function_section (void) in_cold_section_p, DECL_ALIGN (current_function_decl)); #else + if (in_part_section_p) + return get_named_section (NULL, last_part_text_section_name, 0); return (in_cold_section_p ? unlikely_text_section () : hot_function_section (current_function_decl)); @@ -1626,6 +1722,31 @@ notice_global_symbol (tree decl) } } +/* If the text section is partitioned; make sure all the sections are + properly aligned. */ + +static void +output_sections (void) +{ + unsigned int i; + const char *tmp; + + gcc_assert (cfun && flag_partition_functions_into_sections); + + /* Output for each one of the sections the label which marks the + beginning of its address range. */ + for (i = 1; + VEC_iterate (const_str, crtl->subsections.section_start_labels, i, + tmp); + i++) + { + switch_to_section (text_part_section (i)); + assemble_align (FUNCTION_BOUNDARY); + if (crtl->subsections.section_start_labels != NULL) + ASM_OUTPUT_LABEL (asm_out_file, tmp); + } +} + /* Output assembler code for the constant pool of a function and associated with defining the name of the function. DECL describes the function. NAME is the function's name. For the constant pool, we use the current @@ -1641,7 +1762,30 @@ assemble_start_function (tree decl, cons crtl->subsections.unlikely_text_section_name = NULL; first_function_block_is_cold = false; - if (flag_reorder_blocks_and_partition) + if (flag_partition_functions_into_sections + && cfun + && (crtl->subsections.number_of_sections > 0)) + { + unsigned i; + int length = crtl->subsections.number_of_sections; + + crtl->subsections.section_start_labels = + VEC_alloc (const_str, gc, length); + crtl->subsections.section_end_labels = + VEC_alloc (const_str, gc, length); + + for (i = 0; i < crtl->subsections.number_of_sections; i++) + { + ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LSECTIONB", const_labelno); + VEC_safe_push (const_str, gc, crtl->subsections.section_start_labels, + ggc_strdup (tmp_label)); + ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LSECTIONE", const_labelno); + VEC_safe_push (const_str, gc, crtl->subsections.section_end_labels, + ggc_strdup (tmp_label)); + const_labelno++; + } + } + else if (flag_reorder_blocks_and_partition) { ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LHOTB", const_labelno); crtl->subsections.hot_section_label = ggc_strdup (tmp_label); @@ -1675,7 +1819,8 @@ assemble_start_function (tree decl, cons has both hot and cold sections, because we don't want to re-set the alignment when the section switch happens mid-function. */ - if (flag_reorder_blocks_and_partition) + if (flag_reorder_blocks_and_partition + && (flag_partition_functions_into_sections == 0)) { switch_to_section (unlikely_text_section ()); assemble_align (DECL_ALIGN (decl)); @@ -1694,7 +1839,8 @@ assemble_start_function (tree decl, cons first_function_block_is_cold = true; } } - else if (DECL_SECTION_NAME (decl)) + else if (DECL_SECTION_NAME (decl) + && (flag_partition_functions_into_sections == 0)) { /* Calls to function_section rely on first_function_block_is_cold being accurate. The first block may be cold even if we aren't @@ -1708,6 +1854,19 @@ assemble_start_function (tree decl, cons crtl->subsections.unlikely_text_section_name) == 0) first_function_block_is_cold = true; } + if (flag_partition_functions_into_sections) + { + if (!crtl->is_thunk + && BB_PARTITION (ENTRY_BLOCK_PTR->next_bb) == BB_COLD_PARTITION) + { + first_function_block_is_cold = true; + } + if (DECL_SECTION_NAME (decl) + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + UNLIKELY_EXECUTED_TEXT_SECTION_NAME) == 0) + first_function_block_is_cold = true; + output_sections (); + } in_cold_section_p = first_function_block_is_cold; @@ -1715,8 +1874,18 @@ assemble_start_function (tree decl, cons switch_to_section (function_section (decl)); if (flag_reorder_blocks_and_partition + && (flag_partition_functions_into_sections == 0) && !hot_label_written) ASM_OUTPUT_LABEL (asm_out_file, crtl->subsections.hot_section_label); + else if (flag_partition_functions_into_sections + && crtl->subsections.section_start_labels != 0 + && crtl->subsections.number_of_sections > 0) + { + const char *tmp; + + tmp = VEC_index (const_str, crtl->subsections.section_start_labels, 0); + ASM_OUTPUT_LABEL (asm_out_file, tmp); + } /* Tell assembler to move to target machine's alignment for functions. */ align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT); @@ -1802,6 +1971,25 @@ assemble_end_function (tree decl, const ASM_OUTPUT_LABEL (asm_out_file, crtl->subsections.hot_section_end_label); switch_to_section (save_text_section); } + else if (flag_partition_functions_into_sections + && crtl->subsections.section_end_labels != NULL) + { + unsigned int i; + const char *tmp; + + /* Output for each one of the sections the label which marks the + end of its address range. */ + for (i = 0; + VEC_iterate (const_str, crtl->subsections.section_end_labels, i, + tmp); i++) + { + switch_to_section (text_part_section (i)); + tmp = VEC_index (const_str, crtl->subsections.section_end_labels, i); + ASM_OUTPUT_LABEL (asm_out_file, tmp); + } + + } + } /* Assemble code to leave SIZE bytes of zeros. */ @@ -5651,6 +5839,15 @@ default_section_type_flags (tree decl, c flags = SECTION_CODE; else if (decl && decl_readonly_section (decl, reloc)) flags = 0; + else if (flag_partition_functions_into_sections + && crtl->subsections.unlikely_part_text_section_name + && strstr (name, + crtl->subsections.unlikely_part_text_section_name) != 0) + flags = SECTION_CODE; + else if (flag_partition_functions_into_sections + && crtl->subsections.part_text_section_name + && strstr (name, crtl->subsections.part_text_section_name) != 0) + flags = SECTION_CODE; else if (current_function_decl && cfun && crtl->subsections.unlikely_text_section_name Index: target-def.h =================================================================== --- target-def.h (revision 141911) +++ target-def.h (working copy) @@ -433,6 +433,18 @@ #define TARGET_SECTION_TYPE_FLAGS default_section_type_flags #endif +#define TARGET_ESTIMATE_SECTION_OVERHEAD 0 +#define TARGET_ESTIMATE_INSTRUCTION_SIZE 0 +#define TARGET_START_NEW_SECTION 0 + +#define TARGET_BB_PARTITIONING \ + { \ + TARGET_ESTIMATE_SECTION_OVERHEAD, \ + TARGET_ESTIMATE_INSTRUCTION_SIZE, \ + TARGET_START_NEW_SECTION \ + } + + #ifndef TARGET_STRIP_NAME_ENCODING #define TARGET_STRIP_NAME_ENCODING default_strip_name_encoding #endif @@ -926,6 +938,7 @@ TARGET_FIXED_CONDITION_CODE_REGS, \ TARGET_CC_MODES_COMPATIBLE, \ TARGET_MACHINE_DEPENDENT_REORG, \ + TARGET_BB_PARTITIONING, \ TARGET_BUILD_BUILTIN_VA_LIST, \ TARGET_FN_ABI_VA_LIST, \ TARGET_CANONICAL_VA_LIST_TYPE, \ Index: rtl.h =================================================================== --- rtl.h (revision 141911) +++ rtl.h (working copy) @@ -862,6 +862,7 @@ extern const char * const reg_note_name[ #define NOTE_DELETED_LABEL_NAME(INSN) XCSTR (INSN, 4, NOTE) #define SET_INSN_DELETED(INSN) set_insn_deleted (INSN); #define NOTE_BLOCK(INSN) XCTREE (INSN, 4, NOTE) +#define NOTE_TEXT_SECTION(INSN) XCINT (INSN, 4, NOTE) #define NOTE_EH_HANDLER(INSN) XCINT (INSN, 4, NOTE) #define NOTE_BASIC_BLOCK(INSN) XCBBDEF (INSN, 4, NOTE) #define NOTE_VAR_LOCATION(INSN) XCEXP (INSN, 4, NOTE) Index: output.h =================================================================== --- output.h (revision 141911) +++ output.h (working copy) @@ -567,6 +567,7 @@ extern section *mergeable_constant_secti unsigned int); extern section *function_section (tree); extern section *unlikely_text_section (void); +extern section *text_part_section (int); extern section *current_function_section (void); /* Return the numbered .ctors.N (if CONSTRUCTOR_P) or .dtors.N (if Index: bb-reorder.c =================================================================== --- bb-reorder.c (revision 141911) +++ bb-reorder.c (working copy) @@ -85,6 +85,10 @@ #include "toplev.h" #include "tree-pass.h" #include "df.h" +#include "cfgloop.h" +#include "langhooks.h" +#include "hashtab.h" +#include "vec.h" #ifndef HAVE_conditional_execution #define HAVE_conditional_execution 0 @@ -179,13 +183,11 @@ static void connect_traces (int, struct static bool copy_bb_p (const_basic_block, int); static int get_uncond_jump_length (void); static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type); -static void find_rarely_executed_basic_blocks_and_crossing_edges (edge **, - int *, - int *); -static void add_labels_and_missing_jumps (edge *, int); +static void mark_hot_cold_blocks (void); +static void add_labels_and_missing_jumps (VEC (edge, heap) *); static void add_reg_crossing_jump_notes (void); static void fix_up_fall_thru_edges (void); -static void fix_edges_for_rarely_executed_code (edge *, int); +static void fix_edges_for_rarely_executed_code (VEC (edge, heap) *); static void fix_crossing_conditional_branches (void); static void fix_crossing_unconditional_branches (void); @@ -1214,60 +1216,586 @@ get_uncond_jump_length (void) return length; } -/* Find the basic blocks that are rarely executed and need to be moved to - a separate section of the .o file (to cut down on paging and improve - cache locality). */ +/* This structure holds callback functions which are used when + partitioning blocks into sections. */ +struct partition_callbacks +{ + /* A callback to initial the partition. */ + void (*init) (void); + + /* A callback to indicate whether an edge cross sections. */ + bool (*crossing_edge_p) (edge); + + /* A callback to mark section boundary. */ + void (*insert_section_boundary_note) (void); + + /* A callback to partition blocks into sections. */ + void (*partition) (void); + + /* A callback to finalize the partition. */ + void (*finalize) (void); +}; -static void -find_rarely_executed_basic_blocks_and_crossing_edges (edge **crossing_edges, - int *n_crossing_edges, - int *max_idx) + +/* This is a callback function, called from + mark_crossing_edges function. Return true if E is an edge which + crosses between hot and cold sections. */ +static bool +hot_cold_crossing_edge_p (edge e) +{ + return (e->src != ENTRY_BLOCK_PTR + && e->dest != EXIT_BLOCK_PTR + && BB_PARTITION (e->src) != BB_PARTITION (e->dest)); +} + +/* Mark edges that cross between sections with EDGE_CROSSING; store them + in a vector and return the vector. Use CALLBACKS in the decision. */ +static VEC (edge, heap) * +mark_crossing_edges (struct partition_callbacks *callbacks) { basic_block bb; edge e; - int i; edge_iterator ei; + VEC (edge, heap) *crossing_edges = NULL; + + /* Mark every edge that crosses between sections. */ + FOR_EACH_BB (bb) + FOR_EACH_EDGE (e, ei, bb->succs) + { + if ((*callbacks->crossing_edge_p) (e)) + { + e->flags |= EDGE_CROSSING; + VEC_safe_push (edge, heap, crossing_edges, e); + } + else + e->flags &= ~EDGE_CROSSING; + } - /* Mark which partition (hot/cold) each basic block belongs in. */ + return crossing_edges; +} - FOR_EACH_BB (bb) +/* If any destination of a crossing edge does not have a label, add label; + Convert any fall-through crossing edges (for blocks that do not contain + a jump) to unconditional jumps. */ + +/* Structure to hold needed information for each loop. */ +struct loop_info_def +{ + /* The index of the loop. */ + unsigned HOST_WIDE_INT index; + + /* The size (in bytes) of the loop layout. */ + unsigned HOST_WIDE_INT size; + + /* The following fields are the first and last basic-blocks in the + current loop layout. They are used to calculate the loop's size. */ + basic_block first; + basic_block last; +}; + +typedef struct loop_info_def *loop_info; +DEF_VEC_P(loop_info); +DEF_VEC_ALLOC_P(loop_info,heap); + +/* Structure to hold needed information for each basic block. */ +typedef struct funcpart_basic_block_data_def +{ + /* Section id which is attached to each section. */ + int section_id; + + /* The list of loops which begin at this basic-block. */ + VEC(loop_info,heap) *loops_info_list; + + /* The size in bytes of the basic-block. */ + unsigned HOST_WIDE_INT bb_size; +}funcpart_basic_block_data; + +/* The current size (in elements) of the following dynamic array. */ +static unsigned HOST_WIDE_INT fbb_data_size = 0; + +/* The array which holds needed information for basic blocks. */ +static funcpart_basic_block_data *fbb_data; + +/* Information regarding insn. */ +struct insn_aux +{ + /* The insn. */ + rtx insn; + + /* The estimated size in bytes of insn. */ + unsigned HOST_WIDE_INT size; +}; + +/* A vector of insns in the function which holds their size (in bytes). */ +static struct insn_aux *insns_aux; + +/* A variable to hold the overhead introduced by splitting a function + into sections. This overhead contains extra branch instructions + between section which might effect code size. */ +static unsigned HOST_WIDE_INT estimate_section_overhead = 0; + +/* Estimate the maximum size (in bytes) of one section. */ +static unsigned HOST_WIDE_INT estimate_max_section_size = 0; + +/* Given basic-block with index BB_INDEX validate that fbb_data array + has a corresponding element for it. */ + +static void +validate_fbb_data_element (unsigned HOST_WIDE_INT bb_index) +{ + unsigned HOST_WIDE_INT prev_size = fbb_data_size; + unsigned HOST_WIDE_INT i; + + if (bb_index < fbb_data_size) + return; + + /* Resize the array, if necessary. */ + fbb_data_size *= 2; + fbb_data = (funcpart_basic_block_data *) + xrealloc (fbb_data, (fbb_data_size * sizeof (funcpart_basic_block_data))); + for (i = prev_size; i < fbb_data_size; i++) { - if (probably_never_executed_bb_p (bb)) - BB_SET_PARTITION (bb, BB_COLD_PARTITION); - else - BB_SET_PARTITION (bb, BB_HOT_PARTITION); + fbb_data[i].section_id = -1; + fbb_data[i].loops_info_list = NULL; + fbb_data[i].bb_size = 0; } +} - /* Mark every edge that crosses between sections. */ +/* Given basic block BB return it's size in bytes. */ + +static unsigned HOST_WIDE_INT +estimate_size_of_insns_in_bb (basic_block bb) +{ + unsigned HOST_WIDE_INT size = 0; + rtx insn; + + /* Loop through all of the basic-block's insns. */ + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + { + /* Retrieve the size of the instruction. */ + if (insns_aux [INSN_UID (insn)].insn) + size += insns_aux [INSN_UID (insn)].size; + else + size += get_attr_min_length (insn); + } + +#if ENABLE_CHECKING + /* If we already calculated the size of this basic-block we can use it + for checking. */ + if (fbb_data[bb->index].bb_size != 0) + gcc_assert (fbb_data[bb->index].bb_size == size); +#endif + + return size; +} + +/* Split basic-block BB such that the first partition will not exceed the + maximum size of FIRST_PARTITION_SIZE bytes. Update the actual size + of the first partition in FIRST_PARTITION_ACTUAL_SIZE. Return the + new basic-block if it can be created. Else return NULL. */ + +static basic_block +split_bb (basic_block bb, unsigned HOST_WIDE_INT first_partition_size, + unsigned HOST_WIDE_INT *first_partition_actual_size) +{ + rtx insn; + unsigned HOST_WIDE_INT size = 0; + unsigned HOST_WIDE_INT prev_size = 0; + edge e; + rtx prev_insn = NULL; + + /* Loop through all of the basic-block's insns. */ + FOR_BB_INSNS (bb, insn) + { + if (INSN_P (insn)) + { + prev_size = size; + + /* Retrieve the size of the instruction. */ + if (insns_aux[INSN_UID (insn)].insn) + size += insns_aux[INSN_UID (insn)].size; + else + size += get_attr_min_length (insn); + + if (size > first_partition_size) + { + if (prev_insn == NULL) + return NULL; + break; + } + prev_insn = insn; + } + } + +#ifdef ENABLE_CHECKING + gcc_assert (size > first_partition_size); +#endif + + e = split_block (bb, prev_insn); + e->dest->aux = bb->aux; + bb->aux = e->dest; + + if (dump_file) + fprintf (dump_file, + "\tSplit bb %d; create new bb: %d\n", bb->index, e->dest->index); + *first_partition_actual_size = prev_size; + return e->dest; +} + +/* Insert a NOTE_INSN_SWITCH_TEXT_SECTIONS note before basic-blocks that + are markred as BB_FIRST_AFTER_SECTION_SWITCH. */ - i = 0; +static void +size_insert_section_boundary_note (void) +{ + basic_block bb; + rtx new_note; + unsigned number_of_sections = 1; + bool first_switch_p = true; + bool only_one_section_p = true; + + gcc_assert (cfun && flag_partition_functions_into_sections); + FOR_EACH_BB (bb) - FOR_EACH_EDGE (e, ei, bb->succs) { - if (e->src != ENTRY_BLOCK_PTR - && e->dest != EXIT_BLOCK_PTR - && BB_PARTITION (e->src) != BB_PARTITION (e->dest)) + if (bb->flags & BB_FIRST_AFTER_SECTION_SWITCH) + { + new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, + BB_HEAD (bb)); + /* ??? This kind of note always lives between basic blocks, + but add_insn_before will set BLOCK_FOR_INSN anyway. */ + BLOCK_FOR_INSN (new_note) = NULL; + NOTE_TEXT_SECTION (new_note) = number_of_sections++; + if (bb->prev_bb && (BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb)) + && first_switch_p && flag_reorder_blocks_and_partition) + { + crtl->subsections.first_text_section_part_changed = + number_of_sections - 1; + first_switch_p = false; + only_one_section_p = false; + } + } + } + + if (only_one_section_p) + crtl->subsections.first_text_section_part_changed = number_of_sections; + + crtl->subsections.number_of_sections = number_of_sections; +} + +/* Start a new section at the beginning of basic-block BB with + SECTION_ID. */ + +static void +start_new_section (int *section_id, basic_block bb) +{ + bb->flags |= BB_FIRST_AFTER_SECTION_SWITCH; + *section_id = *section_id + 1; + if (dump_file) + fprintf (dump_file, ";; Start new section %d\n", *section_id); +} + +/* Return true if the given basic-block BB starts a loop that can not + fully be inserted into the last section and the loop size is less + than the maximum section size threshold. Use LAST_SECTION_SIZE in + the calculation. */ + +static bool +start_new_section_for_loop (basic_block bb, + unsigned HOST_WIDE_INT last_section_size) +{ + struct loop_info_def *cur_loop; + unsigned i; + + if (fbb_data[bb->index].loops_info_list == NULL) + return false; + + if (last_section_size == 0) + return false; + + /* The loops are sorted in loops_info_list according to the loop size + (in bytes); where the loop with the samllest layout appears first. + */ + for (i = 0; + VEC_iterate (loop_info, fbb_data[bb->index].loops_info_list, i, + cur_loop); + i++) + { + if ((last_section_size + cur_loop->size > + (unsigned) estimate_max_section_size) + && cur_loop->size <= (unsigned) estimate_max_section_size) { - e->flags |= EDGE_CROSSING; - if (i == *max_idx) + if (dump_file) + fprintf (dump_file, + "\n\tbb %d starts a loop (size " HOST_WIDE_INT_PRINT_DEC + "B) " + "recommend to start a new section to not break it\n", + bb->index, cur_loop->size); + + return true; + } + } + return false; +} + +/* Create sections for the current function. Return the edges that + cross between sections in CROSSING_EDGES array which is of size + N_CROSSING_EDGES so they could be fixed later. + + The partitioning is done by traversing the basic-blocks according to + their order in the code layout. When a new basic-block is encountered + a decision is made whether to add it to the last section, split it + or add it to a new section. + + Here is a short description of decision process : + + Start a new section if one of the following conditions exist: + + a) if adding the basic-block to the last section causes the last + section to exceed the max section size and it's size is less + than max section size. + b) if a loop starts at this basic-block and the loop can not + fully be inserted into the last section and it's size is less + than max section size. + c) the basic block hotness property is different from it's + previous basic-block's hotness property. + d) there is a machine-specific reasons. (e.g., the + basic-block starts a sequence that should reside in a single + section; such that its size is less than the section size + threshold). + + If the above conditions are not true split the basic-block + if adding the basic-block to the last section causes the last + section to exceed the max section size. + Otherwise add it to the last section. */ + +static void +create_sections (void) +{ + basic_block bb, new_bb; + unsigned HOST_WIDE_INT last_section_size = 0; + int current_section_id = 0; + unsigned HOST_WIDE_INT first_partition_actual_size; + + /* Mark which section each basic block belongs in. */ + if (dump_file) + fprintf (dump_file, + "\n\n--- partition functions into sections --- (%dB)\n\n" + ";; section overhead size %dB\n" ";; max section size %dB\n\n", + flag_partition_functions_into_sections, + (int) estimate_section_overhead, + (int) estimate_max_section_size); + + FOR_EACH_BB (bb) + { + unsigned HOST_WIDE_INT bb_size; + bool start_new_section_for_loop_p = false; + bool start_new_section_due_to_hotness_prop_p = false; + bool start_new_sction_md_p = false; + + /* Validate that fbb_data array has a corresponding element for + this bb, so we could access it directly. */ + validate_fbb_data_element (bb->index); + + bb_size = estimate_size_of_insns_in_bb (bb); + if (dump_file) + fprintf (dump_file, + ";; Trying to add bb %d (" HOST_WIDE_INT_PRINT_DEC + "B) to section %d", bb->index, bb_size, current_section_id); + + if (bb->il.rtl->skip) + { + /* The skip field is used to mark if this bb should be added + to the current section. */ + last_section_size += bb_size; + if (dump_file) + fprintf (dump_file, + "\n;; Basic-block %d should be a part of section %d.\n" + ";; current section size: " HOST_WIDE_INT_PRINT_DEC + "B\n", bb->index, current_section_id, last_section_size); + + /* Upadte section id. */ + fbb_data[bb->index].section_id = current_section_id; + continue; + } + + /* Check if a loop starts in this bb and we should start a new + section for it. */ + start_new_section_for_loop_p = + start_new_section_for_loop (bb, last_section_size); + + /* Check if the hotness property has been changed. */ + start_new_section_due_to_hotness_prop_p = + ((bb->prev_bb != NULL) && (last_section_size != 0) + && BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb) + && flag_reorder_blocks_and_partition); + + if (targetm.bb_partitioning.start_new_section != 0) + start_new_sction_md_p = + targetm.bb_partitioning.start_new_section (bb->index, bb_size, + estimate_max_section_size, + last_section_size); + + if (((last_section_size + bb_size) > + estimate_max_section_size) || start_new_section_for_loop_p + || start_new_section_due_to_hotness_prop_p || start_new_sction_md_p) + { + /* Do not add the basic-block to the last section. This is due + to the possible reasons: + 1) the addition of the basic block to last section will + cause the last section to exceed the max section size. + 2) the basic-block starts a loop that can not fully be + inserted to the last section without exceeding the max + section size and it's size is less than the max section size. + 3) The hotness properaty of this basic-block is different + then it's previous. + 4) There is a machine-specific reasons. */ + if (dump_file) + { + if (start_new_section_for_loop_p) + fprintf (dump_file, + " ...FAIL\n\tbb %d begins a loop that can not fully" + " be inserted to the last section\n", bb->index); + else if (start_new_section_due_to_hotness_prop_p) + fprintf (dump_file, + " ...FAIL\n\tbb %d hotness property is different " + " then it's previous basic-block\n", bb->index); + else if (((last_section_size + bb_size) > + estimate_max_section_size)) + fprintf (dump_file, + " ...FAIL\n\tSize of section (" + HOST_WIDE_INT_PRINT_DEC + "B) + bb %d (" HOST_WIDE_INT_PRINT_DEC + "B) exceeds threshold \n", last_section_size, + bb->index, bb_size); + } + + /* Start a new section if one of following conditions + are fulfilled: + 1) the last section is not empty and the bb size is less than + the section size threshold. + 2) the basic-block starts a loop that its size is less than + the section size threshold. + 3) The hotness property of this basic block is different then + the previous. + 4) There is a machine-specific reasons. */ + if ((last_section_size != 0 + && bb_size <= estimate_max_section_size) + || start_new_section_for_loop_p + || start_new_section_due_to_hotness_prop_p + || start_new_sction_md_p) + { + if (dump_file) + fprintf (dump_file, + ";; Close section %d (" HOST_WIDE_INT_PRINT_DEC + "B)\n", current_section_id, last_section_size); + start_new_section (¤t_section_id, bb); + last_section_size = 0; + bb = bb->prev_bb; + continue; + } + if (bb->il.rtl->skip) + { + /* The basic-block should be skipped. */ + last_section_size += bb_size; + if (dump_file) + fprintf (dump_file, "\n\tbb %d should be skipped. " + "Add it to section %d (" HOST_WIDE_INT_PRINT_DEC + "B)\n", + bb->index, current_section_id, last_section_size); + /* Upadte section id. */ + fbb_data[bb->index].section_id = current_section_id; + continue; + } + + /* Split the basic-block. Try to insert it's first partition + to the last section such that the section size will not exceed + the section size threshold. */ + new_bb = + split_bb (bb, + estimate_max_section_size - last_section_size, + &first_partition_actual_size); + + if (new_bb != NULL) + { + /* Success. The new basic-block was created and was added + to the last section. Close this section and open a + new one. */ + if (dump_file) + { + fprintf (dump_file, + "\tNew edge: (idx %d , size " + HOST_WIDE_INT_PRINT_DEC + "B) -> BB (idx %d size " HOST_WIDE_INT_PRINT_DEC + "B)\n", bb->index, first_partition_actual_size, + new_bb->index, + bb_size - first_partition_actual_size); + + fprintf (dump_file, + "\tAdd (idx %d; size " HOST_WIDE_INT_PRINT_DEC + "B) to section %d\n", bb->index, + first_partition_actual_size, current_section_id); + + fprintf (dump_file, + ";; Close section %d (" HOST_WIDE_INT_PRINT_DEC + "B)\n", current_section_id, + last_section_size + first_partition_actual_size); + } + /* Upadte section id. */ + fbb_data[bb->index].section_id = current_section_id; + /* Start new section. */ + start_new_section (¤t_section_id, new_bb); + last_section_size = 0; + } + else { - *max_idx *= 2; - *crossing_edges = XRESIZEVEC (edge, *crossing_edges, *max_idx); + /* Failed to create the new basic-block. Close this section + with the current bb inside and open a new one. */ + last_section_size += bb_size; + if (dump_file) + fprintf (dump_file, + "\n;; Failed to split bb. Close section %d (" + HOST_WIDE_INT_PRINT_DEC "B)\n", current_section_id, + last_section_size); + if (bb->next_bb) + start_new_section (¤t_section_id, bb->next_bb); + last_section_size = 0; + continue; } - (*crossing_edges)[i++] = e; } else - e->flags &= ~EDGE_CROSSING; + { + /* The basic-block can be fully added to the last section. */ + last_section_size += bb_size; + if (dump_file) + fprintf (dump_file, + " ...OK\n\tSection new size: " HOST_WIDE_INT_PRINT_DEC + "B\n", last_section_size); + /* Upadte section id. */ + fbb_data[bb->index].section_id = current_section_id; + } } - *n_crossing_edges = i; + if (dump_file) + fprintf (dump_file, "\n\n"); } -/* If any destination of a crossing edge does not have a label, add label; - Convert any fall-through crossing edges (for blocks that do not contain - a jump) to unconditional jumps. */ +/* This is a callback function, called from + mark_crossing_edges function. Return true if E is an edge which + crosses between different sections. */ +static bool +size_crossing_edge_p (edge e) +{ + return (e->src != ENTRY_BLOCK_PTR + && e->dest != EXIT_BLOCK_PTR + && (fbb_data[(e->src)->index].section_id != -1) + && (fbb_data[(e->dest)->index].section_id != -1) + && (fbb_data[(e->src)->index].section_id != + fbb_data[(e->dest)->index].section_id)); +} static void -add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges) +add_labels_and_missing_jumps (VEC (edge, heap) *crossing_edges) { int i; basic_block src; @@ -1275,49 +1803,47 @@ add_labels_and_missing_jumps (edge *cros rtx label; rtx barrier; rtx new_jump; + edge e; - for (i=0; i < n_crossing_edges; i++) + for (i = 0; VEC_iterate (edge, crossing_edges, i, e); i++) { - if (crossing_edges[i]) - { - src = crossing_edges[i]->src; - dest = crossing_edges[i]->dest; + src = e->src; + dest = e->dest; - /* Make sure dest has a label. */ + /* Make sure dest has a label. */ - if (dest && (dest != EXIT_BLOCK_PTR)) - { - label = block_label (dest); + if (dest && (dest != EXIT_BLOCK_PTR)) + { + label = block_label (dest); - /* Make sure source block ends with a jump. If the - source block does not end with a jump it might end - with a call_insn; this case will be handled in - fix_up_fall_thru_edges function. */ + /* Make sure source block ends with a jump. If the + source block does not end with a jump it might end + with a call_insn; this case will be handled in + fix_up_fall_thru_edges function. */ - if (src && (src != ENTRY_BLOCK_PTR)) + if (src && (src != ENTRY_BLOCK_PTR)) + { + if (!JUMP_P (BB_END (src)) && !block_ends_with_call_p (src)) + /* bb just falls through. */ { - if (!JUMP_P (BB_END (src)) && !block_ends_with_call_p (src)) - /* bb just falls through. */ - { - /* make sure there's only one successor */ - gcc_assert (single_succ_p (src)); + /* make sure there's only one successor */ + gcc_assert (single_succ_p (src)); - /* Find label in dest block. */ - label = block_label (dest); + /* Find label in dest block. */ + label = block_label (dest); - new_jump = emit_jump_insn_after (gen_jump (label), - BB_END (src)); - barrier = emit_barrier_after (new_jump); - JUMP_LABEL (new_jump) = label; - LABEL_NUSES (label) += 1; - src->il.rtl->footer = unlink_insn_chain (barrier, barrier); - /* Mark edge as non-fallthru. */ - crossing_edges[i]->flags &= ~EDGE_FALLTHRU; - } /* end: 'if (GET_CODE ... ' */ - } /* end: 'if (src && src->index...' */ - } /* end: 'if (dest && dest->index...' */ - } /* end: 'if (crossing_edges[i]...' */ - } /* end for loop */ + new_jump = emit_jump_insn_after (gen_jump (label), + BB_END (src)); + barrier = emit_barrier_after (new_jump); + JUMP_LABEL (new_jump) = label; + LABEL_NUSES (label) += 1; + src->il.rtl->footer = unlink_insn_chain (barrier, barrier); + /* Mark edge as non-fallthru. */ + e->flags &= ~EDGE_FALLTHRU; + }/* end: 'if (GET_CODE ... ' */ + }/* end: 'if (src && src->index...' */ + }/* end: 'if (dest && dest->index...' */ + }/* end for loop */ } /* Find any bb's where the fall-through edge is a crossing edge (note that @@ -1436,8 +1962,8 @@ fix_up_fall_thru_edges (void) } } } - if (cond_jump_crosses || !invert_worked) + { /* This is the case where both edges out of the basic block are crossing edges. Here we will fix up the @@ -1821,13 +2347,12 @@ add_reg_crossing_jump_notes (void) indirect jumps. */ static void -fix_edges_for_rarely_executed_code (edge *crossing_edges, - int n_crossing_edges) +fix_edges_for_rarely_executed_code (VEC (edge, heap) *crossing_edges) { /* Make sure the source of any crossing edge ends in a jump and the destination of any crossing edge has a label. */ - add_labels_and_missing_jumps (crossing_edges, n_crossing_edges); + add_labels_and_missing_jumps (crossing_edges); /* Convert all crossing fall_thru edges to non-crossing fall thrus to unconditional jumps (that jump to the original fall @@ -1957,7 +2482,10 @@ insert_section_boundary_note (void) rtx new_note; int first_partition = 0; - if (flag_reorder_blocks_and_partition) + /* If -fpartition-functions-into-sections flag is set then + NOTE_INSN_SWITCH_TEXT_SECTIONS note will be inserted later. */ + if (flag_reorder_blocks_and_partition + && (flag_partition_functions_into_sections == 0)) FOR_EACH_BB (bb) { if (!first_partition) @@ -2115,6 +2643,51 @@ struct rtl_opt_pass pass_duplicate_compu } }; +/* Partition basic-blocks into sections. The criterion for partitioning + is determined by the callback functions in CALLBACKS. */ +static void +partition_basic_blocks (struct partition_callbacks *callbacks) +{ + VEC (edge, heap) *crossing_edges = NULL; + + (*callbacks->init) (); + (*callbacks->partition) (); + crossing_edges = mark_crossing_edges (callbacks); + + if (VEC_length (edge, crossing_edges) > 0) + fix_edges_for_rarely_executed_code (crossing_edges); + + VEC_free (edge, heap, crossing_edges); + (*callbacks->finalize) (); + if ((*callbacks->insert_section_boundary_note)) + (*callbacks->insert_section_boundary_note) (); +} + + +/* This is a callback function to initialize the basic-blocks partitioning + into hot and cold sections, called from partition_basic_blocks + function. */ +static void +partition_hot_cold_init (void) +{ + basic_block cur_bb; + + cfg_layout_initialize (0); + + FOR_EACH_BB (cur_bb) + if (cur_bb->index >= NUM_FIXED_BLOCKS + && cur_bb->next_bb->index >= NUM_FIXED_BLOCKS) + cur_bb->aux = cur_bb->next_bb; +} + +/* This is a callback function to finalize the basic-blocks partitioning + into hot and cold sections, called from partition_basic_blocks + function. */ +static void +partition_hot_cold_finalize (void) +{ + cfg_layout_finalize (); +} /* This function is the main 'entrance' for the optimization that partitions hot and cold basic blocks into separate sections of the @@ -2174,38 +2747,401 @@ struct rtl_opt_pass pass_duplicate_compu (through registers) requires that this optimization be performed before register allocation. */ -static void -partition_hot_cold_basic_blocks (void) +static unsigned int +rest_of_handle_partition_blocks_hot_cold (void) { - basic_block cur_bb; - edge *crossing_edges; - int n_crossing_edges; - int max_edges = 2 * last_basic_block; + struct partition_callbacks callbacks; if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) + return 0; + + /* Setup callbacks for the partitioning. */ + callbacks.init = partition_hot_cold_init; + callbacks.finalize = partition_hot_cold_finalize; + callbacks.crossing_edge_p = hot_cold_crossing_edge_p; + callbacks.insert_section_boundary_note = 0; + callbacks.partition = mark_hot_cold_blocks; + + partition_basic_blocks (&callbacks); + return 0; +} + +/* Find the basic blocks that are rarely executed and need to be moved to + a separate section of the .o file (to cut down on paging and improve + cache locality). */ +static void +mark_hot_cold_blocks (void) +{ + basic_block bb; + + /* Mark which partition (hot/cold) each basic block belongs in. */ + + FOR_EACH_BB (bb) + { + if (probably_never_executed_bb_p (bb)) + BB_SET_PARTITION (bb, BB_COLD_PARTITION); + else + BB_SET_PARTITION (bb, BB_HOT_PARTITION); + } +} + + +/* For each loop in LOOPS calculate the first and last basic-block that appear + in their layout. */ +static void +calculate_loop_boundary (struct loop_info_def *loops_aux) +{ + struct loop *loop; + basic_block bb; + + if (!loops_aux) return; + + FOR_EACH_BB (bb) + for (loop = bb->loop_father; loop_outer (loop); loop = loop_outer (loop)) + { + loops_aux[loop->num].last = bb; + if (!loops_aux[loop->num].first) + loops_aux[loop->num].first = bb; + } +} - crossing_edges = XCNEWVEC (edge, max_edges); +/* Return true if loop_info A has less size than loop_info B, in order + to give them an ordering. */ +static bool +loop_size_is_less (const loop_info a, const loop_info b) +{ + if (a->size < b->size) + return true; + return false; +} - cfg_layout_initialize (0); +/* Cache the size (in bytes) of the instructions in the function + to avoid re-calculation. */ +static void +record_insns_size_estimation (void) +{ + basic_block bb; + rtx insn; - FOR_EACH_BB (cur_bb) - if (cur_bb->index >= NUM_FIXED_BLOCKS - && cur_bb->next_bb->index >= NUM_FIXED_BLOCKS) - cur_bb->aux = cur_bb->next_bb; + FOR_EACH_BB (bb) + { + unsigned HOST_WIDE_INT size; + unsigned HOST_WIDE_INT bb_size = 0; + + /* Loop through all of the basic-block's insns. */ + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn) || NOTE_P (insn)) + { + if (targetm.bb_partitioning.estimate_instruction_size != 0) + size = targetm.bb_partitioning.estimate_instruction_size (insn); + else + size = get_attr_min_length (insn); + + if (NOTE_P (insn)) + continue; + bb_size += size; + /* Cache the insn's size for later re-use. */ + insns_aux [INSN_UID (insn)].insn = insn; + insns_aux [INSN_UID (insn)].size = size; + } + /* Store the size of the basic-block for later validation. */ + validate_fbb_data_element (bb->index); + fbb_data[bb->index].bb_size = bb_size; + } +} + +/* Calculate the size (in bytes) of each loop. + This information will be used later in the code partitioning + phase. */ +static void +record_loops_boundaries (void) +{ + unsigned i; + basic_block bb; + loop_iterator li; + struct loop *loop; + unsigned HOST_WIDE_INT total_loop_size; + struct loop_info_def *cur_loop; + struct loop_info_def *loops_aux = NULL; + unsigned int place; + + loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_RECORDED_EXITS); + + /* Calculate insns size in bytes. */ + record_insns_size_estimation (); + + if (number_of_loops () <= 1) + { + loop_optimizer_finalize (); + return; /* There are no loops. */ + } + loops_aux = (struct loop_info_def *) xcalloc (number_of_loops (), + sizeof (struct + loop_info_def)); + /* Get the start and end insns of each loop. */ + calculate_loop_boundary (loops_aux); + + /* We assume that if two loops are disjoint they can not interleave + and if they are not disjoint one is completely contained in the + other. This assumption help us to avoid the case of interleaved + loop intervals (which is somewhat unlikely to happen in practice). + TODO: calculate all the loops's size in one pass. */ + FOR_EACH_LOOP (li, loop, 0) + { + rtx insn; + rtx start, end; + + if (!loop) + continue; + + /* Calculate the size of the loop. */ + total_loop_size = 0; + start = BB_HEAD (loops_aux[loop->num].first); + end = BB_END (loops_aux[loop->num].last); + + for (insn = start; insn != NEXT_INSN (end); insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + /* Retrieve the size of the instruction. */ + if (insns_aux[INSN_UID (insn)].insn) + total_loop_size += insns_aux[INSN_UID (insn)].size; + else + total_loop_size += get_attr_min_length (insn); + } + } + /* Create the loop_info variable, used to hold information about + the loop. */ + cur_loop = + (struct loop_info_def *) xcalloc (1, sizeof (struct loop_info_def)); + + /* Store the information. */ + cur_loop->size = total_loop_size; + cur_loop->index = loop->num; + + validate_fbb_data_element (loops_aux[loop->num].first->index); + /* Insert the loop information in loop->first basic-block. */ + place = + VEC_lower_bound (loop_info, + fbb_data[loops_aux[loop->num].first->index]. + loops_info_list, cur_loop, loop_size_is_less); + VEC_safe_insert (loop_info, heap, + fbb_data[loops_aux[loop->num].first->index]. + loops_info_list, place, cur_loop); + } + + /* Dump the information that was collected. */ + if (dump_file) + FOR_EACH_BB (bb) + { + validate_fbb_data_element (bb->index); + + if (fbb_data[bb->index].loops_info_list == NULL) + continue; + + for (i = 0; + VEC_iterate (loop_info, fbb_data[bb->index].loops_info_list, i, + cur_loop); + i++) + { + fprintf (dump_file, + "\n;; bb %d is the first basic-block in the loop" + HOST_WIDE_INT_PRINT_DEC " layout (total loop size " + HOST_WIDE_INT_PRINT_DEC "K)", bb->index, cur_loop->index, + cur_loop->size); + } + } + free (loops_aux); + loop_optimizer_finalize (); +} - find_rarely_executed_basic_blocks_and_crossing_edges (&crossing_edges, - &n_crossing_edges, - &max_edges); +static void +free_fbb_data (void) +{ + unsigned HOST_WIDE_INT i, j; + struct loop_info_def *cur_loop; - if (n_crossing_edges > 0) - fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges); + for (i = 0; i < fbb_data_size; i++) + { + if (fbb_data[i].loops_info_list != NULL) + { + for (j = 0; + VEC_iterate (loop_info, fbb_data[i].loops_info_list, j, + cur_loop); + j++) + FREE (cur_loop); - free (crossing_edges); + VEC_free (loop_info, heap, fbb_data[i].loops_info_list); + } + } + FREE (fbb_data); +} +/* Given RTX_FIRST which is the first instruction we start from; check + that there is no unexpected insns outside basic-blocks that could + effect the size of the sections, e.g. unexpected read-only code. */ +static void +check_unexpected_insns (rtx rtx_first) +{ + rtx tmp_rtx; + VEC (rtx, heap) *jump_tables_list; + enum bb_state { NOT_IN_BB, IN_ONE_BB, IN_MULTIPLE_BB }; + int max_uid = get_max_uid (); + basic_block *start = + (basic_block *) xcalloc (max_uid, sizeof (basic_block)); + basic_block *end = (basic_block *) xcalloc (max_uid, sizeof (basic_block)); + enum bb_state *in_bb_p = + (enum bb_state *) xcalloc (max_uid, sizeof (enum bb_state)); + basic_block bb; + rtx insn; + + jump_tables_list = VEC_alloc (rtx, heap, 1); + + if (rtx_first == 0) + return; + + FOR_EACH_BB_REVERSE (bb) + { + rtx x, table, label; + + start[INSN_UID (BB_HEAD (bb))] = bb; + end[INSN_UID (BB_END (bb))] = bb; + for (x = BB_HEAD (bb); x != NULL_RTX; x = NEXT_INSN (x)) + { + enum bb_state state = IN_MULTIPLE_BB; + + if (in_bb_p[INSN_UID (x)] == NOT_IN_BB) + state = IN_ONE_BB; + in_bb_p[INSN_UID (x)] = state; + + if (INSN_P (x) && tablejump_p (x, &label, &table)) + { + /* Record the information regarding the jump table instruction + which we expect to appear outside of basic-block. */ + VEC_safe_push (rtx, heap, jump_tables_list, table); + VEC_safe_push (rtx, heap, jump_tables_list, label); + } + + if (x == BB_END (bb)) + break; + } + } + for (tmp_rtx = rtx_first; NULL != tmp_rtx; tmp_rtx = NEXT_INSN (tmp_rtx)) + { + if (in_bb_p[INSN_UID (tmp_rtx)] == NOT_IN_BB + && !NOTE_P (tmp_rtx) && !BARRIER_P (tmp_rtx)) + { + int i; + bool found_p = false; + rtx label; + + for (i = 0; VEC_iterate (rtx, jump_tables_list, i, insn); i++) + { + /* We expect that the table itself would appear outside + of basic-block. */ + if (rtx_equal_p (tmp_rtx, insn)) + { + found_p = true; + break; + } + } + if (!found_p) + error ("Unexpected insns outside basic-blocks;" + " -fpartition-functions-into-sections may be effected."); + else + { + /* Validate that the table and code that reads the table + will appear in the same section. */ + if (GET_CODE (tmp_rtx) == CODE_LABEL) + { + if (tablejump_p (prev_active_insn (tmp_rtx), &label, NULL)) + { + if (!rtx_equal_p (tmp_rtx, label)) + { + error ("Unexpected insns outside basic-blocks;" + " -fpartition-functions-into-sections " + "may be effected."); + return; + } + + } + else + { + error ("Unexpected insns outside basic-blocks;" + " -fpartition-functions-into-sections may " + "be effected."); + return; + } + } + } + } + } + + free (start); + free (end); + free (in_bb_p); + VEC_free (rtx, heap, jump_tables_list); +} + +/* This is a callback function to initialize the basic-blocks partitioning + into sections of maximum size, called from partition_basic_blocks + function. */ +static void +partition_size_init (void) +{ + basic_block cur_bb; + unsigned HOST_WIDE_INT i; + + fbb_data_size = 10 * last_basic_block; + + /* Allocate the initial hunk of the fbb_data. */ + fbb_data = + (funcpart_basic_block_data *) xcalloc (fbb_data_size, + sizeof + (funcpart_basic_block_data)); + for (i = 0; i < fbb_data_size; i++) + fbb_data[i].section_id = -1; + + insns_aux = + (struct insn_aux *) xcalloc (get_max_uid (), sizeof (struct insn_aux)); + + check_unexpected_insns (get_insns ()); + + /* Update the maximum section size (in bytes). */ + estimate_max_section_size = + flag_partition_functions_into_sections - estimate_section_overhead; + + /* Initialize structures for layout changes. */ + cfg_layout_initialize (0); + + /* Record the start and size of each loop in fbb_data array. */ + record_loops_boundaries (); + + FOR_EACH_BB (cur_bb) + { + cur_bb->flags &= ~BB_FIRST_AFTER_SECTION_SWITCH; + + if (cur_bb->next_bb != EXIT_BLOCK_PTR) + cur_bb->aux = cur_bb->next_bb; + } +} + +/* This is a callback function to finalize the basic-blocks partitioning + into sections of maximum size, called from partition_basic_blocks + function. */ +static void +partition_size_finalize (void) +{ + /* Free data. */ + free_fbb_data (); + /* Finalize layout changes. */ cfg_layout_finalize (); + + free_dominance_info (CDI_DOMINATORS); + free (insns_aux); } - + static bool gate_handle_reorder_blocks (void) { @@ -2269,7 +3205,7 @@ struct rtl_opt_pass pass_reorder_blocks }; static bool -gate_handle_partition_blocks (void) +gate_handle_partition_blocks_hot_cold (void) { /* The optimization to partition hot/cold basic blocks into separate sections of the .o file does not work well with linkonce or with @@ -2281,31 +3217,214 @@ gate_handle_partition_blocks (void) && !user_defined_section_attribute); } -/* Partition hot and cold basic blocks. */ -static unsigned int -rest_of_handle_partition_blocks (void) +struct rtl_opt_pass pass_partition_blocks_hot_cold = +{ + { + RTL_PASS, + "bbpart", /* name */ + gate_handle_partition_blocks_hot_cold, /* gate */ + rest_of_handle_partition_blocks_hot_cold, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_REORDER_BLOCKS, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_rtl_sharing /* todo_flags_finish */ + } +}; + +/* The overhead (in bytes) of creating a new section includes adding + unconditional branch instruction between sections should also be + taken into account. */ +static void +get_estimate_section_overhead (void) +{ + gcc_assert (flag_partition_functions_into_sections != 0); + + estimate_section_overhead = 0; + estimate_max_section_size = 0; + + if (uncond_jump_length == 0) + uncond_jump_length = get_uncond_jump_length (); + + if (targetm.bb_partitioning.estimate_section_overhead != 0) + { + /* The machine depndent pass could add extra instructions + as a result of the new branches. */ + estimate_section_overhead = + targetm.bb_partitioning.estimate_section_overhead (); + } + /* Add the size of the new branch that will be created for each + sections. */ + else + estimate_section_overhead += uncond_jump_length; +} + +/* Return TRUE if an instruction exists such that it exceeds the threshold + of the section size. Otherwise return FALSE. */ +static bool +instruction_size_exceeds_threshold (void) { - partition_hot_cold_basic_blocks (); + basic_block bb; + rtx insn; + unsigned HOST_WIDE_INT size = 0; + + FOR_EACH_BB (bb) + { + FOR_BB_INSNS (bb, insn) + { + if (INSN_P (insn) || NOTE_P (insn)) + { + if (targetm.bb_partitioning.estimate_instruction_size != 0) + size = targetm.bb_partitioning.estimate_instruction_size (insn); + else + size = get_attr_min_length (insn); + + if (size > + (flag_partition_functions_into_sections - + estimate_section_overhead)) + { + warning (0, + "Section threshold value is too small. "); + return true; + } + } + } + } + return false; +} + +static bool +gate_handle_partition_blocks_size (void) +{ + if ((flag_partition_functions_into_sections == 0) + || DECL_ONE_ONLY (current_function_decl) + || user_defined_section_attribute) + return 0; + + /* Estimate the overhead in creating new section in term of the new + instruction that are needed to support it and need to be + considered. */ + get_estimate_section_overhead (); + + /* Make sure there is no instruction with size that exceeds the + estimated section size. */ + return (!instruction_size_exceeds_threshold ()); +} + +/* Main entry point for partitioning functions into sections in order + to make it fit small local store. + + The motivation: + + The local store of embedded processors usually imposes a strict + threshold on the code size of programs. Having a limited memory (and + lack of hardware i-cache) creates cases where a single program code + can not fit into it. To overcome this problem, the programmer can + partition the code into multiple pieces and manually swap them in and + out of the local store. To simplify this task, an overlay technique + can be used which handles the memory management tasks automatically + and relieves the programmer from managing the local store manually. + + Preparing the code for the overlay technique consists of a + preprocessing static stage which is done by the compiler and linker. + The compiler first partition the code into sections which are later + constructed into an overlaid program by the linker. At execution + time the overlay manager automatically swap pieces of code in and + out of the local store. + + The ability to place each function in a different section already + exists in GCC (-ffunction-sections). However, using the granularity + of a single function for partitioning is not sufficient when a single + function's body can not fully fit the local store. This new pass + overcomes this problem by breaking the large functions. It can also + break a function into fixed sized sections that could be used for + a software i-cache scheme implementation, as an alternative to the + overlay technique. */ + +static unsigned int +rest_of_handle_partition_blocks_size (void) +{ + struct partition_callbacks callbacks; + + if (n_basic_blocks < 1) + return 0; + + /* Setup callbacks for the partitioning. */ + callbacks.init = partition_size_init; + callbacks.finalize = partition_size_finalize; + callbacks.crossing_edge_p = size_crossing_edge_p; + callbacks.insert_section_boundary_note = size_insert_section_boundary_note; + callbacks.partition = create_sections; + + partition_basic_blocks (&callbacks); return 0; } -struct rtl_opt_pass pass_partition_blocks = +struct rtl_opt_pass pass_partition_blocks_size = { - { - RTL_PASS, - "bbpart", /* name */ - gate_handle_partition_blocks, /* gate */ - rest_of_handle_partition_blocks, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_REORDER_BLOCKS, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_dump_func | TODO_verify_rtl_sharing/* todo_flags_finish */ - } + { + RTL_PASS, + "fpart", /* name */ + gate_handle_partition_blocks_size, /* gate */ + rest_of_handle_partition_blocks_size, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_PARTITION_BLOCKS_SIZE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + TODO_dump_func, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + } }; +/* Validate that the size (in bytes) of each section does not exceed + the maximum section size the user defined. */ + +void +check_sections (void) +{ + rtx insn; + unsigned HOST_WIDE_INT section_size = 0; + int number_of_sections = 1; + + if (flag_partition_functions_into_sections == 0) + return; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + section_size += get_attr_min_length (insn); + continue; + } + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS) + { + number_of_sections++; + if (section_size > (unsigned) flag_partition_functions_into_sections) + { + warning (0, + "section %d (" HOST_WIDE_INT_PRINT_DEC + "B) exceeds section threshold %dB", + NOTE_TEXT_SECTION (insn) - 1, section_size, + flag_partition_functions_into_sections); + } + } + section_size = 0; + } + + /* Check the last section. */ + if (section_size > (unsigned) flag_partition_functions_into_sections) + { + warning (0, + "section %d (" HOST_WIDE_INT_PRINT_DEC + "B) exceeds section threshold %dB", + number_of_sections, section_size, + flag_partition_functions_into_sections); + } +} Index: Makefile.in =================================================================== --- Makefile.in (revision 141911) +++ Makefile.in (working copy) @@ -3039,7 +3039,8 @@ lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) $(FLAGS_H) $(TIMEVAR_H) output.h $(CFGLAYOUT_H) $(FIBHEAP_H) \ $(TARGET_H) $(FUNCTION_H) $(TM_P_H) $(OBSTACK_H) $(EXPR_H) $(REGS_H) \ - $(PARAMS_H) $(TOPLEV_H) tree-pass.h $(DF_H) + $(PARAMS_H) $(TOPLEV_H) tree-pass.h $(DF_H) $(CFGLOOP_H) \ + $(LANGHOOKS_DEF_H) $(HASHTAB_H) vec.h tracer.o : tracer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(TREE_H) $(BASIC_BLOCK_H) hard-reg-set.h output.h $(CFGLAYOUT_H) \ $(FLAGS_H) $(TIMEVAR_H) $(PARAMS_H) $(COVERAGE_H) $(FIBHEAP_H) \ Index: basic-block.h =================================================================== --- basic-block.h (revision 141911) +++ basic-block.h (working copy) @@ -265,6 +265,9 @@ struct rtl_bb_info GTY(()) /* This field is used by the bb-reorder and tracer passes. */ int visited; + + /* This field is used by the bb-partitioning pass. */ + int skip; }; struct gimple_bb_info GTY(()) @@ -333,7 +336,11 @@ enum bb_flags /* Set on blocks that cannot be threaded through. Only used in cfgcleanup.c. */ - BB_NONTHREADABLE_BLOCK = 1 << 11 + BB_NONTHREADABLE_BLOCK = 1 << 11, + + /* Set on blocks that are first in a section. */ + BB_FIRST_AFTER_SECTION_SWITCH = 1 << 12 + }; /* Dummy flag for convenience in the hot/cold partitioning code. */ @@ -946,6 +953,7 @@ extern basic_block next_dom_son (enum cd unsigned bb_dom_dfs_in (enum cdi_direction, basic_block); unsigned bb_dom_dfs_out (enum cdi_direction, basic_block); +extern void check_sections (void); extern edge try_redirect_by_replacing_jump (edge, basic_block, bool); extern void break_superblocks (void); extern void relink_block_chain (bool); Index: passes.c =================================================================== --- passes.c (revision 141911) +++ passes.c (working copy) @@ -756,7 +756,7 @@ init_optimization_passes (void) NEXT_PASS (pass_ud_rtl_dce); NEXT_PASS (pass_combine); NEXT_PASS (pass_if_after_combine); - NEXT_PASS (pass_partition_blocks); + NEXT_PASS (pass_partition_blocks_hot_cold); NEXT_PASS (pass_regmove); NEXT_PASS (pass_split_all_insns); NEXT_PASS (pass_lower_subreg2); @@ -802,6 +802,7 @@ init_optimization_passes (void) NEXT_PASS (pass_compute_alignments); NEXT_PASS (pass_duplicate_computed_gotos); NEXT_PASS (pass_variable_tracking); + NEXT_PASS (pass_partition_blocks_size); NEXT_PASS (pass_free_cfg); NEXT_PASS (pass_machine_reorg); NEXT_PASS (pass_cleanup_barriers); @@ -993,6 +994,9 @@ execute_function_todo (void *data) else gcc_unreachable (); } + if (flags & TODO_check_sections + && flag_partition_functions_into_sections != 0) + check_sections (); #if defined ENABLE_CHECKING if (flags & TODO_verify_ssa) Index: config/spu/spu.opt =================================================================== --- config/spu/spu.opt (revision 141911) +++ config/spu/spu.opt (working copy) @@ -43,6 +43,11 @@ mdual-nops= Target RejectNegative Joined UInteger Var(spu_dual_nops) Insert nops when it might improve performance by allowing dual issue (default) +mstub-size= +Target RejectNegative Joined UInteger Var(spu_stub_size) Init(16) +Specify the stub size which is inserted by the linker when an +overlay manager is used. (Default to 16 bytes) + mstdmain Target Report Mask(STD_MAIN) Use standard main function as entry for startup Index: config/spu/spu.c =================================================================== --- config/spu/spu.c (revision 141911) +++ config/spu/spu.c (working copy) @@ -57,6 +57,7 @@ #include "sbitmap.h" #include "timevar.h" #include "df.h" +#include "vec.h" /* Builtin types, data and prototypes. */ struct spu_builtin_range @@ -87,6 +88,18 @@ static struct spu_builtin_range spu_buil /* Target specific attribute specifications. */ char regs_ever_allocated[FIRST_PSEUDO_REGISTER]; +/* The following are types of critical sections. + Critical section are sequences must reside in a single section. */ +enum critical_section_type +{ + /* Indicates a jump-table. */ + JUMP_TABLE = 0, + /* DMA sequence (wrch 16-21). */ + CRITICAL_DMA_SEQ, + /* Event mask read/write/ack sequence (rd/wrch 0-2). */ + CRITICAL_EVENT_MASK +}; + /* Prototypes and external defs. */ static void spu_init_builtins (void); static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode); @@ -147,6 +160,15 @@ static bool spu_vector_alignment_reachab static tree spu_builtin_vec_perm (tree, tree *); static int spu_sms_res_mii (struct ddg *g); static void asm_file_start (void); +static unsigned HOST_WIDE_INT spu_estimate_section_overhead (void); +static unsigned HOST_WIDE_INT spu_estimate_instruction_size (rtx); +static bool begin_critical_section (rtx, enum critical_section_type *); +static bool end_critical_section (rtx, enum critical_section_type *); +static unsigned HOST_WIDE_INT record_jump_table (rtx); +static bool spu_start_new_section (int, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); + extern const char *reg_names[]; rtx spu_compare_op0, spu_compare_op1; @@ -364,6 +386,15 @@ const struct attribute_spec spu_attribut #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START asm_file_start +#undef TARGET_ESTIMATE_SECTION_OVERHEAD +#define TARGET_ESTIMATE_SECTION_OVERHEAD spu_estimate_section_overhead + +#undef TARGET_ESTIMATE_INSTRUCTION_SIZE +#define TARGET_ESTIMATE_INSTRUCTION_SIZE spu_estimate_instruction_size + +#undef TARGET_START_NEW_SECTION +#define TARGET_START_NEW_SECTION spu_start_new_section + struct gcc_target targetm = TARGET_INITIALIZER; void @@ -403,6 +434,9 @@ spu_override_options (void) if (spu_fixed_range_string) fix_range (spu_fixed_range_string); + if (flag_partition_functions_into_sections) + fix_range ("75-79"); + /* Determine processor architectural level. */ if (spu_arch_string) { @@ -2023,6 +2057,565 @@ spu_return_addr (int count, rtx frame AT it's inefficient. */ return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM); } + +/* Return the size of the stub that the linker will insert for INSN that + are branches to another section. */ +static unsigned HOST_WIDE_INT +get_stub_size (rtx insn) +{ + unsigned HOST_WIDE_INT stub_size = 0; + + if (!JUMP_P (insn) && !CALL_P (insn)) + return 0; + + if (JUMP_P (insn)) + { + rtx set, src; + + stub_size = spu_stub_size; + + /* If the branch instruction and the branch target are in the + same basic-block they will probably be in the same section + as well. Do not add the stub size in this case. */ + if (!tablejump_p (insn, NULL, NULL) + && JUMP_LABEL (insn) + && (BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn))) + stub_size = 0; + + /* For indirect branches including jump-tables (not including the + cases) and indirect function calls; stubs will be created in the + non-overlay local store so their stub size is not inserted to the + section calculation. */ + /* Return statements */ + if (GET_CODE (PATTERN (insn)) == RETURN) + stub_size = 0; + + /* jump table */ + if (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + stub_size = 0; + + set = single_set (insn); + if (set) + { + src = SET_SRC (set); + if (GET_CODE (SET_DEST (set)) != PC) + abort (); + + if (GET_CODE (src) == IF_THEN_ELSE) + { + rtx lab = 0; + + if (GET_CODE (src) == IF_THEN_ELSE) + { + rtx lab = 0; + if (GET_CODE (XEXP (src, 1)) != PC) + lab = XEXP (src, 1); + else if (GET_CODE (XEXP (src, 2)) != PC) + lab = XEXP (src, 2); + } + if (lab && REG_P (lab)) + stub_size = 0; + } + } + } + else if (CALL_P (insn)) + { + rtx call; + + stub_size = spu_stub_size; + /* All of our call patterns are in a PARALLEL and the CALL is + the first pattern in the PARALLEL. */ + if (GET_CODE (PATTERN (insn)) != PARALLEL) + abort (); + call = XVECEXP (PATTERN (insn), 0, 0); + if (GET_CODE (call) == SET) + call = SET_SRC (call); + if (GET_CODE (call) != CALL) + abort (); + if (REG_P (XEXP (XEXP (call, 0), 0))) + stub_size = 0; + } + + return stub_size; +} + +/* Hbrp instructions are inserted when the compiler sees lots of loads + and stores in a row. The worst case would be 1 for every 8 load/store + instructions. */ +static int mem_ref_counter = 0; + +/* The compiler inserts 1 hbrp instruction for every 16 instructions + in the worst case. */ +static int safe_hints_counter = 0; + +/* Extra nops are added for 2 reasons, dual issue and to make room + for hints. For dual issues the worst case is 1 nop for every 3 + instructions. */ +static int spu_dual_nops_counter = 0; + +/* Return the size of instruction INSN in bytes. Take into account the + size of extra machine depndent instructions that can be added + as a result of insn (like branch-hints for branch instructions). + Called when partitioning a function into sections. */ +static unsigned HOST_WIDE_INT +spu_estimate_instruction_size (rtx insn) +{ + unsigned HOST_WIDE_INT size; + rtx table; + /* For dual issues the worst case is 1 nop for every 3 + instructions. */ + int dual_nops = spu_dual_nops > 0 ? 3 : 0; + + if (NOTE_P (insn)) + { + /* Reset the counters if needed. */ + if (NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG) + { + mem_ref_counter = 0; + safe_hints_counter = 0; + spu_dual_nops_counter = 0; + } + if (NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK) + mem_ref_counter = 0; + return 0; + } + + size = get_attr_min_length (insn); + if (tablejump_p (insn, NULL, &table)) + { + rtvec vec; + + if (GET_CODE (PATTERN (table)) == ADDR_VEC) + vec = XVEC (PATTERN (table), 0); + else + vec = XVEC (PATTERN (table), 1); + + /* Add the size of the table to the insn size. */ + size += (GET_NUM_ELEM (vec) * 4); + } + safe_hints_counter += 1; + spu_dual_nops_counter += 1; + + /* The compiler inserts 1 hbrp instruction for every 16 instructions + in the worst case. */ + if (TARGET_SAFE_HINTS && safe_hints_counter >= 16) + { + size += 4; + safe_hints_counter = 0; + spu_dual_nops_counter += 1; + } + + size += get_stub_size (insn); + + if (!TARGET_BRANCH_HINTS || optimize == 0) + return size; + + /* Add the nops and branch hint which are added for each branch. + For hints the worst case is 4 nops for every branch. */ + if ((JUMP_P (insn) || CALL_P (insn)) + && !tablejump_p (insn, NULL, NULL)) + { + /* 4 nops + 1 hint. */ + size += 20; + safe_hints_counter += 5; + spu_dual_nops_counter += 5; + } + + if (mem_read_insn_p (insn) || mem_write_insn_p (insn)) + { + mem_ref_counter++; + if (mem_ref_counter > 8) + { + mem_ref_counter = 0; + /* Add the hbrp instruction. */ + size += 4; + safe_hints_counter += 1; + spu_dual_nops_counter += 1; + } + } + else + mem_ref_counter = 0; + + /* For dual issues the worst case is 1 nop for every 3 + instructions. */ + if (dual_nops && spu_dual_nops_counter >= dual_nops) + { + size += 4; + spu_dual_nops_counter = 0; + safe_hints_counter += 1; + } + return size; +} + +/* Estimate the size in bytes of the extra instructions that will be + generated for each section as a result of creating a new branch for + that section. Called when partitioning a function into sections. */ +static unsigned HOST_WIDE_INT +spu_estimate_section_overhead (void) +{ + int extra_branch_insns = 0; + + if (TARGET_BRANCH_HINTS && optimize != 0) + { + /* Add the nops and branch hint which are added for each branch. + For hints the worst case is 4 nops for every branch. */ + extra_branch_insns = 5; + } + /* Take into account the new branch instruction, its extra + instructions if they are created and the size of the stub that the + linker will add to it. */ + return ((1 + extra_branch_insns) * 4 + spu_stub_size); +} + +/* An auxiliary structure for recognizing critical sections. */ +typedef struct critical_sections +{ + /* The type of the critical section. */ + enum critical_section_type type; + + /* The size of the critical section. */ + unsigned HOST_WIDE_INT size; + + /* True if the end of the critical section was recognized. */ + bool closed_p; +}critical_sections_t; + +DEF_VEC_O(critical_sections_t); +DEF_VEC_ALLOC_O(critical_sections_t,heap); + +/* Return true if INSN begins a critical section. + Record it's type in TYPE. */ +static bool +begin_critical_section (rtx insn, enum critical_section_type *type) +{ + rtx body, set; + + if (!INSN_P (insn)) + return false; + + if (GET_CODE (PATTERN (insn)) == PARALLEL) + return false; + + set = single_set (insn); + + if (set != 0) + { + /* We are looking for this type of instruction: + + (insn (set (reg) + (unspec_volatile [(const_int)])) {spu_rdch_noclobber}) + */ + body = SET_SRC (set); + if (GET_CODE (body) == UNSPEC_VOLATILE) + if (XINT (body, 1) == UNSPEC_RDCH) + { + rtx tmp = XVECEXP (body, 0, 0); + + if (tmp && GET_CODE (tmp) == CONST_INT && INTVAL (tmp) == 0) + { + *type = CRITICAL_EVENT_MASK; + return true; + } + } + + if (GET_CODE (body) == LABEL_REF) + { + *type = JUMP_TABLE; + return true; + } + } + else + { + /* We are looking for this type of instruction: + + (insn (unspec_volatile [(const_int) (subreg)]) + {spu_wrch_noclobber}) + */ + + body = PATTERN (insn); + + if (GET_CODE (body) == UNSPEC_VOLATILE) + if (XINT (body, 1) == UNSPEC_WRCH) + { + rtx tmp = XVECEXP (body, 0, 0); + + /* MFC_LSA 16 */ + if (tmp && (GET_CODE (tmp) == CONST_INT) && INTVAL (tmp) == 16) + { + *type = CRITICAL_DMA_SEQ; + return true; + } + } + + } + return false; +} + +/* Return true if INSN ends a critical section. + Record it's type in TYPE. */ +static bool +end_critical_section (rtx insn, enum critical_section_type *type) +{ + rtx body; + + if (!INSN_P (insn)) + return false; + + body = PATTERN (insn); + + /* We are looking for the following type of instruction: + + (insn (parallel [ + (unspec_volatile [(const_int) (subreg)]) + (clobber (mem))]) {spu_wrch_clobber}) */ + + if (GET_CODE (body) == PARALLEL) + { + rtx tmp = XVECEXP (body, 0, 0); + + if (TARGET_SAFE_DMA && GET_CODE (tmp) == UNSPEC_VOLATILE) + { + if (XINT (tmp, 1) == UNSPEC_WRCH) + { + rtx const_val = XVECEXP (tmp, 0, 0); + + /* MFC_Cmd 21 */ + if (GET_CODE (const_val) == CONST_INT + && INTVAL (const_val) == 21) + { + *type = CRITICAL_DMA_SEQ; + return true; + } + } + } + } + else + { + /* We are looking for the following type of instruction: + + (insn (unspec_volatile [(const_int 2]) (subreg)]) + (spu_wrch_noclobber}) + or + + (insn (unspec_volatile [(const_int 21]) (subreg)]) + (spu_wrch_noclobber}) */ + if (GET_CODE (body) == UNSPEC_VOLATILE) + if (XINT (body, 1) == UNSPEC_WRCH) + { + rtx tmp = XVECEXP (body, 0, 0); + + /* SPU_WrEventAck 2 */ + if (tmp && (GET_CODE (tmp) == CONST_INT) && INTVAL (tmp) == 2) + { + *type = CRITICAL_EVENT_MASK; + return true; + } + /* MFC_Cmd 21 */ + else if (!TARGET_SAFE_DMA + && tmp + && (GET_CODE (tmp) == CONST_INT) + && INTVAL (tmp) == 21) + { + *type = CRITICAL_DMA_SEQ; + return true; + + } + } + } + return false; +} + +/* Record the critical section which is part of the jump-table. + The critical section should contain the jump-table and code that + reads the table to make sure they will appear in the same section. + We are looking for the following sequence of instructions: + + ila ,label + . + . + . + jump_insn (use label) + label: + table + . + . + . + end of table + */ +static unsigned HOST_WIDE_INT +record_jump_table (rtx ila_insn) +{ + rtx insn, set; + basic_block bb; + rtx label, label1; + bool *bb_aux; + bool found_jump_table = false; + unsigned HOST_WIDE_INT size = 0; + + bb_aux = (bool *)xmalloc (n_basic_blocks * sizeof (bool)); + memset (bb_aux, false, n_basic_blocks * sizeof (bool)); + + set = single_set (ila_insn); + + gcc_assert (set); + + label = XEXP (SET_SRC (set), 0); + + /* Reset counters. */ + mem_ref_counter = 0; + safe_hints_counter = 0; + spu_dual_nops_counter = 0; + + for (insn = ila_insn; insn != NULL; insn = NEXT_INSN (insn)) + { + bb = BLOCK_FOR_INSN (insn); + bb_aux[bb->index] = true; + + if (!INSN_P (insn)) + continue; + + size += spu_estimate_instruction_size (insn); + if (!tablejump_p (insn, &label1, NULL)) + continue; + + if (rtx_equal_p (label, label1)) + { + found_jump_table = true; + break; + } + } + + if (found_jump_table) + { + int i; + + /* Mark the bb's between the jump-table and the code that + reads the table so they reside in the same section. */ + for (i = 0; i < n_basic_blocks; i++) + if (bb_aux[i] == true) + BASIC_BLOCK (i)->il.rtl->skip = 1; + } + else + size = 0; + + free (bb_aux); + + return size; +} + +/* Given basic-block BB which contains an instruction that ends a + critical section; close an open critical section that is recorded in + START_SEQUENCE; which appears in the same basic-block as bb and is + of type TYPE. */ +static void +close_critical_sections (VEC (critical_sections_t, heap) *start_sequence, + basic_block bb, enum critical_section_type type) +{ + unsigned int i; + bool bb_close_critical_section_p = false; + critical_sections_t *crit; + + /* Loop through all of the critical sections. */ + for (i = 0; VEC_iterate (critical_sections_t, start_sequence, i, crit); i++) + { + /* First check that this bb closes a critical section of the + right type. */ + if (crit->type == type && !crit->closed_p) + { + /* Mark this section as closed. */ + crit->closed_p = true; + + bb_close_critical_section_p = true; + break; + } + } + if (!bb_close_critical_section_p) + warning (0, + "Unexpected end of critical section in basic-block %d. ", + bb->index); +} + + +/* Return true if the basic-block with index BB_INDEX should be put in + a new section. + The following are all taking into account in the decision: + + BB_SIZE - The size of the basic-block, + ESTIMATE_MAX_SECTION_SIZE - The maximum size of a section, + LAST_SECTION_SIZE - The size of the last section. */ +bool +spu_start_new_section (int bb_index, unsigned HOST_WIDE_INT bb_size, + unsigned HOST_WIDE_INT estimate_max_section_size, + unsigned HOST_WIDE_INT last_section_size) +{ + rtx insn; + VEC (critical_sections_t, heap) *start_sequence = NULL; + int i; + enum critical_section_type type; + basic_block bb = BASIC_BLOCK (bb_index); + bool start_new_section_p = false; + unsigned HOST_WIDE_INT size; + critical_sections_t *crit1; + + /* Loop through all of the basic-block's insns. */ + FOR_BB_INSNS (bb, insn) + { + if (tablejump_p (insn, NULL, NULL) && !bb->il.rtl->skip) + warning (0, "Unexpected jump-table in basic-block %d. ", bb->index); + + /* Check whether this insn can begin a critical sections. */ + if (begin_critical_section (insn, &type)) + { + critical_sections_t crit; + + size = bb_size; + + if (type == JUMP_TABLE) + { + size = record_jump_table (insn); + if (size > estimate_max_section_size) + warning (0, + "jump-table in bb exceeds section size %d. ", + bb->index); + } + + crit.size = size; + crit.type = type; + crit.closed_p = false; + /* Record the start instruction unless this is the . */ + VEC_safe_push (critical_sections_t, heap, start_sequence, &crit); + } + + /* Check whether this insn can end a critical sections. */ + if (end_critical_section (insn, &type)) + { + /* Close any critical section that this instruction may end. */ + close_critical_sections (start_sequence, bb, type); + } + } + + if (dump_file && VEC_length (critical_sections_t, start_sequence)) + fprintf (dump_file, "\n;; bb %d starts a critical section", bb->index); + + for (i = 0; VEC_iterate (critical_sections_t, start_sequence, i, crit1); + i++) + { + if (!crit1->closed_p && (crit1->type != JUMP_TABLE)) + error ("Unexpected start of critical section in basic-block %d. ", + bb->index); + if (crit1->size + last_section_size > estimate_max_section_size + && crit1->size < estimate_max_section_size) + start_new_section_p = true; + } + if (last_section_size == 0) + start_new_section_p = false; + + VEC_free (critical_sections_t, heap, start_sequence); + + return start_new_section_p; +} + /* Given VAL, generate a constant appropriate for MODE. Index: config/spu/spu.h =================================================================== --- config/spu/spu.h (revision 141911) +++ config/spu/spu.h (working copy) @@ -633,3 +633,7 @@ targetm.resolve_overloaded_builtin = spu extern GTY(()) rtx spu_compare_op0; extern GTY(()) rtx spu_compare_op1; +#define HAS_LONG_COND_BRANCH 1 +#define HAS_LONG_UNCOND_BRANCH 1 + + Index: cfgrtl.c =================================================================== --- cfgrtl.c (revision 141911) +++ cfgrtl.c (working copy) @@ -1742,7 +1742,9 @@ rtl_verify_flow_info_1 (void) for (insn = bb->il.rtl->header; insn; insn = NEXT_INSN (insn)) if (!BARRIER_P (insn) - && BLOCK_FOR_INSN (insn) != NULL) + && BLOCK_FOR_INSN (insn) != NULL + && !(GET_CODE (insn) == NOTE + && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)) { error ("insn %d in header of bb %d has non-NULL basic block", INSN_UID (insn), bb->index); @@ -1914,7 +1916,8 @@ rtl_verify_flow_info_1 (void) if (x == BB_END (bb)) break; - if (control_flow_insn_p (x)) + if (control_flow_insn_p (x) + && (!block_ends_with_call_p (bb))) { error ("in basic block %d:", bb->index); fatal_insn ("flow control insn inside a basic block", x); @@ -1967,7 +1970,9 @@ rtl_verify_flow_info (void) /* And that the code outside of basic blocks has NULL bb field. */ if (!BARRIER_P (x) - && BLOCK_FOR_INSN (x) != NULL) + && BLOCK_FOR_INSN (x) != NULL + && !(GET_CODE (x) == NOTE + && NOTE_KIND (x) == NOTE_INSN_VAR_LOCATION)) { error ("insn %d outside of basic blocks has non-NULL bb field", INSN_UID (x)); @@ -2057,7 +2062,9 @@ rtl_verify_flow_info (void) /* Check that the code before the first basic block has NULL bb field. */ if (!BARRIER_P (x) - && BLOCK_FOR_INSN (x) != NULL) + && BLOCK_FOR_INSN (x) != NULL + && !(GET_CODE (x) == NOTE + && NOTE_KIND (x) == NOTE_INSN_VAR_LOCATION)) { error ("insn %d outside of basic blocks has non-NULL bb field", INSN_UID (x));