This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] New pass to partition single function into multiple (resubmission)
- From: Diego Novillo <dnovillo at google dot com>
- To: Revital1 Eres <ERES at il dot ibm dot com>
- Cc: Steven Bosscher <stevenb dot gcc at gmail dot com>, gcc-patches at gcc dot gnu dot org, Jan Hubicha <jh at suse dot cz>
- Date: Wed, 15 Jul 2009 16:10:23 -0400
- Subject: Re: [PATCH] New pass to partition single function into multiple (resubmission)
- References: <OF92C32BEE.9C242D7E-ONC22575AD.003A0103-C22575AD.003A28EE@il.ibm.com> <571f6b510905050509oe83494dyc5d06a45175666bf@mail.gmail.com> <OFF8D89692.8B5697D5-ONC22575C9.00270131-C22575C9.004BCF7C@il.ibm.com>
On Tue, Jun 2, 2009 at 09:48, Revital1 Eres<ERES@il.ibm.com> wrote:
> ===================================================================
> --- toplev.c (revision 148013)
> +++ toplev.c (working copy)
> @@ -1989,6 +1989,8 @@ process_options (void)
> warning (0, "-fdata-sections not supported for this target");
> flag_data_sections = 0;
> }
> + if (flag_partition_functions_into_sections)
> + sorry ("-ffunction-sections not supported for this target ");
> }
>
> if (flag_function_sections && profile_flag)
> @@ -1996,7 +1998,10 @@ process_options (void)
> warning (0, "-ffunction-sections disabled; it makes profiling impossible");
> flag_function_sections = 0;
> }
> -
> + if (flag_partition_functions_into_sections && profile_flag)
> + sorry ("-fpartition-functions-into-sections flag "
> + "does not work with profiling");
> +
> #ifndef HAVE_prefetch
> if (flag_prefetch_loop_arrays)
> {
> @@ -2019,6 +2024,12 @@ process_options (void)
> flag_prefetch_loop_arrays = 0;
> }
>
> + /* TODO: Support debugging for -fpartition-functions-into-sections. */
> + if (flag_partition_functions_into_sections
> + && ((write_symbols != NO_DEBUG) || flag_unwind_tables))
> + sorry ("-fpartition-functions-into-sections flag "
> + "does not work with debugging or unwind tables");
> +
> /* The presence of IEEE signaling NaNs, implies all math can trap. */
> if (flag_signaling_nans)
> flag_trapping_math = 1;
> Index: opts.c
> ===================================================================
> --- opts.c (revision 148013)
> +++ opts.c (working copy)
> @@ -1041,6 +1041,10 @@ decode_options (unsigned int argc, const
> flag_reorder_blocks = 1;
> }
>
> + if (flag_exceptions && flag_partition_functions_into_sections)
> + sorry ("-fpartition-functions-into-sections does not "
> + "work with exceptions");
> +
> /* If user requested unwind info, then turn off the partitioning
> optimization. */
>
> @@ -1052,6 +1056,11 @@ decode_options (unsigned int argc, const
> flag_reorder_blocks = 1;
> }
>
> + if (flag_unwind_tables && ! targetm.unwind_tables_default
> + && flag_partition_functions_into_sections)
> + sorry ("-fpartition-functions-into-sections "
> + "does not support unwind info");
> +
> /* If the target requested unwind info, then turn off the partitioning
> optimization with a different message. Likewise, if the target does not
> support named sections. */
> @@ -1066,6 +1075,20 @@ decode_options (unsigned int argc, const
> flag_reorder_blocks = 1;
> }
>
> + if (flag_partition_functions_into_sections
> + && (!targetm.have_named_sections
> + || (flag_unwind_tables && targetm.unwind_tables_default)))
> + sorry ("-fpartition-functions-into-sections does not work "
> + "on this architecture");
> + if (flag_partition_functions_into_sections
> + && (!targetm.have_named_sections))
> + sorry ("-fpartition-functions-into-sections does not work on this "
> + "architecture");
> + if (flag_partition_functions_into_sections
> + && (!HAS_LONG_COND_BRANCH || !HAS_LONG_UNCOND_BRANCH))
> + sorry ("-fpartition-functions-into-sections does not work on this "
> + "architecture");
> +
> /* Pipelining of outer loops is only possible when general pipelining
> capabilities are requested. */
> if (!flag_sel_sched_pipelining)
> @@ -1900,6 +1923,16 @@ common_handle_option (size_t scode, cons
> flag_sched_stalled_insns = -1;
> break;
>
> + case OPT_fpartition_functions_into_sections_:
> + flag_partition_functions_into_sections = value;
> + if (flag_partition_functions_into_sections != 0)
> + {
> + flag_function_sections = 1;
> + /* TODO: Use asm directives to handle multiple sections. */
> + flag_dwarf2_cfi_asm = 0;
> + }
> + break;
> +
> case OPT_fsched_stalled_insns_dep_:
> flag_sched_stalled_insns_dep = value;
> break;
All the limitations flagged with sorry() should be mentioned in
the manual entry for -fpartition-functions-into-sections.
> + if (dsn && !flag_profile_use)
> + {
> + name = (char *) alloca (TREE_STRING_LENGTH (dsn) + 1);
> + memcpy (name, TREE_STRING_POINTER (dsn), TREE_STRING_LENGTH (dsn) + 1);
> + stripped_name = targetm.strip_name_encoding (name);
I don't follow. If function partitioning does not work with
profiling, why check for fprofile-use here? Or does it not work
only when *generating* profiling info?
> +/* Tell assembler to switch to a new section with SECTION_ID. */
> +
> +section *
> +text_part_section (int section_id)
> +{
> + char section_id_str[2 + 2 * HOST_BITS_PER_INT / 4 + 1];
Add a comment here describing this size expression.
> +#define TARGET_BB_PARTITIONING \
> + { \
> + TARGET_ESTIMATE_SECTION_OVERHEAD, \
> + TARGET_ESTIMATE_INSTRUCTION_SIZE, \
> + TARGET_START_NEW_SECTION, \
> + TARGET_LEGAL_BREAKPOINT \
> + }
These should be documented in the internals manual. Together
with the target hooks for bb partitioning.
> +/* This structure holds callback functions which are used when
> + partitioning blocks into sections. */
> +struct partition_callbacks
> +{
> + /* A callback to initial the partition. */
> + void (*init) (void);
> +
> + /* A callback to indicate whether an edge cross sections. */
> + bool (*crossing_edge_p) (edge);
> +
> + /* A callback to mark section boundary. */
> + void (*insert_section_boundary_note) (void);
> +
> + /* A callback to partition blocks into sections. */
> + void (*partition) (void);
> +
> + /* A callback to finalize the partition. */
> + void (*finalize) (void);
> +};
Likewise.
Additionally, if these target hooks are not defined we should not
accept the -fpartition... flag.
> +/* This is a callback function, called from
> + mark_crossing_edges function. Return true if E is an edge which
> + crosses between hot and cold sections. */
> +static bool
Blank line after comment.
> +/* Mark edges that cross between sections with EDGE_CROSSING; store them
> + in a vector and return the vector. Use CALLBACKS in the decision. */
> +static VEC (edge, heap) *
Likewise.
> + FOR_EACH_BB (bb)
> + FOR_EACH_EDGE (e, ei, bb->succs)
> + {
> + if ((*callbacks->crossing_edge_p) (e))
> + {
> + e->flags |= EDGE_CROSSING;
> + VEC_safe_push (edge, heap, crossing_edges, e);
> + }
> + else
> + e->flags &= ~EDGE_CROSSING;
> + }
Would it make sense to just return early if
callbacks->crossing_edge_p is not defined?
> +/* The current size (in elements) of the following dynamic array. */
> +static unsigned HOST_WIDE_INT fbb_data_size = 0;
> +
> +/* The array which holds needed information for basic blocks. */
> +static funcpart_basic_block_data *fbb_data;
I'd rather make fbb_data a VEC() so you won't have to keep track
of its size separately.
> - /* Mark every edge that crosses between sections. */
> +/* Given basic block BB return it's size in bytes. */
s/it's/its/
> +static basic_block
> +split_bb (basic_block bb, unsigned HOST_WIDE_INT first_partition_size,
> + unsigned HOST_WIDE_INT * first_partition_actual_size)
no space after '*'.
> +/* Insert a NOTE_INSN_SWITCH_TEXT_SECTIONS note before basic-blocks that
> + are markred as BB_FIRST_AFTER_SECTION_SWITCH. */
s/markred/marked/
> + if (bb->prev_bb && (BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb))
> + && first_switch_p && flag_reorder_blocks_and_partition)
Align all parts of the predicate vertically.
> +/* Start a new section at the beginning of basic-block BB with
> + SECTION_ID. */
s/basic-block/basic block/. There are a few other instances of
this.
> + /* The loops are sorted in loops_info_list according to the loop size
> + (in bytes); where the loop with the samllest layout appears first.
> + */
End comment on previous line.
> + a) if adding the basic-block to the last section causes the last
> + section to exceed the max section size and it's size is less
> + than max section size.
s/it's/its/
> + if (((last_section_size + bb_size) >
> + estimate_max_section_size) || start_new_section_for_loop_p
> + || start_new_section_due_to_hotness_prop_p || start_new_sction_md_p)
Align predicate vertically.
> + 4) There is a machine-specific reasons. */
s/is/are/
> + /* Start a new section if one of following conditions
> + are fulfilled:
s/are/is/
> +
> + /* Split the basic-block. Try to insert it's first partition
s/it's/its/
> + }
> + /* Upadte section id. */
s/Upadte/Update/
> + /* We assume that if two loops are disjoint they can not interleave
> + and if they are not disjoint one is completely contained in the
> + other. This assumption help us to avoid the case of interleaved
s/helps us to/helps us/
> +/* Given RTX_FIRST which is the first instruction we start from; check
> + that there is no unexpected insns outside basic-blocks that could
> + effect the size of the sections, e.g. unexpected read-only code. */
s/effect/affect/
Leave blank line after the comment.
> + error ("Unexpected insns outside basic-blocks;"
> + " -fpartition-functions-into-sections may be effected.");
s/effected/affected/
> + error ("Unexpected insns outside basic-blocks;"
> + " -fpartition-functions-into-sections "
> + "may be effected.");
Likewise.
> + error ("Unexpected insns outside basic-blocks;"
> + " -fpartition-functions-into-sections may "
> + "be effected.");
Likewise.
> +/* This is a callback function to initialize the basic-blocks partitioning
> + into sections of maximum size, called from partition_basic_blocks
> + function. */
> +static void
Blank line after comment.
> + if (targetm.bb_partitioning.estimate_section_overhead != 0)
> + {
> + /* The machine depndent pass could add extra instructions
s/depndent/dependent/
> + /* Add the size of the new branch that will be created for each
> + sections. */
s/sections/section/
> +static bool
> +gate_handle_partition_blocks_size (void)
> +{
> + if ((flag_partition_functions_into_sections == 0)
> + || DECL_ONE_ONLY (current_function_decl)
> + || user_defined_section_attribute)
> + return 0;
> + return 1;
s/0/false/
s/1/true/
> + Preparing the code for the overlay technique consists of a
> + preprocessing static stage which is done by the compiler and linker.
> + The compiler first partition the code into sections which are later
s/partition/partitions/
> + constructed into an overlaid program by the linker. At execution
> + time the overlay manager automatically swap pieces of code in and
s/swap/swaps/
> + if (section_size > (unsigned) flag_partition_functions_into_sections)
> + {
> + warning (OPT_Wpartition_functions_into_sections,
> + "section %d (" HOST_WIDE_INT_PRINT_DEC
> + "B) exceeds section threshold %dB",
> + NOTE_TEXT_SECTION (insn) - 1, section_size,
> + flag_partition_functions_into_sections);
What happens when the sizes estimated by the compiler are lower
than the actualy size of the runtime image? I presume the linker
or the loader will do a final check?
I have only briefed gone through the SPU-specific portions of the
patch. If an SPU maintainer is OK with those bits, the rest
seems fine to me (with the changes I marked above).
Diego.