This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH] New pass to partition single function into multiple (resubmission)

From: Diego Novillo <dnovillo at google dot com>
To: Revital1 Eres <ERES at il dot ibm dot com>
Cc: Steven Bosscher <stevenb dot gcc at gmail dot com>, gcc-patches at gcc dot gnu dot org, Jan Hubicha <jh at suse dot cz>
Date: Wed, 15 Jul 2009 16:10:23 -0400
Subject: Re: [PATCH] New pass to partition single function into multiple (resubmission)
References: <OF92C32BEE.9C242D7E-ONC22575AD.003A0103-C22575AD.003A28EE@il.ibm.com> <571f6b510905050509oe83494dyc5d06a45175666bf@mail.gmail.com> <OFF8D89692.8B5697D5-ONC22575C9.00270131-C22575C9.004BCF7C@il.ibm.com>
On Tue, Jun 2, 2009 at 09:48, Revital1 Eres<ERES@il.ibm.com> wrote:

> ===================================================================
> --- toplev.c	(revision 148013)
> +++ toplev.c	(working copy)
> @@ -1989,6 +1989,8 @@ process_options (void)
>  	  warning (0, "-fdata-sections not supported for this target");
>  	  flag_data_sections = 0;
>  	}
> +      if (flag_partition_functions_into_sections)
> +        sorry ("-ffunction-sections not supported for this target ");
>      }
>
>    if (flag_function_sections && profile_flag)
> @@ -1996,7 +1998,10 @@ process_options (void)
>        warning (0, "-ffunction-sections disabled; it makes profiling impossible");
>        flag_function_sections = 0;
>      }
> -
> +  if (flag_partition_functions_into_sections && profile_flag)
> +    sorry ("-fpartition-functions-into-sections flag "
> +	   "does not work with profiling");
> +
>  #ifndef HAVE_prefetch
>    if (flag_prefetch_loop_arrays)
>      {
> @@ -2019,6 +2024,12 @@ process_options (void)
>        flag_prefetch_loop_arrays = 0;
>      }
>
> +  /* TODO: Support debugging for -fpartition-functions-into-sections.  */
> +  if (flag_partition_functions_into_sections
> +      && ((write_symbols != NO_DEBUG) || flag_unwind_tables))
> +    sorry ("-fpartition-functions-into-sections flag "
> +           "does not work with debugging or unwind tables");
> +
>    /* The presence of IEEE signaling NaNs, implies all math can trap.  */
>    if (flag_signaling_nans)
>      flag_trapping_math = 1;
> Index: opts.c
> ===================================================================
> --- opts.c	(revision 148013)
> +++ opts.c	(working copy)
> @@ -1041,6 +1041,10 @@ decode_options (unsigned int argc, const
>        flag_reorder_blocks = 1;
>      }
>
> +  if (flag_exceptions && flag_partition_functions_into_sections)
> +    sorry ("-fpartition-functions-into-sections does not "
> +	   "work with exceptions");
> +
>    /* If user requested unwind info, then turn off the partitioning
>       optimization.  */
>
> @@ -1052,6 +1056,11 @@ decode_options (unsigned int argc, const
>        flag_reorder_blocks = 1;
>      }
>
> +  if (flag_unwind_tables && ! targetm.unwind_tables_default
> +      && flag_partition_functions_into_sections)
> +    sorry ("-fpartition-functions-into-sections "
> +           "does not support unwind info");
> +
>    /* If the target requested unwind info, then turn off the partitioning
>       optimization with a different message.  Likewise, if the target does not
>       support named sections.  */
> @@ -1066,6 +1075,20 @@ decode_options (unsigned int argc, const
>        flag_reorder_blocks = 1;
>      }
>
> +  if (flag_partition_functions_into_sections
> +      && (!targetm.have_named_sections
> +          || (flag_unwind_tables && targetm.unwind_tables_default)))
> +    sorry ("-fpartition-functions-into-sections does not work "
> +           "on this architecture");
> +  if (flag_partition_functions_into_sections
> +      && (!targetm.have_named_sections))
> +    sorry ("-fpartition-functions-into-sections does not work on this "
> +           "architecture");
> +  if (flag_partition_functions_into_sections
> +      && (!HAS_LONG_COND_BRANCH || !HAS_LONG_UNCOND_BRANCH))
> +    sorry ("-fpartition-functions-into-sections does not work on this "
> +           "architecture");
> +
>    /* Pipelining of outer loops is only possible when general pipelining
>       capabilities are requested.  */
>    if (!flag_sel_sched_pipelining)
> @@ -1900,6 +1923,16 @@ common_handle_option (size_t scode, cons
>  	flag_sched_stalled_insns = -1;
>        break;
>
> +    case OPT_fpartition_functions_into_sections_:
> +      flag_partition_functions_into_sections = value;
> +      if (flag_partition_functions_into_sections != 0)
> +       {
> +          flag_function_sections = 1;
> +          /* TODO: Use asm directives to handle multiple sections.  */
> +          flag_dwarf2_cfi_asm = 0;
> +        }
> +      break;
> +
>      case OPT_fsched_stalled_insns_dep_:
>        flag_sched_stalled_insns_dep = value;
>        break;

All the limitations flagged with sorry() should be mentioned in
the manual entry for -fpartition-functions-into-sections.

> +  if (dsn && !flag_profile_use)
> +    {
> +      name = (char *) alloca (TREE_STRING_LENGTH (dsn) + 1);
> +      memcpy (name, TREE_STRING_POINTER (dsn), TREE_STRING_LENGTH (dsn) + 1);
> +      stripped_name = targetm.strip_name_encoding (name);

I don't follow.  If function partitioning does not work with
profiling, why check for fprofile-use here?  Or does it not work
only when *generating* profiling info?

> +/* Tell assembler to switch to a new section with SECTION_ID.  */
> +
> +section *
> +text_part_section (int section_id)
> +{
> +  char section_id_str[2 + 2 * HOST_BITS_PER_INT / 4 + 1];

Add a comment here describing this size expression.

> +#define TARGET_BB_PARTITIONING						\
> +  {									\
> +     TARGET_ESTIMATE_SECTION_OVERHEAD,					\
> +     TARGET_ESTIMATE_INSTRUCTION_SIZE,					\
> +     TARGET_START_NEW_SECTION,						\
> +     TARGET_LEGAL_BREAKPOINT   						\
> +  }

These should be documented in the internals manual.  Together
with the target hooks for bb partitioning.

> +/* This structure holds callback functions which are used when
> +   partitioning blocks into sections.  */
> +struct partition_callbacks
> +{
> +  /* A callback to initial the partition.  */
> +  void (*init) (void);
> +
> +  /* A callback to indicate whether an edge cross sections.  */
> +  bool (*crossing_edge_p) (edge);
> +
> +  /* A callback to mark section boundary.  */
> +  void (*insert_section_boundary_note) (void);
> +
> +  /* A callback to partition blocks into sections.  */
> +  void (*partition) (void);
> +
> +  /* A callback to finalize the partition.  */
> +  void (*finalize) (void);
> +};

Likewise.

Additionally, if these target hooks are not defined we should not
accept the -fpartition... flag.

> +/* This is a callback function, called from
> +   mark_crossing_edges function.  Return true if E is an edge which
> +   crosses between hot and cold sections.  */
> +static bool

Blank line after comment.

> +/* Mark edges that cross between sections with EDGE_CROSSING; store them
> +   in a vector and return the vector.  Use CALLBACKS in the decision.  */
> +static VEC (edge, heap) *

Likewise.

> +  FOR_EACH_BB (bb)
> +    FOR_EACH_EDGE (e, ei, bb->succs)
> +      {
> +        if ((*callbacks->crossing_edge_p) (e))
> +	  {
> +	    e->flags |= EDGE_CROSSING;
> +	    VEC_safe_push (edge, heap, crossing_edges, e);
> +	  }
> +        else
> +	  e->flags &= ~EDGE_CROSSING;
> +      }

Would it make sense to just return early if
callbacks->crossing_edge_p is not defined?

> +/* The current size (in elements) of the following dynamic array.  */
> +static unsigned HOST_WIDE_INT fbb_data_size = 0;
> +
> +/* The array which holds needed information for basic blocks.  */
> +static funcpart_basic_block_data *fbb_data;

I'd rather make fbb_data a VEC() so you won't have to keep track
of its size separately.

> -  /* Mark every edge that crosses between sections.  */
> +/* Given basic block BB return it's size in bytes.  */

s/it's/its/

> +static basic_block
> +split_bb (basic_block bb, unsigned HOST_WIDE_INT first_partition_size,
> +	  unsigned HOST_WIDE_INT * first_partition_actual_size)

no space after '*'.

> +/* Insert a NOTE_INSN_SWITCH_TEXT_SECTIONS note before basic-blocks that
> +   are markred as BB_FIRST_AFTER_SECTION_SWITCH.  */

s/markred/marked/

> +	  if (bb->prev_bb && (BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb))
> +	      && first_switch_p && flag_reorder_blocks_and_partition)

Align all parts of the predicate vertically.

> +/* Start a new section at the beginning of basic-block BB with
> +   SECTION_ID.  */

s/basic-block/basic block/.  There are a few other instances of
this.

> +  /* The loops are sorted in loops_info_list according to the loop size
> +     (in bytes); where the loop with the samllest layout appears first.
> +   */

End comment on previous line.

> +   	a) if adding the basic-block to the last section causes the last
> +           section to exceed the max section size and it's size is less
> +           than max section size.

s/it's/its/

> +      if (((last_section_size + bb_size) >
> +	   estimate_max_section_size) || start_new_section_for_loop_p
> +	  || start_new_section_due_to_hotness_prop_p || start_new_sction_md_p)

Align predicate vertically.

> +	     4) There is a machine-specific reasons.  */

s/is/are/

> +	  /* Start a new section if one of following conditions
> +	     are fulfilled:

s/are/is/

> +
> +	  /* Split the basic-block.  Try to insert it's first partition

s/it's/its/

> +		}
> +	      /* Upadte section id.  */

s/Upadte/Update/

> +  /* We assume that if two loops are disjoint they can not interleave
> +     and if they are not disjoint one is completely contained in the
> +     other.  This assumption help us to avoid the case of interleaved

s/helps us to/helps us/

> +/* Given RTX_FIRST which is the first instruction we start from; check
> +   that there is no unexpected insns outside basic-blocks that could
> +   effect the size of the sections, e.g. unexpected read-only code.  */

s/effect/affect/
Leave blank line after the comment.

> +	    error ("Unexpected insns outside basic-blocks;"
> +		   " -fpartition-functions-into-sections may be effected.");

s/effected/affected/

> +			  error ("Unexpected insns outside basic-blocks;"
> +				 " -fpartition-functions-into-sections "
> +                                 "may be effected.");

Likewise.

> +		      error ("Unexpected insns outside basic-blocks;"
> +			     " -fpartition-functions-into-sections may "
> +                             "be effected.");

Likewise.

> +/* This is a callback function to initialize the basic-blocks partitioning
> +   into sections of maximum size, called from partition_basic_blocks
> +   function.  */
> +static void

Blank line after comment.

> +  if (targetm.bb_partitioning.estimate_section_overhead != 0)
> +    {
> +      /* The machine depndent pass could add extra instructions

s/depndent/dependent/

> +  /* Add the size of the new branch that will be created for each
> +     sections.  */

s/sections/section/

> +static bool
> +gate_handle_partition_blocks_size (void)
> +{
> +  if ((flag_partition_functions_into_sections == 0)
> +      || DECL_ONE_ONLY (current_function_decl)
> +      || user_defined_section_attribute)
> +    return 0;
> +  return 1;

s/0/false/
s/1/true/

> +   Preparing the code for the overlay technique consists of a
> +   preprocessing static stage which is done by the compiler and linker.
> +   The compiler first partition the code into sections which are later

s/partition/partitions/

> +   constructed into an overlaid program by the linker.  At execution
> +   time the overlay manager automatically swap pieces of code in and

s/swap/swaps/

> +	  if (section_size > (unsigned) flag_partition_functions_into_sections)
> +	    {
> +	      warning (OPT_Wpartition_functions_into_sections,
> +		       "section %d (" HOST_WIDE_INT_PRINT_DEC
> +		       "B) exceeds section threshold %dB",
> +		       NOTE_TEXT_SECTION (insn) - 1, section_size,
> +		       flag_partition_functions_into_sections);

What happens when the sizes estimated by the compiler are lower
than the actualy size of the runtime image?  I presume the linker
or the loader will do a final check?

I have only briefed gone through the SPU-specific portions of the
patch.  If an SPU maintainer is OK with those bits, the rest
seems fine to me (with the changes I marked above).


Diego.
Follow-Ups:
- Re: [PATCH] New pass to partition single function into multiple (resubmission)
  - From: Richard Guenther
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]