This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/2] New pass to partition single function into multiple sections


On Mon, Aug 18, 2008 at 11:49:16AM +0300, Revital1 Eres wrote:

And now for the nit-picky comments.  Note, I may have used the wrong address
for myself in the To: field of the last reply.

As a global note, you might want to check each variable declared 'int' to see
whether it should be unsigned WIDE_INT or WIDE_INT.  For things like sizes I
think we should use WIDE_INT to prevent weird bugs from happening when GCC is
used to compile really large programs.

Another note is the FOR_EACH_BB (xx) loops, should indent the { ... } by 2
spaces as per the GNU coding standards.

When you do the diff, it is useful to reviewers to have the function name in
the diff by using:

	svn diff -x -up --diff-cmd diff files...

> Index: target.h
> ===================================================================
> --- target.h	(revision 138847)
> +++ target.h	(working copy)
> @@ -694,6 +694,37 @@
>       delayed-branch scheduling.  */
>    void (* machine_dependent_reorg) (void);
>  
> +    /* Estimate the number of extra instructions that will be added for each
> +       section.  Called when partitioning a function into sections.  */
> +  int (* est_section_overhead) (void);
> +
> +  /* Return the size of instruction in bytes.  Take into account the
> +     size of extra machine depndent instructions that can be added as
> +     a result of insn. (like branch-hints for branch instructions).
> +     Called when partitioning a function into sections.   */
> +  int (* est_instruction_size) (rtx);
> +
> +  /* Set specific flags; if needed; when partitioning a function into
> +     several sections.  */
> +  void (* part_func_to_sections_flags) (void);
> +
> +  /* True if an instruction (which is the first parameter) is the
> +     begining/end of a critical section.  Called when partitioning a
> +     function into sections.  Define this hook if a sequence of
> +     instructions must reside in the same section.
> +     The second parameter is the type of the critical section.  */
> +  bool (* begin_critical_section) (rtx, int *);
> +  bool (* end_critical_section) (rtx, int *);
> +
> +  /* Record the critical section which contains a jump-table.  The first
> +     two parameters are the jump-instruction and the label of the
> +     jump-table.  The last two parameters are the indices of the
> +     basic-blocks which indicate the boundaries of the critical section.
> +     Called when partitioning a function into sections.  The critical
> +     section should contain the jump-table and the code that reads the
> +     table to make sure they will appear in the same section.  */
> +  void (* record_jump_table) (rtx, rtx, int *, int *);
> +
>    /* Create the __builtin_va_list type.  */
>    tree (* build_builtin_va_list) (void);

Obviously if you follow the suggestions in my previous message, these will all
change.  Depending on the hooks that you create, you probably should consider
making a substructure to hold all of the partitioning hooks.  If you keep in
the est_* hooks, I would spell them estimate_ instead of est_, but that is minor.
  
> Index: final.c
> ===================================================================
> --- final.c	(revision 138847)
> +++ final.c	(working copy)
> @@ -1819,7 +1819,10 @@
>  #endif
>  	    (*debug_hooks->switch_text_section) ();
>  
> -	  switch_to_section (current_function_section ());
> +          if (flag_partition_functions_into_sections)
> +            switch_to_section (text_part_section (NOTE_TEXT_SECTION (insn)));
> +          else
> +            switch_to_section (current_function_section ());
>  	  break;

This would change if we move the section into the BB structure.

>  	case NOTE_INSN_BASIC_BLOCK:
> @@ -4209,7 +4212,7 @@
>    0,                                    /* properties_required */
>    0,                                    /* properties_provided */
>    0,                                    /* properties_destroyed */
> -  0,                                    /* todo_flags_start */
> +  TODO_check_sections,                  /* todo_flags_start */
>    TODO_ggc_collect                      /* todo_flags_finish */
>   }
>  };

> Index: opts.c
> ===================================================================
> --- opts.c	(revision 138847)
> +++ opts.c	(working copy)
> @@ -1128,6 +1128,22 @@
>        optimization_current_node = optimization_default_node;
>        first_time_p = false;
>      }
> +  if (flag_partition_functions_into_sections
> +      && (!targetm.have_named_sections))
> +    {
> +      warning 
> +        (0, "-fpartition-functions-into-sections does not work on this "
> +         "architecture");
> +      flag_partition_functions_into_sections = 0;
> +    }
> +  if (flag_partition_functions_into_sections 
> +      && (!HAS_LONG_COND_BRANCH || !HAS_LONG_UNCOND_BRANCH))
> +    {
> +      warning
> +        (0, "-fpartition-functions-into-sections does not work on this "
> +         "architecture");
> +      flag_partition_functions_into_sections = 0;
> +    }
>  }
>  
>  #define LEFT_COLUMN	27
> @@ -1902,7 +1918,18 @@
>      case OPT_fsched_stalled_insns_dep_:
>        flag_sched_stalled_insns_dep = value;
>        break;
> -
> +    
> +    case OPT_fpartition_functions_into_sections_:
> +      flag_partition_functions_into_sections = value;
> +      if (flag_partition_functions_into_sections != 0)
> +        {
> +          if (targetm.part_func_to_sections_flags != 0)
> +            targetm.part_func_to_sections_flags ();
> +          flag_function_sections = 1;
> +          /* TODO: Use asm directives to handle multiple sections.  */
> +          flag_dwarf2_cfi_asm = 0;
> +        }
> +      break;
>      case OPT_fstack_limit:
>        /* The real switch is -fno-stack-limit.  */
>        if (value)

Obviously we will want to flesh out the TODO stuff later.

> Index: function.h
> ===================================================================
> --- function.h	(revision 138847)
> +++ function.h	(working copy)
> @@ -236,6 +236,10 @@
>    rtx internal_arg_pointer;
>  };
>  
> +typedef const char *char_p;    /* For DEF_VEC_P.  */
> +DEF_VEC_P(char_p);
> +DEF_VEC_ALLOC_P(char_p, gc);
> +
>  /* Data for function partitioning.  */
>  struct function_subsections GTY(())
>  {
> @@ -252,6 +256,32 @@
>       targetm.asm_out.named_section.  */
>  
>    const char *unlikely_text_section_name;
> +
> +  /* Arrays to hold assembly labels for the text sections, to
> +     be used by debugger functions for determining the size of text
> +     sections.  */
> +
> +  VEC(char_p,gc) *section_start_labels;
> +  VEC(char_p,gc) *section_end_labels;
> +
> + /* String to be used for names of the arbitrary cold text sections
> +    parts, via targetm.asm_out.named_section.  */
> +
> +  const char *unlikely_part_text_section_name;
> +
> +  /* String to be used for names of the arbitrary hot text section
> +     parts, via targetm.asm_out.named_section.  */
> +
> +  const char *part_text_section_name;
> +
> +  /* A variable to hold the number of sections in the function.  */
> +
> +  unsigned int number_of_sections;
> +
> +  /* The first section with different hotness attribute.  */
> +
> +  unsigned int first_text_section_part_changed;
> +
>  };

I think this should probably go into the named_section structure in output.h.

>  /* Datastructures maintained for currently processed function in RTL form.  */
> Index: print-rtl.c
> ===================================================================
> --- print-rtl.c	(revision 138848)
> +++ print-rtl.c	(working copy)
> @@ -312,7 +312,9 @@
>  		{
>  #ifndef GENERATOR_FILE
>  		  basic_block bb = NOTE_BASIC_BLOCK (in_rtx);
> -		  if (bb != 0)
> +                  /* When partitioning into multiple sections
> +                     NOTE_BASIC_BLOCK holds the section id.  */
> +		  if (bb != 0 && !flag_partition_functions_into_sections)
>  		    fprintf (outfile, " [bb %d]", bb->index);
>  #endif
>  		  break;

If you accept my changes for having the section in the BB structure, this would
change to print that section name.

> Index: varasm.c
> ===================================================================
> --- varasm.c	(revision 138847)
> +++ varasm.c	(working copy)
> @@ -71,6 +71,10 @@
>  struct constant_descriptor_rtx;
>  struct rtx_constant_pool;
>  
> +/* If the function is spread on multiple sections; this variable saves
> +   the name of the last seen section.  */
> +const char *last_part_text_section_name;
> +
>  #define n_deferred_constants (crtl->varasm.deferred_constants)
>  
>  /* Number for making the label on the next
> @@ -168,6 +172,8 @@
>     at the cold section.  */
>  bool in_cold_section_p;
>  
> +bool in_part_section_p;
> +
>  /* A linked list of all the unnamed sections.  */
>  static GTY(()) section *unnamed_sections;
>  
> @@ -684,6 +690,110 @@
>      return get_named_section (NULL, UNLIKELY_EXECUTED_TEXT_SECTION_NAME, 0);
>  }
>  
> +/* Initialize the string which will be part of the section name.
> +   It should contain the function name.  UNLIKELY_P is true if the
> +   section is cold, otherwise it is false.  */
> +
> +static void
> +initialize_part_section_name (bool unlikely_p)
> +{
> +  const char *stripped_name;
> +  char *name;
> +  int section_name_len = 0;
> +  char *section_name, *buffer;
> +  tree dsn;
> +
> +  gcc_assert (cfun && current_function_decl);
> +
> +  if (crtl->subsections.unlikely_text_section_name
> +      && crtl->subsections.unlikely_part_text_section_name)
> +    return;
> +
> +  dsn = DECL_SECTION_NAME (current_function_decl);
> +
> +  /* DECL_SECTION_NAME usually holds the function name.  However, if
> +     -fprofile-use flag is used then DECL_SECTION_NAME would hold
> +     .text.hot or .text.unlikely.  The function name is used to
> +     differentiate between sections in different functions which has
> +     the same section id, so we need to get it.  */
> +  if (dsn && !flag_profile_use)
> +    {
> +      name = (char *) alloca (TREE_STRING_LENGTH (dsn) + 1);
> +      memcpy (name, TREE_STRING_POINTER (dsn), TREE_STRING_LENGTH (dsn) + 1);
> +      stripped_name = targetm.strip_name_encoding (name);
> +    }
> +  else
> +    stripped_name = current_function_name ();
> +
> +  if (unlikely_p)
> +    {
> +      buffer = ACONCAT ((stripped_name, "_unlikely", NULL));
> +      section_name_len = strlen (buffer) + strlen (".part") + 1;
> +      section_name = (char *) alloca (section_name_len);
> +      snprintf (section_name, section_name_len, "%s.part", buffer);
> +      crtl->subsections.unlikely_part_text_section_name =
> +	ggc_strdup (section_name);
> +    }
> +  else
> +    {
> +      section_name_len = strlen (stripped_name) + strlen (".part") + 1;
> +      section_name = (char *) alloca (section_name_len);
> +      snprintf (section_name, section_name_len, "%s.part", stripped_name);
> +      crtl->subsections.part_text_section_name = ggc_strdup (section_name);
> +    }
> +}

I think this function would be subsumed by moving the different sections to
sections in output.h rather than having subsections.

> +/* Tell assembler to switch to a new section with SECTION_ID.  */
> +
> +section *
> +text_part_section (int section_id)
> +{
> +  char section_id_str[4069];

This is a rather unusual size.  I suspect initially you meant to use 4096
instead of 4069, but since the only used to write out integers, you can use a
much smaller size:

    char section_id_str[2 + 2*HOST_BITS_PER_INT/4 + 1];

> +  int section_name_len = 0;
> +  char *section_name;
> +  const char *prefix_str;
> +  bool unlikely_p = false;
> +
> +  in_part_section_p = true;
> +  gcc_assert (flag_function_sections && cfun);
> +
> +  if ((first_function_block_is_cold
> +       && (unsigned) section_id <
> +       crtl->subsections.first_text_section_part_changed)
> +      || (!first_function_block_is_cold
> +	  && (unsigned) section_id >=
> +	  crtl->subsections.first_text_section_part_changed))
> +    {
> +      unlikely_p = true;
> +    }
> +
> +  if (!unlikely_p && !crtl->subsections.part_text_section_name)
> +    initialize_part_section_name (false);
> +  if (unlikely_p && !crtl->subsections.unlikely_part_text_section_name)
> +    initialize_part_section_name (true);
> +
> +  snprintf (section_id_str, 4069, "%d", section_id);

Use sizeof (section_id_str) instead of 4069.

> +  if (unlikely_p)
> +    {
> +      prefix_str = crtl->subsections.unlikely_part_text_section_name;
> +      section_name_len = strlen (section_id_str) + strlen (prefix_str) + 1;
> +      section_name = (char *) alloca (section_name_len);
> +      snprintf (section_name, section_name_len, "%s%d",
> +		prefix_str, section_id);
> +    }
> +  else
> +    {
> +      prefix_str = crtl->subsections.part_text_section_name;
> +      section_name_len = strlen (section_id_str) + strlen (prefix_str) + 1;
> +      section_name = (char *) alloca (section_name_len);
> +      snprintf (section_name, section_name_len, "%s%d",
> +		prefix_str, section_id);
> +    }

You might move the last two lines of the then/else afterwards, since they are
the same.

> +  last_part_text_section_name = ggc_strdup (section_name);
> +  return get_named_section (NULL, section_name, 0);
> +}
> +
> +
>  /* When called within a function context, return true if the function
>     has been assigned a cold text section and if SECT is that section.
>     When called outside a function context, return true if SECT is the
> @@ -834,6 +944,9 @@
>    else
>      return targetm.asm_out.select_section (decl, reloc, DECL_ALIGN (decl));
>  #else
> +   if (flag_partition_functions_into_sections
> +       && crtl->subsections.number_of_sections)
> +    return text_part_section (0);
>    return reloc ? unlikely_text_section () : hot_function_section (decl);
>  #endif
>  }
> @@ -852,6 +965,8 @@
>  					   in_cold_section_p,
>  					   DECL_ALIGN (current_function_decl));
>  #else
> +  if (in_part_section_p)
> +    return get_named_section (NULL, last_part_text_section_name, 0);
>    return (in_cold_section_p
>  	  ? unlikely_text_section ()
>  	  : hot_function_section (current_function_decl));
> @@ -1619,6 +1734,30 @@
>      }
>  }
>  
> +/* If the text section is partitioned; make sure all the sections are
> +   properly aligned.  */
> +
> +static void
> +output_sections (void)
> +{
> +  unsigned int i;
> +  const char *tmp;
> +
> +  gcc_assert (cfun && flag_partition_functions_into_sections);
> +
> +  /* Output for each one of the sections the label which marks the
> +     beginning of its address range.  */
> +  for (i = 1;
> +       VEC_iterate (char_p, crtl->subsections.section_start_labels, i,
> +		    tmp); i++)
> +    {
> +      switch_to_section (text_part_section (i));
> +      assemble_align (FUNCTION_BOUNDARY);
> +      if (crtl->subsections.section_start_labels != NULL)
> +	ASM_OUTPUT_LABEL (asm_out_file, tmp);
> +    }
> +}
> +

Again, this might change depending on whether you move the stuff to full
sections, instead of subsections.

>  /* Output assembler code for the constant pool of a function and associated
>     with defining the name of the function.  DECL describes the function.
>     NAME is the function's name.  For the constant pool, we use the current
> @@ -1634,8 +1773,31 @@
>    crtl->subsections.unlikely_text_section_name = NULL;
>  
>    first_function_block_is_cold = false;
> -  if (flag_reorder_blocks_and_partition)
> +  if (flag_partition_functions_into_sections
> +      && cfun
> +      && (crtl->subsections.number_of_sections > 0))
>      {
> +      unsigned i;
> +      int length = crtl->subsections.number_of_sections;
> +
> +      crtl->subsections.section_start_labels = 
> +          VEC_alloc (char_p, gc, length);
> +      crtl->subsections.section_end_labels = 
> +      VEC_alloc (char_p, gc, length);
> +
> +      for (i = 0; i < crtl->subsections.number_of_sections; i++)
> +        {
> +          ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LSECTIONB", const_labelno);
> +          VEC_safe_push (char_p, gc, crtl->subsections.section_start_labels,
> +                         ggc_strdup (tmp_label));
> +          ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LSECTIONE", const_labelno);
> +          VEC_safe_push (char_p, gc, crtl->subsections.section_end_labels,
> +                         ggc_strdup (tmp_label));
> +          const_labelno++;
> +        }
> +    }
> +  else if (flag_reorder_blocks_and_partition)
> +    {
>        ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LHOTB", const_labelno);
>        crtl->subsections.hot_section_label = ggc_strdup (tmp_label);
>        ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LCOLDB", const_labelno);
> @@ -1668,7 +1830,8 @@
>       has both hot and cold sections, because we don't want to re-set
>       the alignment when the section switch happens mid-function.  */
>  
> -  if (flag_reorder_blocks_and_partition)
> +  if (flag_reorder_blocks_and_partition
> +      && (flag_partition_functions_into_sections == 0))
>      {
>        switch_to_section (unlikely_text_section ());
>        assemble_align (DECL_ALIGN (decl));
> @@ -1687,7 +1850,8 @@
>  	  first_function_block_is_cold = true;
>  	}
>      }
> -  else if (DECL_SECTION_NAME (decl))
> +   else if (DECL_SECTION_NAME (decl)
> +           && (flag_partition_functions_into_sections == 0))
>      {
>        /* Calls to function_section rely on first_function_block_is_cold
>  	 being accurate.  The first block may be cold even if we aren't

This all may change if you use sections instead of subsections.

> @@ -1701,6 +1865,19 @@
>  		     crtl->subsections.unlikely_text_section_name) == 0)
>  	first_function_block_is_cold = true;
>      }
> +   if (flag_partition_functions_into_sections)
> +    {
> +      if (!crtl->is_thunk
> +          && BB_PARTITION (ENTRY_BLOCK_PTR->next_bb) == BB_COLD_PARTITION)
> +        {
> +          first_function_block_is_cold = true;
> +        }
> +      if (DECL_SECTION_NAME (decl)
> +          && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
> +                     UNLIKELY_EXECUTED_TEXT_SECTION_NAME) == 0)
> +        first_function_block_is_cold = true;
> +      output_sections ();
> +    }
>  
>    in_cold_section_p = first_function_block_is_cold;
>  
> @@ -1708,9 +1885,19 @@
>  
>    switch_to_section (function_section (decl));
>    if (flag_reorder_blocks_and_partition
> +      && (flag_partition_functions_into_sections == 0)
>        && !hot_label_written)
>      ASM_OUTPUT_LABEL (asm_out_file, crtl->subsections.hot_section_label);
> +  else if (flag_partition_functions_into_sections
> +           && crtl->subsections.section_start_labels != 0
> +           && crtl->subsections.number_of_sections > 0)
> +   {
> +     const char *tmp;
>  
> +     tmp = VEC_index (char_p, crtl->subsections.section_start_labels, 0);
> +     ASM_OUTPUT_LABEL (asm_out_file, tmp);
> +   }
> +
>    /* Tell assembler to move to target machine's alignment for functions.  */
>    align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
>    if (align > 0)

I have a feeling that this will be simplified with the change to sections.

> @@ -1795,6 +1982,25 @@
>        ASM_OUTPUT_LABEL (asm_out_file, crtl->subsections.hot_section_end_label);
>        switch_to_section (save_text_section);
>      }
> +    else if (flag_partition_functions_into_sections
> +            && crtl->subsections.section_end_labels != NULL)
> +    {
> +       unsigned int i;
> +       const char *tmp;
> +
> +        /* Output for each one of the sections the label which marks the
> +           end of its address range.  */
> +        for (i = 0;
> +               VEC_iterate (char_p, crtl->subsections.section_end_labels, i,
> +                            tmp); i++)
> +         {
> +         switch_to_section (text_part_section (i));
> +         tmp =  VEC_index (char_p, crtl->subsections.section_end_labels, i);
> +         ASM_OUTPUT_LABEL (asm_out_file, tmp);
> +       }
> +
> +    }
> +
>  }

Ditto here.

>  
>  /* Assemble code to leave SIZE bytes of zeros.  */
> @@ -5592,6 +5798,15 @@
>      flags = SECTION_CODE;
>    else if (decl && decl_readonly_section (decl, reloc))
>      flags = 0;
> +  else if (flag_partition_functions_into_sections
> +           && crtl->subsections.unlikely_part_text_section_name
> +           && strstr (name,
> +                      crtl->subsections.unlikely_part_text_section_name) != 0)
> +    flags = SECTION_CODE;
> +  else if (flag_partition_functions_into_sections
> +           && crtl->subsections.part_text_section_name
> +           && strstr (name, crtl->subsections.part_text_section_name) != 0)
> +    flags = SECTION_CODE;
>    else if (current_function_decl
>  	   && cfun
>  	   && crtl->subsections.unlikely_text_section_name

strstr is a somewhat expensive function, I'm wondering if there is a way to
eliminate its use.  It isn't major, I'm just thinking out loud here.

> Index: target-def.h
> ===================================================================
> --- target-def.h	(revision 138847)
> +++ target-def.h	(working copy)
> @@ -413,6 +413,19 @@
>  #define TARGET_SECTION_TYPE_FLAGS default_section_type_flags
>  #endif
>  
> +#define TARGET_EST_SECTION_OVERHEAD 0
> +
> +#define TARGET_EST_INSTRUCTION_SIZE 0
> +
> +#define TARGET_PART_FUNC_TO_SECTIONS_FLAGS 0
> +
> +#define TARGET_BEGIN_CRITICAL_SECTION 0
> +
> +#define TARGET_END_CRITICAL_SECTION 0
> +
> +#define TARGET_RECORD_JUMP_TABLE 0
> +
> +
>  #ifndef TARGET_STRIP_NAME_ENCODING
>  #define TARGET_STRIP_NAME_ENCODING default_strip_name_encoding
>  #endif
> @@ -874,6 +887,12 @@
>    TARGET_FIXED_CONDITION_CODE_REGS,		\
>    TARGET_CC_MODES_COMPATIBLE,			\
>    TARGET_MACHINE_DEPENDENT_REORG,		\
> +  TARGET_EST_SECTION_OVERHEAD,                  \
> +  TARGET_EST_INSTRUCTION_SIZE,                  \
> +  TARGET_PART_FUNC_TO_SECTIONS_FLAGS,           \
> +  TARGET_BEGIN_CRITICAL_SECTION,                \
> +  TARGET_END_CRITICAL_SECTION,                  \
> +  TARGET_RECORD_JUMP_TABLE,                     \
>    TARGET_BUILD_BUILTIN_VA_LIST,			\
>    TARGET_FN_ABI_VA_LIST,			\
>    TARGET_CANONICAL_VA_LIST_TYPE,			\

Obviously these would change depending on the global changes.  As I said
before, this should probably be a substructure to move all of the like
information together.

> Index: rtl.h
> ===================================================================
> --- rtl.h	(revision 138847)
> +++ rtl.h	(working copy)
> @@ -861,6 +861,7 @@
>  #define NOTE_DELETED_LABEL_NAME(INSN) XCSTR (INSN, 4, NOTE)
>  #define SET_INSN_DELETED(INSN) set_insn_deleted (INSN);
>  #define NOTE_BLOCK(INSN)	XCTREE (INSN, 4, NOTE)
> +#define NOTE_TEXT_SECTION(INSN) XCINT (INSN, 4, NOTE)
>  #define NOTE_EH_HANDLER(INSN)	XCINT (INSN, 4, NOTE)
>  #define NOTE_BASIC_BLOCK(INSN)	XCBBDEF (INSN, 4, NOTE)
>  #define NOTE_VAR_LOCATION(INSN)	XCEXP (INSN, 4, NOTE)

If you move the section stuff to just the basic block structure, then you won't
need to keep track of the section on a per insn basis.

> Index: output.h
> ===================================================================
> --- output.h	(revision 138847)
> +++ output.h	(working copy)
> @@ -567,6 +567,7 @@
>  					    unsigned int);
>  extern section *function_section (tree);
>  extern section *unlikely_text_section (void);
> +extern section *text_part_section (int);
>  extern section *current_function_section (void);
>  
>  /* Return the numbered .ctors.N (if CONSTRUCTOR_P) or .dtors.N (if
> 

If you do the global changes, you likely will have other changes to output.h.

> Index: bb-reorder.c
> ===================================================================
> --- bb-reorder.c	(revision 138847)
> +++ bb-reorder.c	(working copy)
> @@ -85,6 +85,10 @@
>  #include "toplev.h"
>  #include "tree-pass.h"
>  #include "df.h"
> +#include "cfgloop.h"
> +#include "langhooks.h"
> +#include "hashtab.h"
> +#include "vec.h"
>  
>  #ifndef HAVE_conditional_execution
>  #define HAVE_conditional_execution 0
> @@ -1265,7 +1269,624 @@
>     Convert any fall-through crossing edges (for blocks that do not contain
>     a jump) to unconditional jumps.  */
>  
> +/* Structure to hold needed information for each loop.  */
> +struct loop_info_def
> +{
> +  /* The index of the loop.  */
> +  unsigned index;

You probably should use unsigned HOST_WIDE_INT here instead of int.  I'm not
sure here whether some code out there has more than 65k loops, but it is
probably better to be safe.

> +  /* The size (in bytes) of the loop layout.  */
> +  unsigned size;

You probably should use unsigned HOST_WIDE_INT here instead of int.

> +  /* The following fields are the first and last basic-blocks in the
> +     current loop layout.  They are used to calculate the loop's size.  */
> +   basic_block first;
> +   basic_block last;
> +};
> +
> +typedef struct loop_info_def *loop_info;
> +DEF_VEC_P(loop_info);
> +DEF_VEC_ALLOC_P(loop_info,heap);
> +
> +/* Structure to hold needed information for each basic block.  */
> +typedef struct funcpart_basic_block_data_def
> +{
> +  /* Section id which is attached to each section.  */
> +  int section_id;
> +
> +  /* The list of loops which begin at this basic-block.  */
> +  VEC(loop_info,heap) *loops_info_list;
> +
> +  /* True if this basic-block appears in critical section.  */
> +  bool in_critical_section_p;
> +
> +  /* True if this basic-block is the first in critical section.  */
> +  bool first_bb_in_critical_section;
> +
> +  /* The size in bytes of the critical section which begins in this
> +     basic-block.  */
> +  int critical_section_size;

You probably should use unsigned HOST_WIDE_INT here instead of int.

> +  /* The size in bytes of the basic-block.  */
> +  int bb_size;

You probably should use unsigned HOST_WIDE_INT here instead of int.

> +}funcpart_basic_block_data;
> +
> +/* The current size (in elements) of the following dynamic array.  */
> +static int fbb_data_size = 0;

You probably should use unsigned HOST_WIDE_INT here instead of int.

> +
> +/* The array which holds needed information for basic blocks.  */
> +static funcpart_basic_block_data *fbb_data;
> +
> +/* Information regarding insn.  */
> +struct insn_aux
> +{
> +  /* The insn.  */ 
> +  rtx insn;                        
> +
> +  /* The estimated size in bytes of insn.  */
> +  int size;            
> +};
> +
> +/* Hash-table of insns in the function which holds their size (in bytes).  */
> +htab_t insns_aux;

Given that you have INSN_UID you don't need a hash table to map insn to size,
but instead allocate a vector and fill in the size.

> +/* A variable to hold the overhead introduced by splitting a function
> +   into sections.  This overhead contains extra branch instructions
> +   between section which might effect code size.  */
> +static int estimate_section_overhead = 0;

You probably should use unsigned HOST_WIDE_INT here instead of int.

> +/* Estimate the maximum size (in bytes) of one section.  */
> +static int estimate_max_section_size = 0;

You probably should use unsigned HOST_WIDE_INT here instead of int.

> +/* Given basic-block with index BB_INDEX validate that fbb_data array
> +   has a corresponding element for it.  */
> +
>  static void
> +validate_fbb_data_element (int bb_index)
> +{
> +  int prev_size = fbb_data_size;
> +  int i;
> +
> +  if (bb_index < fbb_data_size)
> +    return;
> +
> +  /* Resize the array, if necessary.  */
> +  fbb_data_size *= 2;
> +  fbb_data = (funcpart_basic_block_data *)
> +    xrealloc (fbb_data, (fbb_data_size * sizeof (funcpart_basic_block_data)));
> +  for (i = prev_size; i < fbb_data_size; i++)
> +    {
> +      fbb_data[i].section_id = -1;
> +      fbb_data[i].loops_info_list = NULL;
> +      fbb_data[i].in_critical_section_p = false;
> +      fbb_data[i].critical_section_size = 0;
> +      fbb_data[i].first_bb_in_critical_section = false;
> +      fbb_data[i].bb_size = 0;
> +    }
> +}
> +
> +/* Given basic block BB return it's size in bytes.  */
> +
> +static int
> +est_size_of_insns_in_bb (basic_block bb)
> +{
> +  int size = 0;
> +  rtx insn;
> +  struct insn_aux *iaux;
> +
> +  /* Loop through all of the basic-block's insns.  */
> +  FOR_BB_INSNS (bb, insn)
> +  {

The '{' and the code inside should be indented 2 spaces through to the closing
'}'.

> +    if (INSN_P (insn))
> +      {
> +        struct insn_aux iaux_tmp;
> +
> +        /* Retrieve the size of the instruction. */
> +        iaux_tmp.insn = insn;
> +        iaux = (struct insn_aux *)htab_find (insns_aux, &iaux_tmp);
> +        if (iaux)
> +          size += iaux->size;
> +        else
> +          size += get_attr_min_length (insn);
> +      }
> +  }
> +#if ENABLE_CHECKING
> +  /* If we already calculated the size of this basic-block we can use it
> +     for checking.  */
> +  if (fbb_data[bb->index].bb_size != 0)
> +    gcc_assert (fbb_data[bb->index].bb_size == size);
> +#endif
> +
> +  return size;
> +}

As I mentioned before, if this is kept in, you probably should name it
estimate_ instead of est_, but that is minor.  The return size should be
unsigned HOST_WIDE_INT and not int.

> +/* Split basic-block BB such that the first partition will not exceed the
> +   maximum size of FIRST_PARTITION_SIZE bytes.  Update the actual size
> +   of the first partition in FIRST_PARTITION_ACTUAL_SIZE.  Return the
> +   new basic-block if it can be created.  Else return NULL.  */
> +
> +static basic_block
> +split_bb (basic_block bb, int first_partition_size,
> +	  int *first_partition_actual_size)
> +{
> +  rtx insn;
> +  int size = 0;
> +  int prev_size = 0;
> +  edge e;
> +  rtx prev_insn = NULL;
> +  struct insn_aux *iaux;
> +
> +  /* Loop through all of the basic-block's insns.  */
> +  FOR_BB_INSNS (bb, insn)
> +  {
> +    if (INSN_P (insn))
> +      {
> +	struct insn_aux iaux_tmp;
> +
> +	prev_size = size;
> +
> +	/* Retrieve the size of the instruction. */
> +	iaux_tmp.insn = insn;
> +	iaux = (struct insn_aux *) htab_find (insns_aux, &iaux_tmp);

Here I don't think you need a hash table, just call the function to get the
size.  The hash table probably takes up more resources to hash the information
than you would be calculating it.

> +	if (iaux)
> +	  size += iaux->size;
> +	else
> +	  size += get_attr_min_length (insn);
> +
> +	if (size > first_partition_size)
> +	  {
> +	    if (prev_insn == NULL)
> +	      return NULL;
> +	    break;
> +	  }
> +	prev_insn = insn;
> +      }
> +  }
> +
> +#ifdef ENABLE_CHECKING
> +  gcc_assert (size > first_partition_size);
> +#endif
> +
> +  e = split_block (bb, prev_insn);
> +  e->dest->aux = bb->aux;
> +  bb->aux = e->dest;
> +
> +  if (dump_file)
> +    fprintf (dump_file,
> +	     "\tSplit bb %d; create new bb: %d\n", bb->index, e->dest->index);
> +  *first_partition_actual_size = prev_size;
> +  return e->dest;
> +}
> +
> +/* Insert a NOTE_INSN_SWITCH_TEXT_SECTIONS note before basic-blocks that
> +   are markred as BB_FIRST_AFTER_SECTION_SWITCH.  */
> +
> +static void
> +insert_section_boundary_note_in_function (void)
> +{
> +  basic_block bb;
> +  rtx new_note;
> +  unsigned number_of_sections = 1;
> +  bool first_switch_p = true;
> +  bool only_one_section_p = true;
> +
> +  gcc_assert (cfun && flag_partition_functions_into_sections);
> +
> +  FOR_EACH_BB (bb)
> +  {
> +    if (bb->flags & BB_FIRST_AFTER_SECTION_SWITCH)
> +      {
> +	new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
> +				     BB_HEAD (bb));
> +	/* ??? This kind of note always lives between basic blocks,
> +	   but add_insn_before will set BLOCK_FOR_INSN anyway.  */
> +	BLOCK_FOR_INSN (new_note) = NULL;
> +	NOTE_TEXT_SECTION (new_note) = number_of_sections++;
> +	if (bb->prev_bb && (BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb))
> +	    && first_switch_p && flag_reorder_blocks_and_partition)
> +	  {
> +	    crtl->subsections.first_text_section_part_changed =
> +	      number_of_sections - 1;
> +	    first_switch_p = false;
> +	    only_one_section_p = false;
> +	  }
> +      }
> +  }
> +
> +  if (only_one_section_p)
> +    crtl->subsections.first_text_section_part_changed = number_of_sections;
> +
> +  crtl->subsections.number_of_sections = number_of_sections;
> +}
> +
> +/* Start a new section at the beginning of basic-block BB with SECTION_ID.
> +   */

SECTION_ID should probably be on the next line with the '*/'.

> +
> +static void
> +start_new_section (int *section_id, basic_block bb)
> +{
> +  bb->flags |= BB_FIRST_AFTER_SECTION_SWITCH;
> +  *section_id = *section_id + 1;
> +  if (dump_file)
> +    fprintf (dump_file, ";; Start new section %d\n", *section_id);
> +}
> +
> +/* Return true if the given basic-block BB starts a loop that can not
> +   fully be inserted into the last section and the loop size is less
> +   than the maximum section size threshold.  Use LAST_SECTION_SIZE in
> +   the calculation.  */
> +
> +static bool
> +start_new_section_for_loop (basic_block bb, int last_section_size)
> +{
> +  struct loop_info_def *cur_loop;
> +  unsigned i;
> +
> +  if (fbb_data[bb->index].loops_info_list == NULL)
> +    return false;
> +
> +  if (last_section_size == 0)
> +    return false;
> +
> +  /* The loops are sorted in loops_info_list according to the loop size
> +     (in bytes); where the loop with the samllest layout appears first.
> +     */
> +  for (i = 0;
> +       VEC_iterate (loop_info, fbb_data[bb->index].loops_info_list, i,
> +		    cur_loop); i++)
> +    {
> +      if ((last_section_size + cur_loop->size >
> +	   (unsigned) estimate_max_section_size)
> +	  && cur_loop->size <= (unsigned) estimate_max_section_size)
> +	{
> +	  if (dump_file)
> +	    fprintf (dump_file, "\tbb %d starts a loop (size %dB) "
> +		     "recommend to start a new section to not break it\n", 
> +                     bb->index, cur_loop->size);
> +
> +	  return true;
> +	}
> +    }
> +  return false;
> +}
> +
> +/* Return TRUE if basic-block BB contains critical section.  */
> +
> +static bool
> +in_critical_section (basic_block bb)
> +{
> +  if (fbb_data[bb->index].in_critical_section_p)
> +    return true;
> +  return false;
> +}
> +
> +/* Return true if BB is the first basic-block in critical section;
> +   otherwise return false.  */
> +
> +static bool
> +first_bb_in_critical_section (basic_block bb)
> +{
> +  if (fbb_data[bb->index].in_critical_section_p
> +      && fbb_data[bb->index].first_bb_in_critical_section)
> +    return true;
> +  return false;
> +}
> +
> +/* Return true if the given basic-block BB contains a critical section
> +   that can not fully be inserted into the last section.  Use
> +   LAST_SECTION_SIZE in the calculation.  */
> +
> +static bool
> +start_new_section_for_critical_section (basic_block bb, int last_section_size)
> +{
> +  int size;
> +
> +  /* This bb does not start a critical section.  */
> +  if (!fbb_data[bb->index].in_critical_section_p)
> +    return false;
> +
> +  size = fbb_data[bb->index].critical_section_size;
> +
> +  if (last_section_size == 0)
> +    return false;
> +
> +  if ((last_section_size + size >
> +       estimate_max_section_size) && size <= estimate_max_section_size)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file,
> +		 "\n\tbb %d starts a critical section (size %dB) "
> +		 "recommend to start a new section in order to not break it\n",
> +		 bb->index, size);
> +
> +      return true;
> +    }
> +  return false;
> +}

As I said, I think the critical stuff should be pushed to the back end, rather
than having the machine independent stuff deal with it.

> +
> +/* Return a hash for IAUX, which is really a "insn_aux *".  */
> +static hashval_t
> +iaux_info_hash (const void *iaux)
> +{
> +  return (hashval_t) INSN_UID (((const struct insn_aux *) iaux)->insn);
> +}
> +
> +/* Return true if IAUX1 and IAUX2 (which are really both of type
> +   "insn_aux *") refer to the same instruction.  */
> +static int
> +iaux_info_eq (const void *iaux1, const void *iaux2)
> +{
> +  const struct insn_aux *i1 = (const struct insn_aux *) iaux1;
> +  const struct insn_aux *i2 = (const struct insn_aux *) iaux2;
> +
> +  return i1->insn == i2->insn;
> +}

These go away if you don't need to hash the insn.

> +/* Create sections for the current function.  Return the edges that
> +   cross between sections in CROSSING_EDGES array which is of size
> +   N_CROSSING_EDGES so they could be fixed later.  
> +
> +   The partitioning is done by traversing the basic-blocks according to
> +   their order in the code layout.  When a new basic-block is encountered
> +   a decision is made whether to add it to the last section, split it
> +   or add it to a new section.
> +
> +   Here is a short description of decision process :
> +
> +   Start a new section if one of the following conditions exist:
> +
> +   	a) if adding the basic-block to the last section causes the last
> +           section to exceed the max section size and it's size is less
> +           than max section size. 
> +	b) if a loop starts at this basic-block and the loop can not
> +	   fully be inserted into the last section and it's size is less
> +	   than max section size. 
> +        c) the basic block hotness property is different from it's
> +           previous basic-block's hotness property. 
> +        d) if a critical section starts at this basic-block and it
> +           can not fully be inserted into the last section and it's size
> +           is less than max section size.
> +     
> +	If the above conditions are not true split the basic-block
> +	if adding the basic-block to the last section causes the last
> +	section to exceed the max section size and it is not part of a
> +	critical section.  Otherwise add it to the last section.  */
> +        
> +static void
> +create_sections (edge **crossing_edges, int *n_crossing_edges, int *max_idx)
> +{
> +  basic_block bb, new_bb;
> +  edge e;
> +  int i;
> +  edge_iterator ei;
> +  int last_section_size = 0;
> +  int current_section_id = 0;
> +  int first_partition_actual_size;
> +
> +  /* Mark which section each basic block belongs in.  */
> +  if (dump_file)
> +    fprintf (dump_file,
> +	     "\n\n--- partition functions into sections --- (%dB)\n\n"
> +	     ";; section overhead size %dB\n" ";; max section size %dB\n\n",
> +	     flag_partition_functions_into_sections, estimate_section_overhead,
> +	     estimate_max_section_size);
> +
> +  FOR_EACH_BB (bb)
> +  {

This code should be indented 2 spaces.

> +    int bb_size;
> +    bool start_new_section_for_loop_p = false;
> +    bool start_new_section_due_to_hotness_prop_p = false;
> +    bool start_new_section_for_critical_section_p = false;
> +    bool start_critical_section_p = false;
> +
> +    /* Validate that fbb_data array has a corresponding element for
> +       this bb, so we could access it directly.  */
> +    validate_fbb_data_element (bb->index);
> +   
> +    bb_size = est_size_of_insns_in_bb (bb);
> +    if (dump_file)
> +      fprintf (dump_file, ";; Trying to add bb %d (%dB) to section %d",
> +               bb->index, bb_size, current_section_id);
> +
> +     start_critical_section_p = first_bb_in_critical_section (bb);
> +     if (!start_critical_section_p && in_critical_section (bb))
> +     {
> +       /* Critical section must reside only in one section so add it to
> +          the current section.  */
> +       last_section_size += bb_size;
> +       if (dump_file)
> +         fprintf (dump_file,
> +                 "\n;; Basic-block %d is part of critical section."
> +                 " Add it to section %d (%dB)\n",
> +                 bb->index, current_section_id, last_section_size);
> +       /* Upadte section id.  */
> +        fbb_data[bb->index].section_id = current_section_id;
> +       continue;
> +     }
> +
> +    /* Check if a loop starts in this bb and we should start a new
> +       section for it.  */
> +    start_new_section_for_loop_p =
> +      start_new_section_for_loop (bb, last_section_size);
> +
> +    /* Check if the hotness property has been changed.   */
> +    start_new_section_due_to_hotness_prop_p =
> +      ((bb->prev_bb != NULL) && (last_section_size != 0)
> +       && BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb)
> +       && flag_reorder_blocks_and_partition);
> +
> +    /* Check if this bb starts a critical section and we should start
> +       a new section for it.  */
> +    if (start_critical_section_p)
> +      start_new_section_for_critical_section_p =
> +        start_new_section_for_critical_section (bb, last_section_size);
> +
> +    if (((last_section_size + bb_size) >
> +	 estimate_max_section_size) || start_new_section_for_loop_p
> +         || start_new_section_due_to_hotness_prop_p
> +         || start_new_section_for_critical_section_p)
> +      {
> +	/* Do not add the basic-block to the last section.  This is due
> +	   to the possible reasons:
> +	   1) the addition of the basic block to last section will cause
> +	   the last section to exceed the max section size.
> +	   2) the basic-block starts a loop that can not fully be inserted
> +	   to the last section without exceeding the max section size
> +	   and it's size is less than the max section size.  
> +           3) The hotness properaty of this basic-block is different then
> +           it's previous.
> +           4) The basic-block starts critical section that can not fully
> +           be inserted to the last section without exceeding the max
> +           section size and it's size is less than the max section size.  */
> +        if (start_critical_section_p && dump_file)
> +           fprintf (dump_file,
> +                   " ...FAIL\n\tbb %d starts a critical section", bb->index);
> +        else if (start_new_section_for_loop_p && dump_file)
> +          fprintf (dump_file,
> +                   " ...FAIL\n\tbb %d begins a loop that can not fully"
> +                   " be inserted to the last section\n", bb->index);
> +        else if (start_new_section_due_to_hotness_prop_p && dump_file)
> +          fprintf (dump_file,
> +                   " ...FAIL\n\tbb %d hotness property is different then it's"
> +                   " previous basic-block\n", bb->index);
> +	else if (((last_section_size + bb_size) > estimate_max_section_size)
> +	    && dump_file)
> +	  fprintf (dump_file,
> +		   " ...FAIL\n\tSize of section (%dB) + bb %d (%dB)"
> +		   " exceeds threshold \n",
> +		   last_section_size, bb->index, bb_size);

You might want to wrap all of the if's inside of an if (dump_file) { ... }
rather than testing for dump_file on each test, since all of the ifs are just
for debug information.

> +
> +	/* Start a new section if one of following conditions
> +	   are fulfilled:
> +	   1) the last section is not empty and the bb size is less than
> +	   the section size threshold.
> +	   2) the basic-block starts a loop that its size is less than
> +	   the section size threshold.  
> +           3) The hotness property of this basic block is different then
> +           the previous.
> +           4) the basic-block starts a critical section that its size
> +           is less than the section size threshold.  */
> +	if ((last_section_size != 0
> +	     && bb_size <= estimate_max_section_size)
> +	    || start_new_section_for_loop_p
> +            || start_new_section_due_to_hotness_prop_p
> +            || start_new_section_for_critical_section_p)
> +	  {
> +	    if (dump_file)
> +	      fprintf (dump_file,
> +		       ";; Close section %d (%dB)\n", current_section_id,
> +		       last_section_size);
> +	    start_new_section (&current_section_id, bb);
> +	    last_section_size = 0;
> +	    bb = bb->prev_bb;
> +	    continue;
> +	  }
> +       if (start_critical_section_p)
> +        {
> +          /* The basic-block begins a critical section so we can not
> +             split it.  Add it to the current section.  */
> +          last_section_size += bb_size;
> +          if (dump_file)
> +            fprintf (dump_file, "\n\tbb %d starts a critical section. "
> +                     "Add it to section %d (%dB)\n",
> +                     bb->index, current_section_id, last_section_size);
> +          /* Upadte section id.  */
> +          fbb_data[bb->index].section_id = current_section_id;
> +          continue;
> +        }
> +	/* Split the basic-block.  Try to insert it's first partition
> +	   to the last section such that the section size will not exceed
> +	   the section size threshold.  */
> +	new_bb =
> +	  split_bb (bb, 
> +		    estimate_max_section_size - last_section_size,
> +		    &first_partition_actual_size);
> +
> +	if (new_bb != NULL)
> +	  {
> +	    /* Success.  The new basic-block was created and was added to the
> +	       last section.  Close this section and open a new one.  */
> +	    if (dump_file)
> +	      {
> +		fprintf (dump_file,
> +			 "\tNew edge: (idx %d , size %dB) -> BB (idx %d , "
> +			 "size %dB)\n",
> +			 bb->index, first_partition_actual_size,
> +			 new_bb->index,
> +			 bb_size - first_partition_actual_size);
> +		fprintf (dump_file,
> +			 "\tAdd (idx %d; size %dB) to section %d\n",
> +			 bb->index, first_partition_actual_size,
> +			 current_section_id);
> +		fprintf (dump_file, ";; Close section %d (%dB)\n",
> +			 current_section_id,
> +			 last_section_size + first_partition_actual_size);
> +	      }
> +	    /* Upadte section id.  */
> +	    fbb_data[bb->index].section_id = current_section_id;
> +	    /* Start new section.  */
> +	    start_new_section (&current_section_id, new_bb);
> +	    last_section_size = 0;
> +	  }
> +	else
> +	  {
> +	    /* Failed to create the new basic-block.  Close this section
> +	       with the current bb inside and open a new one.  */
> +            last_section_size += bb_size;
> +	    if (dump_file)
> +	      fprintf (dump_file,
> +		       "\n;; Failed to split bb. Close section %d (%dB)\n", 
> +		       current_section_id, last_section_size);
> +            if (bb->next_bb)
> +	      start_new_section (&current_section_id, bb->next_bb);
> +	    last_section_size = 0;
> +	    continue;
> +	  }
> +      }
> +    else
> +      {
> +	/* The basic-block can be fully added to the last section.  */
> +	last_section_size += bb_size;
> +	if (dump_file)
> +	  fprintf (dump_file, " ...OK\n\tSection new size: %dB\n",
> +		   last_section_size);
> +	/* Upadte section id.  */
> +	fbb_data[bb->index].section_id = current_section_id;
> +      }
> +  }
> +  if (dump_file)
> +    fprintf (dump_file, "\n\n");
> +
> +  /* Mark every edge that crosses between sections.  */
> +  i = 0;
> +  FOR_EACH_BB (bb) FOR_EACH_EDGE (e, ei, bb->succs)
> +  {
> +    validate_fbb_data_element (bb->index);
> +
> +    if (e->src != ENTRY_BLOCK_PTR
> +	&& e->dest != EXIT_BLOCK_PTR
> +	&& (fbb_data[(e->src)->index].section_id != -1)
> +	&& (fbb_data[(e->dest)->index].section_id != -1)
> +	&& (fbb_data[(e->src)->index].section_id !=
> +	    fbb_data[(e->dest)->index].section_id))
> +      {
> +	e->flags |= EDGE_CROSSING;
> +	/* Resize the array, if necessary.  */
> +	if (i == *max_idx)
> +	  {
> +	    *max_idx *= 2;
> +	    (*crossing_edges) = (edge *) xrealloc (*crossing_edges,
> +					  (*max_idx) * sizeof (edge));
> +	  }

Here, rather than doing the resize yourself, it is probably better to switch to
use a VEC structure and let the common code handle it.

> +	(*crossing_edges)[i++] = e;

Please put a space after the ')'.

> +      }
> +    else
> +      e->flags &= ~EDGE_CROSSING;
> +  }
> +  *n_crossing_edges = i;
> +}
> +
> +static void
>  add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
>  {
>    int i;
> @@ -1435,10 +2056,14 @@
>  			}
>  		    }
>  		}
> -
>  	      if (cond_jump_crosses || !invert_worked)
> +                      
>  		{
>  		  /* This is the case where both edges out of the basic
> +                   else
> +                   {
> +                      fall_thru->flags |= EDGE_CROSSING;
> +                   }

Eliminate the '{'/'}' since fall_thru->flags is a simple statement.  This is minor.

>  		     block are crossing edges. Here we will fix up the
>  		     fall through edge. The jump edge will be taken care
>  		     of later.  The EDGE_CROSSING flag of fall_thru edge 
> @@ -1956,7 +2581,10 @@
>    rtx new_note;
>    int first_partition = 0;
>  
> -  if (flag_reorder_blocks_and_partition)
> +  /* If -fpartition-functions-into-sections flag is set then
> +     NOTE_INSN_SWITCH_TEXT_SECTIONS note will be inserted later.  */
> +  if (flag_reorder_blocks_and_partition
> +      && (flag_partition_functions_into_sections == 0))
>      FOR_EACH_BB (bb)
>      {
>        if (!first_partition)
> @@ -2201,8 +2829,628 @@
>  
>    cfg_layout_finalize ();
>  }
> -
> +
> +/* For each loop in LOOPS calculate the first and last basic-block that appear 
> +   in their layout.  */
> +static void
> +calculate_loop_boundary (struct loop_info_def *loops_aux)
> +{
> +  struct loop *loop;
> +  basic_block bb;
> +
> +  if (!loops_aux)
> +    return;
> +
> +  FOR_EACH_BB (bb)
> +  {

Indent 2 spaces.

> +    for (loop = bb->loop_father; loop_outer (loop); loop = loop_outer (loop))
> +      {
> +	loops_aux[loop->num].last = bb;
> +	if (!loops_aux[loop->num].first)
> +	  loops_aux[loop->num].first = bb;
> +      }
> +  }
> +}
> +
> +/* Return true if loop_info A has less size than loop_info B, in order
> +   to give them an ordering.  */
>  static bool
> +loop_size_is_less (const loop_info a, const loop_info b)
> +{
> +  if (a->size < b->size)
> +    return true;
> +  return false;
> +}
> +
> +/* Cache the size (in bytes) of the instructions in the function
> +   to avoid re-calculation.  */
> +static void
> +record_insns_size_estimation (void)
> +{
> +  basic_block bb;
> +  rtx insn;
> +  struct insn_aux *iaux;
> +  PTR *slot;
> +
> +  FOR_EACH_BB (bb)
> +  {

Indentation.

> +    int size;
> +    int bb_size = 0;
> +
> +    /* Loop through all of the basic-block's insns.  */
> +    FOR_BB_INSNS (bb, insn) 
> +      if (INSN_P (insn) || NOTE_P (insn))
> +      {
> +	if (targetm.est_instruction_size != 0)
> +	  size = targetm.est_instruction_size (insn);
> +	else
> +	  size = get_attr_min_length (insn);
> +     
> +	if (NOTE_P (insn))
> +	  continue;
> +        bb_size += size;
> +	/* Cache the insn's size for later re-use.  */
> +	iaux = (struct insn_aux *)xmalloc (sizeof (struct insn_aux));
> +	iaux->insn = insn;
> +	iaux->size = size;
> +        
> +	slot = htab_find_slot (insns_aux, iaux, INSERT);
> +	*slot = iaux;
> +      }
> +    /* Store the size of the basic-block for later validation.  */
> +    validate_fbb_data_element (bb->index);
> +    fbb_data[bb->index].bb_size = bb_size;
> +  }
> +}
> +
> +/*  Calculate the size (in bytes) of each loop.
> +    This information will be used later in the code partitioning phase.
> +    */
> +static void
> +record_loops_boundaries (void)
> +{
> +  unsigned i;
> +  basic_block bb;
> +  loop_iterator li;
> +  struct loop *loop;
> +  int total_loop_size;
> +  struct loop_info_def *cur_loop;
> +  struct loop_info_def *loops_aux = NULL;
> +  unsigned int place;
> +  struct insn_aux *iaux;
> +
> +  loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_RECORDED_EXITS);
> +  if (number_of_loops () <= 1)
> +    {
> +      loop_optimizer_finalize ();
> +      return;	/* There are no loops.  */
> +    }
> +  loops_aux =
> +    (struct loop_info_def *) xmalloc (number_of_loops () *
> +				      sizeof (struct loop_info_def));
> +  memset (loops_aux, 0, number_of_loops () * sizeof (struct loop_info_def));

Rather than doing xmalloc (a*b); memset (ptr, 0, a*b), consider using
xcalloc (a, b) instead.

> +  /* Calculate insns size in bytes.  */
> +  record_insns_size_estimation ();
> +
> +  /* Get the start and end insns of each loop.  */
> +  calculate_loop_boundary (loops_aux);
> +
> +  /* We assume that if two loops are disjoint they can not interleave
> +     and if they are not disjoint one is completely contained in the
> +     other.  This assumption help us to avoid the case of interleaved
> +     loop intervals (which is somewhat unlikely to happen in practice).
> +     TODO: calculate all the loops's size in one pass.  */
> +  FOR_EACH_LOOP (li, loop, 0)
> +  {

Indentation.

> +    rtx insn;
> +    rtx start, end;
> +
> +    if (!loop)
> +      continue;
> +
> +    /* Calculate the size of the loop.  */
> +    total_loop_size = 0;
> +    start = BB_HEAD (loops_aux[loop->num].first);
> +    end = BB_END (loops_aux[loop->num].last);
> +
> +    for (insn = start; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
> +      {
> +	if (INSN_P (insn))
> +	  {
> +	    struct insn_aux iaux_tmp;
> +
> +	    /* Retrieve the size of the instruction. */
> +	    iaux_tmp.insn = insn;
> +	    iaux = (struct insn_aux *) htab_find (insns_aux, &iaux_tmp);
> +	    if (iaux)
> +	      total_loop_size += iaux->size;
> +	    else
> +	      total_loop_size += get_attr_min_length (insn);
> +	  }
> +      }

Again, do you need to use a hash table here, or is it simpler to just
recalculate it or use an index into an allocated array of INSN_UID.

> +    /* Create the loop_info variable, used to hold information about
> +       the loop.  */
> +    cur_loop =
> +      (struct loop_info_def *) xcalloc (1, sizeof (struct loop_info_def));
> +
> +    /* Store the information.  */
> +    cur_loop->size = total_loop_size;
> +    cur_loop->index = loop->num;
> +
> +    validate_fbb_data_element (loops_aux[loop->num].first->index);
> +    /* Allocate memory for the loops info in the loop's first
> +       basic-block; if it does not exist already.  */
> +    if (fbb_data[loops_aux[loop->num].first->index].loops_info_list == NULL)
> +      fbb_data[loops_aux[loop->num].first->index].loops_info_list =
> +	VEC_alloc (loop_info, heap, 1);
> +    /* Insert the loop information in loop->first basic-block.  */
> +    place =
> +      VEC_lower_bound (loop_info,
> +		       fbb_data[loops_aux[loop->num].first->index].
> +		       loops_info_list, cur_loop, loop_size_is_less);
> +    VEC_safe_insert (loop_info, heap,
> +		     fbb_data[loops_aux[loop->num].first->index].
> +		     loops_info_list, place, cur_loop);
> +  }
> +
> +  /* Dump the information that was collected. */
> +  if (dump_file)
> +    FOR_EACH_BB (bb)
> +    {

Indentation.

> +      validate_fbb_data_element (bb->index);
> +
> +      if (fbb_data[bb->index].loops_info_list == NULL)
> +	continue;
> +
> +      for (i = 0;
> +	   VEC_iterate (loop_info, fbb_data[bb->index].loops_info_list, i,
> +			cur_loop); i++)

I would put the i++ on a separate line here.

> +	{
> +	  fprintf (dump_file,
> +		   "\n;; bb %d is the first basic-block in the "
> +		   "loop %d layout (total loop size %dK)",
> +		   bb->index, cur_loop->index, cur_loop->size);
> +	}
> +    }
> +  free (loops_aux);
> +  loop_optimizer_finalize ();
> +}
> +
> +/* An auxiliary structure for recognizing critical sections.  */
> +struct critical_sections_aux
> +{
> +  /* The basic-block which contains the critical section.  */
> +  basic_block start_bb;
> +
> +  /* The type of the critical section.  */
> +  int type;

Would an enum be better here than an int?

> +  /* True if the end of the critical section was recognized.  */
> +  bool closed_p;
> +};
> +
> +typedef struct critical_sections_aux *critical_sections;
> +DEF_VEC_P(critical_sections);
> +DEF_VEC_ALLOC_P(critical_sections,heap);
> +
> +/* Given basic-block BB which contains an instruction that ends a
> +   critical section; close an open critical section that is recorded in
> +   START_SEQUENCE; which appears in the same basic-block as bb and is
> +   of type TYPE.  */
> +static void
> +close_critical_sections (VEC(critical_sections, heap) *start_sequence,
> +			 basic_block bb, int type)
> +{
> +  struct critical_sections_aux *crit;
> +  unsigned int i;
> +  bool bb_close_critical_section_p = false;
> +
> +  /* Loop through all of the critical sections.  */
> +  for (i = 0; VEC_iterate (critical_sections, start_sequence, i, crit); i++)
> +    {
> +      /* First check that this bb closes a critical section of the
> +         right type.  */
> +      if (crit->type == type && !crit->closed_p)
> +	{
> +	  int index = crit->start_bb->index;
> +
> +	  if (bb->index == index)
> +	    {
> +	      /* Mark this section as closed.  */
> +	      crit->closed_p = true;
> +
> +	      validate_fbb_data_element (index);
> +	      gcc_assert (!fbb_data[i].in_critical_section_p
> +			  && !fbb_data[i].first_bb_in_critical_section);
> +	      fbb_data[index].in_critical_section_p = true;
> +	      fbb_data[index].critical_section_size = fbb_data[index].bb_size;
> +	      fbb_data[index].first_bb_in_critical_section = true;
> +	      bb_close_critical_section_p = true;
> +	      break;
> +	    }
> +	}
> +    }
> +  if (!bb_close_critical_section_p)
> +    warning (0,
> +	     "Unexpected end of critical section in basic-block %d.  ",
> +	     bb->index);
> +}
> +
> +/* Record the critical section which starts at START_CRIT and ends in
> +   END_CRIT instructions.  These section is part of a jump-table.
> +   By marking this section as critical we validate that the table and
> +   code that reads the table appears in the same section.  */
> +static void
> +record_tablejump (int start_crit, int end_crit)
> +{
> +  int critical_section_size = 0;
> +  int i;
> +  basic_block bb;
> +  basic_block start = BASIC_BLOCK (start_crit);
> +  basic_block end = BASIC_BLOCK (end_crit);
> +
> +  FOR_BB_BETWEEN (bb, start, end, next_bb)
> +    {
> +      i = bb->index;
> +      validate_fbb_data_element (i);
> +      gcc_assert (!fbb_data[i].in_critical_section_p
> +		  && !fbb_data[i].first_bb_in_critical_section);
> +      critical_section_size += fbb_data[i].bb_size;
> +      fbb_data[i].in_critical_section_p = true;
> +    }
> +  validate_fbb_data_element (end->index);
> +  critical_section_size += fbb_data[end->index].bb_size;
> +  fbb_data[end->index].in_critical_section_p = true;
> +
> +  if (fbb_data[start->index].critical_section_size < critical_section_size)
> +    fbb_data[start->index].critical_section_size = critical_section_size;
> +  fbb_data[start->index].first_bb_in_critical_section = true;
> +}

As I said, whether this should be pushed to the backend should be considered.

> +
> +/* Record critical sections.  The instructions in a critical
> +   section must reside in a single section.  */
> +static void
> +record_critical_sections (void)
> +{
> +  basic_block bb;
> +  rtx insn, table, label;
> +  VEC (critical_sections, heap) * start_sequence;
> +  int type, i;
> +  struct critical_sections_aux *crit;
> +
> +  /* If the target does not define any critical section then quit now.  */
> +  if ((targetm.record_jump_table == 0)
> +      && (targetm.begin_critical_section == 0))
> +    return;

You might want to consider using !targetm.record_jump_table and
!targetm.begin_critical_section instead of == 0.  This is minor.

> +
> +  start_sequence = VEC_alloc (critical_sections, heap, 1);
> +
> +  FOR_EACH_BB (bb)
> +  {
> +    /* Loop through all of the basic-block's insns.  */
> +    FOR_BB_INSNS (bb, insn)
> +    {

Indentation again.

> +      if (tablejump_p (insn, &label, &table))
> +	{
> +	  int start = -1, end = -1;

Should these be ints or HOST_WIDE_INT?

> +
> +	  if (targetm.record_jump_table != 0)
> +	    {
> +	      targetm.record_jump_table (insn, label, &start, &end);
> +
> +	      if (start == -1 || end == -1)
> +		warning (0,
> +			 "Unexpected critical section in basic-block %d.  ",
> +			 bb->index);
> +	      else
> +		/* Record the critical section.  */
> +		record_tablejump (start, end);
> +	    }
> +	}
> +
> +      /* Check whether this insn can begin a critical sections.  */
> +      if (targetm.begin_critical_section != 0)
> +	if (targetm.begin_critical_section (insn, &type))
> +	  {
> +	    struct critical_sections_aux *crit;
> +
> +	    crit =
> +	      (struct critical_sections_aux *) xcalloc (1,
> +							sizeof (struct
> +								critical_sections_aux));
> +	    crit->start_bb = bb;
> +	    crit->type = type;
> +	    crit->closed_p = false;
> +	    /* Record the start instruction.  */
> +	    VEC_safe_push (critical_sections, heap, start_sequence, crit);

Given you are pushing a pointer here, I wonder whether it would be better to
have the VEC use a structure instead of pointer to structure?  I'm just
thinking out loud here.  It would save having to do two levels of allocation,
but would involve some structure copies.

> +	  }
> +
> +      /* Check whether this insn can end a critical sections.  */
> +      if (targetm.end_critical_section != 0)
> +	if (targetm.end_critical_section (insn, &type))
> +	  {
> +	    /* Close any critical section that this instruction may end.  */
> +	    close_critical_sections (start_sequence, bb, type);
> +	  }
> +    }
> +  }
> +
> +  for (i = 0; VEC_iterate (critical_sections, start_sequence, i, crit); i++)
> +    {
> +      if (!crit->closed_p)
> +	warning (0,
> +		 "Unexpected start of critical section in basic-block %d.  ",
> +		 crit->start_bb->index);
> +    }

Should this be something like a gcc_assert or error instead of warning?

> +
> +  /* Dump the information that was collected. */
> +  if (dump_file)
> +    FOR_EACH_BB (bb)
> +    {
> +      validate_fbb_data_element (bb->index);
> +
> +      if (fbb_data[bb->index].in_critical_section_p
> +	  && fbb_data[bb->index].first_bb_in_critical_section)
> +	{
> +	  fprintf (dump_file,
> +		   "\n;; bb %d starts a critical section (%dB)", bb->index,
> +		   fbb_data[bb->index].critical_section_size);
> +	}
> +    }
> +
> +  /* Free data.  */
> +  for (i = 0; VEC_iterate (critical_sections, start_sequence, i, crit); i++)
> +    {
> +      free (crit);
> +    }

If you make the VEC use the structure instead of pointer to structure, this
loop goes away.

> +  VEC_free (critical_sections, heap, start_sequence);
> +}
> +
> +static void
> +free_fbb_data (void)
> +{
> +  int i, j;

You probably should consider using unsigned WIDE_INT for the sizes.

> +  struct loop_info_def *cur_loop;
> +
> +  for (i = 0; i < fbb_data_size; i++)
> +    {
> +      if (fbb_data[i].loops_info_list != NULL)
> +	{
> +	  for (j = 0;
> +	       VEC_iterate (loop_info, fbb_data[i].loops_info_list, j,
> +			    cur_loop); j++)

I would break the j++ on to a separate line.

> +	    FREE (cur_loop);
> +
> +	  VEC_free (loop_info, heap, fbb_data[i].loops_info_list);
> +	}
> +    }
> +  FREE (fbb_data);
> +}
> +
> +/* Given RTX_FIRST which is the first instruction we start from; check
> +   that there is no unexpected insns outside basic-blocks that could
> +   effect the size of the sections, e.g. unexpected read-only code.  */
> +static void
> +check_unexpected_insns (rtx rtx_first)
> +{
> +  rtx tmp_rtx;
> +  VEC (rtx, heap) *jump_tables_list;
> +  enum bb_state
> +  { NOT_IN_BB, IN_ONE_BB, IN_MULTIPLE_BB };

The indentation here is unusual.  I would fold it to one line instead of having
the '{'/'}' on a second line.

> +  int max_uid = get_max_uid ();
> +  basic_block *start =
> +    (basic_block *) xcalloc (max_uid, sizeof (basic_block));
> +  basic_block *end = (basic_block *) xcalloc (max_uid, sizeof (basic_block));
> +  enum bb_state *in_bb_p =
> +    (enum bb_state *) xcalloc (max_uid, sizeof (enum bb_state));
> +  basic_block bb;
> +  rtx insn;
> +
> +  jump_tables_list = VEC_alloc (rtx, heap, 1);
> +
> +  if (rtx_first == 0)
> +    return;
> +
> +  FOR_EACH_BB_REVERSE (bb)
> +  {

Indentation.

> +    rtx x, table, label;
> +
> +    start[INSN_UID (BB_HEAD (bb))] = bb;
> +    end[INSN_UID (BB_END (bb))] = bb;
> +    for (x = BB_HEAD (bb); x != NULL_RTX; x = NEXT_INSN (x))
> +      {
> +	enum bb_state state = IN_MULTIPLE_BB;
> +
> +	if (in_bb_p[INSN_UID (x)] == NOT_IN_BB)
> +	  state = IN_ONE_BB;
> +	in_bb_p[INSN_UID (x)] = state;
> +
> +	if (INSN_P (x) && tablejump_p (x, &label, &table))
> +	  {
> +	    /* Record the information regarding the jump table instruction
> +	       which we expect to appear outside of basic-block.  */
> +	    VEC_safe_push (rtx, heap, jump_tables_list, table);
> +	    VEC_safe_push (rtx, heap, jump_tables_list, label);
> +	  }
> +
> +
> +	if (x == BB_END (bb))
> +	  break;
> +      }
> +  }
> +  for (tmp_rtx = rtx_first; NULL != tmp_rtx; tmp_rtx = NEXT_INSN (tmp_rtx))
> +    {
> +      if (in_bb_p[INSN_UID (tmp_rtx)] == NOT_IN_BB
> +	  && !NOTE_P (tmp_rtx) && !BARRIER_P (tmp_rtx))
> +	{
> +	  int i;
> +	  bool found_p = false;
> +	  rtx label;
> +
> +	  for (i = 0; VEC_iterate (rtx, jump_tables_list, i, insn); i++)
> +	    {
> +	      /* We expect that the table itself would appear outside
> +	         of basic-block.  */
> +	      if (rtx_equal_p (tmp_rtx, insn))
> +		{
> +		  found_p = true;
> +		  break;
> +		}
> +	    }
> +	  if (!found_p)
> +	    warning (0, "Unexpected insns outside basic-blocks;"
> +		     " -fpartition-functions-into-sections may be effected.");

Should this be a gcc_asert/error instead of warning?

> +	  else
> +	    {
> +	      /* Validate that the table and code that reads the table
> +	         will appear in the same section.  */
> +	      if (GET_CODE (tmp_rtx) == CODE_LABEL)
> +		{
> +		  if (tablejump_p (prev_active_insn (tmp_rtx), &label, NULL))
> +		    {
> +		      if (!rtx_equal_p (tmp_rtx, label))
> +			{
> +			  warning (0, "Unexpected insns outside basic-blocks;"
> +				   " -fpartition-functions-into-sections "
> +                                   "may be effected.");
> +			  return;
> +			}
> +
> +		    }
> +		  else
> +		    {
> +		      warning (0, "Unexpected insns outside basic-blocks;"
> +			       " -fpartition-functions-into-sections may "
> +                               "be effected.");
> +		      return;
> +		    }
> +		}
> +	    }
> +	}
> +
> +    }
> +
> +  free (start);
> +  free (end);
> +  free (in_bb_p);
> +  VEC_free (rtx, heap, jump_tables_list);
> +}
> +
> +/* Main entry point for partitioning functions into sections in order
> +   to make it fit small local store.
> +
> +   The motivation:
> +
> +   The local store of embedded processors usually imposes a strict
> +   threshold on the code size of programs.  Having a limited memory (and
> +   lack of hardware i-cache) creates cases where a single program code
> +   can not fit into it.  To overcome this problem, the programmer can
> +   partition the code into multiple pieces and manually swap them in and
> +   out of the local store.  To simplify this task, an overlay technique
> +   can be used which handles the memory management tasks automatically
> +   and relieves the programmer from managing the local store manually.
> +
> +   Preparing the code for the overlay technique consists of a
> +   preprocessing static stage which is done by the compiler and linker.
> +   The compiler first partition the code into sections which are later
> +   constructed into an overlaid program by the linker.  At execution
> +   time the overlay manager automatically swap pieces of code in and
> +   out of the local store.
> +
> +   The ability to place each function in a different section already
> +   exists in GCC (-ffunction-sections).  However, using the granularity
> +   of a single function for partitioning is not sufficient when a single
> +   function's body can not fully fit the local store.  This new pass
> +   overcomes this problem by breaking the large functions.  It can also
> +   break a function into fixed sized sections that could be used for
> +   a software i-cache scheme implementation, as an alternative to the
> +   overlay technique.  */
> +
> +static void
> +partition_function_into_sections (void)
> +{
> +  edge *crossing_edges;
> +  int n_crossing_edges = 0;

Should this be unsigned WIDE_INT instead of int?

> +  int max_edges = 10 * last_basic_block;
> +  basic_block cur_bb;
> +  int i;

Should this be unsigned WIDE_INT instead of int?

> +
> +  fbb_data_size = 10 * last_basic_block;
> +
> +  if (n_basic_blocks < 1)
> +    return;
> +
> +  /* Allocate the initial hunk of the fbb_data.  */
> +  fbb_data =
> +    (funcpart_basic_block_data *) xmalloc (fbb_data_size *
> +					   sizeof
> +					   (funcpart_basic_block_data));

You might consider using xcalloc here, and then only initializing the
section_id to -1 in the loop.

> +  for (i = 0; i < fbb_data_size; i++)
> +    {
> +      fbb_data[i].section_id = -1;
> +      fbb_data[i].loops_info_list = NULL;
> +      fbb_data[i].in_critical_section_p = false;
> +      fbb_data[i].first_bb_in_critical_section = false;
> +      fbb_data[i].critical_section_size = 0;
> +      fbb_data[i].bb_size = 0;
> +    }
> +
> +  insns_aux = htab_create (10 * last_basic_block,
> +			   iaux_info_hash, iaux_info_eq, free);
> +

Would it be better to use a VEC here instead of a hash table, based on the
basic block ID?

> +  check_unexpected_insns (get_insns ());
> +
> +  /* Update the maximum section size (in bytes).  */
> +  estimate_max_section_size =
> +    flag_partition_functions_into_sections - estimate_section_overhead;
> +
> +  crossing_edges = (edge *) xcalloc (max_edges, sizeof (edge));
> +
> +  /* Initialize structures for layout changes.  */
> +  cfg_layout_initialize (0);
> +
> +  /* Record the start and size of each loop in fbb_data array.  */
> +  record_loops_boundaries ();
> +
> +  /* Record the critical sections that must reside in a single section.  */
> +  record_critical_sections ();
> +
> +  FOR_EACH_BB (cur_bb)
> +  {

Indentation.

> +    cur_bb->flags &= ~BB_FIRST_AFTER_SECTION_SWITCH;
> +
> +    if (cur_bb->next_bb != EXIT_BLOCK_PTR)
> +      cur_bb->aux = cur_bb->next_bb;
> +  }
> +
> +  create_sections (&crossing_edges, &n_crossing_edges, &max_edges);
> +
> +  if (n_crossing_edges > 0)
> +    {
> +      /* Make sure the source of any crossing edge ends in a jump and the
> +         destination of any crossing edge has a label.  */
> +      add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
> +      /* Convert all crossing fall_thru edges to non-crossing fall
> +         thrus to unconditional jumps (that jump to the original fall
> +         thru dest).  */
> +      fix_up_fall_thru_edges ();
> +    }
> +
> +  /* Free data.  */
> +  FREE (crossing_edges);
> +  free_fbb_data ();
> +  /* Finalize layout changes.  */
> +  cfg_layout_finalize ();
> +
> +  free_dominance_info (CDI_DOMINATORS);
> +  if (insns_aux)
> +    htab_delete (insns_aux);
> +
> +  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
> +  insert_section_boundary_note_in_function ();
> +}
> +
> +static bool
>  gate_handle_reorder_blocks (void)
>  {
>    if (targetm.cannot_modify_jumps_p ())
> @@ -2296,4 +3544,154 @@
>   }
>  };
>  
> +/* The overhead (in bytes) of creating a new section includes adding
> +   unconditional branch instruction between sections should also be
> +   taken into account.  */
> +static void
> +get_est_section_overhead (void)
> +{
> +  gcc_assert (flag_partition_functions_into_sections != 0);
>  
> +  estimate_section_overhead = 0;
> +  estimate_max_section_size = 0;
> +
> +  if (uncond_jump_length == 0)
> +    uncond_jump_length = get_uncond_jump_length ();
> +
> +  if (targetm.est_section_overhead != 0)
> +    {
> +      /* The machine depndent pass could add extra instructions
> +         as a result of the new branches.  */
> +      estimate_section_overhead =
> +        targetm.est_section_overhead ();
> +    }
> +  /* Add the size of the new branch that will be created for each
> +     sections.  */
> +  else
> +    estimate_section_overhead += uncond_jump_length;
> +}
> +
> +/* Return TRUE if an instruction exists such that it exceeds the threshold
> +   of the section size.  Otherwise return FALSE.  */
> +static bool
> +instruction_size_exceeds_threshold (void)
> +{
> +  basic_block bb;
> +  rtx insn;
> +  int size = 0;

Sizes should be unsigned HOST_WIDE_INT.

> +
> +  FOR_EACH_BB (bb)
> +  {
> +    FOR_BB_INSNS (bb, insn)
> +    {

Indentation.

> +      if (INSN_P (insn) || NOTE_P (insn))
> +	{
> +          if (targetm.est_instruction_size != 0)
> +            size = targetm.est_instruction_size (insn);
> +          else
> +            size = get_attr_min_length (insn);
> +
> +	  if (size >
> +	      (flag_partition_functions_into_sections -
> +	       estimate_section_overhead))
> +	    {
> +	      warning (0,
> +		       "Section threshold value is too small.  ");
> +	      return true;
> +	    }
> +	}
> +    }
> +  }
> +  return false;
> +}
> +
> +static bool
> +gate_handle_partition_functions (void)
> +{
> +  if ((flag_partition_functions_into_sections == 0)
> +      || DECL_ONE_ONLY (current_function_decl)
> +      || user_defined_section_attribute)
> +    return 0;
> + 
> +  /* Estimate the overhead in creating new section in term of the new
> +     instruction that are needed to support it and need to be considered.
> +     */
> +  get_est_section_overhead ();
> +    
> +  /* Make sure there is no instruction with size that exceeds the
> +     estimated section size.  */
> +  return (!instruction_size_exceeds_threshold ());
> +}
> +
> +static unsigned int 
> +rest_of_handle_partition_functions (void)
> +{
> +  partition_function_into_sections ();
> +  return 0;
> +}
> +
> +struct rtl_opt_pass pass_partition_functions =
> +{
> +  {
> +   RTL_PASS,
> +   "fpart",                              /* name */
> +   gate_handle_partition_functions,      /* gate */
> +   rest_of_handle_partition_functions,   /* execute */
> +   NULL,                                 /* sub */
> +   NULL,                                 /* next */
> +   0,                                    /* static_pass_number */
> +   TV_FUNCTION_PARTITION,                /* tv_id */
> +   0,                                    /* properties_required */
> +   0,                                    /* properties_provided */
> +   0,                                    /* properties_destroyed */
> +   TODO_dump_func,                       /* todo_flags_start */
> +   TODO_dump_func,                       /* todo_flags_finish */
> +  }
> +};
> +
> +/* Validate that the size (in bytes) of each section does not exceed
> +   the maximum section size the user defined.  */
> +
> +void
> +check_sections (void)
> +{
> +  rtx insn;
> +  int section_size = 0;

sizes should be unsigned HOST_INT.

> +  int number_of_sections = 1;
> +
> +  if (flag_partition_functions_into_sections == 0)
> +    return;
> +
> +  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
> +    {
> +      if (INSN_P (insn))
> +	{
> +	  section_size += get_attr_min_length (insn);
> +	  continue;
> +	}
> +      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> +	{
> +	  number_of_sections++;
> +	  if (section_size > flag_partition_functions_into_sections)
> +	    {
> +	      warning (0,
> +		       "section %d (%dB) exceeds section threshold "
> +		       "%dB",
> +		       NOTE_TEXT_SECTION (insn) - 1, section_size,
> +		       flag_partition_functions_into_sections);
> +	    }

You might want to think whether you should use a particular value for warning
instead of 0.

> +	}
> +      section_size = 0;
> +    }
> +
> +  /* Check the last section.  */
> +  if (section_size > flag_partition_functions_into_sections)
> +    {
> +      warning (0,
> +	       "section %d (%dB) exceeds section threshold "
> +	       "%dB",
> +	       number_of_sections, section_size,
> +	       flag_partition_functions_into_sections);
> +    }
> +
> +}
> Index: basic-block.h
> ===================================================================
> --- basic-block.h	(revision 138847)
> +++ basic-block.h	(working copy)
> @@ -332,7 +332,10 @@
>  
>    /* Set on blocks that cannot be threaded through.
>       Only used in cfgcleanup.c.  */
> -  BB_NONTHREADABLE_BLOCK = 1 << 11
> +  BB_NONTHREADABLE_BLOCK = 1 << 11,
> +
> +  /* Set on blocks that are first in a section.  */
> +  BB_FIRST_AFTER_SECTION_SWITCH = 1 << 12 
>  };
>  
>  /* Dummy flag for convenience in the hot/cold partitioning code.  */
> @@ -941,6 +944,7 @@
>  unsigned bb_dom_dfs_in (enum cdi_direction, basic_block);
>  unsigned bb_dom_dfs_out (enum cdi_direction, basic_block);
>  
> +extern void check_sections (void);
>  extern edge try_redirect_by_replacing_jump (edge, basic_block, bool);
>  extern void break_superblocks (void);
>  extern void relink_block_chain (bool);

Again if the global changes that I suggested are made, this will likely change.

> Index: passes.c
> ===================================================================
> --- passes.c	(revision 138847)
> +++ passes.c	(working copy)
> @@ -805,6 +805,7 @@
>  	  NEXT_PASS (pass_compute_alignments);
>  	  NEXT_PASS (pass_duplicate_computed_gotos);
>  	  NEXT_PASS (pass_variable_tracking);
> +          NEXT_PASS (pass_partition_functions);
>  	  NEXT_PASS (pass_free_cfg);
>  	  NEXT_PASS (pass_machine_reorg);
>  	  NEXT_PASS (pass_cleanup_barriers);
> @@ -996,6 +997,9 @@
>        else
>  	gcc_unreachable ();
>      }
> +  if (flags & TODO_check_sections
> +      && flag_partition_functions_into_sections != 0)
> +     check_sections ();
>  
>  #if defined ENABLE_CHECKING
>    if (flags & TODO_verify_ssa)
> Index: config/spu/spu.c
> ===================================================================
> --- config/spu/spu.c	(revision 138847)
> +++ config/spu/spu.c	(working copy)
> @@ -138,6 +138,12 @@
>  static int spu_builtin_vectorization_cost (bool);
>  static bool spu_vector_alignment_reachable (const_tree, bool);
>  static int spu_sms_res_mii (struct ddg *g);
> +static int spu_est_section_overhead (void);
> +static int spu_est_instruction_size (rtx);
> +static bool spu_begin_critical_section (rtx, int *);
> +static bool spu_end_critical_section (rtx, int *);
> +static void spu_record_jump_table (rtx, rtx, int *, int *);
> +static void part_func_to_sections_flags (void);
>  
>  extern const char *reg_names[];
>  rtx spu_compare_op0, spu_compare_op1;
> @@ -297,6 +303,24 @@
>  #undef TARGET_SCHED_SMS_RES_MII
>  #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
>  
> +#undef TARGET_EST_SECTION_OVERHEAD
> +#define TARGET_EST_SECTION_OVERHEAD spu_est_section_overhead
> +
> +#undef TARGET_EST_INSTRUCTION_SIZE
> +#define TARGET_EST_INSTRUCTION_SIZE spu_est_instruction_size
> +
> +#undef TARGET_BEGIN_CRITICAL_SECTION
> +#define TARGET_BEGIN_CRITICAL_SECTION spu_begin_critical_section
> +
> +#undef TARGET_END_CRITICAL_SECTION
> +#define TARGET_END_CRITICAL_SECTION spu_end_critical_section
> +
> +#undef TARGET_RECORD_JUMP_TABLE
> +#define TARGET_RECORD_JUMP_TABLE spu_record_jump_table
> +
> +#undef TARGET_PART_FUNC_TO_SECTIONS_FLAGS
> +#define TARGET_PART_FUNC_TO_SECTIONS_FLAGS part_func_to_sections_flags
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;

These will change if you do the global changes.

>  
>  void
> @@ -1969,6 +1993,347 @@
>  
>    return gen_rtx_CONST_VECTOR (mode, v);
>  }
> +
> +/* Specific flags for the pass which partition function into multiple
> + *    sections.  */
> +static void 
> +part_func_to_sections_flags (void)
> +{
> +  fix_range ("75-79");
> +}
> +
> +/* Return the size of the stub that the linker will insert for INSN that
> +   are branches to another section.  */
> +static int
> +get_stub_size (rtx insn)
> +{
> +  int stub_size = 0;
> +
> +  if (!JUMP_P (insn) && !CALL_P (insn))
> +    return 0;
> +
> +  if (JUMP_P (insn))
> +    {
> +      rtx set, src;
> +
> +      stub_size = spu_stub_size;
> +
> +      /* If the branch instruction and the branch target are in the
> +         same basic-block they will probably be in the same section
> +         as well.  Do not add the stub size in this case.  */
> +      if (!tablejump_p (insn, NULL, NULL)
> +	  && JUMP_LABEL (insn)
> +	  && (BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn)))
> +	stub_size = 0;
> +
> +      /* For indirect branches including jump-tables (not including the
> +         cases) and indirect function calls; stubs will be created in the
> +         non-overlay local store so their stub size is not inserted to the
> +         section calculation.  */
> +      /* Return statements */
> +      if (GET_CODE (PATTERN (insn)) == RETURN)
> +	stub_size = 0;
> +
> +      /* jump table */
> +      if (GET_CODE (PATTERN (insn)) == ADDR_VEC
> +	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
> +	stub_size = 0;
> +
> +      set = single_set (insn);
> +      if (set)
> +	{
> +	  src = SET_SRC (set);
> +	  if (GET_CODE (SET_DEST (set)) != PC)
> +	    abort ();
> +
> +	  if (GET_CODE (src) == IF_THEN_ELSE)
> +	    {
> +	      rtx lab = 0;
> +
> +	      if (GET_CODE (src) == IF_THEN_ELSE)
> +		{
> +		  rtx lab = 0;
> +		  if (GET_CODE (XEXP (src, 1)) != PC)
> +		    lab = XEXP (src, 1);
> +		  else if (GET_CODE (XEXP (src, 2)) != PC)
> +		    lab = XEXP (src, 2);
> +		}
> +	      if (lab && REG_P (lab))
> +		stub_size = 0;
> +	    }
> +	}
> +    }
> +  else if (CALL_P (insn))
> +    {
> +      rtx call;
> +
> +      stub_size = spu_stub_size;
> +      /* All of our call patterns are in a PARALLEL and the CALL is
> +         the first pattern in the PARALLEL. */
> +      if (GET_CODE (PATTERN (insn)) != PARALLEL)
> +	abort ();
> +      call = XVECEXP (PATTERN (insn), 0, 0);
> +      if (GET_CODE (call) == SET)
> +	call = SET_SRC (call);
> +      if (GET_CODE (call) != CALL)
> +	abort ();
> +      if (REG_P (XEXP (XEXP (call, 0), 0)))
> +	stub_size = 0;
> +    }
> +
> +   return stub_size;
> +}
> +
> +/* Return the size of instruction INSN in bytes.  Take into account the
> +   size of extra machine depndent instructions that can be added
> +   as a result of insn (like branch-hints for branch instructions).
> +   Called when partitioning a function into sections.  */
> +static int
> +spu_est_instruction_size (rtx insn)
> +{

It is minor, but you might want to consider spelling out 'est'.  The return
value should probably be unsigned HOST_WIDE_INT.

You might want to consider moving this file to be an attribute in the spu.md
file, instead of being just C code.

> +  int size;
> +  rtx table;
> +
> +  if (NOTE_P (insn))
> +    {
> +      return 0;
> +    }
> +
> +  size = get_attr_min_length (insn);
> +  if (tablejump_p (insn, NULL, &table))
> +  {
> +     rtvec vec;
> +
> +     if (GET_CODE (PATTERN (table)) == ADDR_VEC)
> +        vec = XVEC (PATTERN (table), 0);
> +      else
> +        vec = XVEC (PATTERN (table), 1);
> +
> +     /* Add the size of the table to the insn size.  */
> +     size += (GET_NUM_ELEM (vec) * 4);
> +  }
> +
> +  size += get_stub_size (insn);
> +
> +  if (!TARGET_BRANCH_HINTS || optimize == 0)
> +    return size;
> +
> +  /* Add the nops and branch hint which are added for each branch.
> +     For hints the worst case is 4 nops for every branch.  */
> +  if ((JUMP_P (insn) || CALL_P (insn))
> +      && !tablejump_p (insn, NULL, NULL))
> +    {
> +      /* 4 nops + 1 hint.  */
> +      size += 20;
> +    }
> +  return size;
> +}
> +
> +/* Estimate the size in bytes of the extra instructions that will be
> +   generated for each section as a result of creating a new branch for
> +   that section.  Called when partitioning a function into sections.  */
> +static int
> +spu_est_section_overhead (void)
> +{
> +  int extra_branch_insns = 0;
> +
> +  if (TARGET_BRANCH_HINTS && optimize != 0)
> +    {
> +      /* Add the nops and branch hint which are added for each branch.
> +         For hints the worst case is 4 nops for every branch.  */
> +      extra_branch_insns = 5;
> +    }
> +   /* Take into account the new branch instruction, its extra
> +      instructions if they are created and the size of the stub that the
> +      linker will add to it.  */
> +  return ((1 + extra_branch_insns) * 4 + spu_stub_size);
> +}

Again sizes should probably be unsigned HOST_WIDE_INT.

> +
> +/* The following are types of critical section.
> +   Those sequences must reside in a single section.  */
> +/* DMA sequence (wrch 16-21).  */
> +#define CRITICAL_DMA_SEQ        1
> +/* Event mask read/write/ack sequence (rd/wrch 0-2).  */
> +#define CRITICAL_EVENT_MASK     2
> +
> +/* Return true if INSN begins a critical section.
> +   Record it's type in TYPE.  */
> +static bool
> +spu_begin_critical_section (rtx insn, int *type)
> +{
> +  rtx body, set;
> +
> +  if (!INSN_P (insn))
> +    return false;
> +  
> +  if (GET_CODE (PATTERN (insn)) == PARALLEL)
> +    return false;
> +
> +  set = single_set (insn);
> +
> +  if (set != 0)
> +    {
> +      /* We are looking for this type of instruction:
> + 
> +         (insn (set (reg)
> +         (unspec_volatile [(const_int)])) {spu_rdch_noclobber})
> +      */
> +      body = SET_SRC (set);
> +      if (GET_CODE (body) == UNSPEC_VOLATILE)
> +        if (XINT (body, 1) == UNSPEC_RDCH)
> +          {
> +            rtx tmp = XVECEXP (body, 0, 0);
> +
> +            if (tmp &&  GET_CODE (tmp) == CONST_INT && INTVAL (tmp) == 0)
> +              {
> +                *type = CRITICAL_EVENT_MASK;
> +                return true;
> +              }
> +          }
> +    }
> +  else
> +    {
> +      /* We are looking for this type of instruction:
> +
> +         (insn (unspec_volatile [(const_int) (subreg)])  
> +               {spu_wrch_noclobber})
> +      */
> +
> +      body = PATTERN (insn);
> +
> +      if (GET_CODE (body) == UNSPEC_VOLATILE)
> +        if (XINT (body, 1) == UNSPEC_WRCH)
> +          {
> +            rtx tmp = XVECEXP (body, 0, 0);
> +
> +            /* MFC_LSA 16  */
> +            if (tmp &&  (GET_CODE (tmp) == CONST_INT) && INTVAL (tmp) == 16)
> +              {
> +                *type = CRITICAL_DMA_SEQ;
> +                return true;
> +              }
> +          }
> +
> +    }
> +  return false;
> +}

This whole function might be better replaced by an attribute in spu.md.

> +
> +/* Return true if INSN ends a critical section.
> +   Record it's type in TYPE.  */
> +static bool
> +spu_end_critical_section (rtx insn, int *type)
> +{
> +  rtx body;
> +
> +  if (!INSN_P (insn))
> +    return false;
> +
> +  body = PATTERN (insn);
> +
> +  /* We are looking for the following type of instruction:
> +
> +     (insn (parallel [
> +           (unspec_volatile [(const_int) (subreg)])
> +                             (clobber (mem))]) {spu_wrch_clobber}) */
> +
> +  if (GET_CODE (body) == PARALLEL)
> +    {
> +      rtx tmp = XVECEXP (body, 0, 0);
> +
> +      if (GET_CODE (tmp) == UNSPEC_VOLATILE)
> +	{
> +	  if (XINT (tmp, 1) == UNSPEC_WRCH)
> +	    {
> +	      rtx const_val = XVECEXP (tmp, 0, 0);
> +
> +	      /* MFC_Cmd 21  */
> +	      if (GET_CODE (const_val) == CONST_INT
> +		  && INTVAL (const_val) == 21)
> +		{
> +		  *type = CRITICAL_DMA_SEQ;
> +		  return true;
> +		}
> +	    }
> +	}
> +    }
> +  else
> +    {
> +      /* We are looking for the following type of instruction:
> +
> +        (insn (unspec_volatile [(const_int]) (subreg)])  
> +               (spu_wrch_noclobber}) */
> +
> +      if (GET_CODE (body) == UNSPEC_VOLATILE)
> +        if (XINT (body, 1) == UNSPEC_WRCH)
> +          {
> +            rtx tmp = XVECEXP (body, 0, 0);
> +
> +            /*  SPU_WrEventAck 2  */
> +            if (tmp && (GET_CODE (tmp) == CONST_INT) && INTVAL (tmp) == 2)
> +              {
> +                *type = CRITICAL_EVENT_MASK;
> +                return true;
> +              }
> +          }
> +
> +    }
> +  return false;
> +}

This whole function might be better replaced by an attribute in spu.md.

> +
> +/* Record the critical section which is part of the jump-table. START and
> +   END are the indices of the basic-blocks which indicate the boundaries
> +   of the critical section.  The critical section should contain the
> +   jump-table and code that reads the table to make sure they will
> +   appear in the same section.  We are looking for the following sequence
> +   of instructions:
> + 
> +   ila ,LABEL 
> +   .
> +   .
> +   .
> +   JUMP_INSN (use label)
> +   LABEL:
> +   table
> +   .
> +   .
> +   .
> +   end of table
> +   */
> +static void
> +spu_record_jump_table (rtx jump_insn, rtx label, int *start,
> +		       int *end)
> +{
> +  rtx insn, set;
> +  basic_block bb;
> +
> +  for (insn = jump_insn; insn != NULL; insn = PREV_INSN (insn))
> +    {
> +      rtx src;
> +
> +      set = single_set (insn);
> +
> +      if (!set)
> +	continue;
> +
> +      bb = BLOCK_FOR_INSN (insn);
> +
> +      src = SET_SRC (set);
> +      if (GET_CODE (src) == LABEL_REF)
> +	{
> +	  rtx label1 = XEXP (src, 0);
> +
> +	  if (rtx_equal_p (label, label1))
> +	    {
> +              /* Record the section.  */
> +	      *start = bb->index;
> +	      *end = BLOCK_FOR_INSN (jump_insn)->index;
> +	      return;
> +	    }
> +	}
> +
> +    }
> +}
> +
>  
>  /* branch hint stuff */
>  
> Index: config/spu/spu.h
> ===================================================================
> --- config/spu/spu.h	(revision 138847)
> +++ config/spu/spu.h	(working copy)
> @@ -631,3 +631,7 @@
>  extern GTY(()) rtx spu_compare_op0;
>  extern GTY(()) rtx spu_compare_op1;
>  
> +#define HAS_LONG_COND_BRANCH 1
> +#define HAS_LONG_UNCOND_BRANCH 1

I think these probably should go.  If they stay, they might be more appropriate
as entries in the targetm structure instead of via #ifdef.

> +
> +
> Index: config/spu/spu.opt
> ===================================================================
> --- config/spu/spu.opt	(revision 138847)
> +++ config/spu/spu.opt	(working copy)
> @@ -35,6 +35,11 @@
>  Target Report RejectNegative InverseMask(SAFE_DMA)
>  volatile must be specified on any memory that is effected by DMA
>  
> +mstub-size=
> +Target RejectNegative Joined UInteger Var(spu_stub_size) Init(16)
> +Specify the stub size which is inserted by the linker when an
> +overlay manger is used. (Default to 16 bytes)
> +
>  mstdmain
>  Target Report Mask(STD_MAIN)
>  Use standard main function as entry for startup
> 
> Index: cfgrtl.c
> ===================================================================
> --- cfgrtl.c	(revision 138950)
> +++ cfgrtl.c	(working copy)
> @@ -1737,7 +1737,9 @@
>  
>        for (insn = bb->il.rtl->header; insn; insn = NEXT_INSN (insn))
>  	if (!BARRIER_P (insn)
> -	    && BLOCK_FOR_INSN (insn) != NULL)
> +	    && BLOCK_FOR_INSN (insn) != NULL
> +            && !(GET_CODE (insn) == NOTE
> +                 && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION))
>  	  {
>  	    error ("insn %d in header of bb %d has non-NULL basic block",
>  		   INSN_UID (insn), bb->index);
> @@ -1909,7 +1911,8 @@
>  	    if (x == BB_END (bb))
>  	      break;
>  
> -	    if (control_flow_insn_p (x))
> +	    if (control_flow_insn_p (x)
> +                && (!block_ends_with_call_p (bb)))
>  	      {
>  		error ("in basic block %d:", bb->index);
>  		fatal_insn ("flow control insn inside a basic block", x);
> @@ -1962,7 +1965,9 @@
>  
>  	  /* And that the code outside of basic blocks has NULL bb field.  */
>  	if (!BARRIER_P (x)
> -	    && BLOCK_FOR_INSN (x) != NULL)
> +	    && BLOCK_FOR_INSN (x) != NULL
> +            && !(GET_CODE (x) == NOTE
> +                 && NOTE_KIND (x) == NOTE_INSN_VAR_LOCATION))
>  	  {
>  	    error ("insn %d outside of basic blocks has non-NULL bb field",
>  		   INSN_UID (x));
> @@ -2052,7 +2057,9 @@
>        /* Check that the code before the first basic block has NULL
>  	 bb field.  */
>        if (!BARRIER_P (x)
> -	  && BLOCK_FOR_INSN (x) != NULL)
> +	  && BLOCK_FOR_INSN (x) != NULL
> +          && !(GET_CODE (x) == NOTE
> +               && NOTE_KIND (x) == NOTE_INSN_VAR_LOCATION))
>  	{
>  	  error ("insn %d outside of basic blocks has non-NULL bb field",
>  		 INSN_UID (x));

-- 
Michael Meissner
email: gnu@the-meissners.org
http://www.the-meissners.org


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]