This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/2] New pass to partition single function into multiple sections (take 2)


Hello,

Attached is an updated version of the patch, following your review.
(Please find below replies to your comments)

The second part of the patch, which contains
support for debug and unwind info, was not reviewed yet
(http://gcc.gnu.org/ml/gcc-patches/2008-08/msg02234.html), so I disabled
-fpartition-functions-into-sections for those cases. (it should be.
enabled once the second part will be accepted)

It passes bootstrap and regtest on powerpc64 and x86_64. On SPU it.
passes regtest (c, c++, and fortran).

Does this patch OK for mainline?

Thanks again,
Revital


> > Index: target.h
> > ===================================================================
> > --- target.h   (revision 138847)
> > +++ target.h   (working copy)
> > @@ -694,6 +694,37 @@
> >       delayed-branch scheduling.  */
> >    void (* machine_dependent_reorg) (void);
> >
> > +    /* Estimate the number of extra instructions that will be added
for each
> > +       section.  Called when partitioning a function into sections.
*/
> > +  int (* est_section_overhead) (void);
> > +
> > +  /* Return the size of instruction in bytes.  Take into account the
> > +     size of extra machine depndent instructions that can be added as
> > +     a result of insn. (like branch-hints for branch instructions).
> > +     Called when partitioning a function into sections.   */
> > +  int (* est_instruction_size) (rtx);
> > +
> > +  /* Set specific flags; if needed; when partitioning a function into
> > +     several sections.  */
> > +  void (* part_func_to_sections_flags) (void);
> > +
> > +  /* True if an instruction (which is the first parameter) is the
> > +     begining/end of a critical section.  Called when partitioning a
> > +     function into sections.  Define this hook if a sequence of
> > +     instructions must reside in the same section.
> > +     The second parameter is the type of the critical section.  */
> > +  bool (* begin_critical_section) (rtx, int *);
> > +  bool (* end_critical_section) (rtx, int *);
> > +
> > +  /* Record the critical section which contains a jump-table.  The
first
> > +     two parameters are the jump-instruction and the label of the
> > +     jump-table.  The last two parameters are the indices of the
> > +     basic-blocks which indicate the boundaries of the critical
section.
> > +     Called when partitioning a function into sections.  The critical
> > +     section should contain the jump-table and the code that reads the
> > +     table to make sure they will appear in the same section.  */
> > +  void (* record_jump_table) (rtx, rtx, int *, int *);
> > +
> >    /* Create the __builtin_va_list type.  */
> >    tree (* build_builtin_va_list) (void);
>
> Obviously if you follow the suggestions in my previous message, these
will all
> change.  Depending on the hooks that you create, you probably should
consider
> making a substructure to hold all of the partitioning hooks.  If you keep
in
> the est_* hooks, I would spell them estimate_ instead of est_, but that
is minor.

Done. I used a substructure and replaced est_ with estimate_.

> > Index: final.c
> > ===================================================================
> > --- final.c   (revision 138847)
> > +++ final.c   (working copy)
> > @@ -1819,7 +1819,10 @@
> >  #endif
> >         (*debug_hooks->switch_text_section) ();
> >
> > -     switch_to_section (current_function_section ());
> > +          if (flag_partition_functions_into_sections)
> > +            switch_to_section (text_part_section (NOTE_TEXT_SECTION
(insn)));
> > +          else
> > +            switch_to_section (current_function_section ());
> >       break;
>
> This would change if we move the section into the BB structure.
>
> >     case NOTE_INSN_BASIC_BLOCK:
> > @@ -4209,7 +4212,7 @@
> >    0,                                    /* properties_required */
> >    0,                                    /* properties_provided */
> >    0,                                    /* properties_destroyed */
> > -  0,                                    /* todo_flags_start */
> > +  TODO_check_sections,                  /* todo_flags_start */
> >    TODO_ggc_collect                      /* todo_flags_finish */
> >   }
> >  };
>
> > Index: opts.c
> > ===================================================================
> > --- opts.c   (revision 138847)
> > +++ opts.c   (working copy)
> > @@ -1128,6 +1128,22 @@
> >        optimization_current_node = optimization_default_node;
> >        first_time_p = false;
> >      }
> > +  if (flag_partition_functions_into_sections
> > +      && (!targetm.have_named_sections))
> > +    {
> > +      warning
> > +        (0, "-fpartition-functions-into-sections does not work on this
"
> > +         "architecture");
> > +      flag_partition_functions_into_sections = 0;
> > +    }
> > +  if (flag_partition_functions_into_sections
> > +      && (!HAS_LONG_COND_BRANCH || !HAS_LONG_UNCOND_BRANCH))
> > +    {
> > +      warning
> > +        (0, "-fpartition-functions-into-sections does not work on this
"
> > +         "architecture");
> > +      flag_partition_functions_into_sections = 0;
> > +    }
> >  }
> >
> >  #define LEFT_COLUMN   27
> > @@ -1902,7 +1918,18 @@
> >      case OPT_fsched_stalled_insns_dep_:
> >        flag_sched_stalled_insns_dep = value;
> >        break;
> > -
> > +
> > +    case OPT_fpartition_functions_into_sections_:
> > +      flag_partition_functions_into_sections = value;
> > +      if (flag_partition_functions_into_sections != 0)
> > +        {
> > +          if (targetm.part_func_to_sections_flags != 0)
> > +            targetm.part_func_to_sections_flags ();
> > +          flag_function_sections = 1;
> > +          /* TODO: Use asm directives to handle multiple sections.  */
> > +          flag_dwarf2_cfi_asm = 0;
> > +        }
> > +      break;
> >      case OPT_fstack_limit:
> >        /* The real switch is -fno-stack-limit.  */
> >        if (value)
>
> Obviously we will want to flesh out the TODO stuff later.

Sure.

> > Index: function.h
> > ===================================================================
> > --- function.h   (revision 138847)
> > +++ function.h   (working copy)
> > @@ -236,6 +236,10 @@
> >    rtx internal_arg_pointer;
> >  };
> >
> > +typedef const char *char_p;    /* For DEF_VEC_P.  */
> > +DEF_VEC_P(char_p);
> > +DEF_VEC_ALLOC_P(char_p, gc);
> > +
> >  /* Data for function partitioning.  */
> >  struct function_subsections GTY(())
> >  {
> > @@ -252,6 +256,32 @@
> >       targetm.asm_out.named_section.  */
> >
> >    const char *unlikely_text_section_name;
> > +
> > +  /* Arrays to hold assembly labels for the text sections, to
> > +     be used by debugger functions for determining the size of text
> > +     sections.  */
> > +
> > +  VEC(char_p,gc) *section_start_labels;
> > +  VEC(char_p,gc) *section_end_labels;
> > +
> > + /* String to be used for names of the arbitrary cold text sections
> > +    parts, via targetm.asm_out.named_section.  */
> > +
> > +  const char *unlikely_part_text_section_name;
> > +
> > +  /* String to be used for names of the arbitrary hot text section
> > +     parts, via targetm.asm_out.named_section.  */
> > +
> > +  const char *part_text_section_name;
> > +
> > +  /* A variable to hold the number of sections in the function.  */
> > +
> > +  unsigned int number_of_sections;
> > +
> > +  /* The first section with different hotness attribute.  */
> > +
> > +  unsigned int first_text_section_part_changed;
> > +
> >  };
>
> I think this should probably go into the named_section structure in
output.h.
>
> >  /* Datastructures maintained for currently processed function in RTL
form.  */
> > Index: print-rtl.c
> > ===================================================================
> > --- print-rtl.c   (revision 138848)
> > +++ print-rtl.c   (working copy)
> > @@ -312,7 +312,9 @@
> >        {
> >  #ifndef GENERATOR_FILE
> >          basic_block bb = NOTE_BASIC_BLOCK (in_rtx);
> > -        if (bb != 0)
> > +                  /* When partitioning into multiple sections
> > +                     NOTE_BASIC_BLOCK holds the section id.  */
> > +        if (bb != 0 && !flag_partition_functions_into_sections)
> >            fprintf (outfile, " [bb %d]", bb->index);
> >  #endif
> >          break;
>
> If you accept my changes for having the section in the BB structure, this
would
> change to print that section name.
>
> > Index: varasm.c
> > ===================================================================
> > --- varasm.c   (revision 138847)
> > +++ varasm.c   (working copy)
> > @@ -71,6 +71,10 @@
> >  struct constant_descriptor_rtx;
> >  struct rtx_constant_pool;
> >
> > +/* If the function is spread on multiple sections; this variable saves
> > +   the name of the last seen section.  */
> > +const char *last_part_text_section_name;
> > +
> >  #define n_deferred_constants (crtl->varasm.deferred_constants)
> >
> >  /* Number for making the label on the next
> > @@ -168,6 +172,8 @@
> >     at the cold section.  */
> >  bool in_cold_section_p;
> >
> > +bool in_part_section_p;
> > +
> >  /* A linked list of all the unnamed sections.  */
> >  static GTY(()) section *unnamed_sections;
> >
> > @@ -684,6 +690,110 @@
> >      return get_named_section (NULL,
UNLIKELY_EXECUTED_TEXT_SECTION_NAME, 0);
> >  }
> >
> > +/* Initialize the string which will be part of the section name.
> > +   It should contain the function name.  UNLIKELY_P is true if the
> > +   section is cold, otherwise it is false.  */
> > +
> > +static void
> > +initialize_part_section_name (bool unlikely_p)
> > +{
> > +  const char *stripped_name;
> > +  char *name;
> > +  int section_name_len = 0;
> > +  char *section_name, *buffer;
> > +  tree dsn;
> > +
> > +  gcc_assert (cfun && current_function_decl);
> > +
> > +  if (crtl->subsections.unlikely_text_section_name
> > +      && crtl->subsections.unlikely_part_text_section_name)
> > +    return;
> > +
> > +  dsn = DECL_SECTION_NAME (current_function_decl);
> > +
> > +  /* DECL_SECTION_NAME usually holds the function name.  However, if
> > +     -fprofile-use flag is used then DECL_SECTION_NAME would hold
> > +     .text.hot or .text.unlikely.  The function name is used to
> > +     differentiate between sections in different functions which has
> > +     the same section id, so we need to get it.  */
> > +  if (dsn && !flag_profile_use)
> > +    {
> > +      name = (char *) alloca (TREE_STRING_LENGTH (dsn) + 1);
> > +      memcpy (name, TREE_STRING_POINTER (dsn), TREE_STRING_LENGTH
(dsn) + 1);
> > +      stripped_name = targetm.strip_name_encoding (name);
> > +    }
> > +  else
> > +    stripped_name = current_function_name ();
> > +
> > +  if (unlikely_p)
> > +    {
> > +      buffer = ACONCAT ((stripped_name, "_unlikely", NULL));
> > +      section_name_len = strlen (buffer) + strlen (".part") + 1;
> > +      section_name = (char *) alloca (section_name_len);
> > +      snprintf (section_name, section_name_len, "%s.part", buffer);
> > +      crtl->subsections.unlikely_part_text_section_name =
> > +   ggc_strdup (section_name);
> > +    }
> > +  else
> > +    {
> > +      section_name_len = strlen (stripped_name) + strlen (".part") +
1;
> > +      section_name = (char *) alloca (section_name_len);
> > +      snprintf (section_name, section_name_len, "%s.part",
stripped_name);
> > +      crtl->subsections.part_text_section_name = ggc_strdup
(section_name);
> > +    }
> > +}
>
> I think this function would be subsumed by moving the different sections
to
> sections in output.h rather than having subsections.
>
> > +/* Tell assembler to switch to a new section with SECTION_ID.  */
> > +
> > +section *
> > +text_part_section (int section_id)
> > +{
> > +  char section_id_str[4069];
>
> This is a rather unusual size.  I suspect initially you meant to use 4096
> instead of 4069, but since the only used to write out integers, you can
use a
> much smaller size:
>
>     char section_id_str[2 + 2*HOST_BITS_PER_INT/4 + 1];

Done.
>
> > +  int section_name_len = 0;
> > +  char *section_name;
> > +  const char *prefix_str;
> > +  bool unlikely_p = false;
> > +
> > +  in_part_section_p = true;
> > +  gcc_assert (flag_function_sections && cfun);
> > +
> > +  if ((first_function_block_is_cold
> > +       && (unsigned) section_id <
> > +       crtl->subsections.first_text_section_part_changed)
> > +      || (!first_function_block_is_cold
> > +     && (unsigned) section_id >=
> > +     crtl->subsections.first_text_section_part_changed))
> > +    {
> > +      unlikely_p = true;
> > +    }
> > +
> > +  if (!unlikely_p && !crtl->subsections.part_text_section_name)
> > +    initialize_part_section_name (false);
> > +  if (unlikely_p && !crtl->
subsections.unlikely_part_text_section_name)
> > +    initialize_part_section_name (true);
> > +
> > +  snprintf (section_id_str, 4069, "%d", section_id);
>
> Use sizeof (section_id_str) instead of 4069.

Done.

>
> > +  if (unlikely_p)
> > +    {
> > +      prefix_str = crtl->subsections.unlikely_part_text_section_name;
> > +      section_name_len = strlen (section_id_str) + strlen (prefix_str)
+ 1;
> > +      section_name = (char *) alloca (section_name_len);
> > +      snprintf (section_name, section_name_len, "%s%d",
> > +      prefix_str, section_id);
> > +    }
> > +  else
> > +    {
> > +      prefix_str = crtl->subsections.part_text_section_name;
> > +      section_name_len = strlen (section_id_str) + strlen (prefix_str)
+ 1;
> > +      section_name = (char *) alloca (section_name_len);
> > +      snprintf (section_name, section_name_len, "%s%d",
> > +      prefix_str, section_id);
> > +    }
>
> You might move the last two lines of the then/else afterwards, since they
are
> the same.

Done.

> > +  last_part_text_section_name = ggc_strdup (section_name);
> > +  return get_named_section (NULL, section_name, 0);
> > +}
> > +
> > +
> >  /* When called within a function context, return true if the function
> >     has been assigned a cold text section and if SECT is that section.
> >     When called outside a function context, return true if SECT is the
> > @@ -834,6 +944,9 @@
> >    else
> >      return targetm.asm_out.select_section (decl, reloc, DECL_ALIGN
(decl));
> >  #else
> > +   if (flag_partition_functions_into_sections
> > +       && crtl->subsections.number_of_sections)
> > +    return text_part_section (0);
> >    return reloc ? unlikely_text_section () : hot_function_section
(decl);
> >  #endif
> >  }
> > @@ -852,6 +965,8 @@
> >                    in_cold_section_p,
> >                    DECL_ALIGN (current_function_decl));
> >  #else
> > +  if (in_part_section_p)
> > +    return get_named_section (NULL, last_part_text_section_name, 0);
> >    return (in_cold_section_p
> >       ? unlikely_text_section ()
> >       : hot_function_section (current_function_decl));
> > @@ -1619,6 +1734,30 @@
> >      }
> >  }
> >
> > +/* If the text section is partitioned; make sure all the sections are
> > +   properly aligned.  */
> > +
> > +static void
> > +output_sections (void)
> > +{
> > +  unsigned int i;
> > +  const char *tmp;
> > +
> > +  gcc_assert (cfun && flag_partition_functions_into_sections);
> > +
> > +  /* Output for each one of the sections the label which marks the
> > +     beginning of its address range.  */
> > +  for (i = 1;
> > +       VEC_iterate (char_p, crtl->subsections.section_start_labels, i,
> > +          tmp); i++)
> > +    {
> > +      switch_to_section (text_part_section (i));
> > +      assemble_align (FUNCTION_BOUNDARY);
> > +      if (crtl->subsections.section_start_labels != NULL)
> > +   ASM_OUTPUT_LABEL (asm_out_file, tmp);
> > +    }
> > +}
> > +
>
> Again, this might change depending on whether you move the stuff to full
> sections, instead of subsections.
>
> >  /* Output assembler code for the constant pool of a function and
associated
> >     with defining the name of the function.  DECL describes the
function.
> >     NAME is the function's name.  For the constant pool, we use the
current
> > @@ -1634,8 +1773,31 @@
> >    crtl->subsections.unlikely_text_section_name = NULL;
> >
> >    first_function_block_is_cold = false;
> > -  if (flag_reorder_blocks_and_partition)
> > +  if (flag_partition_functions_into_sections
> > +      && cfun
> > +      && (crtl->subsections.number_of_sections > 0))
> >      {
> > +      unsigned i;
> > +      int length = crtl->subsections.number_of_sections;
> > +
> > +      crtl->subsections.section_start_labels =
> > +          VEC_alloc (char_p, gc, length);
> > +      crtl->subsections.section_end_labels =
> > +      VEC_alloc (char_p, gc, length);
> > +
> > +      for (i = 0; i < crtl->subsections.number_of_sections; i++)
> > +        {
> > +          ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LSECTIONB",
const_labelno);
> > +          VEC_safe_push (char_p, gc, crtl->
subsections.section_start_labels,
> > +                         ggc_strdup (tmp_label));
> > +          ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LSECTIONE",
const_labelno);
> > +          VEC_safe_push (char_p, gc, crtl->
subsections.section_end_labels,
> > +                         ggc_strdup (tmp_label));
> > +          const_labelno++;
> > +        }
> > +    }
> > +  else if (flag_reorder_blocks_and_partition)
> > +    {
> >        ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LHOTB", const_labelno);
> >        crtl->subsections.hot_section_label = ggc_strdup (tmp_label);
> >        ASM_GENERATE_INTERNAL_LABEL (tmp_label, "LCOLDB",
const_labelno);
> > @@ -1668,7 +1830,8 @@
> >       has both hot and cold sections, because we don't want to re-set
> >       the alignment when the section switch happens mid-function.  */
> >
> > -  if (flag_reorder_blocks_and_partition)
> > +  if (flag_reorder_blocks_and_partition
> > +      && (flag_partition_functions_into_sections == 0))
> >      {
> >        switch_to_section (unlikely_text_section ());
> >        assemble_align (DECL_ALIGN (decl));
> > @@ -1687,7 +1850,8 @@
> >       first_function_block_is_cold = true;
> >     }
> >      }
> > -  else if (DECL_SECTION_NAME (decl))
> > +   else if (DECL_SECTION_NAME (decl)
> > +           && (flag_partition_functions_into_sections == 0))
> >      {
> >        /* Calls to function_section rely on
first_function_block_is_cold
> >      being accurate.  The first block may be cold even if we aren't
>
> This all may change if you use sections instead of subsections.
>
> > @@ -1701,6 +1865,19 @@
> >             crtl->subsections.unlikely_text_section_name) == 0)
> >     first_function_block_is_cold = true;
> >      }
> > +   if (flag_partition_functions_into_sections)
> > +    {
> > +      if (!crtl->is_thunk
> > +          && BB_PARTITION (ENTRY_BLOCK_PTR->next_bb) ==
BB_COLD_PARTITION)
> > +        {
> > +          first_function_block_is_cold = true;
> > +        }
> > +      if (DECL_SECTION_NAME (decl)
> > +          && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
> > +                     UNLIKELY_EXECUTED_TEXT_SECTION_NAME) == 0)
> > +        first_function_block_is_cold = true;
> > +      output_sections ();
> > +    }
> >
> >    in_cold_section_p = first_function_block_is_cold;
> >
> > @@ -1708,9 +1885,19 @@
> >
> >    switch_to_section (function_section (decl));
> >    if (flag_reorder_blocks_and_partition
> > +      && (flag_partition_functions_into_sections == 0)
> >        && !hot_label_written)
> >      ASM_OUTPUT_LABEL (asm_out_file, crtl->
subsections.hot_section_label);
> > +  else if (flag_partition_functions_into_sections
> > +           && crtl->subsections.section_start_labels != 0
> > +           && crtl->subsections.number_of_sections > 0)
> > +   {
> > +     const char *tmp;
> >
> > +     tmp = VEC_index (char_p, crtl->subsections.section_start_labels,
0);
> > +     ASM_OUTPUT_LABEL (asm_out_file, tmp);
> > +   }
> > +
> >    /* Tell assembler to move to target machine's alignment for
functions.  */
> >    align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
> >    if (align > 0)
>
> I have a feeling that this will be simplified with the change to
sections.
>
> > @@ -1795,6 +1982,25 @@
> >        ASM_OUTPUT_LABEL (asm_out_file, crtl->
subsections.hot_section_end_label);
> >        switch_to_section (save_text_section);
> >      }
> > +    else if (flag_partition_functions_into_sections
> > +            && crtl->subsections.section_end_labels != NULL)
> > +    {
> > +       unsigned int i;
> > +       const char *tmp;
> > +
> > +        /* Output for each one of the sections the label which marks
the
> > +           end of its address range.  */
> > +        for (i = 0;
> > +               VEC_iterate (char_p, crtl->
subsections.section_end_labels, i,
> > +                            tmp); i++)
> > +         {
> > +         switch_to_section (text_part_section (i));
> > +         tmp =  VEC_index (char_p, crtl->
subsections.section_end_labels, i);
> > +         ASM_OUTPUT_LABEL (asm_out_file, tmp);
> > +       }
> > +
> > +    }
> > +
> >  }
>
> Ditto here.
>
> >
> >  /* Assemble code to leave SIZE bytes of zeros.  */
> > @@ -5592,6 +5798,15 @@
> >      flags = SECTION_CODE;
> >    else if (decl && decl_readonly_section (decl, reloc))
> >      flags = 0;
> > +  else if (flag_partition_functions_into_sections
> > +           && crtl->subsections.unlikely_part_text_section_name
> > +           && strstr (name,
> > +                      crtl->
subsections.unlikely_part_text_section_name) != 0)
> > +    flags = SECTION_CODE;
> > +  else if (flag_partition_functions_into_sections
> > +           && crtl->subsections.part_text_section_name
> > +           && strstr (name, crtl->
subsections.part_text_section_name) != 0)
> > +    flags = SECTION_CODE;
> >    else if (current_function_decl
> >        && cfun
> >        && crtl->subsections.unlikely_text_section_name
>
> strstr is a somewhat expensive function, I'm wondering if there is a way
to
> eliminate its use.  It isn't major, I'm just thinking out loud here.
>
> > Index: target-def.h
> > ===================================================================
> > --- target-def.h   (revision 138847)
> > +++ target-def.h   (working copy)
> > @@ -413,6 +413,19 @@
> >  #define TARGET_SECTION_TYPE_FLAGS default_section_type_flags
> >  #endif
> >
> > +#define TARGET_EST_SECTION_OVERHEAD 0
> > +
> > +#define TARGET_EST_INSTRUCTION_SIZE 0
> > +
> > +#define TARGET_PART_FUNC_TO_SECTIONS_FLAGS 0
> > +
> > +#define TARGET_BEGIN_CRITICAL_SECTION 0
> > +
> > +#define TARGET_END_CRITICAL_SECTION 0
> > +
> > +#define TARGET_RECORD_JUMP_TABLE 0
> > +
> > +
> >  #ifndef TARGET_STRIP_NAME_ENCODING
> >  #define TARGET_STRIP_NAME_ENCODING default_strip_name_encoding
> >  #endif
> > @@ -874,6 +887,12 @@
> >    TARGET_FIXED_CONDITION_CODE_REGS,      \
> >    TARGET_CC_MODES_COMPATIBLE,         \
> >    TARGET_MACHINE_DEPENDENT_REORG,      \
> > +  TARGET_EST_SECTION_OVERHEAD,                  \
> > +  TARGET_EST_INSTRUCTION_SIZE,                  \
> > +  TARGET_PART_FUNC_TO_SECTIONS_FLAGS,           \
> > +  TARGET_BEGIN_CRITICAL_SECTION,                \
> > +  TARGET_END_CRITICAL_SECTION,                  \
> > +  TARGET_RECORD_JUMP_TABLE,                     \
> >    TARGET_BUILD_BUILTIN_VA_LIST,         \
> >    TARGET_FN_ABI_VA_LIST,         \
> >    TARGET_CANONICAL_VA_LIST_TYPE,         \
>
> Obviously these would change depending on the global changes.  As I said
> before, this should probably be a substructure to move all of the like
> information together.
>
> > Index: rtl.h
> > ===================================================================
> > --- rtl.h   (revision 138847)
> > +++ rtl.h   (working copy)
> > @@ -861,6 +861,7 @@
> >  #define NOTE_DELETED_LABEL_NAME(INSN) XCSTR (INSN, 4, NOTE)
> >  #define SET_INSN_DELETED(INSN) set_insn_deleted (INSN);
> >  #define NOTE_BLOCK(INSN)   XCTREE (INSN, 4, NOTE)
> > +#define NOTE_TEXT_SECTION(INSN) XCINT (INSN, 4, NOTE)
> >  #define NOTE_EH_HANDLER(INSN)   XCINT (INSN, 4, NOTE)
> >  #define NOTE_BASIC_BLOCK(INSN)   XCBBDEF (INSN, 4, NOTE)
> >  #define NOTE_VAR_LOCATION(INSN)   XCEXP (INSN, 4, NOTE)
>
> If you move the section stuff to just the basic block structure, then you
won't
> need to keep track of the section on a per insn basis.
>
> > Index: output.h
> > ===================================================================
> > --- output.h   (revision 138847)
> > +++ output.h   (working copy)
> > @@ -567,6 +567,7 @@
> >                     unsigned int);
> >  extern section *function_section (tree);
> >  extern section *unlikely_text_section (void);
> > +extern section *text_part_section (int);
> >  extern section *current_function_section (void);
> >
> >  /* Return the numbered .ctors.N (if CONSTRUCTOR_P) or .dtors.N (if
> >
>
> If you do the global changes, you likely will have other changes to
output.h.
>
> > Index: bb-reorder.c
> > ===================================================================
> > --- bb-reorder.c   (revision 138847)
> > +++ bb-reorder.c   (working copy)
> > @@ -85,6 +85,10 @@
> >  #include "toplev.h"
> >  #include "tree-pass.h"
> >  #include "df.h"
> > +#include "cfgloop.h"
> > +#include "langhooks.h"
> > +#include "hashtab.h"
> > +#include "vec.h"
> >
> >  #ifndef HAVE_conditional_execution
> >  #define HAVE_conditional_execution 0
> > @@ -1265,7 +1269,624 @@
> >     Convert any fall-through crossing edges (for blocks that do not
contain
> >     a jump) to unconditional jumps.  */
> >
> > +/* Structure to hold needed information for each loop.  */
> > +struct loop_info_def
> > +{
> > +  /* The index of the loop.  */
> > +  unsigned index;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.  I'm
not
> sure here whether some code out there has more than 65k loops, but it is
> probably better to be safe.>

Done.
>
> > +  /* The size (in bytes) of the loop layout.  */
> > +  unsigned size;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.

Done.
>
> > +  /* The following fields are the first and last basic-blocks in the
> > +     current loop layout.  They are used to calculate the loop's size.
*/
> > +   basic_block first;
> > +   basic_block last;
> > +};
> > +
> > +typedef struct loop_info_def *loop_info;
> > +DEF_VEC_P(loop_info);
> > +DEF_VEC_ALLOC_P(loop_info,heap);
> > +
> > +/* Structure to hold needed information for each basic block.  */
> > +typedef struct funcpart_basic_block_data_def
> > +{
> > +  /* Section id which is attached to each section.  */
> > +  int section_id;
> > +
> > +  /* The list of loops which begin at this basic-block.  */
> > +  VEC(loop_info,heap) *loops_info_list;
> > +
> > +  /* True if this basic-block appears in critical section.  */
> > +  bool in_critical_section_p;
> > +
> > +  /* True if this basic-block is the first in critical section.  */
> > +  bool first_bb_in_critical_section;
> > +
> > +  /* The size in bytes of the critical section which begins in this
> > +     basic-block.  */
> > +  int critical_section_size;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.

This field was removed.
>
> > +  /* The size in bytes of the basic-block.  */
> > +  int bb_size;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.

Done.

> > +}funcpart_basic_block_data;
> > +
> > +/* The current size (in elements) of the following dynamic array.  */
> > +static int fbb_data_size = 0;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.

Done.
>
> > +
> > +/* The array which holds needed information for basic blocks.  */
> > +static funcpart_basic_block_data *fbb_data;
> > +
> > +/* Information regarding insn.  */
> > +struct insn_aux
> > +{
> > +  /* The insn.  */
> > +  rtx insn;
> > +
> > +  /* The estimated size in bytes of insn.  */
> > +  int size;
> > +};
> > +
> > +/* Hash-table of insns in the function which holds their size (in
bytes).  */
> > +htab_t insns_aux;
>
> Given that you have INSN_UID you don't need a hash table to map insn to
size,
> but instead allocate a vector and fill in the size.

Done.

> > +/* A variable to hold the overhead introduced by splitting a function
> > +   into sections.  This overhead contains extra branch instructions
> > +   between section which might effect code size.  */
> > +static int estimate_section_overhead = 0;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.>

Done.

> > +/* Estimate the maximum size (in bytes) of one section.  */
> > +static int estimate_max_section_size = 0;
>
> You probably should use unsigned HOST_WIDE_INT here instead of int.

Done.

> > +/* Given basic-block with index BB_INDEX validate that fbb_data array
> > +   has a corresponding element for it.  */
> > +
> >  static void
> > +validate_fbb_data_element (int bb_index)
> > +{
> > +  int prev_size = fbb_data_size;
> > +  int i;
> > +
> > +  if (bb_index < fbb_data_size)
> > +    return;
> > +
> > +  /* Resize the array, if necessary.  */
> > +  fbb_data_size *= 2;
> > +  fbb_data = (funcpart_basic_block_data *)
> > +    xrealloc (fbb_data, (fbb_data_size * sizeof
(funcpart_basic_block_data)));
> > +  for (i = prev_size; i < fbb_data_size; i++)
> > +    {
> > +      fbb_data[i].section_id = -1;
> > +      fbb_data[i].loops_info_list = NULL;
> > +      fbb_data[i].in_critical_section_p = false;
> > +      fbb_data[i].critical_section_size = 0;
> > +      fbb_data[i].first_bb_in_critical_section = false;
> > +      fbb_data[i].bb_size = 0;
> > +    }
> > +}
> > +
> > +/* Given basic block BB return it's size in bytes.  */
> > +
> > +static int
> > +est_size_of_insns_in_bb (basic_block bb)
> > +{
> > +  int size = 0;
> > +  rtx insn;
> > +  struct insn_aux *iaux;
> > +
> > +  /* Loop through all of the basic-block's insns.  */
> > +  FOR_BB_INSNS (bb, insn)
> > +  {
>
> The '{' and the code inside should be indented 2 spaces through to the
closing
> '}'.

Done.

> > +    if (INSN_P (insn))
> > +      {
> > +        struct insn_aux iaux_tmp;
> > +
> > +        /* Retrieve the size of the instruction. */
> > +        iaux_tmp.insn = insn;
> > +        iaux = (struct insn_aux *)htab_find (insns_aux, &iaux_tmp);
> > +        if (iaux)
> > +          size += iaux->size;
> > +        else
> > +          size += get_attr_min_length (insn);
> > +      }
> > +  }
> > +#if ENABLE_CHECKING
> > +  /* If we already calculated the size of this basic-block we can use
it
> > +     for checking.  */
> > +  if (fbb_data[bb->index].bb_size != 0)
> > +    gcc_assert (fbb_data[bb->index].bb_size == size);
> > +#endif
> > +
> > +  return size;
> > +}
>
> As I mentioned before, if this is kept in, you probably should name it
> estimate_ instead of est_, but that is minor.  The return size should be
> unsigned HOST_WIDE_INT and not int.

Done.

> > +/* Split basic-block BB such that the first partition will not exceed
the
> > +   maximum size of FIRST_PARTITION_SIZE bytes.  Update the actual size
> > +   of the first partition in FIRST_PARTITION_ACTUAL_SIZE.  Return the
> > +   new basic-block if it can be created.  Else return NULL.  */
> > +
> > +static basic_block
> > +split_bb (basic_block bb, int first_partition_size,
> > +     int *first_partition_actual_size)
> > +{
> > +  rtx insn;
> > +  int size = 0;
> > +  int prev_size = 0;
> > +  edge e;
> > +  rtx prev_insn = NULL;
> > +  struct insn_aux *iaux;
> > +
> > +  /* Loop through all of the basic-block's insns.  */
> > +  FOR_BB_INSNS (bb, insn)
> > +  {
> > +    if (INSN_P (insn))
> > +      {
> > +   struct insn_aux iaux_tmp;
> > +
> > +   prev_size = size;
> > +
> > +   /* Retrieve the size of the instruction. */
> > +   iaux_tmp.insn = insn;
> > +   iaux = (struct insn_aux *) htab_find (insns_aux, &iaux_tmp);
>
> Here I don't think you need a hash table, just call the function to get
the
> size.  The hash table probably takes up more resources to hash the
information
> than you would be calculating it.

Done.

> > +   if (iaux)
> > +     size += iaux->size;
> > +   else
> > +     size += get_attr_min_length (insn);
> > +
> > +   if (size > first_partition_size)
> > +     {
> > +       if (prev_insn == NULL)
> > +         return NULL;
> > +       break;
> > +     }
> > +   prev_insn = insn;
> > +      }
> > +  }
> > +
> > +#ifdef ENABLE_CHECKING
> > +  gcc_assert (size > first_partition_size);
> > +#endif
> > +
> > +  e = split_block (bb, prev_insn);
> > +  e->dest->aux = bb->aux;
> > +  bb->aux = e->dest;
> > +
> > +  if (dump_file)
> > +    fprintf (dump_file,
> > +        "\tSplit bb %d; create new bb: %d\n", bb->index, e->dest->
index);
> > +  *first_partition_actual_size = prev_size;
> > +  return e->dest;
> > +}
> > +
> > +/* Insert a NOTE_INSN_SWITCH_TEXT_SECTIONS note before basic-blocks
that
> > +   are markred as BB_FIRST_AFTER_SECTION_SWITCH.  */
> > +
> > +static void
> > +insert_section_boundary_note_in_function (void)
> > +{
> > +  basic_block bb;
> > +  rtx new_note;
> > +  unsigned number_of_sections = 1;
> > +  bool first_switch_p = true;
> > +  bool only_one_section_p = true;
> > +
> > +  gcc_assert (cfun && flag_partition_functions_into_sections);
> > +
> > +  FOR_EACH_BB (bb)
> > +  {
> > +    if (bb->flags & BB_FIRST_AFTER_SECTION_SWITCH)
> > +      {
> > +   new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
> > +                 BB_HEAD (bb));
> > +   /* ??? This kind of note always lives between basic blocks,
> > +      but add_insn_before will set BLOCK_FOR_INSN anyway.  */
> > +   BLOCK_FOR_INSN (new_note) = NULL;
> > +   NOTE_TEXT_SECTION (new_note) = number_of_sections++;
> > +   if (bb->prev_bb && (BB_PARTITION (bb) != BB_PARTITION (bb->
prev_bb))
> > +       && first_switch_p && flag_reorder_blocks_and_partition)
> > +     {
> > +       crtl->subsections.first_text_section_part_changed =
> > +         number_of_sections - 1;
> > +       first_switch_p = false;
> > +       only_one_section_p = false;
> > +     }
> > +      }
> > +  }
> > +
> > +  if (only_one_section_p)
> > +    crtl->subsections.first_text_section_part_changed =
number_of_sections;
> > +
> > +  crtl->subsections.number_of_sections = number_of_sections;
> > +}
> > +
> > +/* Start a new section at the beginning of basic-block BB with
SECTION_ID.
> > +   */
>
> SECTION_ID should probably be on the next line with the '*/'.

Done.

> > +
> > +static void
> > +start_new_section (int *section_id, basic_block bb)
> > +{
> > +  bb->flags |= BB_FIRST_AFTER_SECTION_SWITCH;
> > +  *section_id = *section_id + 1;
> > +  if (dump_file)
> > +    fprintf (dump_file, ";; Start new section %d\n", *section_id);
> > +}
> > +
> > +/* Return true if the given basic-block BB starts a loop that can not
> > +   fully be inserted into the last section and the loop size is less
> > +   than the maximum section size threshold.  Use LAST_SECTION_SIZE in
> > +   the calculation.  */
> > +
> > +static bool
> > +start_new_section_for_loop (basic_block bb, int last_section_size)
> > +{
> > +  struct loop_info_def *cur_loop;
> > +  unsigned i;
> > +
> > +  if (fbb_data[bb->index].loops_info_list == NULL)
> > +    return false;
> > +
> > +  if (last_section_size == 0)
> > +    return false;
> > +
> > +  /* The loops are sorted in loops_info_list according to the loop
size
> > +     (in bytes); where the loop with the samllest layout appears
first.
> > +     */
> > +  for (i = 0;
> > +       VEC_iterate (loop_info, fbb_data[bb->index].loops_info_list, i,
> > +          cur_loop); i++)
> > +    {
> > +      if ((last_section_size + cur_loop->size >
> > +      (unsigned) estimate_max_section_size)
> > +     && cur_loop->size <= (unsigned) estimate_max_section_size)
> > +   {
> > +     if (dump_file)
> > +       fprintf (dump_file, "\tbb %d starts a loop (size %dB) "
> > +           "recommend to start a new section to not break it\n",
> > +                     bb->index, cur_loop->size);
> > +
> > +     return true;
> > +   }
> > +    }
> > +  return false;
> > +}
> > +
> > +/* Return TRUE if basic-block BB contains critical section.  */
> > +
> > +static bool
> > +in_critical_section (basic_block bb)
> > +{
> > +  if (fbb_data[bb->index].in_critical_section_p)
> > +    return true;
> > +  return false;
> > +}
> > +
> > +/* Return true if BB is the first basic-block in critical section;
> > +   otherwise return false.  */
> > +
> > +static bool
> > +first_bb_in_critical_section (basic_block bb)
> > +{
> > +  if (fbb_data[bb->index].in_critical_section_p
> > +      && fbb_data[bb->index].first_bb_in_critical_section)
> > +    return true;
> > +  return false;
> > +}
> > +
> > +/* Return true if the given basic-block BB contains a critical section
> > +   that can not fully be inserted into the last section.  Use
> > +   LAST_SECTION_SIZE in the calculation.  */
> > +
> > +static bool
> > +start_new_section_for_critical_section (basic_block bb, int
last_section_size)
> > +{
> > +  int size;
> > +
> > +  /* This bb does not start a critical section.  */
> > +  if (!fbb_data[bb->index].in_critical_section_p)
> > +    return false;
> > +
> > +  size = fbb_data[bb->index].critical_section_size;
> > +
> > +  if (last_section_size == 0)
> > +    return false;
> > +
> > +  if ((last_section_size + size >
> > +       estimate_max_section_size) && size <=
estimate_max_section_size)
> > +    {
> > +      if (dump_file)
> > +   fprintf (dump_file,
> > +       "\n\tbb %d starts a critical section (size %dB) "
> > +       "recommend to start a new section in order to not break it\n",
> > +       bb->index, size);
> > +
> > +      return true;
> > +    }
> > +  return false;
> > +}
>
> As I said, I think the critical stuff should be pushed to the back end,
rather
> than having the machine independent stuff deal with it.

Done.

> > +
> > +/* Return a hash for IAUX, which is really a "insn_aux *".  */
> > +static hashval_t
> > +iaux_info_hash (const void *iaux)
> > +{
> > +  return (hashval_t) INSN_UID (((const struct insn_aux *) iaux)->
insn);
> > +}
> > +
> > +/* Return true if IAUX1 and IAUX2 (which are really both of type
> > +   "insn_aux *") refer to the same instruction.  */
> > +static int
> > +iaux_info_eq (const void *iaux1, const void *iaux2)
> > +{
> > +  const struct insn_aux *i1 = (const struct insn_aux *) iaux1;
> > +  const struct insn_aux *i2 = (const struct insn_aux *) iaux2;
> > +
> > +  return i1->insn == i2->insn;
> > +}
>
> These go away if you don't need to hash the insn.

Done.
>
> > +/* Create sections for the current function.  Return the edges that
> > +   cross between sections in CROSSING_EDGES array which is of size
> > +   N_CROSSING_EDGES so they could be fixed later.
> > +
> > +   The partitioning is done by traversing the basic-blocks according
to
> > +   their order in the code layout.  When a new basic-block is
encountered
> > +   a decision is made whether to add it to the last section, split it
> > +   or add it to a new section.
> > +
> > +   Here is a short description of decision process :
> > +
> > +   Start a new section if one of the following conditions exist:
> > +
> > +      a) if adding the basic-block to the last section causes the last
> > +           section to exceed the max section size and it's size is
less
> > +           than max section size.
> > +   b) if a loop starts at this basic-block and the loop can not
> > +      fully be inserted into the last section and it's size is less
> > +      than max section size.
> > +        c) the basic block hotness property is different from it's
> > +           previous basic-block's hotness property.
> > +        d) if a critical section starts at this basic-block and it
> > +           can not fully be inserted into the last section and it's
size
> > +           is less than max section size.
> > +
> > +   If the above conditions are not true split the basic-block
> > +   if adding the basic-block to the last section causes the last
> > +   section to exceed the max section size and it is not part of a
> > +   critical section.  Otherwise add it to the last section.  */
> > +
> > +static void
> > +create_sections (edge **crossing_edges, int *n_crossing_edges, int
*max_idx)
> > +{
> > +  basic_block bb, new_bb;
> > +  edge e;
> > +  int i;
> > +  edge_iterator ei;
> > +  int last_section_size = 0;
> > +  int current_section_id = 0;
> > +  int first_partition_actual_size;
> > +
> > +  /* Mark which section each basic block belongs in.  */
> > +  if (dump_file)
> > +    fprintf (dump_file,
> > +        "\n\n--- partition functions into sections --- (%dB)\n\n"
> > +        ";; section overhead size %dB\n" ";; max section size %dB\n
\n",
> > +        flag_partition_functions_into_sections,
estimate_section_overhead,
> > +        estimate_max_section_size);
> > +
> > +  FOR_EACH_BB (bb)
> > +  {
>
> This code should be indented 2 spaces.

Done.

> > +    int bb_size;
> > +    bool start_new_section_for_loop_p = false;
> > +    bool start_new_section_due_to_hotness_prop_p = false;
> > +    bool start_new_section_for_critical_section_p = false;
> > +    bool start_critical_section_p = false;
> > +
> > +    /* Validate that fbb_data array has a corresponding element for
> > +       this bb, so we could access it directly.  */
> > +    validate_fbb_data_element (bb->index);
> > +
> > +    bb_size = est_size_of_insns_in_bb (bb);
> > +    if (dump_file)
> > +      fprintf (dump_file, ";; Trying to add bb %d (%dB) to section
%d",
> > +               bb->index, bb_size, current_section_id);
> > +
> > +     start_critical_section_p = first_bb_in_critical_section (bb);
> > +     if (!start_critical_section_p && in_critical_section (bb))
> > +     {
> > +       /* Critical section must reside only in one section so add it
to
> > +          the current section.  */
> > +       last_section_size += bb_size;
> > +       if (dump_file)
> > +         fprintf (dump_file,
> > +                 "\n;; Basic-block %d is part of critical section."
> > +                 " Add it to section %d (%dB)\n",
> > +                 bb->index, current_section_id, last_section_size);
> > +       /* Upadte section id.  */
> > +        fbb_data[bb->index].section_id = current_section_id;
> > +       continue;
> > +     }
> > +
> > +    /* Check if a loop starts in this bb and we should start a new
> > +       section for it.  */
> > +    start_new_section_for_loop_p =
> > +      start_new_section_for_loop (bb, last_section_size);
> > +
> > +    /* Check if the hotness property has been changed.   */
> > +    start_new_section_due_to_hotness_prop_p =
> > +      ((bb->prev_bb != NULL) && (last_section_size != 0)
> > +       && BB_PARTITION (bb) != BB_PARTITION (bb->prev_bb)
> > +       && flag_reorder_blocks_and_partition);
> > +
> > +    /* Check if this bb starts a critical section and we should start
> > +       a new section for it.  */
> > +    if (start_critical_section_p)
> > +      start_new_section_for_critical_section_p =
> > +        start_new_section_for_critical_section (bb,
last_section_size);
> > +
> > +    if (((last_section_size + bb_size) >
> > +    estimate_max_section_size) || start_new_section_for_loop_p
> > +         || start_new_section_due_to_hotness_prop_p
> > +         || start_new_section_for_critical_section_p)
> > +      {
> > +   /* Do not add the basic-block to the last section.  This is due
> > +      to the possible reasons:
> > +      1) the addition of the basic block to last section will cause
> > +      the last section to exceed the max section size.
> > +      2) the basic-block starts a loop that can not fully be inserted
> > +      to the last section without exceeding the max section size
> > +      and it's size is less than the max section size.
> > +           3) The hotness properaty of this basic-block is different
then
> > +           it's previous.
> > +           4) The basic-block starts critical section that can not
fully
> > +           be inserted to the last section without exceeding the max
> > +           section size and it's size is less than the max section
size.  */
> > +        if (start_critical_section_p && dump_file)
> > +           fprintf (dump_file,
> > +                   " ...FAIL\n\tbb %d starts a critical section", bb->
index);
> > +        else if (start_new_section_for_loop_p && dump_file)
> > +          fprintf (dump_file,
> > +                   " ...FAIL\n\tbb %d begins a loop that can not
fully"
> > +                   " be inserted to the last section\n", bb->index);
> > +        else if (start_new_section_due_to_hotness_prop_p && dump_file)
> > +          fprintf (dump_file,
> > +                   " ...FAIL\n\tbb %d hotness property is different
then it's"
> > +                   " previous basic-block\n", bb->index);
> > +   else if (((last_section_size + bb_size) >
estimate_max_section_size)
> > +       && dump_file)
> > +     fprintf (dump_file,
> > +         " ...FAIL\n\tSize of section (%dB) + bb %d (%dB)"
> > +         " exceeds threshold \n",
> > +         last_section_size, bb->index, bb_size);
>
> You might want to wrap all of the if's inside of an if (dump_file)
{ ... }
> rather than testing for dump_file on each test, since all of the ifs are
just
> for debug information.

Done.

> > +
> > +   /* Start a new section if one of following conditions
> > +      are fulfilled:
> > +      1) the last section is not empty and the bb size is less than
> > +      the section size threshold.
> > +      2) the basic-block starts a loop that its size is less than
> > +      the section size threshold.
> > +           3) The hotness property of this basic block is different
then
> > +           the previous.
> > +           4) the basic-block starts a critical section that its size
> > +           is less than the section size threshold.  */
> > +   if ((last_section_size != 0
> > +        && bb_size <= estimate_max_section_size)
> > +       || start_new_section_for_loop_p
> > +            || start_new_section_due_to_hotness_prop_p
> > +            || start_new_section_for_critical_section_p)
> > +     {
> > +       if (dump_file)
> > +         fprintf (dump_file,
> > +             ";; Close section %d (%dB)\n", current_section_id,
> > +             last_section_size);
> > +       start_new_section (&current_section_id, bb);
> > +       last_section_size = 0;
> > +       bb = bb->prev_bb;
> > +       continue;
> > +     }
> > +       if (start_critical_section_p)
> > +        {
> > +          /* The basic-block begins a critical section so we can not
> > +             split it.  Add it to the current section.  */
> > +          last_section_size += bb_size;
> > +          if (dump_file)
> > +            fprintf (dump_file, "\n\tbb %d starts a critical section.
"
> > +                     "Add it to section %d (%dB)\n",
> > +                     bb->index, current_section_id,
last_section_size);
> > +          /* Upadte section id.  */
> > +          fbb_data[bb->index].section_id = current_section_id;
> > +          continue;
> > +        }
> > +   /* Split the basic-block.  Try to insert it's first partition
> > +      to the last section such that the section size will not exceed
> > +      the section size threshold.  */
> > +   new_bb =
> > +     split_bb (bb,
> > +          estimate_max_section_size - last_section_size,
> > +          &first_partition_actual_size);
> > +
> > +   if (new_bb != NULL)
> > +     {
> > +       /* Success.  The new basic-block was created and was added to
the
> > +          last section.  Close this section and open a new one.  */
> > +       if (dump_file)
> > +         {
> > +      fprintf (dump_file,
> > +          "\tNew edge: (idx %d , size %dB) -> BB (idx %d , "
> > +          "size %dB)\n",
> > +          bb->index, first_partition_actual_size,
> > +          new_bb->index,
> > +          bb_size - first_partition_actual_size);
> > +      fprintf (dump_file,
> > +          "\tAdd (idx %d; size %dB) to section %d\n",
> > +          bb->index, first_partition_actual_size,
> > +          current_section_id);
> > +      fprintf (dump_file, ";; Close section %d (%dB)\n",
> > +          current_section_id,
> > +          last_section_size + first_partition_actual_size);
> > +         }
> > +       /* Upadte section id.  */
> > +       fbb_data[bb->index].section_id = current_section_id;
> > +       /* Start new section.  */
> > +       start_new_section (&current_section_id, new_bb);
> > +       last_section_size = 0;
> > +     }
> > +   else
> > +     {
> > +       /* Failed to create the new basic-block.  Close this section
> > +          with the current bb inside and open a new one.  */
> > +            last_section_size += bb_size;
> > +       if (dump_file)
> > +         fprintf (dump_file,
> > +             "\n;; Failed to split bb. Close section %d (%dB)\n",
> > +             current_section_id, last_section_size);
> > +            if (bb->next_bb)
> > +         start_new_section (&current_section_id, bb->next_bb);
> > +       last_section_size = 0;
> > +       continue;
> > +     }
> > +      }
> > +    else
> > +      {
> > +   /* The basic-block can be fully added to the last section.  */
> > +   last_section_size += bb_size;
> > +   if (dump_file)
> > +     fprintf (dump_file, " ...OK\n\tSection new size: %dB\n",
> > +         last_section_size);
> > +   /* Upadte section id.  */
> > +   fbb_data[bb->index].section_id = current_section_id;
> > +      }
> > +  }
> > +  if (dump_file)
> > +    fprintf (dump_file, "\n\n");
> > +
> > +  /* Mark every edge that crosses between sections.  */
> > +  i = 0;
> > +  FOR_EACH_BB (bb) FOR_EACH_EDGE (e, ei, bb->succs)
> > +  {
> > +    validate_fbb_data_element (bb->index);
> > +
> > +    if (e->src != ENTRY_BLOCK_PTR
> > +   && e->dest != EXIT_BLOCK_PTR
> > +   && (fbb_data[(e->src)->index].section_id != -1)
> > +   && (fbb_data[(e->dest)->index].section_id != -1)
> > +   && (fbb_data[(e->src)->index].section_id !=
> > +       fbb_data[(e->dest)->index].section_id))
> > +      {
> > +   e->flags |= EDGE_CROSSING;
> > +   /* Resize the array, if necessary.  */
> > +   if (i == *max_idx)
> > +     {
> > +       *max_idx *= 2;
> > +       (*crossing_edges) = (edge *) xrealloc (*crossing_edges,
> > +                 (*max_idx) * sizeof (edge));
> > +     }
>
> Here, rather than doing the resize yourself, it is probably better to
switch to
> use a VEC structure and let the common code handle it.

Done.

> > +   (*crossing_edges)[i++] = e;
>
> Please put a space after the ')'.
>
> > +      }
> > +    else
> > +      e->flags &= ~EDGE_CROSSING;
> > +  }
> > +  *n_crossing_edges = i;
> > +}
> > +
> > +static void
> >  add_labels_and_missing_jumps (edge *crossing_edges, int
n_crossing_edges)
> >  {
> >    int i;
> > @@ -1435,10 +2056,14 @@
> >           }
> >            }
> >        }
> > -
> >           if (cond_jump_crosses || !invert_worked)
> > +
> >        {
> >          /* This is the case where both edges out of the basic
> > +                   else
> > +                   {
> > +                      fall_thru->flags |= EDGE_CROSSING;
> > +                   }
>
> Eliminate the '{'/'}' since fall_thru->flags is a simple statement.  This
is minor.
>
> >             block are crossing edges. Here we will fix up the
> >             fall through edge. The jump edge will be taken care
> >             of later.  The EDGE_CROSSING flag of fall_thru edge
> > @@ -1956,7 +2581,10 @@
> >    rtx new_note;
> >    int first_partition = 0;
> >
> > -  if (flag_reorder_blocks_and_partition)
> > +  /* If -fpartition-functions-into-sections flag is set then
> > +     NOTE_INSN_SWITCH_TEXT_SECTIONS note will be inserted later.  */
> > +  if (flag_reorder_blocks_and_partition
> > +      && (flag_partition_functions_into_sections == 0))
> >      FOR_EACH_BB (bb)
> >      {
> >        if (!first_partition)
> > @@ -2201,8 +2829,628 @@
> >
> >    cfg_layout_finalize ();
> >  }
> > -
> > +
> > +/* For each loop in LOOPS calculate the first and last basic-block
that appear
> > +   in their layout.  */
> > +static void
> > +calculate_loop_boundary (struct loop_info_def *loops_aux)
> > +{
> > +  struct loop *loop;
> > +  basic_block bb;
> > +
> > +  if (!loops_aux)
> > +    return;
> > +
> > +  FOR_EACH_BB (bb)
> > +  {
>
> Indent 2 spaces.

Done.

>
> > +    for (loop = bb->loop_father; loop_outer (loop); loop = loop_outer
(loop))
> > +      {
> > +   loops_aux[loop->num].last = bb;
> > +   if (!loops_aux[loop->num].first)
> > +     loops_aux[loop->num].first = bb;
> > +      }
> > +  }
> > +}
> > +
> > +/* Return true if loop_info A has less size than loop_info B, in order
> > +   to give them an ordering.  */
> >  static bool
> > +loop_size_is_less (const loop_info a, const loop_info b)
> > +{
> > +  if (a->size < b->size)
> > +    return true;
> > +  return false;
> > +}
> > +
> > +/* Cache the size (in bytes) of the instructions in the function
> > +   to avoid re-calculation.  */
> > +static void
> > +record_insns_size_estimation (void)
> > +{
> > +  basic_block bb;
> > +  rtx insn;
> > +  struct insn_aux *iaux;
> > +  PTR *slot;
> > +
> > +  FOR_EACH_BB (bb)
> > +  {
>
> Indentation.

Done.

> > +    int size;
> > +    int bb_size = 0;
> > +
> > +    /* Loop through all of the basic-block's insns.  */
> > +    FOR_BB_INSNS (bb, insn)
> > +      if (INSN_P (insn) || NOTE_P (insn))
> > +      {
> > +   if (targetm.est_instruction_size != 0)
> > +     size = targetm.est_instruction_size (insn);
> > +   else
> > +     size = get_attr_min_length (insn);
> > +
> > +   if (NOTE_P (insn))
> > +     continue;
> > +        bb_size += size;
> > +   /* Cache the insn's size for later re-use.  */
> > +   iaux = (struct insn_aux *)xmalloc (sizeof (struct insn_aux));
> > +   iaux->insn = insn;
> > +   iaux->size = size;
> > +
> > +   slot = htab_find_slot (insns_aux, iaux, INSERT);
> > +   *slot = iaux;
> > +      }
> > +    /* Store the size of the basic-block for later validation.  */
> > +    validate_fbb_data_element (bb->index);
> > +    fbb_data[bb->index].bb_size = bb_size;
> > +  }
> > +}
> > +
> > +/*  Calculate the size (in bytes) of each loop.
> > +    This information will be used later in the code partitioning
phase.
> > +    */
> > +static void
> > +record_loops_boundaries (void)
> > +{
> > +  unsigned i;
> > +  basic_block bb;
> > +  loop_iterator li;
> > +  struct loop *loop;
> > +  int total_loop_size;
> > +  struct loop_info_def *cur_loop;
> > +  struct loop_info_def *loops_aux = NULL;
> > +  unsigned int place;
> > +  struct insn_aux *iaux;
> > +
> > +  loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
LOOPS_HAVE_RECORDED_EXITS);
> > +  if (number_of_loops () <= 1)
> > +    {
> > +      loop_optimizer_finalize ();
> > +      return;   /* There are no loops.  */
> > +    }
> > +  loops_aux =
> > +    (struct loop_info_def *) xmalloc (number_of_loops () *
> > +                  sizeof (struct loop_info_def));
> > +  memset (loops_aux, 0, number_of_loops () * sizeof (struct
loop_info_def));
>
> Rather than doing xmalloc (a*b); memset (ptr, 0, a*b), consider using
> xcalloc (a, b) instead.);

Done.

> > +  /* Calculate insns size in bytes.  */
> > +  record_insns_size_estimation ();
> > +
> > +  /* Get the start and end insns of each loop.  */
> > +  calculate_loop_boundary (loops_aux);
> > +
> > +  /* We assume that if two loops are disjoint they can not interleave
> > +     and if they are not disjoint one is completely contained in the
> > +     other.  This assumption help us to avoid the case of interleaved
> > +     loop intervals (which is somewhat unlikely to happen in
practice).
> > +     TODO: calculate all the loops's size in one pass.  */
> > +  FOR_EACH_LOOP (li, loop, 0)
> > +  {
>
> Indentation.

Done.
>
> > +    rtx insn;
> > +    rtx start, end;
> > +
> > +    if (!loop)
> > +      continue;
> > +
> > +    /* Calculate the size of the loop.  */
> > +    total_loop_size = 0;
> > +    start = BB_HEAD (loops_aux[loop->num].first);
> > +    end = BB_END (loops_aux[loop->num].last);
> > +
> > +    for (insn = start; insn != NEXT_INSN (end); insn = NEXT_INSN
(insn))
> > +      {
> > +   if (INSN_P (insn))
> > +     {
> > +       struct insn_aux iaux_tmp;
> > +
> > +       /* Retrieve the size of the instruction. */
> > +       iaux_tmp.insn = insn;
> > +       iaux = (struct insn_aux *) htab_find (insns_aux, &iaux_tmp);
> > +       if (iaux)
> > +         total_loop_size += iaux->size;
> > +       else
> > +         total_loop_size += get_attr_min_length (insn);
> > +     }
> > +      }
>
> Again, do you need to use a hash table here, or is it simpler to just
> recalculate it or use an index into an allocated array of INSN_UID.

Done.

>
> > +    /* Create the loop_info variable, used to hold information about
> > +       the loop.  */
> > +    cur_loop =
> > +      (struct loop_info_def *) xcalloc (1, sizeof (struct
loop_info_def));
> > +
> > +    /* Store the information.  */
> > +    cur_loop->size = total_loop_size;
> > +    cur_loop->index = loop->num;
> > +
> > +    validate_fbb_data_element (loops_aux[loop->num].first->index);
> > +    /* Allocate memory for the loops info in the loop's first
> > +       basic-block; if it does not exist already.  */
> > +    if (fbb_data[loops_aux[loop->num].first->index].loops_info_list ==
NULL)
> > +      fbb_data[loops_aux[loop->num].first->index].loops_info_list =
> > +   VEC_alloc (loop_info, heap, 1);
> > +    /* Insert the loop information in loop->first basic-block.  */
> > +    place =
> > +      VEC_lower_bound (loop_info,
> > +             fbb_data[loops_aux[loop->num].first->index].
> > +             loops_info_list, cur_loop, loop_size_is_less);
> > +    VEC_safe_insert (loop_info, heap,
> > +           fbb_data[loops_aux[loop->num].first->index].
> > +           loops_info_list, place, cur_loop);
> > +  }
> > +
> > +  /* Dump the information that was collected. */
> > +  if (dump_file)
> > +    FOR_EACH_BB (bb)
> > +    {
>
> Indentation.

Done.
>
> > +      validate_fbb_data_element (bb->index);
> > +
> > +      if (fbb_data[bb->index].loops_info_list == NULL)
> > +   continue;
> > +
> > +      for (i = 0;
> > +      VEC_iterate (loop_info, fbb_data[bb->index].loops_info_list, i,
> > +         cur_loop); i++)
>
> I would put the i++ on a separate line here.

Done.
>
> > +   {
> > +     fprintf (dump_file,
> > +         "\n;; bb %d is the first basic-block in the "
> > +         "loop %d layout (total loop size %dK)",
> > +         bb->index, cur_loop->index, cur_loop->size);
> > +   }
> > +    }
> > +  free (loops_aux);
> > +  loop_optimizer_finalize ();
> > +}
> > +
> > +/* An auxiliary structure for recognizing critical sections.  */
> > +struct critical_sections_aux
> > +{
> > +  /* The basic-block which contains the critical section.  */
> > +  basic_block start_bb;
> > +
> > +  /* The type of the critical section.  */
> > +  int type;
>
> Would an enum be better here than an int?

Done (in spu.c).
>
> > +  /* True if the end of the critical section was recognized.  */
> > +  bool closed_p;
> > +};
> > +
> > +typedef struct critical_sections_aux *critical_sections;
> > +DEF_VEC_P(critical_sections);
> > +DEF_VEC_ALLOC_P(critical_sections,heap);
> > +
> > +/* Given basic-block BB which contains an instruction that ends a
> > +   critical section; close an open critical section that is recorded
in
> > +   START_SEQUENCE; which appears in the same basic-block as bb and is
> > +   of type TYPE.  */
> > +static void
> > +close_critical_sections (VEC(critical_sections, heap) *start_sequence,
> > +          basic_block bb, int type)
> > +{
> > +  struct critical_sections_aux *crit;
> > +  unsigned int i;
> > +  bool bb_close_critical_section_p = false;
> > +
> > +  /* Loop through all of the critical sections.  */
> > +  for (i = 0; VEC_iterate (critical_sections, start_sequence, i,
crit); i++)
> > +    {
> > +      /* First check that this bb closes a critical section of the
> > +         right type.  */
> > +      if (crit->type == type && !crit->closed_p)
> > +   {
> > +     int index = crit->start_bb->index;
> > +
> > +     if (bb->index == index)
> > +       {
> > +         /* Mark this section as closed.  */
> > +         crit->closed_p = true;
> > +
> > +         validate_fbb_data_element (index);
> > +         gcc_assert (!fbb_data[i].in_critical_section_p
> > +           && !fbb_data[i].first_bb_in_critical_section);
> > +         fbb_data[index].in_critical_section_p = true;
> > +         fbb_data[index].critical_section_size = fbb_data
[index].bb_size;
> > +         fbb_data[index].first_bb_in_critical_section = true;
> > +         bb_close_critical_section_p = true;
> > +         break;
> > +       }
> > +   }
> > +    }
> > +  if (!bb_close_critical_section_p)
> > +    warning (0,
> > +        "Unexpected end of critical section in basic-block %d.  ",
> > +        bb->index);
> > +}
> > +
> > +/* Record the critical section which starts at START_CRIT and ends in
> > +   END_CRIT instructions.  These section is part of a jump-table.
> > +   By marking this section as critical we validate that the table and
> > +   code that reads the table appears in the same section.  */
> > +static void
> > +record_tablejump (int start_crit, int end_crit)
> > +{
> > +  int critical_section_size = 0;
> > +  int i;
> > +  basic_block bb;
> > +  basic_block start = BASIC_BLOCK (start_crit);
> > +  basic_block end = BASIC_BLOCK (end_crit);
> > +
> > +  FOR_BB_BETWEEN (bb, start, end, next_bb)
> > +    {
> > +      i = bb->index;
> > +      validate_fbb_data_element (i);
> > +      gcc_assert (!fbb_data[i].in_critical_section_p
> > +        && !fbb_data[i].first_bb_in_critical_section);
> > +      critical_section_size += fbb_data[i].bb_size;
> > +      fbb_data[i].in_critical_section_p = true;
> > +    }
> > +  validate_fbb_data_element (end->index);
> > +  critical_section_size += fbb_data[end->index].bb_size;
> > +  fbb_data[end->index].in_critical_section_p = true;
> > +
> > +  if (fbb_data[start->index].critical_section_size <
critical_section_size)
> > +    fbb_data[start->index].critical_section_size =
critical_section_size;
> > +  fbb_data[start->index].first_bb_in_critical_section = true;
> > +}
>
> As I said, whether this should be pushed to the backend should be
considered.

Done.
>
> > +
> > +/* Record critical sections.  The instructions in a critical
> > +   section must reside in a single section.  */
> > +static void
> > +record_critical_sections (void)
> > +{
> > +  basic_block bb;
> > +  rtx insn, table, label;
> > +  VEC (critical_sections, heap) * start_sequence;
> > +  int type, i;
> > +  struct critical_sections_aux *crit;
> > +
> > +  /* If the target does not define any critical section then quit now.
*/
> > +  if ((targetm.record_jump_table == 0)
> > +      && (targetm.begin_critical_section == 0))
> > +    return;
>
> You might want to consider using !targetm.record_jump_table and
> !targetm.begin_critical_section instead of == 0.  This is minor.
>
> > +
> > +  start_sequence = VEC_alloc (critical_sections, heap, 1);
> > +
> > +  FOR_EACH_BB (bb)
> > +  {
> > +    /* Loop through all of the basic-block's insns.  */
> > +    FOR_BB_INSNS (bb, insn)
> > +    {
>
> Indentation again.

Done.
>
> > +      if (tablejump_p (insn, &label, &table))
> > +   {
> > +     int start = -1, end = -1;
>
> Should these be ints or HOST_WIDE_INT?
>
> > +
> > +     if (targetm.record_jump_table != 0)
> > +       {
> > +         targetm.record_jump_table (insn, label, &start, &end);
> > +
> > +         if (start == -1 || end == -1)
> > +      warning (0,
> > +          "Unexpected critical section in basic-block %d.  ",
> > +          bb->index);
> > +         else
> > +      /* Record the critical section.  */
> > +      record_tablejump (start, end);
> > +       }
> > +   }
> > +
> > +      /* Check whether this insn can begin a critical sections.  */
> > +      if (targetm.begin_critical_section != 0)
> > +   if (targetm.begin_critical_section (insn, &type))
> > +     {
> > +       struct critical_sections_aux *crit;
> > +
> > +       crit =
> > +         (struct critical_sections_aux *) xcalloc (1,
> > +                     sizeof (struct
> > +                        critical_sections_aux));
> > +       crit->start_bb = bb;
> > +       crit->type = type;
> > +       crit->closed_p = false;
> > +       /* Record the start instruction.  */
> > +       VEC_safe_push (critical_sections, heap, start_sequence, crit);
>
> Given you are pushing a pointer here, I wonder whether it would be better
to
> have the VEC use a structure instead of pointer to structure?  I'm just
> thinking out loud here.  It would save having to do two levels of
allocation,
> but would involve some structure copies.

Done. (in spu.c)
>
> > +     }
> > +
> > +      /* Check whether this insn can end a critical sections.  */
> > +      if (targetm.end_critical_section != 0)
> > +   if (targetm.end_critical_section (insn, &type))
> > +     {
> > +       /* Close any critical section that this instruction may end.
*/
> > +       close_critical_sections (start_sequence, bb, type);
> > +     }
> > +    }
> > +  }
> > +
> > +  for (i = 0; VEC_iterate (critical_sections, start_sequence, i,
crit); i++)
> > +    {
> > +      if (!crit->closed_p)
> > +   warning (0,
> > +       "Unexpected start of critical section in basic-block %d.  ",
> > +       crit->start_bb->index);
> > +    }
>
> Should this be something like a gcc_assert or error instead of warning?

Done. (in spu.c)
>
> > +
> > +  /* Dump the information that was collected. */
> > +  if (dump_file)
> > +    FOR_EACH_BB (bb)
> > +    {
> > +      validate_fbb_data_element (bb->index);
> > +
> > +      if (fbb_data[bb->index].in_critical_section_p
> > +     && fbb_data[bb->index].first_bb_in_critical_section)
> > +   {
> > +     fprintf (dump_file,
> > +         "\n;; bb %d starts a critical section (%dB)", bb->index,
> > +         fbb_data[bb->index].critical_section_size);
> > +   }
> > +    }
> > +
> > +  /* Free data.  */
> > +  for (i = 0; VEC_iterate (critical_sections, start_sequence, i,
crit); i++)
> > +    {
> > +      free (crit);
> > +    }
>
> If you make the VEC use the structure instead of pointer to structure,
this
> loop goes away.
>
> > +  VEC_free (critical_sections, heap, start_sequence);
> > +}
> > +
> > +static void
> > +free_fbb_data (void)
> > +{
> > +  int i, j;
>
> You probably should consider using unsigned WIDE_INT for the sizes.

Done.

> > +  struct loop_info_def *cur_loop;
> > +
> > +  for (i = 0; i < fbb_data_size; i++)
> > +    {
> > +      if (fbb_data[i].loops_info_list != NULL)
> > +   {
> > +     for (j = 0;
> > +          VEC_iterate (loop_info, fbb_data[i].loops_info_list, j,
> > +             cur_loop); j++)
>
> I would break the j++ on to a separate line.

Done.

> > +       FREE (cur_loop);
> > +
> > +     VEC_free (loop_info, heap, fbb_data[i].loops_info_list);
> > +   }
> > +    }
> > +  FREE (fbb_data);
> > +}
> > +
> > +/* Given RTX_FIRST which is the first instruction we start from; check
> > +   that there is no unexpected insns outside basic-blocks that could
> > +   effect the size of the sections, e.g. unexpected read-only code.
*/
> > +static void
> > +check_unexpected_insns (rtx rtx_first)
> > +{
> > +  rtx tmp_rtx;
> > +  VEC (rtx, heap) *jump_tables_list;
> > +  enum bb_state
> > +  { NOT_IN_BB, IN_ONE_BB, IN_MULTIPLE_BB };
>
> The indentation here is unusual.  I would fold it to one line instead of
having
> the '{'/'}' on a second line.

Done.

> > +  int max_uid = get_max_uid ();
> > +  basic_block *start =
> > +    (basic_block *) xcalloc (max_uid, sizeof (basic_block));
> > +  basic_block *end = (basic_block *) xcalloc (max_uid, sizeof
(basic_block));
> > +  enum bb_state *in_bb_p =
> > +    (enum bb_state *) xcalloc (max_uid, sizeof (enum bb_state));
> > +  basic_block bb;
> > +  rtx insn;
> > +
> > +  jump_tables_list = VEC_alloc (rtx, heap, 1);
> > +
> > +  if (rtx_first == 0)
> > +    return;
> > +
> > +  FOR_EACH_BB_REVERSE (bb)
> > +  {
>
> Indentation.

Done.
>
> > +    rtx x, table, label;
> > +
> > +    start[INSN_UID (BB_HEAD (bb))] = bb;
> > +    end[INSN_UID (BB_END (bb))] = bb;
> > +    for (x = BB_HEAD (bb); x != NULL_RTX; x = NEXT_INSN (x))
> > +      {
> > +   enum bb_state state = IN_MULTIPLE_BB;
> > +
> > +   if (in_bb_p[INSN_UID (x)] == NOT_IN_BB)
> > +     state = IN_ONE_BB;
> > +   in_bb_p[INSN_UID (x)] = state;
> > +
> > +   if (INSN_P (x) && tablejump_p (x, &label, &table))
> > +     {
> > +       /* Record the information regarding the jump table instruction
> > +          which we expect to appear outside of basic-block.  */
> > +       VEC_safe_push (rtx, heap, jump_tables_list, table);
> > +       VEC_safe_push (rtx, heap, jump_tables_list, label);
> > +     }
> > +
> > +
> > +   if (x == BB_END (bb))
> > +     break;
> > +      }
> > +  }
> > +  for (tmp_rtx = rtx_first; NULL != tmp_rtx; tmp_rtx = NEXT_INSN
(tmp_rtx))
> > +    {
> > +      if (in_bb_p[INSN_UID (tmp_rtx)] == NOT_IN_BB
> > +     && !NOTE_P (tmp_rtx) && !BARRIER_P (tmp_rtx))
> > +   {
> > +     int i;
> > +     bool found_p = false;
> > +     rtx label;
> > +
> > +     for (i = 0; VEC_iterate (rtx, jump_tables_list, i, insn); i++)
> > +       {
> > +         /* We expect that the table itself would appear outside
> > +            of basic-block.  */
> > +         if (rtx_equal_p (tmp_rtx, insn))
> > +      {
> > +        found_p = true;
> > +        break;
> > +      }
> > +       }
> > +     if (!found_p)
> > +       warning (0, "Unexpected insns outside basic-blocks;"
> > +           " -fpartition-functions-into-sections may be effected.");
>
> Should this be a gcc_asert/error instead of warning?

Done.
>
> > +     else
> > +       {
> > +         /* Validate that the table and code that reads the table
> > +            will appear in the same section.  */
> > +         if (GET_CODE (tmp_rtx) == CODE_LABEL)
> > +      {
> > +        if (tablejump_p (prev_active_insn (tmp_rtx), &label, NULL))
> > +          {
> > +            if (!rtx_equal_p (tmp_rtx, label))
> > +         {
> > +           warning (0, "Unexpected insns outside basic-blocks;"
> > +               " -fpartition-functions-into-sections "
> > +                                   "may be effected.");
> > +           return;
> > +         }
> > +
> > +          }
> > +        else
> > +          {
> > +            warning (0, "Unexpected insns outside basic-blocks;"
> > +                " -fpartition-functions-into-sections may "
> > +                               "be effected.");
> > +            return;
> > +          }
> > +      }
> > +       }
> > +   }
> > +
> > +    }
> > +
> > +  free (start);
> > +  free (end);
> > +  free (in_bb_p);
> > +  VEC_free (rtx, heap, jump_tables_list);
> > +}
> > +
> > +/* Main entry point for partitioning functions into sections in order
> > +   to make it fit small local store.
> > +
> > +   The motivation:
> > +
> > +   The local store of embedded processors usually imposes a strict
> > +   threshold on the code size of programs.  Having a limited memory
(and
> > +   lack of hardware i-cache) creates cases where a single program code
> > +   can not fit into it.  To overcome this problem, the programmer can
> > +   partition the code into multiple pieces and manually swap them in
and
> > +   out of the local store.  To simplify this task, an overlay
technique
> > +   can be used which handles the memory management tasks automatically
> > +   and relieves the programmer from managing the local store manually.
> > +
> > +   Preparing the code for the overlay technique consists of a
> > +   preprocessing static stage which is done by the compiler and
linker.
> > +   The compiler first partition the code into sections which are later
> > +   constructed into an overlaid program by the linker.  At execution
> > +   time the overlay manager automatically swap pieces of code in and
> > +   out of the local store.
> > +
> > +   The ability to place each function in a different section already
> > +   exists in GCC (-ffunction-sections).  However, using the
granularity
> > +   of a single function for partitioning is not sufficient when a
single
> > +   function's body can not fully fit the local store.  This new pass
> > +   overcomes this problem by breaking the large functions.  It can
also
> > +   break a function into fixed sized sections that could be used for
> > +   a software i-cache scheme implementation, as an alternative to the
> > +   overlay technique.  */
> > +
> > +static void
> > +partition_function_into_sections (void)
> > +{
> > +  edge *crossing_edges;
> > +  int n_crossing_edges = 0;
>
> Should this be unsigned WIDE_INT instead of int?

This variable was eliminated. (using VEC instead)

> > +  int max_edges = 10 * last_basic_block;
> > +  basic_block cur_bb;
> > +  int i;
>
> Should this be unsigned WIDE_INT instead of int?

Done.

> > +
> > +  fbb_data_size = 10 * last_basic_block;
> > +
> > +  if (n_basic_blocks < 1)
> > +    return;
> > +
> > +  /* Allocate the initial hunk of the fbb_data.  */
> > +  fbb_data =
> > +    (funcpart_basic_block_data *) xmalloc (fbb_data_size *
> > +                  sizeof
> > +                  (funcpart_basic_block_data));
>
> You might consider using xcalloc here, and then only initializing the
> section_id to -1 in the loop.

Done.

> > +  for (i = 0; i < fbb_data_size; i++)
> > +    {
> > +      fbb_data[i].section_id = -1;
> > +      fbb_data[i].loops_info_list = NULL;
> > +      fbb_data[i].in_critical_section_p = false;
> > +      fbb_data[i].first_bb_in_critical_section = false;
> > +      fbb_data[i].critical_section_size = 0;
> > +      fbb_data[i].bb_size = 0;
> > +    }
> > +
> > +  insns_aux = htab_create (10 * last_basic_block,
> > +            iaux_info_hash, iaux_info_eq, free);
> > +
>
> Would it be better to use a VEC here instead of a hash table, based on
the
> basic block ID?>

Done.

> > +  check_unexpected_insns (get_insns ());
> > +
> > +  /* Update the maximum section size (in bytes).  */
> > +  estimate_max_section_size =
> > +    flag_partition_functions_into_sections -
estimate_section_overhead;
> > +
> > +  crossing_edges = (edge *) xcalloc (max_edges, sizeof (edge));
> > +
> > +  /* Initialize structures for layout changes.  */
> > +  cfg_layout_initialize (0);
> > +
> > +  /* Record the start and size of each loop in fbb_data array.  */
> > +  record_loops_boundaries ();
> > +
> > +  /* Record the critical sections that must reside in a single
section.  */
> > +  record_critical_sections ();
> > +
> > +  FOR_EACH_BB (cur_bb)
> > +  {
>
> Indentation.

Done.
>
> > +    cur_bb->flags &= ~BB_FIRST_AFTER_SECTION_SWITCH;
> > +
> > +    if (cur_bb->next_bb != EXIT_BLOCK_PTR)
> > +      cur_bb->aux = cur_bb->next_bb;
> > +  }
> > +
> > +  create_sections (&crossing_edges, &n_crossing_edges, &max_edges);
> > +
> > +  if (n_crossing_edges > 0)
> > +    {
> > +      /* Make sure the source of any crossing edge ends in a jump and
the
> > +         destination of any crossing edge has a label.  */
> > +      add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
> > +      /* Convert all crossing fall_thru edges to non-crossing fall
> > +         thrus to unconditional jumps (that jump to the original fall
> > +         thru dest).  */
> > +      fix_up_fall_thru_edges ();
> > +    }
> > +
> > +  /* Free data.  */
> > +  FREE (crossing_edges);
> > +  free_fbb_data ();
> > +  /* Finalize layout changes.  */
> > +  cfg_layout_finalize ();
> > +
> > +  free_dominance_info (CDI_DOMINATORS);
> > +  if (insns_aux)
> > +    htab_delete (insns_aux);
> > +
> > +  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
> > +  insert_section_boundary_note_in_function ();
> > +}
> > +
> > +static bool
> >  gate_handle_reorder_blocks (void)
> >  {
> >    if (targetm.cannot_modify_jumps_p ())
> > @@ -2296,4 +3544,154 @@
> >   }
> >  };
> >
> > +/* The overhead (in bytes) of creating a new section includes adding
> > +   unconditional branch instruction between sections should also be
> > +   taken into account.  */
> > +static void
> > +get_est_section_overhead (void)
> > +{
> > +  gcc_assert (flag_partition_functions_into_sections != 0);
> >
> > +  estimate_section_overhead = 0;
> > +  estimate_max_section_size = 0;
> > +
> > +  if (uncond_jump_length == 0)
> > +    uncond_jump_length = get_uncond_jump_length ();
> > +
> > +  if (targetm.est_section_overhead != 0)
> > +    {
> > +      /* The machine depndent pass could add extra instructions
> > +         as a result of the new branches.  */
> > +      estimate_section_overhead =
> > +        targetm.est_section_overhead ();
> > +    }
> > +  /* Add the size of the new branch that will be created for each
> > +     sections.  */
> > +  else
> > +    estimate_section_overhead += uncond_jump_length;
> > +}
> > +
> > +/* Return TRUE if an instruction exists such that it exceeds the
threshold
> > +   of the section size.  Otherwise return FALSE.  */
> > +static bool
> > +instruction_size_exceeds_threshold (void)
> > +{
> > +  basic_block bb;
> > +  rtx insn;
> > +  int size = 0;
>
> Sizes should be unsigned HOST_WIDE_INT.

Done.

> > +
> > +  FOR_EACH_BB (bb)
> > +  {
> > +    FOR_BB_INSNS (bb, insn)
> > +    {
>
> Indentation.

Done.
>
> > +      if (INSN_P (insn) || NOTE_P (insn))
> > +   {
> > +          if (targetm.est_instruction_size != 0)
> > +            size = targetm.est_instruction_size (insn);
> > +          else
> > +            size = get_attr_min_length (insn);
> > +
> > +     if (size >
> > +         (flag_partition_functions_into_sections -
> > +          estimate_section_overhead))
> > +       {
> > +         warning (0,
> > +             "Section threshold value is too small.  ");
> > +         return true;
> > +       }
> > +   }
> > +    }
> > +  }
> > +  return false;
> > +}
> > +
> > +static bool
> > +gate_handle_partition_functions (void)
> > +{
> > +  if ((flag_partition_functions_into_sections == 0)
> > +      || DECL_ONE_ONLY (current_function_decl)
> > +      || user_defined_section_attribute)
> > +    return 0;
> > +
> > +  /* Estimate the overhead in creating new section in term of the new
> > +     instruction that are needed to support it and need to be
considered.
> > +     */
> > +  get_est_section_overhead ();
> > +
> > +  /* Make sure there is no instruction with size that exceeds the
> > +     estimated section size.  */
> > +  return (!instruction_size_exceeds_threshold ());
> > +}
> > +
> > +static unsigned int
> > +rest_of_handle_partition_functions (void)
> > +{
> > +  partition_function_into_sections ();
> > +  return 0;
> > +}
> > +
> > +struct rtl_opt_pass pass_partition_functions =
> > +{
> > +  {
> > +   RTL_PASS,
> > +   "fpart",                              /* name */
> > +   gate_handle_partition_functions,      /* gate */
> > +   rest_of_handle_partition_functions,   /* execute */
> > +   NULL,                                 /* sub */
> > +   NULL,                                 /* next */
> > +   0,                                    /* static_pass_number */
> > +   TV_FUNCTION_PARTITION,                /* tv_id */
> > +   0,                                    /* properties_required */
> > +   0,                                    /* properties_provided */
> > +   0,                                    /* properties_destroyed */
> > +   TODO_dump_func,                       /* todo_flags_start */
> > +   TODO_dump_func,                       /* todo_flags_finish */
> > +  }
> > +};
> > +
> > +/* Validate that the size (in bytes) of each section does not exceed
> > +   the maximum section size the user defined.  */
> > +
> > +void
> > +check_sections (void)
> > +{
> > +  rtx insn;
> > +  int section_size = 0;
>
> sizes should be unsigned HOST_INT.

Done.
>
> > +  int number_of_sections = 1;
> > +
> > +  if (flag_partition_functions_into_sections == 0)
> > +    return;
> > +
> > +  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
> > +    {
> > +      if (INSN_P (insn))
> > +   {
> > +     section_size += get_attr_min_length (insn);
> > +     continue;
> > +   }
> > +      if (NOTE_P (insn) && NOTE_KIND (insn) ==
NOTE_INSN_SWITCH_TEXT_SECTIONS)
> > +   {
> > +     number_of_sections++;
> > +     if (section_size > flag_partition_functions_into_sections)
> > +       {
> > +         warning (0,
> > +             "section %d (%dB) exceeds section threshold "
> > +             "%dB",
> > +             NOTE_TEXT_SECTION (insn) - 1, section_size,
> > +             flag_partition_functions_into_sections);
> > +       }
>
> You might want to think whether you should use a particular value for
warning
> instead of 0.
>
> > +   }
> > +      section_size = 0;
> > +    }
> > +
> > +  /* Check the last section.  */
> > +  if (section_size > flag_partition_functions_into_sections)
> > +    {
> > +      warning (0,
> > +          "section %d (%dB) exceeds section threshold "
> > +          "%dB",
> > +          number_of_sections, section_size,
> > +          flag_partition_functions_into_sections);
> > +    }
> > +
> > +}
> > Index: basic-block.h
> > ===================================================================
> > --- basic-block.h   (revision 138847)
> > +++ basic-block.h   (working copy)
> > @@ -332,7 +332,10 @@
> >
> >    /* Set on blocks that cannot be threaded through.
> >       Only used in cfgcleanup.c.  */
> > -  BB_NONTHREADABLE_BLOCK = 1 << 11
> > +  BB_NONTHREADABLE_BLOCK = 1 << 11,
> > +
> > +  /* Set on blocks that are first in a section.  */
> > +  BB_FIRST_AFTER_SECTION_SWITCH = 1 << 12
> >  };
> >
> >  /* Dummy flag for convenience in the hot/cold partitioning code.  */
> > @@ -941,6 +944,7 @@
> >  unsigned bb_dom_dfs_in (enum cdi_direction, basic_block);
> >  unsigned bb_dom_dfs_out (enum cdi_direction, basic_block);
> >
> > +extern void check_sections (void);
> >  extern edge try_redirect_by_replacing_jump (edge, basic_block, bool);
> >  extern void break_superblocks (void);
> >  extern void relink_block_chain (bool);
>
> Again if the global changes that I suggested are made, this will likely
change.
>
> > Index: passes.c
> > ===================================================================
> > --- passes.c   (revision 138847)
> > +++ passes.c   (working copy)
> > @@ -805,6 +805,7 @@
> >       NEXT_PASS (pass_compute_alignments);
> >       NEXT_PASS (pass_duplicate_computed_gotos);
> >       NEXT_PASS (pass_variable_tracking);
> > +          NEXT_PASS (pass_partition_functions);
> >       NEXT_PASS (pass_free_cfg);
> >       NEXT_PASS (pass_machine_reorg);
> >       NEXT_PASS (pass_cleanup_barriers);
> > @@ -996,6 +997,9 @@
> >        else
> >     gcc_unreachable ();
> >      }
> > +  if (flags & TODO_check_sections
> > +      && flag_partition_functions_into_sections != 0)
> > +     check_sections ();
> >
> >  #if defined ENABLE_CHECKING
> >    if (flags & TODO_verify_ssa)
> > Index: config/spu/spu.c
> > ===================================================================
> > --- config/spu/spu.c   (revision 138847)
> > +++ config/spu/spu.c   (working copy)
> > @@ -138,6 +138,12 @@
> >  static int spu_builtin_vectorization_cost (bool);
> >  static bool spu_vector_alignment_reachable (const_tree, bool);
> >  static int spu_sms_res_mii (struct ddg *g);
> > +static int spu_est_section_overhead (void);
> > +static int spu_est_instruction_size (rtx);
> > +static bool spu_begin_critical_section (rtx, int *);
> > +static bool spu_end_critical_section (rtx, int *);
> > +static void spu_record_jump_table (rtx, rtx, int *, int *);
> > +static void part_func_to_sections_flags (void);
> >
> >  extern const char *reg_names[];
> >  rtx spu_compare_op0, spu_compare_op1;
> > @@ -297,6 +303,24 @@
> >  #undef TARGET_SCHED_SMS_RES_MII
> >  #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
> >
> > +#undef TARGET_EST_SECTION_OVERHEAD
> > +#define TARGET_EST_SECTION_OVERHEAD spu_est_section_overhead
> > +
> > +#undef TARGET_EST_INSTRUCTION_SIZE
> > +#define TARGET_EST_INSTRUCTION_SIZE spu_est_instruction_size
> > +
> > +#undef TARGET_BEGIN_CRITICAL_SECTION
> > +#define TARGET_BEGIN_CRITICAL_SECTION spu_begin_critical_section
> > +
> > +#undef TARGET_END_CRITICAL_SECTION
> > +#define TARGET_END_CRITICAL_SECTION spu_end_critical_section
> > +
> > +#undef TARGET_RECORD_JUMP_TABLE
> > +#define TARGET_RECORD_JUMP_TABLE spu_record_jump_table
> > +
> > +#undef TARGET_PART_FUNC_TO_SECTIONS_FLAGS
> > +#define TARGET_PART_FUNC_TO_SECTIONS_FLAGS part_func_to_sections_flags
> > +
> >  struct gcc_target targetm = TARGET_INITIALIZER;
>
> These will change if you do the global changes.
>
> >
> >  void
> > @@ -1969,6 +1993,347 @@
> >
> >    return gen_rtx_CONST_VECTOR (mode, v);
> >  }
> > +
> > +/* Specific flags for the pass which partition function into multiple
> > + *    sections.  */
> > +static void
> > +part_func_to_sections_flags (void)
> > +{
> > +  fix_range ("75-79");
> > +}
> > +
> > +/* Return the size of the stub that the linker will insert for INSN
that
> > +   are branches to another section.  */
> > +static int
> > +get_stub_size (rtx insn)
> > +{
> > +  int stub_size = 0;
> > +
> > +  if (!JUMP_P (insn) && !CALL_P (insn))
> > +    return 0;
> > +
> > +  if (JUMP_P (insn))
> > +    {
> > +      rtx set, src;
> > +
> > +      stub_size = spu_stub_size;
> > +
> > +      /* If the branch instruction and the branch target are in the
> > +         same basic-block they will probably be in the same section
> > +         as well.  Do not add the stub size in this case.  */
> > +      if (!tablejump_p (insn, NULL, NULL)
> > +     && JUMP_LABEL (insn)
> > +     && (BLOCK_NUM (JUMP_LABEL (insn)) == BLOCK_NUM (insn)))
> > +   stub_size = 0;
> > +
> > +      /* For indirect branches including jump-tables (not including
the
> > +         cases) and indirect function calls; stubs will be created in
the
> > +         non-overlay local store so their stub size is not inserted to
the
> > +         section calculation.  */
> > +      /* Return statements */
> > +      if (GET_CODE (PATTERN (insn)) == RETURN)
> > +   stub_size = 0;
> > +
> > +      /* jump table */
> > +      if (GET_CODE (PATTERN (insn)) == ADDR_VEC
> > +     || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
> > +   stub_size = 0;
> > +
> > +      set = single_set (insn);
> > +      if (set)
> > +   {
> > +     src = SET_SRC (set);
> > +     if (GET_CODE (SET_DEST (set)) != PC)
> > +       abort ();
> > +
> > +     if (GET_CODE (src) == IF_THEN_ELSE)
> > +       {
> > +         rtx lab = 0;
> > +
> > +         if (GET_CODE (src) == IF_THEN_ELSE)
> > +      {
> > +        rtx lab = 0;
> > +        if (GET_CODE (XEXP (src, 1)) != PC)
> > +          lab = XEXP (src, 1);
> > +        else if (GET_CODE (XEXP (src, 2)) != PC)
> > +          lab = XEXP (src, 2);
> > +      }
> > +         if (lab && REG_P (lab))
> > +      stub_size = 0;
> > +       }
> > +   }
> > +    }
> > +  else if (CALL_P (insn))
> > +    {
> > +      rtx call;
> > +
> > +      stub_size = spu_stub_size;
> > +      /* All of our call patterns are in a PARALLEL and the CALL is
> > +         the first pattern in the PARALLEL. */
> > +      if (GET_CODE (PATTERN (insn)) != PARALLEL)
> > +   abort ();
> > +      call = XVECEXP (PATTERN (insn), 0, 0);
> > +      if (GET_CODE (call) == SET)
> > +   call = SET_SRC (call);
> > +      if (GET_CODE (call) != CALL)
> > +   abort ();
> > +      if (REG_P (XEXP (XEXP (call, 0), 0)))
> > +   stub_size = 0;
> > +    }
> > +
> > +   return stub_size;
> > +}
> > +
> > +/* Return the size of instruction INSN in bytes.  Take into account
the
> > +   size of extra machine depndent instructions that can be added
> > +   as a result of insn (like branch-hints for branch instructions).
> > +   Called when partitioning a function into sections.  */
> > +static int
> > +spu_est_instruction_size (rtx insn)
> > +{
>
> It is minor, but you might want to consider spelling out 'est'.  The
return
> value should probably be unsigned HOST_WIDE_INT.

Done.

> You might want to consider moving this file to be an attribute in the
spu.md
> file, instead of being just C code.

This function was changed as a result of the
patch for better insertion of hint and hbrp insns
(http://gcc.gnu.org/ml/gcc-patches/2008-08/msg01996.html).  The estimation
of instruction size is now depended on other instructions seen so far
(using accumulators), so I left it in the c file.

> > +  int size;
> > +  rtx table;
> > +
> > +  if (NOTE_P (insn))
> > +    {
> > +      return 0;
> > +    }
> > +
> > +  size = get_attr_min_length (insn);
> > +  if (tablejump_p (insn, NULL, &table))
> > +  {
> > +     rtvec vec;
> > +
> > +     if (GET_CODE (PATTERN (table)) == ADDR_VEC)
> > +        vec = XVEC (PATTERN (table), 0);
> > +      else
> > +        vec = XVEC (PATTERN (table), 1);
> > +
> > +     /* Add the size of the table to the insn size.  */
> > +     size += (GET_NUM_ELEM (vec) * 4);
> > +  }
> > +
> > +  size += get_stub_size (insn);
> > +
> > +  if (!TARGET_BRANCH_HINTS || optimize == 0)
> > +    return size;
> > +
> > +  /* Add the nops and branch hint which are added for each branch.
> > +     For hints the worst case is 4 nops for every branch.  */
> > +  if ((JUMP_P (insn) || CALL_P (insn))
> > +      && !tablejump_p (insn, NULL, NULL))
> > +    {
> > +      /* 4 nops + 1 hint.  */
> > +      size += 20;
> > +    }
> > +  return size;
> > +}
> > +
> > +/* Estimate the size in bytes of the extra instructions that will be
> > +   generated for each section as a result of creating a new branch for
> > +   that section.  Called when partitioning a function into sections.
*/
> > +static int
> > +spu_est_section_overhead (void)
> > +{
> > +  int extra_branch_insns = 0;
> > +
> > +  if (TARGET_BRANCH_HINTS && optimize != 0)
> > +    {
> > +      /* Add the nops and branch hint which are added for each branch.
> > +         For hints the worst case is 4 nops for every branch.  */
> > +      extra_branch_insns = 5;
> > +    }
> > +   /* Take into account the new branch instruction, its extra
> > +      instructions if they are created and the size of the stub that
the
> > +      linker will add to it.  */
> > +  return ((1 + extra_branch_insns) * 4 + spu_stub_size);
> > +}
>
> Again sizes should probably be unsigned HOST_WIDE_INT.

Done.

> > +
> > +/* The following are types of critical section.
> > +   Those sequences must reside in a single section.  */
> > +/* DMA sequence (wrch 16-21).  */
> > +#define CRITICAL_DMA_SEQ        1
> > +/* Event mask read/write/ack sequence (rd/wrch 0-2).  */
> > +#define CRITICAL_EVENT_MASK     2
> > +
> > +/* Return true if INSN begins a critical section.
> > +   Record it's type in TYPE.  */
> > +static bool
> > +spu_begin_critical_section (rtx insn, int *type)
> > +{
> > +  rtx body, set;
> > +
> > +  if (!INSN_P (insn))
> > +    return false;
> > +
> > +  if (GET_CODE (PATTERN (insn)) == PARALLEL)
> > +    return false;
> > +
> > +  set = single_set (insn);
> > +
> > +  if (set != 0)
> > +    {
> > +      /* We are looking for this type of instruction:
> > +
> > +         (insn (set (reg)
> > +         (unspec_volatile [(const_int)])) {spu_rdch_noclobber})
> > +      */
> > +      body = SET_SRC (set);
> > +      if (GET_CODE (body) == UNSPEC_VOLATILE)
> > +        if (XINT (body, 1) == UNSPEC_RDCH)
> > +          {
> > +            rtx tmp = XVECEXP (body, 0, 0);
> > +
> > +            if (tmp &&  GET_CODE (tmp) == CONST_INT && INTVAL (tmp) ==
0)
> > +              {
> > +                *type = CRITICAL_EVENT_MASK;
> > +                return true;
> > +              }
> > +          }
> > +    }
> > +  else
> > +    {
> > +      /* We are looking for this type of instruction:
> > +
> > +         (insn (unspec_volatile [(const_int) (subreg)])
> > +               {spu_wrch_noclobber})
> > +      */
> > +
> > +      body = PATTERN (insn);
> > +
> > +      if (GET_CODE (body) == UNSPEC_VOLATILE)
> > +        if (XINT (body, 1) == UNSPEC_WRCH)
> > +          {
> > +            rtx tmp = XVECEXP (body, 0, 0);
> > +
> > +            /* MFC_LSA 16  */
> > +            if (tmp &&  (GET_CODE (tmp) == CONST_INT) && INTVAL (tmp)
== 16)
> > +              {
> > +                *type = CRITICAL_DMA_SEQ;
> > +                return true;
> > +              }
> > +          }
> > +
> > +    }
> > +  return false;
> > +}
>
> This whole function might be better replaced by an attribute in spu.md.

I prefer to leave the recognition of the critical sections in the c
file at this stage because it might be changed in the future (using user
directive).

> > +
> > +/* Return true if INSN ends a critical section.
> > +   Record it's type in TYPE.  */
> > +static bool
> > +spu_end_critical_section (rtx insn, int *type)
> > +{
> > +  rtx body;
> > +
> > +  if (!INSN_P (insn))
> > +    return false;
> > +
> > +  body = PATTERN (insn);
> > +
> > +  /* We are looking for the following type of instruction:
> > +
> > +     (insn (parallel [
> > +           (unspec_volatile [(const_int) (subreg)])
> > +                             (clobber (mem))]) {spu_wrch_clobber}) */
> > +
> > +  if (GET_CODE (body) == PARALLEL)
> > +    {
> > +      rtx tmp = XVECEXP (body, 0, 0);
> > +
> > +      if (GET_CODE (tmp) == UNSPEC_VOLATILE)
> > +   {
> > +     if (XINT (tmp, 1) == UNSPEC_WRCH)
> > +       {
> > +         rtx const_val = XVECEXP (tmp, 0, 0);
> > +
> > +         /* MFC_Cmd 21  */
> > +         if (GET_CODE (const_val) == CONST_INT
> > +        && INTVAL (const_val) == 21)
> > +      {
> > +        *type = CRITICAL_DMA_SEQ;
> > +        return true;
> > +      }
> > +       }
> > +   }
> > +    }
> > +  else
> > +    {
> > +      /* We are looking for the following type of instruction:
> > +
> > +        (insn (unspec_volatile [(const_int]) (subreg)])
> > +               (spu_wrch_noclobber}) */
> > +
> > +      if (GET_CODE (body) == UNSPEC_VOLATILE)
> > +        if (XINT (body, 1) == UNSPEC_WRCH)
> > +          {
> > +            rtx tmp = XVECEXP (body, 0, 0);
> > +
> > +            /*  SPU_WrEventAck 2  */
> > +            if (tmp && (GET_CODE (tmp) == CONST_INT) && INTVAL (tmp)
== 2)
> > +              {
> > +                *type = CRITICAL_EVENT_MASK;
> > +                return true;
> > +              }
> > +          }
> > +
> > +    }
> > +  return false;
> > +}
>
> This whole function might be better replaced by an attribute in spu.md.
>
> > +
> > +/* Record the critical section which is part of the jump-table. START
and
> > +   END are the indices of the basic-blocks which indicate the
boundaries
> > +   of the critical section.  The critical section should contain the
> > +   jump-table and code that reads the table to make sure they will
> > +   appear in the same section.  We are looking for the following
sequence
> > +   of instructions:
> > +
> > +   ila ,LABEL
> > +   .
> > +   .
> > +   .
> > +   JUMP_INSN (use label)
> > +   LABEL:
> > +   table
> > +   .
> > +   .
> > +   .
> > +   end of table
> > +   */
> > +static void
> > +spu_record_jump_table (rtx jump_insn, rtx label, int *start,
> > +             int *end)
> > +{
> > +  rtx insn, set;
> > +  basic_block bb;
> > +
> > +  for (insn = jump_insn; insn != NULL; insn = PREV_INSN (insn))
> > +    {
> > +      rtx src;
> > +
> > +      set = single_set (insn);
> > +
> > +      if (!set)
> > +   continue;
> > +
> > +      bb = BLOCK_FOR_INSN (insn);
> > +
> > +      src = SET_SRC (set);
> > +      if (GET_CODE (src) == LABEL_REF)
> > +   {
> > +     rtx label1 = XEXP (src, 0);
> > +
> > +     if (rtx_equal_p (label, label1))
> > +       {
> > +              /* Record the section.  */
> > +         *start = bb->index;
> > +         *end = BLOCK_FOR_INSN (jump_insn)->index;
> > +         return;
> > +       }
> > +   }
> > +
> > +    }
> > +}
> > +
> >
> >  /* branch hint stuff */
> >
> > Index: config/spu/spu.h
> > ===================================================================
> > --- config/spu/spu.h   (revision 138847)
> > +++ config/spu/spu.h   (working copy)
> > @@ -631,3 +631,7 @@
> >  extern GTY(()) rtx spu_compare_op0;
> >  extern GTY(()) rtx spu_compare_op1;
> >
> > +#define HAS_LONG_COND_BRANCH 1
> > +#define HAS_LONG_UNCOND_BRANCH 1
>
> I think these probably should go.  If they stay, they might be more
appropriate
> as entries in the targetm structure instead of via #ifdef.
>
> > +
> > +
> > Index: config/spu/spu.opt
> > ===================================================================
> > --- config/spu/spu.opt   (revision 138847)
> > +++ config/spu/spu.opt   (working copy)
> > @@ -35,6 +35,11 @@
> >  Target Report RejectNegative InverseMask(SAFE_DMA)
> >  volatile must be specified on any memory that is effected by DMA
> >
> > +mstub-size=
> > +Target RejectNegative Joined UInteger Var(spu_stub_size) Init(16)
> > +Specify the stub size which is inserted by the linker when an
> > +overlay manger is used. (Default to 16 bytes)
> > +
> >  mstdmain
> >  Target Report Mask(STD_MAIN)
> >  Use standard main function as entry for startup
> >
> > Index: cfgrtl.c
> > ===================================================================
> > --- cfgrtl.c   (revision 138950)
> > +++ cfgrtl.c   (working copy)
> > @@ -1737,7 +1737,9 @@
> >
> >        for (insn = bb->il.rtl->header; insn; insn = NEXT_INSN (insn))
> >     if (!BARRIER_P (insn)
> > -       && BLOCK_FOR_INSN (insn) != NULL)
> > +       && BLOCK_FOR_INSN (insn) != NULL
> > +            && !(GET_CODE (insn) == NOTE
> > +                 && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION))
> >       {
> >         error ("insn %d in header of bb %d has non-NULL basic block",
> >           INSN_UID (insn), bb->index);
> > @@ -1909,7 +1911,8 @@
> >         if (x == BB_END (bb))
> >           break;
> >
> > -       if (control_flow_insn_p (x))
> > +       if (control_flow_insn_p (x)
> > +                && (!block_ends_with_call_p (bb)))
> >           {
> >        error ("in basic block %d:", bb->index);
> >        fatal_insn ("flow control insn inside a basic block", x);
> > @@ -1962,7 +1965,9 @@
> >
> >       /* And that the code outside of basic blocks has NULL bb field.
*/
> >     if (!BARRIER_P (x)
> > -       && BLOCK_FOR_INSN (x) != NULL)
> > +       && BLOCK_FOR_INSN (x) != NULL
> > +            && !(GET_CODE (x) == NOTE
> > +                 && NOTE_KIND (x) == NOTE_INSN_VAR_LOCATION))
> >       {
> >         error ("insn %d outside of basic blocks has non-NULL bb field",
> >           INSN_UID (x));
> > @@ -2052,7 +2057,9 @@
> >        /* Check that the code before the first basic block has NULL
> >      bb field.  */
> >        if (!BARRIER_P (x)
> > -     && BLOCK_FOR_INSN (x) != NULL)
> > +     && BLOCK_FOR_INSN (x) != NULL
> > +          && !(GET_CODE (x) == NOTE
> > +               && NOTE_KIND (x) == NOTE_INSN_VAR_LOCATION))
> >     {
> >       error ("insn %d outside of basic blocks has non-NULL bb field",
> >         INSN_UID (x));
>
> --
> Michael Meissner
> email: gnu@the-meissners.org
> http://www.the-meissners.org

(See attached file: patch_partition_8_9.txt)(See attached file:
Change_log_8_9.txt)(See attached file: patch_testsuite_7_9.txt)

Attachment: patch_partition_8_9.txt
Description: Text document

Attachment: Change_log_8_9.txt
Description: Text document

Attachment: patch_testsuite_7_9.txt
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]