[PATCH 3/6] Implement fork-based parallelism engine

Richard Biener richard.guenther@gmail.com
Mon Aug 31 09:33:01 GMT 2020


On Thu, Aug 27, 2020 at 8:27 PM Giuliano Belinassi
<giuliano.belinassi@usp.br> wrote:
>
> Hi, Honza.
>
> Thank you for your detailed review!
>
> On 08/27, Jan Hubicka wrote:
> > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> > > index c0b45795059..22405098dc5 100644
> > > --- a/gcc/cgraph.c
> > > +++ b/gcc/cgraph.c
> > > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl)
> > >    decl_node->remove ();
> > >  }
> > >
> > > +/* Release function dominator info if present.  */
> > > +
> > > +void
> > > +cgraph_node::maybe_release_dominators (void)
> > > +{
> > > +  struct function *fun = DECL_STRUCT_FUNCTION (decl);
> > > +
> > > +  if (fun && fun->cfg)
> > > +    {
> > > +      if (dom_info_available_p (fun, CDI_DOMINATORS))
> > > +   free_dominance_info (fun, CDI_DOMINATORS);
> > > +      if (dom_info_available_p (fun, CDI_POST_DOMINATORS))
> > > +   free_dominance_info (fun, CDI_POST_DOMINATORS);
> > > +    }
> > > +}
> >
> > I am not sure if that needs to be member function, but if so we want to
> > merge it with other places in cgraph.c and cgraphunit.c where dominators
> > are freed.  I do not think you need to check avalability.
>
> This is necessary to remove some nodes from the callgraph.  For some
> reason, if I node->remove () and it still have the dominance info
> available, it will fail some assertions on the compiler.
>
> However, with regard to code layout, this can be moved to lto-cgraph.c,
> as it is only used there.
>
> > > +
> > >  /* Record that DECL1 and DECL2 are semantically identical function
> > >     versions.  */
> > >  void
> > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > > index b4a7871bd3d..72ac19f9672 100644
> > > --- a/gcc/cgraph.h
> > > +++ b/gcc/cgraph.h
> > > @@ -463,6 +463,15 @@ public:
> > >       Return NULL if there's no such node.  */
> > >    static symtab_node *get_for_asmname (const_tree asmname);
> > >
> > > +  /* Get symtab node by order.  */
> > > +  static symtab_node *find_by_order (int order);
> >
> > This is quadratic and moreover seems unused. Why do you add it?
>
> I added this for debugging, since I used this a lot inside GDB.
> Sure, I can remove this without any problems, or print a warning
> for the developer to avoid calling this in production code.
>
> > > +
> > > +  /* Get symtab_node by its name.  */
> > > +  static symtab_node *find_by_name (const char *);
> >
> > Similarly here, note that names are not really very meaningful as lookup
> > things, since they get duplicated.
> > > +
> > > +  /* Get symtab_node by its ASM name.  */
> > > +  static symtab_node *find_by_asm_name (const char *);
> >
> > For this we have get_for_asmname (which also populates asm name hash as
> > needed and is not quadratic)
>
> Cool. I will surely remove this then :)
>
> > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> > > index d10d635e942..73e4bed3b61 100644
> > > --- a/gcc/cgraphunit.c
> > > +++ b/gcc/cgraphunit.c
> > > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void)
> > >  {
> > >    location_t saved_loc;
> > >
> > > +  /* FIXME: Find out why body-removed nodes are marked for output.  */
> > > +  if (body_removed)
> > > +    return;
> >
> > Indeed, we should know :)
>
> Looks like this was due an early problem. I removed this and bootstrap
> is working OK.
>
> > > +
> > > +
> > >    /* We ought to not compile any inline clones.  */
> > >    gcc_assert (!inlined_to);
> > >
> > > @@ -2658,6 +2663,7 @@ ipa_passes (void)
> > >
> > >        execute_ipa_summary_passes
> > >     ((ipa_opt_pass_d *) passes->all_regular_ipa_passes);
> > > +
> > This seems accidental.
>
> Yes.
>
> > >      }
> > >
> > >    /* Some targets need to handle LTO assembler output specially.  */
> > > @@ -2687,10 +2693,17 @@ ipa_passes (void)
> > >    if (flag_generate_lto || flag_generate_offload)
> > >      targetm.asm_out.lto_end ();
> > >
> > > -  if (!flag_ltrans
> > > +  if (split_outputs)
> > > +    flag_ltrans = true;
> > > +
> > > +  if ((!flag_ltrans || split_outputs)
> > >        && ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO)
> > >       || !flag_lto || flag_fat_lto_objects))
> > >      execute_ipa_pass_list (passes->all_regular_ipa_passes);
> > > +
> > > +  if (split_outputs)
> > > +    flag_ltrans = false;
> > > +
> > >    invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL);
> > >
> > >    bitmap_obstack_release (NULL);
> > > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void)
> > >        }
> > >  }
> > >
> > > +static bool is_number (const char *str)
> > > +{
> > > +  while (*str != '\0')
> > > +    switch (*str++)
> > > +      {
> > > +   case '0':
> > > +   case '1':
> > > +   case '2':
> > > +   case '3':
> > > +   case '4':
> > > +   case '5':
> > > +   case '6':
> > > +   case '7':
> > > +   case '8':
> > > +   case '9':
> > > +     continue;
> > > +   default:
> > > +     return false;
> > > +      }
> > > +
> > > +  return true;
> > > +}
> >
> > This looks odd, we have other places where we parse number from command
> > line :)
>
> isdigit () is poisoned in GCC. But I guess I should look how -flto=
> does this.

You need to use the all-caps variants, ISDIGIT in this case.

> > > +
> > > +/* If forked, which child am I?  */
> > > +
> > > +static int childno = -1;
> > > +
> > > +static bool
> > > +maybe_compile_in_parallel (void)
> > > +{
> > > +  struct symtab_node *node;
> > > +  int partitions, i, j;
> > > +  int *pids;
> > > +
> > > +  bool promote_statics = param_promote_statics;
> > > +  bool balance = param_balance_partitions;
> > > +  bool jobserver = false;
> > > +  bool job_auto = false;
> > > +  int num_jobs = -1;
> > > +
> > > +  if (!flag_parallel_jobs || !split_outputs)
> > > +    return false;
> > > +
> > > +  if (!strcmp (flag_parallel_jobs, "auto"))
> > > +    {
> > > +      jobserver = jobserver_initialize ();
> > > +      job_auto = true;
> > > +    }
> > > +  else if (!strcmp (flag_parallel_jobs, "jobserver"))
> > > +    jobserver = jobserver_initialize ();
> > > +  else if (is_number (flag_parallel_jobs))
> > > +    num_jobs = atoi (flag_parallel_jobs);
> > > +  else
> > > +    gcc_unreachable ();
> > > +
> > > +  if (job_auto && !jobserver)
> > > +    {
> > > +      num_jobs = sysconf (_SC_NPROCESSORS_CONF);
> > > +      if (num_jobs > 2)
> > > +   num_jobs = 2;
> > > +    }
> > > +
> > > +  if (num_jobs < 0 && !jobserver)
> > > +    {
> > > +      inform (UNKNOWN_LOCATION,
> > > +         "-fparallel-jobs=jobserver, but no GNU Jobserver found");
> > > +      return false;
> > > +    }
> > > +
> > > +  if (jobserver)
> > > +    num_jobs = 2;
> > > +
> > > +  if (num_jobs == 0)
> > > +    {
> > > +      inform (UNKNOWN_LOCATION, "-fparallel-jobs=0 makes no sense");
> > > +      return false;
> > > +    }
> > > +
> > > +  /* Trick the compiler to think that we are in WPA.  */
> > > +  flag_wpa = "";
> > > +  symtab_node::checking_verify_symtab_nodes ();
> >
> > Messing with WPA/ltrans flags is not good idea.  You already have
> > split_output for parallel build.  I sort of see why you set ltrans, but
> > why WPA?
>
> Some assertions expected flag_wpa to be present.  Sure, I can add
> split_outputs in these assertions.
>
> > > +
> > > +  /* Partition the program so that COMDATs get mapped to the same
> > > +     partition.  If promote_statics is true, it also maps statics
> > > +     to the same partition.  If balance is true, try to balance the
> > > +     partitions for compilation performance.  */
> > > +  lto_merge_comdat_map (balance, promote_statics, num_jobs);
> > > +
> > > +  /* AUX pointers are used by partitioning code to bookkeep number of
> > > +     partitions symbol is in.  This is no longer needed.  */
> > > +  FOR_EACH_SYMBOL (node)
> > > +    node->aux = NULL;
> > > +
> > > +  /* We decided that partitioning is a bad idea.  In this case, just
> > > +     proceed with the default compilation method.  */
> > > +  if (ltrans_partitions.length () <= 1)
> > > +    {
> > > +      flag_wpa = NULL;
> > > +      jobserver_finalize ();
> > > +      return false;
> > > +    }
> > > +
> > > +  /* Find out statics that need to be promoted
> > > +     to globals with hidden visibility because they are accessed from
> > > +     multiple partitions.  */
> > > +  lto_promote_cross_file_statics (promote_statics);
> > > +
> > > +  /* Check if we have variables being referenced across partitions.  */
> > > +  lto_check_usage_from_other_partitions ();
> > > +
> > > +  /* Trick the compiler to think we are not in WPA anymore.  */
> > > +  flag_wpa = NULL;
> > > +
> > > +  partitions = ltrans_partitions.length ();
> > > +  pids = XALLOCAVEC (pid_t, partitions);
> > > +
> > > +  /* There is no point in launching more jobs than we have partitions.  */
> > > +  if (num_jobs > partitions)
> > > +    num_jobs = partitions;
> > > +
> > > +  /* Trick the compiler to think we are in LTRANS mode.  */
> > > +  flag_ltrans = true;
> > > +
> > > +  init_additional_asm_names_file ();
> > > +
> > > +  /* Flush asm file, so we don't get repeated output as we fork.  */
> > > +  fflush (asm_out_file);
> > > +
> > > +  /* Insert a token for child to consume.  */
> > > +  if (jobserver)
> > > +    {
> > > +      num_jobs = partitions;
> > > +      jobserver_return_token ('p');
> > > +    }
> > > +
> > > +  /* Spawn processes.  Spawn as soon as there is a free slot.  */
> > > +  for (j = 0, i = -num_jobs; i < partitions; i++, j++)
> > > +    {
> > > +      if (i >= 0)
> > > +   {
> > > +     int wstatus, ret;
> > > +     ret = waitpid (pids[i], &wstatus, 0);
> > > +
> > > +     if (ret < 0)
> > > +       internal_error ("Unable to wait child %d to finish", i);
> > > +     else if (WIFEXITED (wstatus))
> > > +       {
> > > +         if (WEXITSTATUS (wstatus) != 0)
> > > +           error ("Child %d exited with error", i);
> > > +       }
> > > +     else if (WIFSIGNALED (wstatus))
> > > +       error ("Child %d aborted with error", i);
> > > +   }
> > > +
> > > +      if (j < partitions)
> > > +   {
> > > +     gcc_assert (ltrans_partitions[j]->symbols > 0);
> > > +
> > > +     if (jobserver)
> > > +       jobserver_get_token ();
> > > +
> > > +     pids[j] = fork ();
> > > +     if (pids[j] == 0)
> > > +       {
> > > +         childno = j;
> > > +         lto_apply_partition_mask (ltrans_partitions[j]);
> > > +         return true;
> > > +       }
> > > +   }
> > > +    }
> > > +
> > > +  /* Get the token which parent inserted for the childs, which they returned by
> > > +     now.  */
> > > +  if (jobserver)
> > > +    jobserver_get_token ();
> > > +  exit (0);
> > > +}
> > > +
> > > +
> > >  /* Perform simple optimizations based on callgraph.  */
> > >
> > >  void
> > > @@ -2768,6 +2960,7 @@ symbol_table::compile (void)
> > >    {
> > >      timevar_start (TV_CGRAPH_IPA_PASSES);
> > >      ipa_passes ();
> > > +    maybe_compile_in_parallel ();
> > >      timevar_stop (TV_CGRAPH_IPA_PASSES);
> > >    }
> > >    /* Do nothing else if any IPA pass found errors or if we are just streaming LTO.  */
> > > @@ -2790,6 +2983,9 @@ symbol_table::compile (void)
> > >    timevar_pop (TV_CGRAPHOPT);
> > >
> > >    /* Output everything.  */
> > > +  if (split_outputs)
> > > +    handle_additional_asm (childno);
> > What this is doin?
>
> Create an auxiliary file in which we will write the name of every
> assembler output.  This will change, as it is a better idea to write
> them using the main process rather than the child, as Richi already
> pointed out.
>
> > > +
> > >    switch_to_section (text_section);
> > >    (*debug_hooks->assembly_start) ();
> > >    if (!quiet_flag)
> > > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
> > > index 2cfab40156e..bc500df4853 100644
> > > --- a/gcc/ipa-fnsummary.c
> > > +++ b/gcc/ipa-fnsummary.c
> > > @@ -4610,7 +4610,7 @@ public:
> > >        gcc_assert (n == 0);
> > >        small_p = param;
> > >      }
> > > -  virtual bool gate (function *) { return true; }
> > > +  virtual bool gate (function *) { return !(flag_ltrans && split_outputs); }
> > >    virtual unsigned int execute (function *)
> > >      {
> > >        ipa_free_fn_summary ();
> > > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
> > > index 069de9d82fb..6a5657c7507 100644
> > > --- a/gcc/ipa-icf.c
> > > +++ b/gcc/ipa-icf.c
> > > @@ -2345,7 +2345,8 @@ sem_item_optimizer::filter_removed_items (void)
> > >          {
> > >       cgraph_node *cnode = static_cast <sem_function *>(item)->get_node ();
> > >
> > > -     if (in_lto_p && (cnode->alias || cnode->body_removed))
> > > +     if ((in_lto_p || split_outputs)
> > > +         && (cnode->alias || cnode->body_removed))
> >
> >
> > And I wonder why you need these. IPA passes are run before we split,
> > right?
>
> This was due to an early problem.  I removed this and bootstrap is
> still working.
>
> > >         remove_item (item);
> > >       else
> > >         filtered.safe_push (item);
> > > diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c
> > > index 7c854f471e8..4d9e11482d3 100644
> > > --- a/gcc/ipa-visibility.c
> > > +++ b/gcc/ipa-visibility.c
> > > @@ -540,7 +540,8 @@ optimize_weakref (symtab_node *node)
> > >  static void
> > >  localize_node (bool whole_program, symtab_node *node)
> > >  {
> > > -  gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl));
> > > +  gcc_assert (split_outputs || whole_program || in_lto_p
> > > +         || !TREE_PUBLIC (node->decl));
> > >
> > >    /* It is possible that one comdat group contains both hidden and non-hidden
> > >       symbols.  In this case we can privatize all hidden symbol but we need
> > > diff --git a/gcc/ipa.c b/gcc/ipa.c
> > > index 288b58cf73d..b397ea2fed8 100644
> > > --- a/gcc/ipa.c
> > > +++ b/gcc/ipa.c
> > > @@ -350,7 +350,7 @@ symbol_table::remove_unreachable_nodes (FILE *file)
> > >
> > >    /* Mark variables that are obviously needed.  */
> > >    FOR_EACH_DEFINED_VARIABLE (vnode)
> > > -    if (!vnode->can_remove_if_no_refs_p()
> > > +    if (!vnode->can_remove_if_no_refs_p ()
> > >     && !vnode->in_other_partition)
> > >        {
> > >     reachable.add (vnode);
> > > @@ -564,7 +564,7 @@ symbol_table::remove_unreachable_nodes (FILE *file)
> > >     }
> > >        else
> > >     gcc_assert (node->clone_of || !node->has_gimple_body_p ()
> > > -               || in_lto_p || DECL_RESULT (node->decl));
> > > +               || in_lto_p || split_outputs || DECL_RESULT (node->decl));
> > >      }
> > >
> > >    /* Inline clones might be kept around so their materializing allows further
> > > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
> > > index 93a99f3465b..12be8546d9c 100644
> > > --- a/gcc/lto-cgraph.c
> > > +++ b/gcc/lto-cgraph.c
> > > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
> > >  #include "omp-offload.h"
> > >  #include "stringpool.h"
> > >  #include "attribs.h"
> > > +#include "lto-partition.h"
> > >
> > >  /* True when asm nodes has been output.  */
> > >  bool asm_nodes_output = false;
> > > @@ -2065,3 +2066,174 @@ input_cgraph_opt_summary (vec<symtab_node *> nodes)
> > >     input_cgraph_opt_section (file_data, data, len, nodes);
> > >      }
> > >  }
> > > +
> > > +/* When analysing function for removal, we have mainly three states, as
> > > +   defined below.  */
> > > +
> > > +enum node_partition_state
> > > +{
> > > +  CAN_REMOVE,              /* This node can be removed, or is still to be
> > > +                      analysed.  */
> > > +  IN_CURRENT_PARTITION, /* This node is in current partition and should not be
> > > +                      touched.  */
> > > +  IN_BOUNDARY,             /* This node is in boundary, therefore being in other
> > > +                      partition or is a external symbol, and its body can
> > > +                      be released.  */
> > > +  IN_BOUNDARY_KEEP_BODY /* This symbol is in other partition but we may need its
> > > +                      body for inlining, for instance.  */
> > > +};
> > > +
> > > +/* Handle node that are in the LTRANS boundary, releasing its body and
> > > +   other informations if necessary.  */
> > > +
> > > +static void
> > > +handle_node_in_boundary (symtab_node *node, bool keep_body)
> > > +{
> > > +  if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
> > > +    {
> > > +      if (cnode->inlined_to && cnode->inlined_to->aux2 != IN_CURRENT_PARTITION)
> > > +   {
> > > +     /* If marked to be inlined into a node not in current partition,
> > > +        then undo the inline.  */
> > > +
> > > +     if (cnode->callers) /* This edge could be removed.  */
> > > +       cnode->callers->inline_failed = CIF_UNSPECIFIED;
> > > +     cnode->inlined_to = NULL;
> > > +   }
> > > +
> > > +      if (cnode->has_gimple_body_p ())
> > > +   {
> > > +     if (!keep_body)
> > > +       {
> > > +         cnode->maybe_release_dominators ();
> > > +         cnode->remove_callees ();
> > > +         cnode->remove_all_references ();
> > > +
> > > +         /* FIXME: Releasing body of clones can release bodies of functions
> > > +            in current partition.  */
> > > +
> > > +         /* cnode->release_body ();  */
> > > +         cnode->body_removed = true;
> > > +         cnode->definition = false;
> > > +         cnode->analyzed = false;
> > > +       }
> > > +     cnode->cpp_implicit_alias = false;
> > > +     cnode->alias = false;
> > > +     cnode->transparent_alias = false;
> > > +     cnode->thunk.thunk_p = false;
> > > +     cnode->weakref = false;
> > > +     /* After early inlining we drop always_inline attributes on
> > > +        bodies of functions that are still referenced (have their
> > > +        address taken).  */
> > > +     DECL_ATTRIBUTES (cnode->decl)
> > > +       = remove_attribute ("always_inline",
> > > +                           DECL_ATTRIBUTES (node->decl));
> > > +
> > > +     cnode->in_other_partition = true;
> > > +   }
> > > +    }
> > > +  else if (is_a <varpool_node *> (node) && !DECL_EXTERNAL (node->decl))
> > > +    {
> > > +      DECL_EXTERNAL (node->decl) = true;
> > > +      node->in_other_partition = true;
> > > +    }
> > > +}
> > > +
> > > +/* Check the boundary and expands it if necessary, including more nodes or
> > > +   promoting then to a state where their body is required.  */
> > > +
> > > +static void
> > > +compute_boundary (ltrans_partition partition)
> > > +{
> > > +  vec<lto_encoder_entry> &nodes = partition->encoder->nodes;
> > > +  symtab_node *node;
> > > +  cgraph_node *cnode;
> > > +  auto_vec<symtab_node *, 16> mark_to_remove;
> > > +  unsigned int i;
> > > +
> > > +  FOR_EACH_SYMBOL (node)
> > > +    node->aux2 = CAN_REMOVE;
> >
> > There is boundary computation in lto-cgraph.c so we should merge the
> > logic...
>
> Agree.
>
> > If you keep the lto-partition datastructures it will compute boundary
> > for you and you can just remove the rest (I got that working at one
> > point).
>
> This is interesting, because I could not get that working out of the
> box. The lto_promote_cross_statics did not provide a fully working
> boundary that I could simple remove everything else. If you take a
> closer look, you will see that I am using the already computed boundary
> as a base, and incrementing that with the extra stuff required
> (mainly inline clones body).
>
> >
> > I am also not sure about copy-on-write effect of this. It may be better
> > to keep things around and just teach late passes to not compile things
> > in other partitions, but that is definitly for incremental change.
>
> Well, this looks like a lot of work :)
>
> > > +void
> > > +init_additional_asm_names_file (void)
> > > +{
> > > +  gcc_assert (split_outputs);
> > > +
> > > +  additional_asm_filenames = fopen (split_outputs, "w");
> > > +  if (!additional_asm_filenames)
> > > +    error ("Unable to create a temporary write-only file.");
> > > +
> > > +  fclose (additional_asm_filenames);
> > > +}
> >
> > Aha, that is what it does :)
> > I wonder if creating the file conditionally (and late) can not lead to
> > race condition where we will use same tmp file name for other build
> > executed in parallel by make.
>
> Hummm. True. Added to my TODO list :)
> Well, I never had any sort of issues with race condition here, even
> after stressing it, but this certainly is not proof that it is free of
> race conditition :)
>
> > > +
> > > +/* Reinitialize the assembler file and store it in the additional asm file.  */
> > > +
> > > +void
> > > +handle_additional_asm (int childno)
> > > +{
> > > +  gcc_assert (split_outputs);
> > > +
> > > +  if (childno < 0)
> > > +    return;
> > > +
> > > +  const char *temp_asm_name = make_temp_file (".s");
> > > +  asm_file_name = temp_asm_name;
> > > +
> > > +  if (asm_out_file == stdout)
> > > +    fatal_error (UNKNOWN_LOCATION, "Unexpected asm output to stdout");
> > > +
> > > +  fclose (asm_out_file);
> > > +
> > > +  asm_out_file = fopen (temp_asm_name, "w");
> > > +  if (!asm_out_file)
> > > +    fatal_error (UNKNOWN_LOCATION, "Unable to create asm output file");
> > > +
> > > +  /* Reopen file as append mode.  Here we assume that write to append file is
> > > +     atomic, as it is in Linux.  */
> > > +  additional_asm_filenames = fopen (split_outputs, "a");
> > > +  if (!additional_asm_filenames)
> > > +    fatal_error (UNKNOWN_LOCATION,
> > > +            "Unable to open the temporary asm files container");
> > > +
> > > +  fprintf (additional_asm_filenames, "%d %s\n", childno, asm_file_name);
> > > +  fclose (additional_asm_filenames);
> > > +}
> > > +
> > >  /* A helper function; used as the reallocator function for cpp's line
> > >     table.  */
> > >  static void *
> > > @@ -2311,7 +2359,7 @@ do_compile ()
> > >
> > >            timevar_stop (TV_PHASE_SETUP);
> > >
> > > -          compile_file ();
> > > +     compile_file ();
> > >          }
> > >        else
> > >          {
> > > @@ -2477,6 +2525,12 @@ toplev::main (int argc, char **argv)
> > >
> > >    finalize_plugins ();
> > >
> > > +  if (jobserver_initialized)
> > > +    {
> > > +      jobserver_return_token (JOBSERVER_NULL_TOKEN);
> > > +      jobserver_finalize ();
> > > +    }
> > > +
> > >    after_memory_report = true;
> > >
> > >    if (seen_error () || werrorcount)
> > > diff --git a/gcc/toplev.h b/gcc/toplev.h
> > > index d6c316962b0..3abbf74cd02 100644
> > > --- a/gcc/toplev.h
> > > +++ b/gcc/toplev.h
> > > @@ -103,4 +103,7 @@ extern void parse_alignment_opts (void);
> > >
> > >  extern void initialize_rtl (void);
> > >
> > > +extern void init_additional_asm_names_file (void);
> > > +extern void handle_additional_asm (int);
> > > +
> > >  #endif /* ! GCC_TOPLEV_H */
> > > diff --git a/gcc/varasm.c b/gcc/varasm.c
> > > index 4070f9c17e8..84df52013d7 100644
> > > --- a/gcc/varasm.c
> > > +++ b/gcc/varasm.c
> > > @@ -110,7 +110,7 @@ static void decode_addr_const (tree, class addr_const *);
> > >  static hashval_t const_hash_1 (const tree);
> > >  static int compare_constant (const tree, const tree);
> > >  static void output_constant_def_contents (rtx);
> > > -static void output_addressed_constants (tree);
> > > +static void output_addressed_constants (tree, int);
> > >  static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
> > >                                            unsigned int, bool, bool);
> > >  static void globalize_decl (tree);
> > > @@ -2272,7 +2272,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED,
> > >
> > >    /* Output any data that we will need to use the address of.  */
> > >    if (DECL_INITIAL (decl) && DECL_INITIAL (decl) != error_mark_node)
> > > -    output_addressed_constants (DECL_INITIAL (decl));
> > > +    output_addressed_constants (DECL_INITIAL (decl), 0);
> > >
> > >    /* dbxout.c needs to know this.  */
> > >    if (sect && (sect->common.flags & SECTION_CODE) != 0)
> > > @@ -3426,11 +3426,11 @@ build_constant_desc (tree exp)
> > >     already have labels.  */
> > >
> > >  static constant_descriptor_tree *
> > > -add_constant_to_table (tree exp)
> > > +add_constant_to_table (tree exp, int defer)
> > >  {
> > >    /* The hash table methods may call output_constant_def for addressed
> > >       constants, so handle them first.  */
> > > -  output_addressed_constants (exp);
> > > +  output_addressed_constants (exp, defer);
> > >
> > >    /* Sanity check to catch recursive insertion.  */
> > >    static bool inserting;
> > > @@ -3474,7 +3474,7 @@ add_constant_to_table (tree exp)
> > >  rtx
> > >  output_constant_def (tree exp, int defer)
> > >  {
> > > -  struct constant_descriptor_tree *desc = add_constant_to_table (exp);
> > > +  struct constant_descriptor_tree *desc = add_constant_to_table (exp, defer);
> > >    maybe_output_constant_def_contents (desc, defer);
> > >    return desc->rtl;
> > >  }
> > > @@ -3544,7 +3544,7 @@ output_constant_def_contents (rtx symbol)
> > >
> > >    /* Make sure any other constants whose addresses appear in EXP
> > >       are assigned label numbers.  */
> > > -  output_addressed_constants (exp);
> > > +  output_addressed_constants (exp, 0);
> > >
> > >    /* We are no longer deferring this constant.  */
> > >    TREE_ASM_WRITTEN (decl) = TREE_ASM_WRITTEN (exp) = 1;
> > > @@ -3608,7 +3608,7 @@ lookup_constant_def (tree exp)
> > >  tree
> > >  tree_output_constant_def (tree exp)
> > >  {
> > > -  struct constant_descriptor_tree *desc = add_constant_to_table (exp);
> > > +  struct constant_descriptor_tree *desc = add_constant_to_table (exp, 1);
> > >    tree decl = SYMBOL_REF_DECL (XEXP (desc->rtl, 0));
> > >    varpool_node::finalize_decl (decl);
> > >    return decl;
> > > @@ -4327,7 +4327,7 @@ compute_reloc_for_constant (tree exp)
> > >     Indicate whether an ADDR_EXPR has been encountered.  */
> > >
> > >  static void
> > > -output_addressed_constants (tree exp)
> > > +output_addressed_constants (tree exp, int defer)
> > >  {
> > >    tree tem;
> > >
> > > @@ -4347,21 +4347,21 @@ output_addressed_constants (tree exp)
> > >     tem = DECL_INITIAL (tem);
> > >
> > >        if (CONSTANT_CLASS_P (tem) || TREE_CODE (tem) == CONSTRUCTOR)
> > > -   output_constant_def (tem, 0);
> > > +   output_constant_def (tem, defer);
> > >
> > >        if (TREE_CODE (tem) == MEM_REF)
> > > -   output_addressed_constants (TREE_OPERAND (tem, 0));
> > > +   output_addressed_constants (TREE_OPERAND (tem, 0), defer);
> > >        break;
> > >
> > >      case PLUS_EXPR:
> > >      case POINTER_PLUS_EXPR:
> > >      case MINUS_EXPR:
> > > -      output_addressed_constants (TREE_OPERAND (exp, 1));
> > > +      output_addressed_constants (TREE_OPERAND (exp, 1), defer);
> > >        gcc_fallthrough ();
> > >
> > >      CASE_CONVERT:
> > >      case VIEW_CONVERT_EXPR:
> > > -      output_addressed_constants (TREE_OPERAND (exp, 0));
> > > +      output_addressed_constants (TREE_OPERAND (exp, 0), defer);
> > >        break;
> > >
> > >      case CONSTRUCTOR:
> > > @@ -4369,7 +4369,7 @@ output_addressed_constants (tree exp)
> > >     unsigned HOST_WIDE_INT idx;
> > >     FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, tem)
> > >       if (tem != 0)
> > > -       output_addressed_constants (tem);
> > > +       output_addressed_constants (tem, defer);
> > >        }
> > >        break;
> >
> > Nice job :)
> > Honza
>
> Thank you,
> Giuliano.


More information about the Gcc-patches mailing list