[PATCH 3/6] Implement fork-based parallelism engine
Richard Biener
richard.guenther@gmail.com
Mon Aug 31 09:33:01 GMT 2020
On Thu, Aug 27, 2020 at 8:27 PM Giuliano Belinassi
<giuliano.belinassi@usp.br> wrote:
>
> Hi, Honza.
>
> Thank you for your detailed review!
>
> On 08/27, Jan Hubicka wrote:
> > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> > > index c0b45795059..22405098dc5 100644
> > > --- a/gcc/cgraph.c
> > > +++ b/gcc/cgraph.c
> > > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl)
> > > decl_node->remove ();
> > > }
> > >
> > > +/* Release function dominator info if present. */
> > > +
> > > +void
> > > +cgraph_node::maybe_release_dominators (void)
> > > +{
> > > + struct function *fun = DECL_STRUCT_FUNCTION (decl);
> > > +
> > > + if (fun && fun->cfg)
> > > + {
> > > + if (dom_info_available_p (fun, CDI_DOMINATORS))
> > > + free_dominance_info (fun, CDI_DOMINATORS);
> > > + if (dom_info_available_p (fun, CDI_POST_DOMINATORS))
> > > + free_dominance_info (fun, CDI_POST_DOMINATORS);
> > > + }
> > > +}
> >
> > I am not sure if that needs to be member function, but if so we want to
> > merge it with other places in cgraph.c and cgraphunit.c where dominators
> > are freed. I do not think you need to check avalability.
>
> This is necessary to remove some nodes from the callgraph. For some
> reason, if I node->remove () and it still have the dominance info
> available, it will fail some assertions on the compiler.
>
> However, with regard to code layout, this can be moved to lto-cgraph.c,
> as it is only used there.
>
> > > +
> > > /* Record that DECL1 and DECL2 are semantically identical function
> > > versions. */
> > > void
> > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > > index b4a7871bd3d..72ac19f9672 100644
> > > --- a/gcc/cgraph.h
> > > +++ b/gcc/cgraph.h
> > > @@ -463,6 +463,15 @@ public:
> > > Return NULL if there's no such node. */
> > > static symtab_node *get_for_asmname (const_tree asmname);
> > >
> > > + /* Get symtab node by order. */
> > > + static symtab_node *find_by_order (int order);
> >
> > This is quadratic and moreover seems unused. Why do you add it?
>
> I added this for debugging, since I used this a lot inside GDB.
> Sure, I can remove this without any problems, or print a warning
> for the developer to avoid calling this in production code.
>
> > > +
> > > + /* Get symtab_node by its name. */
> > > + static symtab_node *find_by_name (const char *);
> >
> > Similarly here, note that names are not really very meaningful as lookup
> > things, since they get duplicated.
> > > +
> > > + /* Get symtab_node by its ASM name. */
> > > + static symtab_node *find_by_asm_name (const char *);
> >
> > For this we have get_for_asmname (which also populates asm name hash as
> > needed and is not quadratic)
>
> Cool. I will surely remove this then :)
>
> > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> > > index d10d635e942..73e4bed3b61 100644
> > > --- a/gcc/cgraphunit.c
> > > +++ b/gcc/cgraphunit.c
> > > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void)
> > > {
> > > location_t saved_loc;
> > >
> > > + /* FIXME: Find out why body-removed nodes are marked for output. */
> > > + if (body_removed)
> > > + return;
> >
> > Indeed, we should know :)
>
> Looks like this was due an early problem. I removed this and bootstrap
> is working OK.
>
> > > +
> > > +
> > > /* We ought to not compile any inline clones. */
> > > gcc_assert (!inlined_to);
> > >
> > > @@ -2658,6 +2663,7 @@ ipa_passes (void)
> > >
> > > execute_ipa_summary_passes
> > > ((ipa_opt_pass_d *) passes->all_regular_ipa_passes);
> > > +
> > This seems accidental.
>
> Yes.
>
> > > }
> > >
> > > /* Some targets need to handle LTO assembler output specially. */
> > > @@ -2687,10 +2693,17 @@ ipa_passes (void)
> > > if (flag_generate_lto || flag_generate_offload)
> > > targetm.asm_out.lto_end ();
> > >
> > > - if (!flag_ltrans
> > > + if (split_outputs)
> > > + flag_ltrans = true;
> > > +
> > > + if ((!flag_ltrans || split_outputs)
> > > && ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO)
> > > || !flag_lto || flag_fat_lto_objects))
> > > execute_ipa_pass_list (passes->all_regular_ipa_passes);
> > > +
> > > + if (split_outputs)
> > > + flag_ltrans = false;
> > > +
> > > invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL);
> > >
> > > bitmap_obstack_release (NULL);
> > > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void)
> > > }
> > > }
> > >
> > > +static bool is_number (const char *str)
> > > +{
> > > + while (*str != '\0')
> > > + switch (*str++)
> > > + {
> > > + case '0':
> > > + case '1':
> > > + case '2':
> > > + case '3':
> > > + case '4':
> > > + case '5':
> > > + case '6':
> > > + case '7':
> > > + case '8':
> > > + case '9':
> > > + continue;
> > > + default:
> > > + return false;
> > > + }
> > > +
> > > + return true;
> > > +}
> >
> > This looks odd, we have other places where we parse number from command
> > line :)
>
> isdigit () is poisoned in GCC. But I guess I should look how -flto=
> does this.
You need to use the all-caps variants, ISDIGIT in this case.
> > > +
> > > +/* If forked, which child am I? */
> > > +
> > > +static int childno = -1;
> > > +
> > > +static bool
> > > +maybe_compile_in_parallel (void)
> > > +{
> > > + struct symtab_node *node;
> > > + int partitions, i, j;
> > > + int *pids;
> > > +
> > > + bool promote_statics = param_promote_statics;
> > > + bool balance = param_balance_partitions;
> > > + bool jobserver = false;
> > > + bool job_auto = false;
> > > + int num_jobs = -1;
> > > +
> > > + if (!flag_parallel_jobs || !split_outputs)
> > > + return false;
> > > +
> > > + if (!strcmp (flag_parallel_jobs, "auto"))
> > > + {
> > > + jobserver = jobserver_initialize ();
> > > + job_auto = true;
> > > + }
> > > + else if (!strcmp (flag_parallel_jobs, "jobserver"))
> > > + jobserver = jobserver_initialize ();
> > > + else if (is_number (flag_parallel_jobs))
> > > + num_jobs = atoi (flag_parallel_jobs);
> > > + else
> > > + gcc_unreachable ();
> > > +
> > > + if (job_auto && !jobserver)
> > > + {
> > > + num_jobs = sysconf (_SC_NPROCESSORS_CONF);
> > > + if (num_jobs > 2)
> > > + num_jobs = 2;
> > > + }
> > > +
> > > + if (num_jobs < 0 && !jobserver)
> > > + {
> > > + inform (UNKNOWN_LOCATION,
> > > + "-fparallel-jobs=jobserver, but no GNU Jobserver found");
> > > + return false;
> > > + }
> > > +
> > > + if (jobserver)
> > > + num_jobs = 2;
> > > +
> > > + if (num_jobs == 0)
> > > + {
> > > + inform (UNKNOWN_LOCATION, "-fparallel-jobs=0 makes no sense");
> > > + return false;
> > > + }
> > > +
> > > + /* Trick the compiler to think that we are in WPA. */
> > > + flag_wpa = "";
> > > + symtab_node::checking_verify_symtab_nodes ();
> >
> > Messing with WPA/ltrans flags is not good idea. You already have
> > split_output for parallel build. I sort of see why you set ltrans, but
> > why WPA?
>
> Some assertions expected flag_wpa to be present. Sure, I can add
> split_outputs in these assertions.
>
> > > +
> > > + /* Partition the program so that COMDATs get mapped to the same
> > > + partition. If promote_statics is true, it also maps statics
> > > + to the same partition. If balance is true, try to balance the
> > > + partitions for compilation performance. */
> > > + lto_merge_comdat_map (balance, promote_statics, num_jobs);
> > > +
> > > + /* AUX pointers are used by partitioning code to bookkeep number of
> > > + partitions symbol is in. This is no longer needed. */
> > > + FOR_EACH_SYMBOL (node)
> > > + node->aux = NULL;
> > > +
> > > + /* We decided that partitioning is a bad idea. In this case, just
> > > + proceed with the default compilation method. */
> > > + if (ltrans_partitions.length () <= 1)
> > > + {
> > > + flag_wpa = NULL;
> > > + jobserver_finalize ();
> > > + return false;
> > > + }
> > > +
> > > + /* Find out statics that need to be promoted
> > > + to globals with hidden visibility because they are accessed from
> > > + multiple partitions. */
> > > + lto_promote_cross_file_statics (promote_statics);
> > > +
> > > + /* Check if we have variables being referenced across partitions. */
> > > + lto_check_usage_from_other_partitions ();
> > > +
> > > + /* Trick the compiler to think we are not in WPA anymore. */
> > > + flag_wpa = NULL;
> > > +
> > > + partitions = ltrans_partitions.length ();
> > > + pids = XALLOCAVEC (pid_t, partitions);
> > > +
> > > + /* There is no point in launching more jobs than we have partitions. */
> > > + if (num_jobs > partitions)
> > > + num_jobs = partitions;
> > > +
> > > + /* Trick the compiler to think we are in LTRANS mode. */
> > > + flag_ltrans = true;
> > > +
> > > + init_additional_asm_names_file ();
> > > +
> > > + /* Flush asm file, so we don't get repeated output as we fork. */
> > > + fflush (asm_out_file);
> > > +
> > > + /* Insert a token for child to consume. */
> > > + if (jobserver)
> > > + {
> > > + num_jobs = partitions;
> > > + jobserver_return_token ('p');
> > > + }
> > > +
> > > + /* Spawn processes. Spawn as soon as there is a free slot. */
> > > + for (j = 0, i = -num_jobs; i < partitions; i++, j++)
> > > + {
> > > + if (i >= 0)
> > > + {
> > > + int wstatus, ret;
> > > + ret = waitpid (pids[i], &wstatus, 0);
> > > +
> > > + if (ret < 0)
> > > + internal_error ("Unable to wait child %d to finish", i);
> > > + else if (WIFEXITED (wstatus))
> > > + {
> > > + if (WEXITSTATUS (wstatus) != 0)
> > > + error ("Child %d exited with error", i);
> > > + }
> > > + else if (WIFSIGNALED (wstatus))
> > > + error ("Child %d aborted with error", i);
> > > + }
> > > +
> > > + if (j < partitions)
> > > + {
> > > + gcc_assert (ltrans_partitions[j]->symbols > 0);
> > > +
> > > + if (jobserver)
> > > + jobserver_get_token ();
> > > +
> > > + pids[j] = fork ();
> > > + if (pids[j] == 0)
> > > + {
> > > + childno = j;
> > > + lto_apply_partition_mask (ltrans_partitions[j]);
> > > + return true;
> > > + }
> > > + }
> > > + }
> > > +
> > > + /* Get the token which parent inserted for the childs, which they returned by
> > > + now. */
> > > + if (jobserver)
> > > + jobserver_get_token ();
> > > + exit (0);
> > > +}
> > > +
> > > +
> > > /* Perform simple optimizations based on callgraph. */
> > >
> > > void
> > > @@ -2768,6 +2960,7 @@ symbol_table::compile (void)
> > > {
> > > timevar_start (TV_CGRAPH_IPA_PASSES);
> > > ipa_passes ();
> > > + maybe_compile_in_parallel ();
> > > timevar_stop (TV_CGRAPH_IPA_PASSES);
> > > }
> > > /* Do nothing else if any IPA pass found errors or if we are just streaming LTO. */
> > > @@ -2790,6 +2983,9 @@ symbol_table::compile (void)
> > > timevar_pop (TV_CGRAPHOPT);
> > >
> > > /* Output everything. */
> > > + if (split_outputs)
> > > + handle_additional_asm (childno);
> > What this is doin?
>
> Create an auxiliary file in which we will write the name of every
> assembler output. This will change, as it is a better idea to write
> them using the main process rather than the child, as Richi already
> pointed out.
>
> > > +
> > > switch_to_section (text_section);
> > > (*debug_hooks->assembly_start) ();
> > > if (!quiet_flag)
> > > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
> > > index 2cfab40156e..bc500df4853 100644
> > > --- a/gcc/ipa-fnsummary.c
> > > +++ b/gcc/ipa-fnsummary.c
> > > @@ -4610,7 +4610,7 @@ public:
> > > gcc_assert (n == 0);
> > > small_p = param;
> > > }
> > > - virtual bool gate (function *) { return true; }
> > > + virtual bool gate (function *) { return !(flag_ltrans && split_outputs); }
> > > virtual unsigned int execute (function *)
> > > {
> > > ipa_free_fn_summary ();
> > > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
> > > index 069de9d82fb..6a5657c7507 100644
> > > --- a/gcc/ipa-icf.c
> > > +++ b/gcc/ipa-icf.c
> > > @@ -2345,7 +2345,8 @@ sem_item_optimizer::filter_removed_items (void)
> > > {
> > > cgraph_node *cnode = static_cast <sem_function *>(item)->get_node ();
> > >
> > > - if (in_lto_p && (cnode->alias || cnode->body_removed))
> > > + if ((in_lto_p || split_outputs)
> > > + && (cnode->alias || cnode->body_removed))
> >
> >
> > And I wonder why you need these. IPA passes are run before we split,
> > right?
>
> This was due to an early problem. I removed this and bootstrap is
> still working.
>
> > > remove_item (item);
> > > else
> > > filtered.safe_push (item);
> > > diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c
> > > index 7c854f471e8..4d9e11482d3 100644
> > > --- a/gcc/ipa-visibility.c
> > > +++ b/gcc/ipa-visibility.c
> > > @@ -540,7 +540,8 @@ optimize_weakref (symtab_node *node)
> > > static void
> > > localize_node (bool whole_program, symtab_node *node)
> > > {
> > > - gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl));
> > > + gcc_assert (split_outputs || whole_program || in_lto_p
> > > + || !TREE_PUBLIC (node->decl));
> > >
> > > /* It is possible that one comdat group contains both hidden and non-hidden
> > > symbols. In this case we can privatize all hidden symbol but we need
> > > diff --git a/gcc/ipa.c b/gcc/ipa.c
> > > index 288b58cf73d..b397ea2fed8 100644
> > > --- a/gcc/ipa.c
> > > +++ b/gcc/ipa.c
> > > @@ -350,7 +350,7 @@ symbol_table::remove_unreachable_nodes (FILE *file)
> > >
> > > /* Mark variables that are obviously needed. */
> > > FOR_EACH_DEFINED_VARIABLE (vnode)
> > > - if (!vnode->can_remove_if_no_refs_p()
> > > + if (!vnode->can_remove_if_no_refs_p ()
> > > && !vnode->in_other_partition)
> > > {
> > > reachable.add (vnode);
> > > @@ -564,7 +564,7 @@ symbol_table::remove_unreachable_nodes (FILE *file)
> > > }
> > > else
> > > gcc_assert (node->clone_of || !node->has_gimple_body_p ()
> > > - || in_lto_p || DECL_RESULT (node->decl));
> > > + || in_lto_p || split_outputs || DECL_RESULT (node->decl));
> > > }
> > >
> > > /* Inline clones might be kept around so their materializing allows further
> > > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
> > > index 93a99f3465b..12be8546d9c 100644
> > > --- a/gcc/lto-cgraph.c
> > > +++ b/gcc/lto-cgraph.c
> > > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
> > > #include "omp-offload.h"
> > > #include "stringpool.h"
> > > #include "attribs.h"
> > > +#include "lto-partition.h"
> > >
> > > /* True when asm nodes has been output. */
> > > bool asm_nodes_output = false;
> > > @@ -2065,3 +2066,174 @@ input_cgraph_opt_summary (vec<symtab_node *> nodes)
> > > input_cgraph_opt_section (file_data, data, len, nodes);
> > > }
> > > }
> > > +
> > > +/* When analysing function for removal, we have mainly three states, as
> > > + defined below. */
> > > +
> > > +enum node_partition_state
> > > +{
> > > + CAN_REMOVE, /* This node can be removed, or is still to be
> > > + analysed. */
> > > + IN_CURRENT_PARTITION, /* This node is in current partition and should not be
> > > + touched. */
> > > + IN_BOUNDARY, /* This node is in boundary, therefore being in other
> > > + partition or is a external symbol, and its body can
> > > + be released. */
> > > + IN_BOUNDARY_KEEP_BODY /* This symbol is in other partition but we may need its
> > > + body for inlining, for instance. */
> > > +};
> > > +
> > > +/* Handle node that are in the LTRANS boundary, releasing its body and
> > > + other informations if necessary. */
> > > +
> > > +static void
> > > +handle_node_in_boundary (symtab_node *node, bool keep_body)
> > > +{
> > > + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
> > > + {
> > > + if (cnode->inlined_to && cnode->inlined_to->aux2 != IN_CURRENT_PARTITION)
> > > + {
> > > + /* If marked to be inlined into a node not in current partition,
> > > + then undo the inline. */
> > > +
> > > + if (cnode->callers) /* This edge could be removed. */
> > > + cnode->callers->inline_failed = CIF_UNSPECIFIED;
> > > + cnode->inlined_to = NULL;
> > > + }
> > > +
> > > + if (cnode->has_gimple_body_p ())
> > > + {
> > > + if (!keep_body)
> > > + {
> > > + cnode->maybe_release_dominators ();
> > > + cnode->remove_callees ();
> > > + cnode->remove_all_references ();
> > > +
> > > + /* FIXME: Releasing body of clones can release bodies of functions
> > > + in current partition. */
> > > +
> > > + /* cnode->release_body (); */
> > > + cnode->body_removed = true;
> > > + cnode->definition = false;
> > > + cnode->analyzed = false;
> > > + }
> > > + cnode->cpp_implicit_alias = false;
> > > + cnode->alias = false;
> > > + cnode->transparent_alias = false;
> > > + cnode->thunk.thunk_p = false;
> > > + cnode->weakref = false;
> > > + /* After early inlining we drop always_inline attributes on
> > > + bodies of functions that are still referenced (have their
> > > + address taken). */
> > > + DECL_ATTRIBUTES (cnode->decl)
> > > + = remove_attribute ("always_inline",
> > > + DECL_ATTRIBUTES (node->decl));
> > > +
> > > + cnode->in_other_partition = true;
> > > + }
> > > + }
> > > + else if (is_a <varpool_node *> (node) && !DECL_EXTERNAL (node->decl))
> > > + {
> > > + DECL_EXTERNAL (node->decl) = true;
> > > + node->in_other_partition = true;
> > > + }
> > > +}
> > > +
> > > +/* Check the boundary and expands it if necessary, including more nodes or
> > > + promoting then to a state where their body is required. */
> > > +
> > > +static void
> > > +compute_boundary (ltrans_partition partition)
> > > +{
> > > + vec<lto_encoder_entry> &nodes = partition->encoder->nodes;
> > > + symtab_node *node;
> > > + cgraph_node *cnode;
> > > + auto_vec<symtab_node *, 16> mark_to_remove;
> > > + unsigned int i;
> > > +
> > > + FOR_EACH_SYMBOL (node)
> > > + node->aux2 = CAN_REMOVE;
> >
> > There is boundary computation in lto-cgraph.c so we should merge the
> > logic...
>
> Agree.
>
> > If you keep the lto-partition datastructures it will compute boundary
> > for you and you can just remove the rest (I got that working at one
> > point).
>
> This is interesting, because I could not get that working out of the
> box. The lto_promote_cross_statics did not provide a fully working
> boundary that I could simple remove everything else. If you take a
> closer look, you will see that I am using the already computed boundary
> as a base, and incrementing that with the extra stuff required
> (mainly inline clones body).
>
> >
> > I am also not sure about copy-on-write effect of this. It may be better
> > to keep things around and just teach late passes to not compile things
> > in other partitions, but that is definitly for incremental change.
>
> Well, this looks like a lot of work :)
>
> > > +void
> > > +init_additional_asm_names_file (void)
> > > +{
> > > + gcc_assert (split_outputs);
> > > +
> > > + additional_asm_filenames = fopen (split_outputs, "w");
> > > + if (!additional_asm_filenames)
> > > + error ("Unable to create a temporary write-only file.");
> > > +
> > > + fclose (additional_asm_filenames);
> > > +}
> >
> > Aha, that is what it does :)
> > I wonder if creating the file conditionally (and late) can not lead to
> > race condition where we will use same tmp file name for other build
> > executed in parallel by make.
>
> Hummm. True. Added to my TODO list :)
> Well, I never had any sort of issues with race condition here, even
> after stressing it, but this certainly is not proof that it is free of
> race conditition :)
>
> > > +
> > > +/* Reinitialize the assembler file and store it in the additional asm file. */
> > > +
> > > +void
> > > +handle_additional_asm (int childno)
> > > +{
> > > + gcc_assert (split_outputs);
> > > +
> > > + if (childno < 0)
> > > + return;
> > > +
> > > + const char *temp_asm_name = make_temp_file (".s");
> > > + asm_file_name = temp_asm_name;
> > > +
> > > + if (asm_out_file == stdout)
> > > + fatal_error (UNKNOWN_LOCATION, "Unexpected asm output to stdout");
> > > +
> > > + fclose (asm_out_file);
> > > +
> > > + asm_out_file = fopen (temp_asm_name, "w");
> > > + if (!asm_out_file)
> > > + fatal_error (UNKNOWN_LOCATION, "Unable to create asm output file");
> > > +
> > > + /* Reopen file as append mode. Here we assume that write to append file is
> > > + atomic, as it is in Linux. */
> > > + additional_asm_filenames = fopen (split_outputs, "a");
> > > + if (!additional_asm_filenames)
> > > + fatal_error (UNKNOWN_LOCATION,
> > > + "Unable to open the temporary asm files container");
> > > +
> > > + fprintf (additional_asm_filenames, "%d %s\n", childno, asm_file_name);
> > > + fclose (additional_asm_filenames);
> > > +}
> > > +
> > > /* A helper function; used as the reallocator function for cpp's line
> > > table. */
> > > static void *
> > > @@ -2311,7 +2359,7 @@ do_compile ()
> > >
> > > timevar_stop (TV_PHASE_SETUP);
> > >
> > > - compile_file ();
> > > + compile_file ();
> > > }
> > > else
> > > {
> > > @@ -2477,6 +2525,12 @@ toplev::main (int argc, char **argv)
> > >
> > > finalize_plugins ();
> > >
> > > + if (jobserver_initialized)
> > > + {
> > > + jobserver_return_token (JOBSERVER_NULL_TOKEN);
> > > + jobserver_finalize ();
> > > + }
> > > +
> > > after_memory_report = true;
> > >
> > > if (seen_error () || werrorcount)
> > > diff --git a/gcc/toplev.h b/gcc/toplev.h
> > > index d6c316962b0..3abbf74cd02 100644
> > > --- a/gcc/toplev.h
> > > +++ b/gcc/toplev.h
> > > @@ -103,4 +103,7 @@ extern void parse_alignment_opts (void);
> > >
> > > extern void initialize_rtl (void);
> > >
> > > +extern void init_additional_asm_names_file (void);
> > > +extern void handle_additional_asm (int);
> > > +
> > > #endif /* ! GCC_TOPLEV_H */
> > > diff --git a/gcc/varasm.c b/gcc/varasm.c
> > > index 4070f9c17e8..84df52013d7 100644
> > > --- a/gcc/varasm.c
> > > +++ b/gcc/varasm.c
> > > @@ -110,7 +110,7 @@ static void decode_addr_const (tree, class addr_const *);
> > > static hashval_t const_hash_1 (const tree);
> > > static int compare_constant (const tree, const tree);
> > > static void output_constant_def_contents (rtx);
> > > -static void output_addressed_constants (tree);
> > > +static void output_addressed_constants (tree, int);
> > > static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
> > > unsigned int, bool, bool);
> > > static void globalize_decl (tree);
> > > @@ -2272,7 +2272,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED,
> > >
> > > /* Output any data that we will need to use the address of. */
> > > if (DECL_INITIAL (decl) && DECL_INITIAL (decl) != error_mark_node)
> > > - output_addressed_constants (DECL_INITIAL (decl));
> > > + output_addressed_constants (DECL_INITIAL (decl), 0);
> > >
> > > /* dbxout.c needs to know this. */
> > > if (sect && (sect->common.flags & SECTION_CODE) != 0)
> > > @@ -3426,11 +3426,11 @@ build_constant_desc (tree exp)
> > > already have labels. */
> > >
> > > static constant_descriptor_tree *
> > > -add_constant_to_table (tree exp)
> > > +add_constant_to_table (tree exp, int defer)
> > > {
> > > /* The hash table methods may call output_constant_def for addressed
> > > constants, so handle them first. */
> > > - output_addressed_constants (exp);
> > > + output_addressed_constants (exp, defer);
> > >
> > > /* Sanity check to catch recursive insertion. */
> > > static bool inserting;
> > > @@ -3474,7 +3474,7 @@ add_constant_to_table (tree exp)
> > > rtx
> > > output_constant_def (tree exp, int defer)
> > > {
> > > - struct constant_descriptor_tree *desc = add_constant_to_table (exp);
> > > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, defer);
> > > maybe_output_constant_def_contents (desc, defer);
> > > return desc->rtl;
> > > }
> > > @@ -3544,7 +3544,7 @@ output_constant_def_contents (rtx symbol)
> > >
> > > /* Make sure any other constants whose addresses appear in EXP
> > > are assigned label numbers. */
> > > - output_addressed_constants (exp);
> > > + output_addressed_constants (exp, 0);
> > >
> > > /* We are no longer deferring this constant. */
> > > TREE_ASM_WRITTEN (decl) = TREE_ASM_WRITTEN (exp) = 1;
> > > @@ -3608,7 +3608,7 @@ lookup_constant_def (tree exp)
> > > tree
> > > tree_output_constant_def (tree exp)
> > > {
> > > - struct constant_descriptor_tree *desc = add_constant_to_table (exp);
> > > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, 1);
> > > tree decl = SYMBOL_REF_DECL (XEXP (desc->rtl, 0));
> > > varpool_node::finalize_decl (decl);
> > > return decl;
> > > @@ -4327,7 +4327,7 @@ compute_reloc_for_constant (tree exp)
> > > Indicate whether an ADDR_EXPR has been encountered. */
> > >
> > > static void
> > > -output_addressed_constants (tree exp)
> > > +output_addressed_constants (tree exp, int defer)
> > > {
> > > tree tem;
> > >
> > > @@ -4347,21 +4347,21 @@ output_addressed_constants (tree exp)
> > > tem = DECL_INITIAL (tem);
> > >
> > > if (CONSTANT_CLASS_P (tem) || TREE_CODE (tem) == CONSTRUCTOR)
> > > - output_constant_def (tem, 0);
> > > + output_constant_def (tem, defer);
> > >
> > > if (TREE_CODE (tem) == MEM_REF)
> > > - output_addressed_constants (TREE_OPERAND (tem, 0));
> > > + output_addressed_constants (TREE_OPERAND (tem, 0), defer);
> > > break;
> > >
> > > case PLUS_EXPR:
> > > case POINTER_PLUS_EXPR:
> > > case MINUS_EXPR:
> > > - output_addressed_constants (TREE_OPERAND (exp, 1));
> > > + output_addressed_constants (TREE_OPERAND (exp, 1), defer);
> > > gcc_fallthrough ();
> > >
> > > CASE_CONVERT:
> > > case VIEW_CONVERT_EXPR:
> > > - output_addressed_constants (TREE_OPERAND (exp, 0));
> > > + output_addressed_constants (TREE_OPERAND (exp, 0), defer);
> > > break;
> > >
> > > case CONSTRUCTOR:
> > > @@ -4369,7 +4369,7 @@ output_addressed_constants (tree exp)
> > > unsigned HOST_WIDE_INT idx;
> > > FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, tem)
> > > if (tem != 0)
> > > - output_addressed_constants (tem);
> > > + output_addressed_constants (tem, defer);
> > > }
> > > break;
> >
> > Nice job :)
> > Honza
>
> Thank you,
> Giuliano.
More information about the Gcc-patches
mailing list