This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/n] OpenMP 4.0 offloading infrastructure: LTO streaming


> 2014-09-27  Ilya Verbin  <ilya.verbin@intel.com>
> 	    Ilya Tocar  <ilya.tocar@intel.com>
> 	    Andrey Turetskiy  <andrey.turetskiy@intel.com>
> 	    Bernd Schmidt  <bernds@codesourcery.com>
> gcc/
> 	* cgraph.h (symtab_node): Add need_dump flag.
> 	* cgraphunit.c: Include lto-section-names.h.
> 	(initialize_offload): New function.
> 	(ipa_passes): Initialize offload and call ipa_write_summaries if there
> 	is something to write to OMP_SECTION_NAME_PREFIX sections.
> 	(symbol_table::compile): Call lto_streamer_hooks_init under flag_openmp.
> 	* ipa-inline-analysis.c (inline_generate_summary): Do not exit under
> 	flag_openmp.
> 	(inline_free_summary): Always remove hooks.
> 	* lto-cgraph.c (lto_set_symtab_encoder_in_partition): Exit if there is
> 	no need to encode the node.
> 	(referenced_from_other_partition_p, reachable_from_other_partition_p):
> 	Ignore references from non-target functions to target functions if we
> 	are streaming out target-side bytecode (offload lto mode).
> 	(select_what_to_dump): New function.
> 	* lto-section-names.h (OMP_SECTION_NAME_PREFIX): Define.
> 	(section_name_prefix): Declare.
> 	* lto-streamer.c (offload_lto_mode): New variable.
> 	(section_name_prefix): New variable.
> 	(lto_get_section_name): Use section_name_prefix instead of
> 	LTO_SECTION_NAME_PREFIX.
> 	* lto-streamer.h (select_what_to_dump): Declare.
> 	(offload_lto_mode): Declare.
> 	* omp-low.c (is_targetreg_ctx): New function.
> 	(create_omp_child_function, check_omp_nesting_restrictions): Use it.
> 	(expand_omp_target): Set mark_force_output for the target functions.
> 	(lower_omp_critical): Add target attribute for omp critical symbol.
> 	* passes.c (ipa_write_summaries): Call select_what_to_dump.
> gcc/lto/
> 	* lto-object.c (lto_obj_add_section): Use section_name_prefix instead of
> 	LTO_SECTION_NAME_PREFIX.
> 	* lto-partition.c (add_symbol_to_partition_1): Always set
> 	node->need_dump to true.
> 	(lto_promote_cross_file_statics): Call select_what_to_dump.
> 	* lto.c (lto_section_with_id): Use section_name_prefix instead of
> 	LTO_SECTION_NAME_PREFIX.
> 	(read_cgraph_and_symbols): Read OMP_SECTION_NAME_PREFIX sections, if
> 	being built as an offload compiler.
> 
> Thanks,
>   -- Ilya
> 
> ---
> 
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 7481906..9ab970d 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -444,6 +444,11 @@ public:
>    /* Set when init priority is set.  */
>    unsigned in_init_priority_hash : 1;
>  
> +  /* Set when symbol needs to be dumped into LTO bytecode for LTO,
> +     or in pragma omp target case, for separate compilation targeting
> +     a different architecture.  */
> +  unsigned need_dump : 1;

dump for me implied debug dump. LTO is usually called streaming, so prehaps
need_lto_stremaing?

> +/* Check whether there is at least one function or global variable to offload.
> +   */
> +
> +static bool
> +initialize_offload (void)

Perhaps have_offload_p? Nothing is initialized here...
> +{
> +  bool have_offload = false;
> +  struct cgraph_node *node;
> +  struct varpool_node *vnode;
> +
> +  FOR_EACH_DEFINED_FUNCTION (node)
> +    if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (node->decl)))
> +      {
> +	have_offload = true;
> +	break;
> +      }
> +
> +  FOR_EACH_DEFINED_VARIABLE (vnode)
> +    {
> +      if (!lookup_attribute ("omp declare target",
> +			     DECL_ATTRIBUTES (vnode->decl))
> +	  || TREE_CODE (vnode->decl) != VAR_DECL
> +	  || DECL_SIZE (vnode->decl) == 0)
> +	continue;
> +      have_offload = true;
> +    }
> +
> +  return have_offload;
> +}
> +
>  static void
>  ipa_passes (void)
>  {
> +  bool have_offload = false;
>    gcc::pass_manager *passes = g->get_passes ();
>  
>    set_cfun (NULL);
> @@ -2004,6 +2036,14 @@ ipa_passes (void)
>    gimple_register_cfg_hooks ();
>    bitmap_obstack_initialize (NULL);
>  
> +  if (!in_lto_p && flag_openmp)
> +    {
> +      have_offload = initialize_offload ();
> +      /* OpenMP offloading requires LTO infrastructure.  */
> +      if (have_offload)
> +	flag_generate_lto = 1;
> +    }
> +
>    invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL);
>  
>    if (!in_lto_p)
> @@ -2041,7 +2081,20 @@ ipa_passes (void)
>      targetm.asm_out.lto_start ();
>  
>    if (!in_lto_p)
> -    ipa_write_summaries ();
> +    {
> +      if (have_offload)
> +	{
> +	  offload_lto_mode = true;
> +	  section_name_prefix = OMP_SECTION_NAME_PREFIX;
> +	  ipa_write_summaries ();
> +	}
> +      if (flag_lto)
> +	{
> +	  offload_lto_mode = false;
> +	  section_name_prefix = LTO_SECTION_NAME_PREFIX;
> +	  ipa_write_summaries ();
> +	}

How does LTO combine with offloading?
> @@ -4325,11 +4325,6 @@ void
>  inline_free_summary (void)
>  {
>    struct cgraph_node *node;
> -  if (!inline_edge_summary_vec.exists ())
> -    return;
> -  FOR_EACH_DEFINED_FUNCTION (node)
> -    if (!node->alias)
> -      reset_inline_summary (node);
>    if (function_insertion_hook_holder)
>      symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder);
>    function_insertion_hook_holder = NULL;
> @@ -4345,6 +4340,11 @@ inline_free_summary (void)
>    if (edge_duplication_hook_holder)
>      symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
>    edge_duplication_hook_holder = NULL;
> +  if (!inline_edge_summary_vec.exists ())
> +    return;
> +  FOR_EACH_DEFINED_FUNCTION (node)
> +    if (!node->alias)
> +      reset_inline_summary (node);

Why this is needed?
> diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
> index 0584946..78b7fc8 100644
> --- a/gcc/lto-cgraph.c
> +++ b/gcc/lto-cgraph.c
> @@ -239,6 +239,9 @@ void
>  lto_set_symtab_encoder_in_partition (lto_symtab_encoder_t encoder,
>  				     symtab_node *node)
>  {
> +  /* Ignore not needed nodes.  */
> +  if (!node->need_dump)
> +    return;

I think it should be rather done at caller side (in the loop setting what to output)
rather than in this simple datastructure accestor.

>    int index = lto_symtab_encoder_encode (encoder, node);
>    encoder->nodes[index].in_partition = true;
>  }
> @@ -321,6 +324,12 @@ referenced_from_other_partition_p (symtab_node *node, lto_symtab_encoder_t encod
>  
>    for (i = 0; node->iterate_referring (i, ref); i++)
>      {
> +      /* Ignore references from non-target functions in offload lto mode.  */
> +      if (offload_lto_mode
> +	  && !lookup_attribute ("omp declare target",
> +				DECL_ATTRIBUTES (ref->referring->decl)))
> +	continue;

Those are quite busy loops, you may consder making offload a flag.  Why you can't test
need_dump here?

I think you also need to run free lang data when you decide to stream something.

Otherwise the cgraph bits seems resonable. I think Richi will want to comment on LTO
part.
Honza


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]