This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 3/n] OpenMP 4.0 offloading infrastructure: offload tables


On Wed, 5 Nov 2014, Ilya Verbin wrote:

> On 08 Oct 11:23, Jakub Jelinek wrote:
> > LGTM, with the requested var/section renames.
> > Would like if Honza and/or Richard had a look at the cgraph/LTO stuff
> > in the patch though.
> 
> Since patch 2 was updated, this patch also should be updated.
> Now the offload_vars array is filled in varpool_node::get_create .
> 
> Richard, is it OK for trunk?

The LTO parts are ok for trunk.

Thanks,
Richard.

> Thanks,
>   -- Ilya
> 
> 
> ---
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index f31af05..3db30bf 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -2303,6 +2303,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
>    $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \
>    $(srcdir)/tree-parloops.c \
>    $(srcdir)/omp-low.c \
> +  $(srcdir)/omp-low.h \
>    $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
>    $(srcdir)/cgraphclones.c \
>    $(srcdir)/tree-phinodes.c \
> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> index 83ab419..bafbadb 100644
> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -218,6 +218,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-nested.h"
>  #include "gimplify.h"
>  #include "dbgcnt.h"
> +#include "omp-low.h"
>  #include "lto-section-names.h"
>  
>  /* Queue of cgraph nodes scheduled to be added into cgraph.  This is a
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 5036d4f..6a5a031 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11205,6 +11205,12 @@ If defined, this function returns an appropriate alignment in bits for an atomic
>  ISO C11 requires atomic compound assignments that may raise floating-point exceptions to raise exceptions corresponding to the arithmetic operation whose result was successfully stored in a compare-and-exchange sequence.  This requires code equivalent to calls to @code{feholdexcept}, @code{feclearexcept} and @code{feupdateenv} to be generated at appropriate points in the compare-and-exchange sequence.  This hook should set @code{*@var{hold}} to an expression equivalent to the call to @code{feholdexcept}, @code{*@var{clear}} to an expression equivalent to the call to @code{feclearexcept} and @code{*@var{update}} to an expression equivalent to the call to @code{feupdateenv}.  The three expressions are @code{NULL_TREE} on entry to the hook and may be left as @code{NULL_TREE} if no code is required in a particular place.  The default implementation leaves all three expressions as @code{NULL_TREE}.  The @code{__atomic_feraiseexcept} function from @code{libatomic} may be of use as part 
 of the code generated in @code{*@var{update}}.
>  @end deftypefn
>  
> +@deftypefn {Target Hook} void TARGET_RECORD_OFFLOAD_SYMBOL (tree)
> +Used when offloaded functions are seen in the compilation unit and no named
> +sections are available.  It is called once for each symbol that must be
> +recorded in the offload function and variable table.
> +@end deftypefn
> +
>  @defmac TARGET_SUPPORTS_WIDE_INT
>  
>  On older ports, large integers are stored in @code{CONST_DOUBLE} rtl
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 5674e6c..cadf05d 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8167,6 +8167,8 @@ and the associated definitions of those functions.
>  
>  @hook TARGET_ATOMIC_ASSIGN_EXPAND_FENV
>  
> +@hook TARGET_RECORD_OFFLOAD_SYMBOL
> +
>  @defmac TARGET_SUPPORTS_WIDE_INT
>  
>  On older ports, large integers are stored in @code{CONST_DOUBLE} rtl
> diff --git a/gcc/gengtype.c b/gcc/gengtype.c
> index e48b448..06c37d5 100644
> --- a/gcc/gengtype.c
> +++ b/gcc/gengtype.c
> @@ -1843,7 +1843,7 @@ open_base_files (void)
>        "tree-ssa.h", "reload.h", "cpp-id-data.h", "tree-chrec.h",
>        "except.h", "output.h",  "cfgloop.h",
>        "target.h", "ipa-prop.h", "lto-streamer.h", "target-globals.h",
> -      "ipa-inline.h", "dwarf2out.h", NULL
> +      "ipa-inline.h", "dwarf2out.h", "omp-low.h", NULL
>      };
>      const char *const *ifp;
>      outf_p gtype_desc_c;
> diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
> index 45655ba..6c442cf 100644
> --- a/gcc/lto-cgraph.c
> +++ b/gcc/lto-cgraph.c
> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "context.h"
>  #include "pass_manager.h"
>  #include "ipa-utils.h"
> +#include "omp-low.h"
>  
>  /* True when asm nodes has been output.  */
>  bool asm_nodes_output = false;
> @@ -1057,6 +1058,49 @@ read_string (struct lto_input_block *ib)
>    return str;
>  }
>  
> +/* Output function/variable tables that will allow libgomp to look up offload
> +   target code.  OFFLOAD_FUNCS is filled in expand_omp_target, OFFLOAD_VARS is
> +   filled in ipa_passes.  In WHOPR (partitioned) mode during the WPA stage both
> +   OFFLOAD_FUNCS and OFFLOAD_VARS are filled by input_offload_tables.  */
> +
> +void
> +output_offload_tables (void)
> +{
> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
> +    return;
> +
> +  struct lto_simple_output_block *ob
> +    = lto_create_simple_output_block (LTO_section_offload_table);
> +
> +  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
> +    {
> +      streamer_write_enum (ob->main_stream, LTO_symtab_tags,
> +			   LTO_symtab_last_tag, LTO_symtab_unavail_node);
> +      lto_output_fn_decl_index (ob->decl_state, ob->main_stream,
> +				(*offload_funcs)[i]);
> +    }
> +
> +  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
> +    {
> +      streamer_write_enum (ob->main_stream, LTO_symtab_tags,
> +			   LTO_symtab_last_tag, LTO_symtab_variable);
> +      lto_output_var_decl_index (ob->decl_state, ob->main_stream,
> +				 (*offload_vars)[i]);
> +    }
> +
> +  streamer_write_uhwi_stream (ob->main_stream, 0);
> +  lto_destroy_simple_output_block (ob);
> +
> +  /* In WHOPR mode during the WPA stage the joint offload tables need to be
> +     streamed to one partition only.  That's why we free offload_funcs and
> +     offload_vars after the first call of output_offload_tables.  */
> +  if (flag_wpa)
> +    {
> +      vec_free (offload_funcs);
> +      vec_free (offload_vars);
> +    }
> +}
> +
>  /* Overwrite the information in NODE based on FILE_DATA, TAG, FLAGS,
>     STACK_SIZE, SELF_TIME and SELF_SIZE.  This is called either to initialize
>     NODE or to replace the values in it, for instance because the first
> @@ -1756,6 +1800,55 @@ input_symtab (void)
>      }
>  }
>  
> +/* Input function/variable tables that will allow libgomp to look up offload
> +   target code, and store them into OFFLOAD_FUNCS and OFFLOAD_VARS.  */
> +
> +void
> +input_offload_tables (void)
> +{
> +  struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
> +  struct lto_file_decl_data *file_data;
> +  unsigned int j = 0;
> +
> +  while ((file_data = file_data_vec[j++]))
> +    {
> +      const char *data;
> +      size_t len;
> +      struct lto_input_block *ib
> +	= lto_create_simple_input_block (file_data, LTO_section_offload_table,
> +					 &data, &len);
> +      if (!ib)
> +	continue;
> +
> +      enum LTO_symtab_tags tag
> +	= streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
> +      while (tag)
> +	{
> +	  if (tag == LTO_symtab_unavail_node)
> +	    {
> +	      int decl_index = streamer_read_uhwi (ib);
> +	      tree fn_decl
> +		= lto_file_decl_data_get_fn_decl (file_data, decl_index);
> +	      vec_safe_push (offload_funcs, fn_decl);
> +	    }
> +	  else if (tag == LTO_symtab_variable)
> +	    {
> +	      int decl_index = streamer_read_uhwi (ib);
> +	      tree var_decl
> +		= lto_file_decl_data_get_var_decl (file_data, decl_index);
> +	      vec_safe_push (offload_vars, var_decl);
> +	    }
> +	  else
> +	    fatal_error ("invalid offload table in %s", file_data->file_name);
> +
> +	  tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
> +	}
> +
> +      lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
> +				      ib, data, len);
> +    }
> +}
> +
>  /* True when we need optimization summary for NODE.  */
>  
>  static int
> diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c
> index 2485da9..83c8166 100644
> --- a/gcc/lto-section-in.c
> +++ b/gcc/lto-section-in.c
> @@ -66,7 +66,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
>    "cgraphopt",
>    "inline",
>    "ipcp_trans",
> -  "icf"
> +  "icf",
> +  "offload_table"
>  };
>  
>  
> diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h
> index f5dbed2..d84deac 100644
> --- a/gcc/lto-section-names.h
> +++ b/gcc/lto-section-names.h
> @@ -35,4 +35,7 @@ extern const char *section_name_prefix;
>  
>  #define LTO_SEGMENT_NAME "__GNU_LTO"
>  
> +#define OFFLOAD_VAR_TABLE_SECTION_NAME ".gnu.offload_vars"
> +#define OFFLOAD_FUNC_TABLE_SECTION_NAME ".gnu.offload_funcs"
> +
>  #endif /* GCC_LTO_SECTION_NAMES_H */
> diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
> index 39931e3..6d99439 100644
> --- a/gcc/lto-streamer-out.c
> +++ b/gcc/lto-streamer-out.c
> @@ -2304,6 +2304,8 @@ lto_output (void)
>       statements using the statement UIDs.  */
>    output_symtab ();
>  
> +  output_offload_tables ();
> +
>  #ifdef ENABLE_CHECKING
>    lto_bitmap_free (output);
>  #endif
> diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
> index 0b3fb6a..3f9a8a9 100644
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -248,6 +248,7 @@ enum lto_section_type
>    LTO_section_inline_summary,
>    LTO_section_ipcp_transform,
>    LTO_section_ipa_icf,
> +  LTO_section_offload_table,
>    LTO_N_SECTION_TYPES		/* Must be last.  */
>  };
>  
> @@ -823,6 +824,8 @@ bool lto_symtab_encoder_encode_initializer_p (lto_symtab_encoder_t,
>  					      varpool_node *);
>  void output_symtab (void);
>  void input_symtab (void);
> +void output_offload_tables (void);
> +void input_offload_tables (void);
>  bool referenced_from_other_partition_p (struct ipa_ref_list *,
>  				        lto_symtab_encoder_t);
>  bool reachable_from_other_partition_p (struct cgraph_node *,
> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> index 0451e71..06887a8 100644
> --- a/gcc/lto/lto.c
> +++ b/gcc/lto/lto.c
> @@ -3024,6 +3024,8 @@ read_cgraph_and_symbols (unsigned nfiles, const char **fnames)
>    /* Read the symtab.  */
>    input_symtab ();
>  
> +  input_offload_tables ();
> +
>    /* Store resolutions into the symbol table.  */
>  
>    ld_plugin_symbol_resolution_t *res;
> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> index 1404b5e..79e6ab3 100644
> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -73,6 +73,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "optabs.h"
>  #include "cfgloop.h"
>  #include "target.h"
> +#include "common/common-target.h"
>  #include "omp-low.h"
>  #include "gimple-low.h"
>  #include "tree-cfgcleanup.h"
> @@ -82,6 +83,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-eh.h"
>  #include "cilk.h"
>  #include "context.h"
> +#include "lto-section-names.h"
>  
>  
>  /* Lowering of OpenMP parallel and workshare constructs proceeds in two
> @@ -230,6 +232,9 @@ static tree scan_omp_1_op (tree *, int *, void *);
>        *handled_ops_p = false; \
>        break;
>  
> +/* Holds offload tables with decls.  */
> +vec<tree, va_gc> *offload_funcs, *offload_vars;
> +
>  /* Convenience function for calling scan_omp_1_op on tree operands.  */
>  
>  static inline tree
> @@ -8406,6 +8411,9 @@ expand_omp_target (struct omp_region *region)
>        DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
>        cgraph_node::add_new_function (child_fn, true);
>  
> +      /* Add the new function to the offload table.  */
> +      vec_safe_push (offload_funcs, child_fn);
> +
>        /* Fix the callgraph edges for child_cfun.  Those for cfun will be
>  	 fixed in a following pass.  */
>        push_cfun (child_cfun);
> @@ -12376,4 +12384,91 @@ make_pass_omp_simd_clone (gcc::context *ctxt)
>    return new pass_omp_simd_clone (ctxt);
>  }
>  
> +/* Helper function for omp_finish_file routine.  Takes decls from V_DECLS and
> +   adds their addresses and sizes to constructor-vector V_CTOR.  */
> +static void
> +add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
> +					 vec<constructor_elt, va_gc> *v_ctor)
> +{
> +  unsigned len = vec_safe_length (v_decls);
> +  for (unsigned i = 0; i < len; i++)
> +    {
> +      tree it = (*v_decls)[i];
> +      bool is_function = TREE_CODE (it) != VAR_DECL;
> +
> +      CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, build_fold_addr_expr (it));
> +      if (!is_function)
> +	CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE,
> +				fold_convert (const_ptr_type_node,
> +					      DECL_SIZE_UNIT (it)));
> +    }
> +}
> +
> +/* Create new symbols containing (address, size) pairs for global variables,
> +   marked with "omp declare target" attribute, as well as addresses for the
> +   functions, which are outlined target regions.  */
> +void
> +omp_finish_file (void)
> +{
> +  unsigned num_funcs = vec_safe_length (offload_funcs);
> +  unsigned num_vars = vec_safe_length (offload_vars);
> +
> +  if (num_funcs == 0 && num_vars == 0)
> +    return;
> +
> +  if (targetm_common.have_named_sections)
> +    {
> +      vec<constructor_elt, va_gc> *v_f, *v_v;
> +      vec_alloc (v_f, num_funcs);
> +      vec_alloc (v_v, num_vars * 2);
> +
> +      add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
> +      add_decls_addresses_to_decl_constructor (offload_vars, v_v);
> +
> +      tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
> +						    num_vars * 2);
> +      tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
> +						     num_funcs);
> +      TYPE_ALIGN (vars_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
> +      TYPE_ALIGN (funcs_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
> +      tree ctor_v = build_constructor (vars_decl_type, v_v);
> +      tree ctor_f = build_constructor (funcs_decl_type, v_f);
> +      TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
> +      TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
> +      tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +				    get_identifier (".offload_func_table"),
> +				    funcs_decl_type);
> +      tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +				   get_identifier (".offload_var_table"),
> +				   vars_decl_type);
> +      TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
> +      /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
> +	 otherwise a joint table in a binary will contain padding between
> +	 tables from multiple object files.  */
> +      DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
> +      DECL_ALIGN (funcs_decl) = TYPE_ALIGN (funcs_decl_type);
> +      DECL_ALIGN (vars_decl) = TYPE_ALIGN (vars_decl_type);
> +      DECL_INITIAL (funcs_decl) = ctor_f;
> +      DECL_INITIAL (vars_decl) = ctor_v;
> +      set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
> +      set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
> +
> +      varpool_node::finalize_decl (vars_decl);
> +      varpool_node::finalize_decl (funcs_decl);
> +   }
> +  else
> +    {
> +      for (unsigned i = 0; i < num_funcs; i++)
> +	{
> +	  tree it = (*offload_funcs)[i];
> +	  targetm.record_offload_symbol (it);
> +	}
> +      for (unsigned i = 0; i < num_vars; i++)
> +	{
> +	  tree it = (*offload_vars)[i];
> +	  targetm.record_offload_symbol (it);
> +	}
> +    }
> +}
> +
>  #include "gt-omp-low.h"
> diff --git a/gcc/omp-low.h b/gcc/omp-low.h
> index d80c2d6..ac587d0 100644
> --- a/gcc/omp-low.h
> +++ b/gcc/omp-low.h
> @@ -27,5 +27,9 @@ extern void omp_expand_local (basic_block);
>  extern void free_omp_regions (void);
>  extern tree omp_reduction_init (tree, tree);
>  extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
> +extern void omp_finish_file (void);
> +
> +extern GTY(()) vec<tree, va_gc> *offload_funcs;
> +extern GTY(()) vec<tree, va_gc> *offload_vars;
>  
>  #endif /* GCC_OMP_LOW_H */
> diff --git a/gcc/target.def b/gcc/target.def
> index 4d90fc2..290a466 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -1760,6 +1760,14 @@ HOOK_VECTOR_END (vectorize)
>  #undef HOOK_PREFIX
>  #define HOOK_PREFIX "TARGET_"
>  
> +DEFHOOK
> +(record_offload_symbol,
> + "Used when offloaded functions are seen in the compilation unit and no named\n\
> +sections are available.  It is called once for each symbol that must be\n\
> +recorded in the offload function and variable table.",
> + void, (tree),
> + hook_void_tree)
> +
>  /* Allow target specific overriding of option settings after options have
>    been changed by an attribute or pragma or when it is reset at the
>    end of the code affected by an attribute or pragma.  */
> diff --git a/gcc/toplev.c b/gcc/toplev.c
> index 7d8cefa..d9d325b 100644
> --- a/gcc/toplev.c
> +++ b/gcc/toplev.c
> @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "ipa-prop.h"
>  #include "gcse.h"
>  #include "optabs.h"
> +#include "omp-low.h"
>  
>  #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO)
>  #include "dbxout.h"
> @@ -591,6 +592,8 @@ compile_file (void)
>        if (flag_sanitize & SANITIZE_THREAD)
>  	tsan_finish_file ();
>  
> +      omp_finish_file ();
> +
>        output_shared_constant_pool ();
>        output_object_blocks ();
>        finish_tm_clone_pairs ();
> diff --git a/gcc/varpool.c b/gcc/varpool.c
> index c508bf9..cd3710d 100644
> --- a/gcc/varpool.c
> +++ b/gcc/varpool.c
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "lto-streamer.h"
>  #include "hash-set.h"
>  #include "context.h"
> +#include "omp-low.h"
>  
>  const char * const tls_model_names[]={"none", "tls-emulated", "tls-real",
>  				      "tls-global-dynamic", "tls-local-dynamic",
> @@ -162,6 +163,8 @@ varpool_node::get_create (tree decl)
>      {
>        node->offloadable = 1;
>        g->have_offload = true;
> +      if (!in_lto_p)
> +	vec_safe_push (offload_vars, decl);
>      }
>  
>    node->register_symbol ();
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Jeff Hawn, Jennifer Guild, Felix Imendoerffer, HRB 21284
(AG Nuernberg)
Maxfeldstrasse 5, 90409 Nuernberg, Germany

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]