diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1a12eda..ff1533c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2013-09-24 Aldy Hernandez + + * Makefile.in (omp-low.o): Depend on PRETTY_PRINT_H. + * ipa-cp.c (determine_versionability): Nodes with SIMD clones are + not versionable. + * ggc.h (ggc_alloc_cleared_simd_clone_stat): New. + * cgraph.h (enum linear_stride_type): New. + (struct simd_clone_arg): New. + (struct simd_clone): New. + (struct cgraph_node): Add `simdclone' field. + Add `has_simd_clones' field. + * omp-low.c: Add new pass_omp_simd_clone support code. + (vecsize_mangle): New. + (ipa_omp_simd_clone): New. + (simd_clone_clauses_extract): New. + (simd_clone_compute_base_data_type): New. + (simd_clone_compute_isa_and_simdlen): New. + (simd_clone_create): New. + (simd_clone_mangle): New. + (simd_clone_struct_allow): New. + (simd_clone_struct_copy): New. + (class argno_map): New. + (argno_map::argno_map(tree)): New. + (argno_map::~argno_map): New. + (argno_map::to_tree): New. + * tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): New. + * tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document. + * tree-pass.h (make_pass_omp_simd_clone): New. + * passes.def (pass_omp_simd_clone): New. + * target.def: Define new hook prefix "TARGET_CILKPLUS_". + (default_vector_mangling_isa_code): New. + (max_vector_size_for_isa): New. + * doc/tm.texi.in: Add placeholder for + TARGET_CILKPLUS_DEFAULT_DEFAULT_VECTOR_MANGLING_ISA_CODE, + TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA. + * doc/tm.texi: Regenerate. + * config/i386/i386.c (ix86_cilkplus_default_vector_mangling_isa_code): + New. + (ix86_cilkplus_max_vector_size_for_isa): New. + (TARGET_CILKPLUS_DEFAULT_DEFAULT_VECTOR_MANGLING_ISA_CODE): Define. + (TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA): Define. + 2013-09-19 Jakub Jelinek PR tree-optimization/58472 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c006711..4fc7e48 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2573,6 +2573,7 @@ omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(RTL_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_CORE_H) \ $(TREE_SSA_H) $(FLAGS_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) \ $(TREE_PASS_H) $(GGC_H) $(EXCEPT_H) $(SPLAY_TREE_H) $(OPTABS_H) \ + $(PRETTY_PRINT_H) \ $(CFGLOOP_H) tree-iterator.h $(TARGET_H) gt-omp-low.h tree-browser.o : tree-browser.c tree-browser.def $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(HASH_TABLE_H) $(TREE_H) $(TREE_PRETTY_PRINT_H) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 50e8743..3db29cd 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -248,6 +248,68 @@ struct GTY(()) cgraph_clone_info bitmap combined_args_to_skip; }; +enum linear_stride_type { + LINEAR_STRIDE_NO, + LINEAR_STRIDE_YES_CONSTANT, + LINEAR_STRIDE_YES_VARIABLE +}; + +/* Function arguments in the original function of a SIMD clone. + Supplementary data for `struct simd_clone'. */ + +struct GTY(()) simd_clone_arg { + /* A SIMD clone's argument can be either linear (constant or + variable), uniform, or vector. If the argument is neither linear + or uniform, the default is vector. */ + + /* If the linear stride is a constant, `linear_stride' is + LINEAR_STRIDE_YES_CONSTANT, and `linear_stride_num' holds + the numeric stride. + + If the linear stride is variable, `linear_stride' is + LINEAR_STRIDE_YES_VARIABLE, and `linear_stride_num' contains + the function argument containing the stride (as an index into the + function arguments starting at 0). + + Otherwise, `linear_stride' is LINEAR_STRIDE_NO and + `linear_stride_num' is unused. */ + enum linear_stride_type linear_stride; + unsigned HOST_WIDE_INT linear_stride_num; + + /* Variable alignment if available, otherwise 0. */ + unsigned int alignment; + + /* True if variable is uniform. */ + unsigned int uniform : 1; +}; + +/* Specific data for a SIMD function clone. */ + +struct GTY(()) simd_clone { + /* Number of words in the SIMD lane associated with this clone. */ + unsigned int simdlen; + + /* Number of annotated function arguments in `args'. This is + usually the number of named arguments in FNDECL. */ + unsigned int nargs; + + /* Max hardware vector size in bits. */ + unsigned int hw_vector_size; + + /* Used to determine ISA in mangling. */ + unsigned char isa; + + /* True if this is the masked, in-branch version of the clone, + otherwise false. */ + unsigned int inbranch : 1; + + /* True if this is a Cilk Plus variant. */ + unsigned int cilk_elemental : 1; + + /* Annotated function arguments for the original function. */ + struct simd_clone_arg GTY((length ("%h.nargs"))) args[1]; +}; + /* The cgraph data structure. Each function decl has assigned cgraph_node listing callees and callers. */ @@ -282,6 +344,10 @@ struct GTY(()) cgraph_node { /* Declaration node used to be clone of. */ tree former_clone_of; + /* If this is a SIMD clone, this points to the SIMD specific + information for it. */ + struct simd_clone *simdclone; + /* Interprocedural passes scheduled to have their transform functions applied next time we execute local pass on them. We maintain it per-function in order to allow IPA passes to introduce new functions. */ @@ -323,6 +389,8 @@ struct GTY(()) cgraph_node { /* ?? We should be able to remove this. We have enough bits in cgraph to calculate it. */ unsigned tm_clone : 1; + /* True if this function has SIMD clones. */ + unsigned has_simd_clones : 1; /* True if this decl is a dispatcher for function versions. */ unsigned dispatcher_function : 1; }; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 46c37d8..2d687cd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -42806,6 +42806,43 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) return val; } +/* Return the default vector mangling ISA code when none is specified + in a `processor' clause. */ + +static char +ix86_cilkplus_default_vector_mangling_isa_code (struct cgraph_node *clone + ATTRIBUTE_UNUSED) +{ + return 'x'; +} + +/* Return the maximum hardware vector size (in bits) for a given ISA. + ISA is an ISA character as specified in Intel's Vector ABI (section + on mangling). */ + +static unsigned int +ix86_cilkplus_max_vector_size_for_isa (char isa) +{ + /* ?? Intel currently has no ISA encoding character for AVX-512. */ + switch (isa) + { + case 'x': + /* xmm (SSE2). */ + return 128; + case 'y': + /* ymm1 (AVX1). */ + case 'Y': + /* ymm2 (AVX2). */ + return 256; + case 'z': + /* zmm (MIC). */ + return 512; + default: + gcc_unreachable (); + return 0; + } +} + /* Initialize the GCC target structure. */ #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory @@ -43178,6 +43215,14 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_SPILL_CLASS #define TARGET_SPILL_CLASS ix86_spill_class +#undef TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE +#define TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE \ + ix86_cilkplus_default_vector_mangling_isa_code + +#undef TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA +#define TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA \ + ix86_cilkplus_max_vector_size_for_isa + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8d220f3..917c742 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5787,6 +5787,26 @@ The default is @code{NULL_TREE} which means to not vectorize gather loads. @end deftypefn +@deftypefn {Target Hook} char TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE (struct cgraph_node *@var{}) +This hook should return the default vector mangling ISA code when none +is specified in a Cilk Plus @code{processor} clause. This is as specified +in the Intel Vector ABI document. + +This hook, as well as @code{max_vector_size_for_isa} below must be set +to support the Cilk Plus @code{processor} clause. + +The only argument is a @var{cgraph_node} containing the clone. +@end deftypefn + +@deftypefn {Target Hook} {unsigned int} TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA (char) +This hook returns the maximum hardware vector size in bits for a given +@var{ISA} character. The @var{ISA} character is as described in Intel's +Vector ABI (see section on mangling). + +This hook must be defined in order to support the Cilk Plus @code{processor} +clause. +@end deftypefn + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 863e843a..fbc6095 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4414,6 +4414,10 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_BUILTIN_GATHER +@hook TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE + +@hook TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses diff --git a/gcc/ggc.h b/gcc/ggc.h index b31bc80..eee90c6 100644 --- a/gcc/ggc.h +++ b/gcc/ggc.h @@ -276,4 +276,11 @@ ggc_alloc_cleared_gimple_statement_d_stat (size_t s MEM_STAT_DECL) ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT); } +static inline struct simd_clone * +ggc_alloc_cleared_simd_clone_stat (size_t s MEM_STAT_DECL) +{ + return (struct simd_clone *) + ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT); +} + #endif diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 56b27b2..a04ee90 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -446,6 +446,13 @@ determine_versionability (struct cgraph_node *node) reason = "not a tree_versionable_function"; else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) reason = "insufficient body availability"; + else if (node->has_simd_clones) + { + /* Ideally we should clone the SIMD clones themselves and create + vector copies of them, so IPA-cp and SIMD clones can happily + coexist, but that may not be worth the effort. */ + reason = "function has SIMD clones"; + } if (reason && dump_file && !node->symbol.alias && !node->thunk.thunk_p) fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n", diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 2d7898f..dbe4afd 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "optabs.h" #include "cfgloop.h" #include "target.h" +#include "pretty-print.h" /* Lowering of OpenMP parallel and workshare constructs proceeds in two @@ -10287,5 +10288,449 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt) { return new pass_diagnose_omp_blocks (ctxt); } + +/* SIMD clone supporting code. */ + +/* A map for function arguments. This will map a zero-based integer + to the corresponding index into DECL_ARGUMENTS. */ +class argno_map +{ + vec tree_args; + public: + /* Default constructor declared but not implemented by design. The + only valid constructor is TREE version below. */ + argno_map (); + argno_map (tree fndecl); + + ~argno_map () { tree_args.release (); } + tree to_tree (int n); +}; + +/* FNDECL is the function containing the arguments. */ + +argno_map::argno_map (tree fndecl) +{ + tree_args.create (5); + for (tree t = DECL_ARGUMENTS (fndecl); t; t = DECL_CHAIN (t)) + tree_args.safe_push (t); +} + +/* Return the DECL corresponding to the zero-based integer index into + the function arguments. */ + +tree +argno_map::to_tree (int n) +{ + return tree_args[n]; +} + +/* Allocate a fresh `simd_clone' and return it. NARGS is the number + of arguments to reserve space for. */ + +static struct simd_clone * +simd_clone_struct_alloc (int nargs) +{ + struct simd_clone *clone_info; + int len = sizeof (struct simd_clone) + + nargs * sizeof (struct simd_clone_arg); + clone_info = ggc_alloc_cleared_simd_clone_stat (len PASS_MEM_STAT); + return clone_info; +} + +/* Make a copy of the `struct simd_clone' in FROM to TO. */ + +static inline void +simd_clone_struct_copy (struct simd_clone *to, struct simd_clone *from) +{ + memcpy (to, from, sizeof (struct simd_clone) + + from->nargs * sizeof (struct simd_clone_arg)); +} + +/* Given a simd clone in NEW_NODE, extract the simd specific + information from the OMP clauses passed in CLAUSES, and set the + relevant bits in the cgraph node. *INBRANCH_SPECIFIED is set to + TRUE if the `inbranch' or `notinbranch' clause specified, otherwise + set to FALSE. */ + +static void +simd_clone_clauses_extract (struct cgraph_node *new_node, tree clauses, + bool *inbranch_specified) +{ + tree t; + int n = 0; + *inbranch_specified = false; + for (t = DECL_ARGUMENTS (new_node->symbol.decl); t; t = DECL_CHAIN (t)) + ++n; + + /* To distinguish from an OpenMP simd clone, Cilk Plus functions to + be cloned have a distinctive artificial label in addition to "omp + declare simd". */ + bool cilk_clone = flag_enable_cilkplus + && lookup_attribute ("cilk plus elemental", + DECL_ATTRIBUTES (new_node->symbol.decl)); + if (cilk_clone) + remove_attribute ("cilk plus elemental", + DECL_ATTRIBUTES (new_node->symbol.decl)); + + struct simd_clone *clone_info = simd_clone_struct_alloc (n); + clone_info->nargs = n; + clone_info->cilk_elemental = cilk_clone; + gcc_assert (!new_node->simdclone); + new_node->simdclone = clone_info; + + if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE) + return; + + for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t)) + { + switch (OMP_CLAUSE_CODE (t)) + { + case OMP_CLAUSE_INBRANCH: + clone_info->inbranch = 1; + *inbranch_specified = true; + break; + case OMP_CLAUSE_NOTINBRANCH: + clone_info->inbranch = 0; + *inbranch_specified = true; + break; + case OMP_CLAUSE_SIMDLEN: + clone_info->simdlen + = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t)); + break; + case OMP_CLAUSE_LINEAR: + { + tree decl = OMP_CLAUSE_DECL (t); + tree step = OMP_CLAUSE_LINEAR_STEP (t); + int argno = TREE_INT_CST_LOW (decl); + if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t)) + { + clone_info->args[argno].linear_stride + = LINEAR_STRIDE_YES_VARIABLE; + clone_info->args[argno].linear_stride_num + = TREE_INT_CST_LOW (step); + gcc_assert (!TREE_INT_CST_HIGH (step)); + } + else + { + if (TREE_INT_CST_HIGH (step)) + { + /* It looks like this can't really happen, since the + front-ends generally issue: + + warning: integer constant is too large for its type. + + But let's assume somehow we got past all that. */ + warning_at (DECL_SOURCE_LOCATION (decl), 0, + "ignoring large linear step"); + } + else + { + clone_info->args[argno].linear_stride + = LINEAR_STRIDE_YES_CONSTANT; + clone_info->args[argno].linear_stride_num + = TREE_INT_CST_LOW (step); + } + } + break; + } + case OMP_CLAUSE_UNIFORM: + { + tree decl = OMP_CLAUSE_DECL (t); + int argno = tree_low_cst (decl, 1); + clone_info->args[argno].uniform = 1; + break; + } + case OMP_CLAUSE_ALIGNED: + { + tree decl = OMP_CLAUSE_DECL (t); + int argno = tree_low_cst (decl, 1); + clone_info->args[argno].alignment + = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t)); + break; + } + default: + break; + } + } +} + +/* Helper function for mangling vectors. Given a vector size in bits, + return the corresponding mangling character. */ + +static char +vecsize_mangle (unsigned int vecsize) +{ + switch (vecsize) + { + /* The Intel Vector ABI does not provide a mangling character + for a 64-bit ISA, but this feels like it's keeping with the + design. */ + case 64: return 'w'; + + case 128: return 'x'; + case 256: return 'y'; + case 512: return 'z'; + default: + /* FIXME: We must come up with a default mangling bit. */ + return 'x'; + } +} + +/* Given a SIMD clone in NEW_NODE, calculate the characteristic data + type and return the coresponding type. The characteristic data + type is computed as described in the Intel Vector ABI. */ + +static tree +simd_clone_compute_base_data_type (struct cgraph_node *new_node) +{ + tree type = integer_type_node; + tree fndecl = new_node->symbol.decl; + + /* a) For non-void function, the characteristic data type is the + return type. */ + if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE) + type = TREE_TYPE (TREE_TYPE (fndecl)); + + /* b) If the function has any non-uniform, non-linear parameters, + then the characteristic data type is the type of the first + such parameter. */ + else + { + argno_map map (fndecl); + for (unsigned int i = 0; i < new_node->simdclone->nargs; ++i) + { + struct simd_clone_arg arg = new_node->simdclone->args[i]; + if (!arg.uniform && arg.linear_stride == LINEAR_STRIDE_NO) + { + type = TREE_TYPE (map.to_tree (i)); + break; + } + } + } + + /* c) If the characteristic data type determined by a) or b) above + is struct, union, or class type which is pass-by-value (except + for the type that maps to the built-in complex data type), the + characteristic data type is int. */ + if (RECORD_OR_UNION_TYPE_P (type) + && !aggregate_value_p (type, NULL) + && TREE_CODE (type) != COMPLEX_TYPE) + return integer_type_node; + + /* d) If none of the above three classes is applicable, the + characteristic data type is int. */ + + return type; + + /* e) For Intel Xeon Phi native and offload compilation, if the + resulting characteristic data type is 8-bit or 16-bit integer + data type, the characteristic data type is int. */ + /* Well, we don't handle Xeon Phi yet. */ +} + +/* Given a SIMD clone in NEW_NODE, compute the default ISA, simdlen, + and hardware vector size and store them in NEW_NODE->simdclone. */ + +static void +simd_clone_compute_isa_and_simdlen (struct cgraph_node *new_node) +{ + char isa = new_node->simdclone->isa; + /* Vector size for this clone. */ + unsigned int vecsize = 0; + /* Base vector type, based on function arguments. */ + tree base_type = simd_clone_compute_base_data_type (new_node); + unsigned int base_type_size = GET_MODE_BITSIZE (TYPE_MODE (base_type)); + + /* Calculate everything for Cilk Plus clones with appropriate target + support. This is as specified in the Intel Vector ABI. + + Note: Any target which supports the Cilk Plus processor clause + must also provide appropriate target hooks for calculating + default ISA/processor (default_vector_mangling_isa_code), and for + calculating hardware vector size based on ISA/processor + (max_vector_size_for_isa). */ + if (new_node->simdclone->cilk_elemental + && targetm.cilkplus.default_vector_mangling_isa_code) + { + if (!isa) + isa = targetm.cilkplus.default_vector_mangling_isa_code (new_node); + vecsize = targetm.cilkplus.max_vector_size_for_isa (isa); + if (!new_node->simdclone->simdlen) + new_node->simdclone->simdlen = vecsize / base_type_size; + } + /* Calculate everything else generically. */ + else + { + vecsize = GET_MODE_BITSIZE (targetm.vectorize.preferred_simd_mode + (TYPE_MODE (base_type))); + isa = vecsize_mangle (vecsize); + if (!new_node->simdclone->simdlen) + new_node->simdclone->simdlen = vecsize / base_type_size; + } + new_node->simdclone->isa = isa; + new_node->simdclone->hw_vector_size = vecsize; +} + +static void +simd_clone_mangle (struct cgraph_node *old_node, struct cgraph_node *new_node) +{ + char isa = new_node->simdclone->isa; + char mask = new_node->simdclone->inbranch ? 'M' : 'N'; + unsigned int simdlen = new_node->simdclone->simdlen; + unsigned int n; + pretty_printer vars_pp; + + gcc_assert (isa && simdlen); + + for (n = 0; n < new_node->simdclone->nargs; ++n) + { + struct simd_clone_arg arg = new_node->simdclone->args[n]; + + if (arg.uniform) + pp_character (&vars_pp, 'u'); + else if (arg.linear_stride == LINEAR_STRIDE_YES_CONSTANT) + { + gcc_assert (arg.linear_stride_num != 0); + pp_character (&vars_pp, 'l'); + if (arg.linear_stride_num > 1) + pp_unsigned_wide_integer (&vars_pp, + arg.linear_stride_num); + } + else if (arg.linear_stride == LINEAR_STRIDE_YES_VARIABLE) + { + pp_character (&vars_pp, 's'); + pp_unsigned_wide_integer (&vars_pp, arg.linear_stride_num); + } + else + pp_character (&vars_pp, 'v'); + if (arg.alignment) + { + pp_character (&vars_pp, 'a'); + pp_decimal_int (&vars_pp, arg.alignment); + } + } + + pretty_printer pp; + pp_printf (&pp, "_ZGV%c%c%d%s_%s", isa, mask, simdlen, + pp_formatted_text (&vars_pp), + IDENTIFIER_POINTER + (DECL_ASSEMBLER_NAME (old_node->symbol.decl))); + const char *str = pp_formatted_text (&pp); + change_decl_assembler_name (new_node->symbol.decl, + get_identifier (str)); +} + +/* Create a simd clone of OLD_NODE and return it. */ + +static struct cgraph_node * +simd_clone_create (struct cgraph_node *old_node) +{ + struct cgraph_node *new_node; + new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false, + NULL, NULL, "simdclone"); + + /* Keep cgraph friends from removing the clone. */ + new_node->symbol.externally_visible + = old_node->symbol.externally_visible; + TREE_PUBLIC (new_node->symbol.decl) = TREE_PUBLIC (old_node->symbol.decl); + old_node->has_simd_clones = true; + + DECL_ATTRIBUTES (new_node->symbol.decl) + = remove_attribute ("omp declare simd", + DECL_ATTRIBUTES (new_node->symbol.decl)); + + return new_node; +} + +/* If the function in NODE is tagged as an elemental SIMD function, + create the appropriate SIMD clones. */ + +static void +expand_simd_clones (struct cgraph_node *node) +{ + if (cgraph_function_body_availability (node) < AVAIL_OVERWRITABLE) + return; + + tree attr = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (node->symbol.decl)); + if (!attr) + return; + do + { + struct cgraph_node *new_node = simd_clone_create (node); + + bool inbranch_clause; + simd_clone_clauses_extract (new_node, TREE_VALUE (attr), + &inbranch_clause); + simd_clone_compute_isa_and_simdlen (new_node); + simd_clone_mangle (node, new_node); + + // FIXME: Adjust clone parameters to their appropriate vector types. + + /* If no inbranch clause was specified, we need both variants. + We have already created the not-in-branch version above, by + virtue of .inbranch being clear. Create the masked in-branch + version. */ + if (!inbranch_clause) + { + struct cgraph_node *n = simd_clone_create (node); + struct simd_clone *clone + = simd_clone_struct_alloc (new_node->simdclone->nargs); + simd_clone_struct_copy (clone, new_node->simdclone); + clone->inbranch = 1; + n->simdclone = clone; + simd_clone_mangle (node, n); + } + } + while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr)))); +} + +/* Entry point for IPA simd clone creation pass. */ + +static unsigned int +ipa_omp_simd_clone (void) +{ + struct cgraph_node *node; + FOR_EACH_DEFINED_FUNCTION (node) + expand_simd_clones (node); + return 0; +} + +namespace { + +const pass_data pass_data_omp_simd_clone = +{ + SIMPLE_IPA_PASS, /* type */ + "simdclone", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + ( PROP_ssa | PROP_cfg ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_omp_simd_clone : public simple_ipa_opt_pass +{ +public: + pass_omp_simd_clone(gcc::context *ctxt) + : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return flag_openmp || flag_enable_cilkplus; } + unsigned int execute () { return ipa_omp_simd_clone (); } +}; + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_omp_simd_clone (gcc::context *ctxt) +{ + return new pass_omp_simd_clone (ctxt); +} #include "gt-omp-low.h" diff --git a/gcc/passes.def b/gcc/passes.def index 84eb3f3..6803399 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -97,6 +97,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_feedback_split_functions); POP_INSERT_PASSES () NEXT_PASS (pass_ipa_increase_alignment); + NEXT_PASS (pass_omp_simd_clone); NEXT_PASS (pass_ipa_tm); NEXT_PASS (pass_ipa_lower_emutls); TERMINATE_PASS_LIST () diff --git a/gcc/target.def b/gcc/target.def index 6de513f..a5ebd11 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1508,6 +1508,35 @@ hook_int_uint_mode_1) HOOK_VECTOR_END (sched) +/* Functions relating to Cilk Plus. */ +#undef HOOK_PREFIX +#define HOOK_PREFIX "TARGET_CILKPLUS_" +HOOK_VECTOR (TARGET_CILKPLUS, cilkplus) + +DEFHOOK +(default_vector_mangling_isa_code, +"This hook should return the default vector mangling ISA code when none\n\ +is specified in a Cilk Plus @code{processor} clause. This is as specified\n\ +in the Intel Vector ABI document.\n\ +\n\ +This hook, as well as @code{max_vector_size_for_isa} below must be set\n\ +to support the Cilk Plus @code{processor} clause.\n\ +\n\ +The only argument is a @var{cgraph_node} containing the clone.", +char, (struct cgraph_node *), NULL) + +DEFHOOK +(max_vector_size_for_isa, +"This hook returns the maximum hardware vector size in bits for a given\n\ +@var{ISA} character. The @var{ISA} character is as described in Intel's\n\ +Vector ABI (see section on mangling).\n\ +\n\ +This hook must be defined in order to support the Cilk Plus @code{processor}\n\ +clause.", +unsigned int, (char), NULL) + +HOOK_VECTOR_END (cilkplus) + /* Functions relating to vectorization. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_VECTORIZE_" diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-1.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-1.c new file mode 100644 index 0000000..486b67a --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -fdump-tree-optimized -O3" } */ + +/* Test that functions that have SIMD clone counterparts are not + cloned by IPA-cp. For example, special_add() below has SIMD clones + created for it. However, if IPA-cp later decides to clone a + specialization of special_add(x, 666) when analyzing fillit(), we + will forever keep the vectorizer from using the SIMD versions of + special_add in a loop. + + If IPA-CP gets taught how to adjust the SIMD clones as well, this + test could be removed. */ + +#pragma omp declare simd simdlen(4) +static int __attribute__ ((noinline)) +special_add (int x, int y) +{ + if (y == 666) + return x + y + 123; + else + return x + y; +} + +void fillit(int *tot) +{ + int i; + + for (i=0; i < 10000; ++i) + tot[i] = special_add (i, 666); +} + +/* { dg-final { scan-tree-dump-not "special_add.constprop" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c new file mode 100644 index 0000000..8ab3131 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp -fdump-tree-optimized -O -msse2" } */ + +#pragma omp declare simd inbranch uniform(c) linear(b:66) // addit.simdclone.2 +#pragma omp declare simd notinbranch aligned(c:32) // addit.simdclone.1 +int addit(int a, int b, int c) +{ + return a + b; +} + +#pragma omp declare simd uniform(a) aligned(a:32) linear(k:1) notinbranch +float setArray(float *a, float x, int k) +{ + a[k] = a[k] + x; + return a[k]; +} + +/* { dg-final { scan-tree-dump "clone.0 \\(_ZGVxN4ua32vl_setArray" "optimized" } } */ +/* { dg-final { scan-tree-dump "clone.1 \\(_ZGVxN4vvva32_addit" "optimized" } } */ +/* { dg-final { scan-tree-dump "clone.2 \\(_ZGVxM4vl66u_addit" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-3.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-3.c new file mode 100644 index 0000000..1ce9692 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp -fdump-tree-optimized -O -msse2" } */ + +/* Test that if there is no *inbranch clauses, that both the masked and + the unmasked version are created. */ + +#pragma omp declare simd +int addit(int a, int b, int c) +{ + return a + b; +} + +/* { dg-final { scan-tree-dump "clone.* \\(_ZGVxN4vvv_addit" "optimized" } } */ +/* { dg-final { scan-tree-dump "clone.* \\(_ZGVxM4vvv_addit" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 4a0d437..c436b72 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -885,6 +885,9 @@ struct GTY(()) tree_base { CALL_ALLOCA_FOR_VAR_P in CALL_EXPR + OMP_CLAUSE_LINEAR_VARIABLE_STRIDE in + OMP_CLAUSE_LINEAR + side_effects_flag: TREE_SIDE_EFFECTS in diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index ea1a62f..718f259 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -474,6 +474,7 @@ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_lto_finish_out (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); +extern simple_ipa_opt_pass *make_pass_omp_simd_clone (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_profile (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt); diff --git a/gcc/tree.h b/gcc/tree.h index b13cb2b..3e9818c 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1318,6 +1318,10 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_LINEAR_NO_COPYOUT(NODE) \ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR)) +/* True if a LINEAR clause has a stride that is variable. */ +#define OMP_CLAUSE_LINEAR_VARIABLE_STRIDE(NODE) \ + TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR)) + #define OMP_CLAUSE_LINEAR_STEP(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR), 1)