This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Ping: [PATCH 3/6] Vectorize internal functions
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, richard dot sandiford at arm dot com
- Date: Tue, 17 Nov 2015 15:32:56 +0100
- Subject: Re: Ping: [PATCH 3/6] Vectorize internal functions
- Authentication-results: sourceware.org; auth=none
- References: <87io5bno02 dot fsf at e105548-lin dot cambridge dot arm dot com> <87611bnnoo dot fsf at e105548-lin dot cambridge dot arm dot com> <87wpthj7ma dot fsf_-_ at e105548-lin dot cambridge dot arm dot com>
On Tue, Nov 17, 2015 at 10:30 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Thanks for all the reviews for this series. I think the patch below
> is the only target-independent one that hasn't had any comments.
This patch is ok.
Thanks,
Richard.
> Richard
>
> Richard Sandiford <richard.sandiford@arm.com> writes:
>> This patch tries to vectorize built-in and internal functions as
>> internal functions first, falling back on the current built-in
>> target hooks otherwise.
>>
>>
>> gcc/
>> * internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
>> * internal-fn.c (direct_internal_fn_array): Update accordingly.
>> * tree-vectorizer.h (vectorizable_function): Delete.
>> * tree-vect-stmts.c: Include internal-fn.h.
>> (vectorizable_internal_function): New function.
>> (vectorizable_function): Inline into...
>> (vectorizable_call): ...here. Explicitly reject calls that read
>> from or write to memory. Try using an internal function before
>> falling back on the old vectorizable_function behavior.
>>
>> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> index 898c83d..a5bda2f 100644
>> --- a/gcc/internal-fn.c
>> +++ b/gcc/internal-fn.c
>> @@ -69,13 +69,13 @@ init_internal_fns ()
>>
>> /* Create static initializers for the information returned by
>> direct_internal_fn. */
>> -#define not_direct { -2, -2 }
>> -#define mask_load_direct { -1, -1 }
>> -#define load_lanes_direct { -1, -1 }
>> -#define mask_store_direct { 3, 3 }
>> -#define store_lanes_direct { 0, 0 }
>> -#define unary_direct { 0, 0 }
>> -#define binary_direct { 0, 0 }
>> +#define not_direct { -2, -2, false }
>> +#define mask_load_direct { -1, -1, false }
>> +#define load_lanes_direct { -1, -1, false }
>> +#define mask_store_direct { 3, 3, false }
>> +#define store_lanes_direct { 0, 0, false }
>> +#define unary_direct { 0, 0, true }
>> +#define binary_direct { 0, 0, true }
>>
>> const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
>> #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
>> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
>> index 6cb123f..aea6abd 100644
>> --- a/gcc/internal-fn.h
>> +++ b/gcc/internal-fn.h
>> @@ -134,6 +134,14 @@ struct direct_internal_fn_info
>> function isn't directly mapped to an optab. */
>> signed int type0 : 8;
>> signed int type1 : 8;
>> + /* True if the function is pointwise, so that it can be vectorized by
>> + converting the return type and all argument types to vectors of the
>> + same number of elements. E.g. we can vectorize an IFN_SQRT on
>> + floats as an IFN_SQRT on vectors of N floats.
>> +
>> + This only needs 1 bit, but occupies the full 16 to ensure a nice
>> + layout. */
>> + unsigned int vectorizable : 16;
>> };
>>
>> extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 75389c4..1142142 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "tree-scalar-evolution.h"
>> #include "tree-vectorizer.h"
>> #include "builtins.h"
>> +#include "internal-fn.h"
>>
>> /* For lang_hooks.types.type_for_mode. */
>> #include "langhooks.h"
>> @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
>> add_stmt_to_eh_lp (vec_stmt, lp_nr);
>> }
>>
>> -/* Checks if CALL can be vectorized in type VECTYPE. Returns
>> - a function declaration if the target has a vectorized version
>> - of the function, or NULL_TREE if the function cannot be vectorized. */
>> +/* We want to vectorize a call to combined function CFN with function
>> + decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
>> + as the types of all inputs. Check whether this is possible using
>> + an internal function, returning its code if so or IFN_LAST if not. */
>>
>> -tree
>> -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
>> +static internal_fn
>> +vectorizable_internal_function (combined_fn cfn, tree fndecl,
>> + tree vectype_out, tree vectype_in)
>> {
>> - /* We only handle functions that do not read or clobber memory. */
>> - if (gimple_vuse (call))
>> - return NULL_TREE;
>> -
>> - combined_fn fn = gimple_call_combined_fn (call);
>> - if (fn != CFN_LAST)
>> - return targetm.vectorize.builtin_vectorized_function
>> - (fn, vectype_out, vectype_in);
>> -
>> - if (gimple_call_builtin_p (call, BUILT_IN_MD))
>> - return targetm.vectorize.builtin_md_vectorized_function
>> - (gimple_call_fndecl (call), vectype_out, vectype_in);
>> -
>> - return NULL_TREE;
>> + internal_fn ifn;
>> + if (internal_fn_p (cfn))
>> + ifn = as_internal_fn (cfn);
>> + else
>> + ifn = associated_internal_fn (fndecl);
>> + if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
>> + {
>> + const direct_internal_fn_info &info = direct_internal_fn (ifn);
>> + if (info.vectorizable)
>> + {
>> + tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
>> + tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
>> + if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1)))
>> + return ifn;
>> + }
>> + }
>> + return IFN_LAST;
>> }
>>
>>
>> @@ -2232,15 +2238,43 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>> else
>> return false;
>>
>> + /* We only handle functions that do not read or clobber memory. */
>> + if (gimple_vuse (stmt))
>> + {
>> + if (dump_enabled_p ())
>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> + "function reads from or writes to memory.\n");
>> + return false;
>> + }
>> +
>> /* For now, we only vectorize functions if a target specific builtin
>> is available. TODO -- in some cases, it might be profitable to
>> insert the calls for pieces of the vector, in order to be able
>> to vectorize other operations in the loop. */
>> - fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
>> - if (fndecl == NULL_TREE)
>> + fndecl = NULL_TREE;
>> + internal_fn ifn = IFN_LAST;
>> + combined_fn cfn = gimple_call_combined_fn (stmt);
>> + tree callee = gimple_call_fndecl (stmt);
>> +
>> + /* First try using an internal function. */
>> + if (cfn != CFN_LAST)
>> + ifn = vectorizable_internal_function (cfn, callee, vectype_out,
>> + vectype_in);
>> +
>> + /* If that fails, try asking for a target-specific built-in function. */
>> + if (ifn == IFN_LAST)
>> + {
>> + if (cfn != CFN_LAST)
>> + fndecl = targetm.vectorize.builtin_vectorized_function
>> + (cfn, vectype_out, vectype_in);
>> + else
>> + fndecl = targetm.vectorize.builtin_md_vectorized_function
>> + (callee, vectype_out, vectype_in);
>> + }
>> +
>> + if (ifn == IFN_LAST && !fndecl)
>> {
>> - if (gimple_call_internal_p (stmt)
>> - && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
>> + if (cfn == CFN_GOMP_SIMD_LANE
>> && !slp_node
>> && loop_vinfo
>> && LOOP_VINFO_LOOP (loop_vinfo)->simduid
>> @@ -2261,8 +2295,6 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>> }
>> }
>>
>> - gcc_assert (!gimple_vuse (stmt));
>> -
>> if (slp_node || PURE_SLP_STMT (stmt_info))
>> ncopies = 1;
>> else if (modifier == NARROW)
>> @@ -2324,7 +2356,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>> vec<tree> vec_oprndsk = vec_defs[k];
>> vargs[k] = vec_oprndsk[i];
>> }
>> - new_stmt = gimple_build_call_vec (fndecl, vargs);
>> + if (ifn != IFN_LAST)
>> + new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> + else
>> + new_stmt = gimple_build_call_vec (fndecl, vargs);
>> new_temp = make_ssa_name (vec_dest, new_stmt);
>> gimple_call_set_lhs (new_stmt, new_temp);
>> vect_finish_stmt_generation (stmt, new_stmt, gsi);
>> @@ -2372,7 +2407,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>> }
>> else
>> {
>> - new_stmt = gimple_build_call_vec (fndecl, vargs);
>> + if (ifn != IFN_LAST)
>> + new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> + else
>> + new_stmt = gimple_build_call_vec (fndecl, vargs);
>> new_temp = make_ssa_name (vec_dest, new_stmt);
>> gimple_call_set_lhs (new_stmt, new_temp);
>> }
>> @@ -2418,7 +2456,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>> vargs.quick_push (vec_oprndsk[i]);
>> vargs.quick_push (vec_oprndsk[i + 1]);
>> }
>> - new_stmt = gimple_build_call_vec (fndecl, vargs);
>> + if (ifn != IFN_LAST)
>> + new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> + else
>> + new_stmt = gimple_build_call_vec (fndecl, vargs);
>> new_temp = make_ssa_name (vec_dest, new_stmt);
>> gimple_call_set_lhs (new_stmt, new_temp);
>> vect_finish_stmt_generation (stmt, new_stmt, gsi);
>> @@ -2456,7 +2497,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>> vargs.quick_push (vec_oprnd1);
>> }
>>
>> - new_stmt = gimple_build_call_vec (fndecl, vargs);
>> + if (ifn != IFN_LAST)
>> + new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> + else
>> + new_stmt = gimple_build_call_vec (fndecl, vargs);
>> new_temp = make_ssa_name (vec_dest, new_stmt);
>> gimple_call_set_lhs (new_stmt, new_temp);
>> vect_finish_stmt_generation (stmt, new_stmt, gsi);
>> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
>> index 9cde091..bb1ab39 100644
>> --- a/gcc/tree-vectorizer.h
>> +++ b/gcc/tree-vectorizer.h
>> @@ -958,7 +958,6 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
>> int *, vec<tree> *);
>> extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
>> extern void free_stmt_vec_info (gimple *stmt);
>> -extern tree vectorizable_function (gcall *, tree, tree);
>> extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
>> stmt_vector_for_cost *,
>> stmt_vector_for_cost *);
>