[PATCH] (2/3) Add vectorization of builtin functions
Dorit Nuzman
DORIT@il.ibm.com
Thu Nov 16 16:13:00 GMT 2006
>
> This adds vectorization of builtin functions to the vectorizer. It does
looks good to me...
thanks,
dorit
> so by adding the target hook
>
> tree vectorized_function (enum built_in_function fn, tree type)
>
> which is supposed to return a FUNCTION_DECL for a vectorized variant of
> the builtin function FN for the vector type TYPE.
>
> Bootstrapped and regtested on x86_64-unknown-linux-gnu.
>
> Ok for mainline?
>
> Thanks,
> Richard.
>
> :ADDPATCH vectorizer:
>
> 2006-11-16 Richard Guenther <rguenther@suse.de>
> Zdenek Dvorak <dvorakz@suse.cz>
>
> * target.h (struct gcc_target): Add builtin_vectorized_function
> target hook.
> * target-def.h (TARGET_VECTORIZE): Likewise.
> * targhooks.h (default_builtin_vectorized_function): Declare.
> * targhooks.c (default_builtin_vectorized_function): Define.
> * tree-vectorizer.h (stmt_vec_info_type): Add call_vec_info_type.
> (vectorizable_call): Declare.
> * tree-vect-analyze.c (vect_analyze_operations): Call
> vectorizable_call.
> * tree-vect-transform.c (vectorizable_function): New static function.
> (build_vectorized_function_call): Likewise.
> (vectorizable_call): New function.
> (vect_transform_stmt): Handle vectorizable calls.
> (vect_transform_loop): Rename is_store to remove_stmt.
>
> Index: targhooks.c
> ===================================================================
> *** targhooks.c (revision 118884)
> --- targhooks.c (working copy)
> *************** default_invalid_within_doloop (rtx insn)
> *** 319,324 ****
> --- 319,333 ----
> return NULL;
> }
>
> + /* Mapping of builtin functions to vectorized variants. */
> +
> + tree
> + default_builtin_vectorized_function (enum built_in_function fn
> ATTRIBUTE_UNUSED,
> + tree type ATTRIBUTE_UNUSED)
> + {
> + return NULL_TREE;
> + }
> +
> bool
> hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
> CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
> Index: targhooks.h
> ===================================================================
> *** targhooks.h (revision 118884)
> --- targhooks.h (working copy)
> *************** extern const char * default_invalid_with
> *** 57,62 ****
> --- 57,64 ----
>
> extern bool default_narrow_bitfield (void);
>
> + extern tree default_builtin_vectorized_function (enum
> built_in_function, tree);
> +
> /* These are here, and not in hooks.[ch], because not all users of
> hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
>
> Index: target.h
> ===================================================================
> *** target.h (revision 118884)
> --- target.h (working copy)
> *************** struct gcc_target
> *** 370,375 ****
> --- 370,379 ----
> function. */
> tree (* builtin_mask_for_load) (void);
>
> + /* Returns a code for builtin that realizes vectorized version of
> + function, or NULL_TREE if not available. */
> + tree (* builtin_vectorized_function) (unsigned, tree);
> +
> /* Target builtin that implements vector widening multiplication.
> builtin_mul_widen_eve computes the element-by-element products
> for the even elements, and builtin_mul_widen_odd computes the
> Index: tree-vectorizer.h
> ===================================================================
> *** tree-vectorizer.h (revision 118884)
> --- tree-vectorizer.h (working copy)
> *************** enum stmt_vec_info_type {
> *** 163,168 ****
> --- 163,169 ----
> load_vec_info_type,
> store_vec_info_type,
> op_vec_info_type,
> + call_vec_info_type,
> assignment_vec_info_type,
> condition_vec_info_type,
> reduc_vec_info_type,
> *************** extern bool vectorizable_operation (tree
> *** 368,373 ****
> --- 369,375 ----
> extern bool vectorizable_type_promotion (tree, block_stmt_iterator
> *, tree *);
> extern bool vectorizable_type_demotion (tree, block_stmt_iterator
> *, tree *);
> extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree
*);
> + extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
> extern bool vectorizable_condition (tree, block_stmt_iterator *, tree
*);
> extern bool vectorizable_live_operation (tree, block_stmt_iterator
> *, tree *);
> extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree
*);
> Index: tree-vect-analyze.c
> ===================================================================
> *** tree-vect-analyze.c (revision 118884)
> --- tree-vect-analyze.c (working copy)
> *************** vect_analyze_operations (loop_vec_info l
> *** 300,305 ****
> --- 300,306 ----
> || vectorizable_operation (stmt, NULL, NULL)
> || vectorizable_assignment (stmt, NULL, NULL)
> || vectorizable_load (stmt, NULL, NULL)
> + || vectorizable_call (stmt, NULL, NULL)
> || vectorizable_store (stmt, NULL, NULL)
> || vectorizable_condition (stmt, NULL, NULL));
>
> Index: target-def.h
> ===================================================================
> *** target-def.h (revision 118884)
> --- target-def.h (working copy)
> *************** Foundation, 51 Franklin Street, Fifth Fl
> *** 332,344 ****
> TARGET_SCHED_SET_SCHED_FLAGS}
>
> #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
> #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
> #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
>
> #define TARGET_VECTORIZE
\
> ! {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
> ! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,
\
> ! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
>
> #define TARGET_DEFAULT_TARGET_FLAGS 0
>
> --- 332,348 ----
> TARGET_SCHED_SET_SCHED_FLAGS}
>
> #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
> + #define TARGET_BUILTIN_VECTORIZED_FUNCTION
> default_builtin_vectorized_function
> #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
> #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
>
> #define TARGET_VECTORIZE
\
> ! { \
> ! TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
> ! TARGET_BUILTIN_VECTORIZED_FUNCTION, \
> ! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,
\
> ! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
> ! }
>
> #define TARGET_DEFAULT_TARGET_FLAGS 0
>
> Index: tree-vect-transform.c
> ===================================================================
> *** tree-vect-transform.c (revision 118884)
> --- tree-vect-transform.c (working copy)
> *************** vectorizable_reduction (tree stmt, block
> *** 1549,1554 ****
> --- 1549,1700 ----
> return true;
> }
>
> + /* Checks if CALL can be vectorized in type VECTYPE. Returns
> + true if the target has a vectorized version of the function,
> + or false if the function cannot be vectorized. */
> +
> + static bool
> + vectorizable_function (tree call, tree vectype)
> + {
> + tree fndecl = get_callee_fndecl (call);
> +
> + /* We only handle functions that do not read or clobber memory --
i.e.
> + const or novops ones. */
> + if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
> + return false;
> +
> + if (!fndecl
> + || TREE_CODE (fndecl) != FUNCTION_DECL
> + || !DECL_BUILT_IN (fndecl))
> + return false;
> +
> + if (targetm.vectorize.builtin_vectorized_function
> (DECL_FUNCTION_CODE (fndecl), vectype))
> + return true;
> +
> + return false;
> + }
> +
> + /* Returns an expression that performs a call to vectorized version
> + of FNDECL in type VECTYPE, with the arguments given by ARGS.
> + If extra statements need to be generated, they are inserted
> + before BSI. */
> +
> + static tree
> + build_vectorized_function_call (tree fndecl,
> + tree vectype, tree args)
> + {
> + tree vfndecl;
> + enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
> +
> + /* The target specific builtin should be available. */
> + vfndecl = targetm.vectorize.builtin_vectorized_function (code,
vectype);
> + gcc_assert (vfndecl != NULL_TREE);
> +
> + return build_function_call_expr (vfndecl, args);
> + }
> +
> + /* Function vectorizable_call.
> +
> + Check if STMT performs a function call that can be vectorized.
> + If VEC_STMT is also passed, vectorize the STMT: create a vectorized
> + stmt to replace it, put it in VEC_STMT, and insert it at BSI.
> + Return FALSE if not a vectorizable STMT, TRUE otherwise. */
> +
> + bool
> + vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
> + {
> + tree vec_dest;
> + tree scalar_dest;
> + tree operation;
> + tree op, args, type;
> + tree vec_oprnd, vargs, *pvargs_end;
> + stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> + tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> + tree fndecl, rhs, new_temp, def, def_stmt;
> + enum vect_def_type dt;
> +
> + /* Is STMT a vectorizable call? */
> + if (TREE_CODE (stmt) != MODIFY_EXPR)
> + return false;
> +
> + if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
> + return false;
> +
> + operation = TREE_OPERAND (stmt, 1);
> + if (TREE_CODE (operation) != CALL_EXPR)
> + return false;
> +
> + /* For now, we only vectorize functions if a target specific builtin
> + is available. TODO -- in some cases, it might be profitable to
> + insert the calls for pieces of the vector, in order to be able
> + to vectorize other operations in the loop. */
> + if (!vectorizable_function (operation, vectype))
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "function is not vectorizable.");
> +
> + return false;
> + }
> + gcc_assert (!stmt_references_memory_p (stmt));
> +
> + for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN
(args))
> + {
> + op = TREE_VALUE (args);
> +
> + if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "use not simple.");
> + return false;
> + }
> + }
> +
> + if (!vec_stmt) /* transformation not required. */
> + {
> + STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
> + return true;
> + }
> +
> + /** Transform. **/
> +
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "transform operation.");
> +
> + /* Handle def. */
> + scalar_dest = TREE_OPERAND (stmt, 0);
> + vec_dest = vect_create_destination_var (scalar_dest, vectype);
> +
> + /* Handle uses. */
> + vargs = NULL_TREE;
> + pvargs_end = &vargs;
> + for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN
(args))
> + {
> + op = TREE_VALUE (args);
> + vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
> +
> + *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
> + pvargs_end = &TREE_CHAIN (*pvargs_end);
> + }
> +
> + fndecl = get_callee_fndecl (operation);
> + rhs = build_vectorized_function_call (fndecl, vectype, vargs);
> + *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
> + new_temp = make_ssa_name (vec_dest, *vec_stmt);
> + TREE_OPERAND (*vec_stmt, 0) = new_temp;
> +
> + vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
> +
> + /* The call in STMT might prevent it from being removed in dce. We
however
> + cannot remove it here, due to the way the ssa name it definesis
mapped
> + to the new definition. So just replace rhs of the statement
> with something
> + harmless. */
> + type = TREE_TYPE (scalar_dest);
> + TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
> +
> + return true;
> + }
> +
>
> /* Function vectorizable_assignment.
>
> *************** vectorizable_condition (tree stmt, block
> *** 3011,3017 ****
> bool
> vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
> {
> ! bool is_store = false;
> tree vec_stmt = NULL_TREE;
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> tree orig_stmt_in_pattern;
> --- 3157,3163 ----
> bool
> vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
> {
> ! bool remove_stmt = false;
> tree vec_stmt = NULL_TREE;
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> tree orig_stmt_in_pattern;
> *************** vect_transform_stmt (tree stmt, block_st
> *** 3049,3055 ****
> case store_vec_info_type:
> done = vectorizable_store (stmt, bsi, &vec_stmt);
> gcc_assert (done);
> ! is_store = true;
> break;
>
> case condition_vec_info_type:
> --- 3195,3205 ----
> case store_vec_info_type:
> done = vectorizable_store (stmt, bsi, &vec_stmt);
> gcc_assert (done);
> ! remove_stmt = true;
> ! break;
> !
> ! case call_vec_info_type:
> ! done = vectorizable_call (stmt, bsi, &vec_stmt);
> break;
>
> case condition_vec_info_type:
> *************** vect_transform_stmt (tree stmt, block_st
> *** 3099,3105 ****
> }
> }
>
> ! return is_store;
> }
>
>
> --- 3249,3255 ----
> }
> }
>
> ! return remove_stmt;
> }
>
>
> *************** vect_transform_loop (loop_vec_info loop_
> *** 3907,3913 ****
> {
> tree stmt = bsi_stmt (si);
> stmt_vec_info stmt_info;
> ! bool is_store;
>
> if (vect_print_dump_info (REPORT_DETAILS))
> {
> --- 4057,4063 ----
> {
> tree stmt = bsi_stmt (si);
> stmt_vec_info stmt_info;
> ! bool remove_stmt;
>
> if (vect_print_dump_info (REPORT_DETAILS))
> {
> *************** vect_transform_loop (loop_vec_info loop_
> *** 3932,3939 ****
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "transform statement.");
>
> ! is_store = vect_transform_stmt (stmt, &si);
> ! if (is_store)
> {
> /* Free the attached stmt_vec_info and remove the stmt. */
> stmt_ann_t ann = stmt_ann (stmt);
> --- 4082,4089 ----
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "transform statement.");
>
> ! remove_stmt = vect_transform_stmt (stmt, &si);
> ! if (remove_stmt)
> {
> /* Free the attached stmt_vec_info and remove the stmt. */
> stmt_ann_t ann = stmt_ann (stmt);
>
More information about the Gcc-patches
mailing list