[PATCH] (2/3) Add vectorization of builtin functions
Richard Guenther
rguenther@suse.de
Thu Nov 16 12:50:00 GMT 2006
This adds vectorization of builtin functions to the vectorizer. It does
so by adding the target hook
tree vectorized_function (enum built_in_function fn, tree type)
which is supposed to return a FUNCTION_DECL for a vectorized variant of
the builtin function FN for the vector type TYPE.
Bootstrapped and regtested on x86_64-unknown-linux-gnu.
Ok for mainline?
Thanks,
Richard.
:ADDPATCH vectorizer:
2006-11-16 Richard Guenther <rguenther@suse.de>
Zdenek Dvorak <dvorakz@suse.cz>
* target.h (struct gcc_target): Add builtin_vectorized_function
target hook.
* target-def.h (TARGET_VECTORIZE): Likewise.
* targhooks.h (default_builtin_vectorized_function): Declare.
* targhooks.c (default_builtin_vectorized_function): Define.
* tree-vectorizer.h (stmt_vec_info_type): Add call_vec_info_type.
(vectorizable_call): Declare.
* tree-vect-analyze.c (vect_analyze_operations): Call
vectorizable_call.
* tree-vect-transform.c (vectorizable_function): New static function.
(build_vectorized_function_call): Likewise.
(vectorizable_call): New function.
(vect_transform_stmt): Handle vectorizable calls.
(vect_transform_loop): Rename is_store to remove_stmt.
Index: targhooks.c
===================================================================
*** targhooks.c (revision 118884)
--- targhooks.c (working copy)
*************** default_invalid_within_doloop (rtx insn)
*** 319,324 ****
--- 319,333 ----
return NULL;
}
+ /* Mapping of builtin functions to vectorized variants. */
+
+ tree
+ default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
+ tree type ATTRIBUTE_UNUSED)
+ {
+ return NULL_TREE;
+ }
+
bool
hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
Index: targhooks.h
===================================================================
*** targhooks.h (revision 118884)
--- targhooks.h (working copy)
*************** extern const char * default_invalid_with
*** 57,62 ****
--- 57,64 ----
extern bool default_narrow_bitfield (void);
+ extern tree default_builtin_vectorized_function (enum built_in_function, tree);
+
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
Index: target.h
===================================================================
*** target.h (revision 118884)
--- target.h (working copy)
*************** struct gcc_target
*** 370,375 ****
--- 370,379 ----
function. */
tree (* builtin_mask_for_load) (void);
+ /* Returns a code for builtin that realizes vectorized version of
+ function, or NULL_TREE if not available. */
+ tree (* builtin_vectorized_function) (unsigned, tree);
+
/* Target builtin that implements vector widening multiplication.
builtin_mul_widen_eve computes the element-by-element products
for the even elements, and builtin_mul_widen_odd computes the
Index: tree-vectorizer.h
===================================================================
*** tree-vectorizer.h (revision 118884)
--- tree-vectorizer.h (working copy)
*************** enum stmt_vec_info_type {
*** 163,168 ****
--- 163,169 ----
load_vec_info_type,
store_vec_info_type,
op_vec_info_type,
+ call_vec_info_type,
assignment_vec_info_type,
condition_vec_info_type,
reduc_vec_info_type,
*************** extern bool vectorizable_operation (tree
*** 368,373 ****
--- 369,375 ----
extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+ extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
Index: tree-vect-analyze.c
===================================================================
*** tree-vect-analyze.c (revision 118884)
--- tree-vect-analyze.c (working copy)
*************** vect_analyze_operations (loop_vec_info l
*** 300,305 ****
--- 300,306 ----
|| vectorizable_operation (stmt, NULL, NULL)
|| vectorizable_assignment (stmt, NULL, NULL)
|| vectorizable_load (stmt, NULL, NULL)
+ || vectorizable_call (stmt, NULL, NULL)
|| vectorizable_store (stmt, NULL, NULL)
|| vectorizable_condition (stmt, NULL, NULL));
Index: target-def.h
===================================================================
*** target-def.h (revision 118884)
--- target-def.h (working copy)
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 332,344 ****
TARGET_SCHED_SET_SCHED_FLAGS}
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE \
! {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
#define TARGET_DEFAULT_TARGET_FLAGS 0
--- 332,348 ----
TARGET_SCHED_SET_SCHED_FLAGS}
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION default_builtin_vectorized_function
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE \
! { \
! TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
! TARGET_BUILTIN_VECTORIZED_FUNCTION, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
! }
#define TARGET_DEFAULT_TARGET_FLAGS 0
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c (revision 118884)
--- tree-vect-transform.c (working copy)
*************** vectorizable_reduction (tree stmt, block
*** 1549,1554 ****
--- 1549,1700 ----
return true;
}
+ /* Checks if CALL can be vectorized in type VECTYPE. Returns
+ true if the target has a vectorized version of the function,
+ or false if the function cannot be vectorized. */
+
+ static bool
+ vectorizable_function (tree call, tree vectype)
+ {
+ tree fndecl = get_callee_fndecl (call);
+
+ /* We only handle functions that do not read or clobber memory -- i.e.
+ const or novops ones. */
+ if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
+ return false;
+
+ if (!fndecl
+ || TREE_CODE (fndecl) != FUNCTION_DECL
+ || !DECL_BUILT_IN (fndecl))
+ return false;
+
+ if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
+ return true;
+
+ return false;
+ }
+
+ /* Returns an expression that performs a call to vectorized version
+ of FNDECL in type VECTYPE, with the arguments given by ARGS.
+ If extra statements need to be generated, they are inserted
+ before BSI. */
+
+ static tree
+ build_vectorized_function_call (tree fndecl,
+ tree vectype, tree args)
+ {
+ tree vfndecl;
+ enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+
+ /* The target specific builtin should be available. */
+ vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
+ gcc_assert (vfndecl != NULL_TREE);
+
+ return build_function_call_expr (vfndecl, args);
+ }
+
+ /* Function vectorizable_call.
+
+ Check if STMT performs a function call that can be vectorized.
+ If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+ stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+ Return FALSE if not a vectorizable STMT, TRUE otherwise. */
+
+ bool
+ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+ {
+ tree vec_dest;
+ tree scalar_dest;
+ tree operation;
+ tree op, args, type;
+ tree vec_oprnd, vargs, *pvargs_end;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ tree fndecl, rhs, new_temp, def, def_stmt;
+ enum vect_def_type dt;
+
+ /* Is STMT a vectorizable call? */
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+ return false;
+
+ operation = TREE_OPERAND (stmt, 1);
+ if (TREE_CODE (operation) != CALL_EXPR)
+ return false;
+
+ /* For now, we only vectorize functions if a target specific builtin
+ is available. TODO -- in some cases, it might be profitable to
+ insert the calls for pieces of the vector, in order to be able
+ to vectorize other operations in the loop. */
+ if (!vectorizable_function (operation, vectype))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "function is not vectorizable.");
+
+ return false;
+ }
+ gcc_assert (!stmt_references_memory_p (stmt));
+
+ for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+ {
+ op = TREE_VALUE (args);
+
+ if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "use not simple.");
+ return false;
+ }
+ }
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ return true;
+ }
+
+ /** Transform. **/
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "transform operation.");
+
+ /* Handle def. */
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+ /* Handle uses. */
+ vargs = NULL_TREE;
+ pvargs_end = &vargs;
+ for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+ {
+ op = TREE_VALUE (args);
+ vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
+
+ *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
+ pvargs_end = &TREE_CHAIN (*pvargs_end);
+ }
+
+ fndecl = get_callee_fndecl (operation);
+ rhs = build_vectorized_function_call (fndecl, vectype, vargs);
+ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, *vec_stmt);
+ TREE_OPERAND (*vec_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+
+ /* The call in STMT might prevent it from being removed in dce. We however
+ cannot remove it here, due to the way the ssa name it defines is mapped
+ to the new definition. So just replace rhs of the statement with something
+ harmless. */
+ type = TREE_TYPE (scalar_dest);
+ TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
+
+ return true;
+ }
+
/* Function vectorizable_assignment.
*************** vectorizable_condition (tree stmt, block
*** 3011,3017 ****
bool
vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
{
! bool is_store = false;
tree vec_stmt = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree orig_stmt_in_pattern;
--- 3157,3163 ----
bool
vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
{
! bool remove_stmt = false;
tree vec_stmt = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree orig_stmt_in_pattern;
*************** vect_transform_stmt (tree stmt, block_st
*** 3049,3055 ****
case store_vec_info_type:
done = vectorizable_store (stmt, bsi, &vec_stmt);
gcc_assert (done);
! is_store = true;
break;
case condition_vec_info_type:
--- 3195,3205 ----
case store_vec_info_type:
done = vectorizable_store (stmt, bsi, &vec_stmt);
gcc_assert (done);
! remove_stmt = true;
! break;
!
! case call_vec_info_type:
! done = vectorizable_call (stmt, bsi, &vec_stmt);
break;
case condition_vec_info_type:
*************** vect_transform_stmt (tree stmt, block_st
*** 3099,3105 ****
}
}
! return is_store;
}
--- 3249,3255 ----
}
}
! return remove_stmt;
}
*************** vect_transform_loop (loop_vec_info loop_
*** 3907,3913 ****
{
tree stmt = bsi_stmt (si);
stmt_vec_info stmt_info;
! bool is_store;
if (vect_print_dump_info (REPORT_DETAILS))
{
--- 4057,4063 ----
{
tree stmt = bsi_stmt (si);
stmt_vec_info stmt_info;
! bool remove_stmt;
if (vect_print_dump_info (REPORT_DETAILS))
{
*************** vect_transform_loop (loop_vec_info loop_
*** 3932,3939 ****
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform statement.");
! is_store = vect_transform_stmt (stmt, &si);
! if (is_store)
{
/* Free the attached stmt_vec_info and remove the stmt. */
stmt_ann_t ann = stmt_ann (stmt);
--- 4082,4089 ----
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform statement.");
! remove_stmt = vect_transform_stmt (stmt, &si);
! if (remove_stmt)
{
/* Free the attached stmt_vec_info and remove the stmt. */
stmt_ann_t ann = stmt_ann (stmt);
More information about the Gcc-patches
mailing list