This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][RFC] Do some vectorizer-friendly canonicalization before vectorization
On Thu, 23 Nov 2006, Richard Guenther wrote:
> On Thu, 23 Nov 2006, Dorit Nuzman wrote:
>
> > Richard Guenther <rguenther@suse.de> wrote on 23/11/2006 12:39:40:
> >
> > > The following passes for me on x86_64-unknown-linux-gnu. Can you check
> > > it works on ppc?
> > >
> Ok, so consider the pow (x, 0.5) pattern matching removed until the
> function vectorizing is approved (where we then can check for support).
Or try this one - it adds this capability.
Richard.
Index: targhooks.c
===================================================================
*** targhooks.c (revision 119115)
--- targhooks.c (working copy)
*************** default_invalid_within_doloop (rtx insn)
*** 319,324 ****
--- 319,333 ----
return NULL;
}
+ /* Mapping of builtin functions to vectorized variants. */
+
+ tree
+ default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
+ tree type ATTRIBUTE_UNUSED)
+ {
+ return NULL_TREE;
+ }
+
bool
hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
Index: targhooks.h
===================================================================
*** targhooks.h (revision 119115)
--- targhooks.h (working copy)
*************** extern const char * default_invalid_with
*** 57,62 ****
--- 57,64 ----
extern bool default_narrow_bitfield (void);
+ extern tree default_builtin_vectorized_function (enum built_in_function, tree);
+
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
Index: target.h
===================================================================
*** target.h (revision 119115)
--- target.h (working copy)
*************** struct gcc_target
*** 370,375 ****
--- 370,379 ----
function. */
tree (* builtin_mask_for_load) (void);
+ /* Returns a code for builtin that realizes vectorized version of
+ function, or NULL_TREE if not available. */
+ tree (* builtin_vectorized_function) (unsigned, tree);
+
/* Target builtin that implements vector widening multiplication.
builtin_mul_widen_eve computes the element-by-element products
for the even elements, and builtin_mul_widen_odd computes the
Index: testsuite/gcc.dg/vect/vect-pow-1.c
===================================================================
*** testsuite/gcc.dg/vect/vect-pow-1.c (revision 119115)
--- testsuite/gcc.dg/vect/vect-pow-1.c (working copy)
***************
*** 1,14 ****
- /* { dg-do compile } */
- /* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details" } */
-
- double x[256];
-
- void foo(void)
- {
- int i;
- for (i=0; i<256; ++i)
- x[i] = x[i] * x[i];
- }
-
- /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
- /* { dg-final { cleanup-tree-dump "vect" } } */
--- 0 ----
Index: testsuite/gcc.dg/vect/fast-math-vect-pow-1.c
===================================================================
*** testsuite/gcc.dg/vect/fast-math-vect-pow-1.c (revision 119115)
--- testsuite/gcc.dg/vect/fast-math-vect-pow-1.c (working copy)
***************
*** 1,7 ****
/* { dg-do compile } */
! /* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details" } */
! double x[256];
void foo(void)
{
--- 1,7 ----
/* { dg-do compile } */
! /* { dg-require-effective-target vect_float } */
! float x[256];
void foo(void)
{
*************** void foo(void)
*** 10,14 ****
x[i] = x[i] * x[i];
}
! /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 10,14 ----
x[i] = x[i] * x[i];
}
! /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-pow-2.c
===================================================================
*** testsuite/gcc.dg/vect/vect-pow-2.c (revision 119115)
--- testsuite/gcc.dg/vect/vect-pow-2.c (working copy)
***************
*** 1,14 ****
- /* { dg-do compile } */
- /* { dg-options "-O2 -ftree-vectorize -fno-math-errno -fdump-tree-vect-details" } */
-
- double x[256];
-
- void foo(void)
- {
- int i;
- for (i=0; i<256; ++i)
- x[i] = __builtin_pow (x[i], 0.5);
- }
-
- /* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
- /* { dg-final { cleanup-tree-dump "vect" } } */
--- 0 ----
Index: testsuite/gcc.dg/vect/vect.exp
===================================================================
*** testsuite/gcc.dg/vect/vect.exp (revision 119115)
--- testsuite/gcc.dg/vect/vect.exp (working copy)
*************** lappend DEFAULT_VECTCFLAGS "-ffast-math"
*** 97,102 ****
--- 97,108 ----
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-vect*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
+ # -fno-math-errno tests
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+ lappend DEFAULT_VECTCFLAGS "-fno-math-errno"
+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-math-errno-vect*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
# -fwrapv tests
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fwrapv"
Index: testsuite/gcc.dg/vect/no-math-errno-vect-pow-1.c
===================================================================
*** testsuite/gcc.dg/vect/no-math-errno-vect-pow-1.c (revision 119115)
--- testsuite/gcc.dg/vect/no-math-errno-vect-pow-1.c (working copy)
***************
*** 1,13 ****
/* { dg-do compile } */
! /* { dg-options "-O2 -ftree-vectorize -fno-math-errno -fdump-tree-vect-details" } */
! double x[256];
void foo(void)
{
int i;
for (i=0; i<256; ++i)
! x[i] = __builtin_pow (x[i], 0.5);
}
/* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
--- 1,13 ----
/* { dg-do compile } */
! /* { dg-require-effective-target vect_float } */
! float x[256];
void foo(void)
{
int i;
for (i=0; i<256; ++i)
! x[i] = __builtin_powf (x[i], 0.5);
}
/* { dg-final { scan-tree-dump "pattern recognized" "vect" } } */
Index: tree-vectorizer.h
===================================================================
*** tree-vectorizer.h (revision 119115)
--- tree-vectorizer.h (working copy)
*************** enum stmt_vec_info_type {
*** 163,168 ****
--- 163,169 ----
load_vec_info_type,
store_vec_info_type,
op_vec_info_type,
+ call_vec_info_type,
assignment_vec_info_type,
condition_vec_info_type,
reduc_vec_info_type,
*************** extern bool vectorizable_operation (tree
*** 397,402 ****
--- 398,405 ----
extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+ extern bool vectorizable_function (tree, tree);
+ extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
Index: tree-vect-analyze.c
===================================================================
*** tree-vect-analyze.c (revision 119115)
--- tree-vect-analyze.c (working copy)
*************** vect_analyze_operations (loop_vec_info l
*** 301,306 ****
--- 301,307 ----
|| vectorizable_operation (stmt, NULL, NULL)
|| vectorizable_assignment (stmt, NULL, NULL)
|| vectorizable_load (stmt, NULL, NULL)
+ || vectorizable_call (stmt, NULL, NULL)
|| vectorizable_store (stmt, NULL, NULL)
|| vectorizable_condition (stmt, NULL, NULL));
Index: tree-vect-patterns.c
===================================================================
*** tree-vect-patterns.c (revision 119115)
--- tree-vect-patterns.c (working copy)
*************** vect_recog_pow_pattern (tree last_stmt,
*** 466,472 ****
/* We now have a pow or powi builtin function call with a constant
exponent. */
- *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
*type_out = NULL_TREE;
/* Catch squaring. */
--- 466,471 ----
*************** vect_recog_pow_pattern (tree last_stmt,
*** 474,480 ****
&& tree_low_cst (exp, 0) == 2)
|| (TREE_CODE (exp) == REAL_CST
&& REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
! return build2 (MULT_EXPR, TREE_TYPE (base), base, base);
/* Catch square root. */
if (TREE_CODE (exp) == REAL_CST
--- 473,482 ----
&& tree_low_cst (exp, 0) == 2)
|| (TREE_CODE (exp) == REAL_CST
&& REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
! {
! *type_in = TREE_TYPE (base);
! return build2 (MULT_EXPR, TREE_TYPE (base), base, base);
! }
/* Catch square root. */
if (TREE_CODE (exp) == REAL_CST
*************** vect_recog_pow_pattern (tree last_stmt,
*** 482,488 ****
{
tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
tree newarglist = build_tree_list (NULL_TREE, base);
! return build_function_call_expr (newfn, newarglist);
}
return NULL_TREE;
--- 484,496 ----
{
tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
tree newarglist = build_tree_list (NULL_TREE, base);
! *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
! if (*type_in)
! {
! newfn = build_function_call_expr (newfn, newarglist);
! if (vectorizable_function (newfn, *type_in))
! return newfn;
! }
}
return NULL_TREE;
Index: target-def.h
===================================================================
*** target-def.h (revision 119115)
--- target-def.h (working copy)
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 332,344 ****
TARGET_SCHED_SET_SCHED_FLAGS}
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE \
! {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
#define TARGET_DEFAULT_TARGET_FLAGS 0
--- 332,348 ----
TARGET_SCHED_SET_SCHED_FLAGS}
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
+ #define TARGET_BUILTIN_VECTORIZED_FUNCTION default_builtin_vectorized_function
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE \
! { \
! TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
! TARGET_BUILTIN_VECTORIZED_FUNCTION, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
! }
#define TARGET_DEFAULT_TARGET_FLAGS 0
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c (revision 119115)
--- tree-vect-transform.c (working copy)
*************** vectorizable_reduction (tree stmt, block
*** 1561,1566 ****
--- 1561,1712 ----
return true;
}
+ /* Checks if CALL can be vectorized in type VECTYPE. Returns
+ true if the target has a vectorized version of the function,
+ or false if the function cannot be vectorized. */
+
+ bool
+ vectorizable_function (tree call, tree vectype)
+ {
+ tree fndecl = get_callee_fndecl (call);
+
+ /* We only handle functions that do not read or clobber memory -- i.e.
+ const or novops ones. */
+ if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
+ return false;
+
+ if (!fndecl
+ || TREE_CODE (fndecl) != FUNCTION_DECL
+ || !DECL_BUILT_IN (fndecl))
+ return false;
+
+ if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
+ return true;
+
+ return false;
+ }
+
+ /* Returns an expression that performs a call to vectorized version
+ of FNDECL in type VECTYPE, with the arguments given by ARGS.
+ If extra statements need to be generated, they are inserted
+ before BSI. */
+
+ static tree
+ build_vectorized_function_call (tree fndecl,
+ tree vectype, tree args)
+ {
+ tree vfndecl;
+ enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
+
+ /* The target specific builtin should be available. */
+ vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
+ gcc_assert (vfndecl != NULL_TREE);
+
+ return build_function_call_expr (vfndecl, args);
+ }
+
+ /* Function vectorizable_call.
+
+ Check if STMT performs a function call that can be vectorized.
+ If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+ stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+ Return FALSE if not a vectorizable STMT, TRUE otherwise. */
+
+ bool
+ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+ {
+ tree vec_dest;
+ tree scalar_dest;
+ tree operation;
+ tree op, args, type;
+ tree vec_oprnd, vargs, *pvargs_end;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ tree fndecl, rhs, new_temp, def, def_stmt;
+ enum vect_def_type dt;
+
+ /* Is STMT a vectorizable call? */
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+ return false;
+
+ operation = TREE_OPERAND (stmt, 1);
+ if (TREE_CODE (operation) != CALL_EXPR)
+ return false;
+
+ /* For now, we only vectorize functions if a target specific builtin
+ is available. TODO -- in some cases, it might be profitable to
+ insert the calls for pieces of the vector, in order to be able
+ to vectorize other operations in the loop. */
+ if (!vectorizable_function (operation, vectype))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "function is not vectorizable.");
+
+ return false;
+ }
+ gcc_assert (!stmt_references_memory_p (stmt));
+
+ for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+ {
+ op = TREE_VALUE (args);
+
+ if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "use not simple.");
+ return false;
+ }
+ }
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ return true;
+ }
+
+ /** Transform. **/
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "transform operation.");
+
+ /* Handle def. */
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+ /* Handle uses. */
+ vargs = NULL_TREE;
+ pvargs_end = &vargs;
+ for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+ {
+ op = TREE_VALUE (args);
+ vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
+
+ *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
+ pvargs_end = &TREE_CHAIN (*pvargs_end);
+ }
+
+ fndecl = get_callee_fndecl (operation);
+ rhs = build_vectorized_function_call (fndecl, vectype, vargs);
+ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, *vec_stmt);
+ TREE_OPERAND (*vec_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+
+ /* The call in STMT might prevent it from being removed in dce. We however
+ cannot remove it here, due to the way the ssa name it defines is mapped
+ to the new definition. So just replace rhs of the statement with something
+ harmless. */
+ type = TREE_TYPE (scalar_dest);
+ TREE_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
+
+ return true;
+ }
+
/* Function vectorizable_assignment.
*************** vect_transform_stmt (tree stmt, block_st
*** 3713,3718 ****
--- 3859,3868 ----
gcc_assert (done);
break;
+ case call_vec_info_type:
+ done = vectorizable_call (stmt, bsi, &vec_stmt);
+ break;
+
default:
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "stmt not supported.");