This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC PATCH] SLP vectorize calls
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Ira Rosen <ira dot rosen at linaro dot org>, Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 20 Oct 2011 23:50:07 +0200
- Subject: [RFC PATCH] SLP vectorize calls
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
While looking at *.vect dumps from Polyhedron, I've noticed the lack
of SLP vectorization of builtin calls.
This patch is an attempt to handle at least 1 and 2 operand builtin calls
(SLP doesn't handle ternary stmts either yet), where all the types are the
same. E.g. it can handle
extern float copysignf (float, float);
extern float sqrtf (float);
float a[8], b[8], c[8], d[8];
void
foo (void)
{
a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
}
and compile it into:
vmovaps .LC0(%rip), %ymm0
vandnps b(%rip), %ymm0, %ymm1
vandps c(%rip), %ymm0, %ymm0
vorps %ymm0, %ymm1, %ymm0
vsqrtps d(%rip), %ymm1
vaddps %ymm1, %ymm0, %ymm0
vaddps .LC1(%rip), %ymm0, %ymm0
vmovaps %ymm0, a(%rip)
I've bootstrapped/regtested it on x86_64-linux and i686-linux, but
am not 100% sure about all the changes, e.g. that
|| PURE_SLP_STMT (stmt_info) part.
2011-10-20 Jakub Jelinek <jakub@redhat.com>
* tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument.
Handle vectorization of SLP calls.
(vect_analyze_stmt): Adjust caller, add call to it for SLP too.
(vect_transform_stmt): Adjust vectorizable_call caller, remove
assertion.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Handle one
and two argument calls too.
(vect_build_slp_tree): Allow CALL_EXPR.
(vect_get_slp_defs): Handle calls.
--- gcc/tree-vect-stmts.c.jj 2011-10-20 14:13:34.000000000 +0200
+++ gcc/tree-vect-stmts.c 2011-10-20 18:02:43.000000000 +0200
@@ -1483,7 +1483,8 @@ vectorizable_function (gimple call, tree
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
static bool
-vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
+vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+ slp_tree slp_node)
{
tree vec_dest;
tree scalar_dest;
@@ -1494,6 +1495,7 @@ vectorizable_call (gimple stmt, gimple_s
int nunits_in;
int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
tree fndecl, new_temp, def, rhs_type;
gimple def_stmt;
enum vect_def_type dt[3]
@@ -1505,19 +1507,12 @@ vectorizable_call (gimple stmt, gimple_s
size_t i, nargs;
tree lhs;
- /* FORNOW: unsupported in basic block SLP. */
- gcc_assert (loop_vinfo);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
return false;
- /* FORNOW: SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
/* Is STMT a vectorizable call? */
if (!is_gimple_call (stmt))
return false;
@@ -1558,7 +1553,7 @@ vectorizable_call (gimple stmt, gimple_s
if (!rhs_type)
rhs_type = TREE_TYPE (op);
- if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
+ if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
&def_stmt, &def, &dt[i], &opvectype))
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -1620,7 +1615,13 @@ vectorizable_call (gimple stmt, gimple_s
gcc_assert (!gimple_vuse (stmt));
- if (modifier == NARROW)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
+ {
+ if (modifier != NONE)
+ return false;
+ ncopies = 1;
+ }
+ else if (modifier == NARROW)
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -1659,6 +1660,43 @@ vectorizable_call (gimple stmt, gimple_s
else
VEC_truncate (tree, vargs, 0);
+ if (slp_node)
+ {
+ VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
+
+ gcc_assert (j == 0);
+ if (nargs == 1)
+ vect_get_vec_defs (gimple_call_arg (stmt, 0), NULL_TREE, stmt,
+ &vec_oprnds0, &vec_oprnds1, slp_node);
+ else if (nargs == 2)
+ vect_get_vec_defs (gimple_call_arg (stmt, 0),
+ gimple_call_arg (stmt, 1), stmt,
+ &vec_oprnds0, &vec_oprnds1, slp_node);
+ else
+ gcc_unreachable ();
+
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
+ {
+ vec_oprnd1 = nargs == 2 ? VEC_index (tree, vec_oprnds1, i)
+ : NULL_TREE;
+ new_stmt = gimple_build_call (fndecl, nargs, vec_oprnd0,
+ vec_oprnd1);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_call_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ mark_symbols_for_renaming (new_stmt);
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
+ new_stmt);
+ }
+
+ VEC_free (tree, heap, vec_oprnds0);
+ if (vec_oprnds1)
+ VEC_free (tree, heap, vec_oprnds1);
+
+ continue;
+ }
+
for (i = 0; i < nargs; i++)
{
op = gimple_call_arg (stmt, i);
@@ -5099,7 +5137,7 @@ vect_analyze_stmt (gimple stmt, bool *ne
|| vectorizable_operation (stmt, NULL, NULL, NULL)
|| vectorizable_assignment (stmt, NULL, NULL, NULL)
|| vectorizable_load (stmt, NULL, NULL, NULL, NULL)
- || vectorizable_call (stmt, NULL, NULL)
+ || vectorizable_call (stmt, NULL, NULL, NULL)
|| vectorizable_store (stmt, NULL, NULL, NULL)
|| vectorizable_reduction (stmt, NULL, NULL, NULL)
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0));
@@ -5108,10 +5146,11 @@ vect_analyze_stmt (gimple stmt, bool *ne
if (bb_vinfo)
ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
|| vectorizable_type_demotion (stmt, NULL, NULL, node)
- || vectorizable_shift (stmt, NULL, NULL, node)
+ || vectorizable_shift (stmt, NULL, NULL, node)
|| vectorizable_operation (stmt, NULL, NULL, node)
|| vectorizable_assignment (stmt, NULL, NULL, node)
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
+ || vectorizable_call (stmt, NULL, NULL, node)
|| vectorizable_store (stmt, NULL, NULL, node));
}
@@ -5234,8 +5273,7 @@ vect_transform_stmt (gimple stmt, gimple
break;
case call_vec_info_type:
- gcc_assert (!slp_node);
- done = vectorizable_call (stmt, gsi, &vec_stmt);
+ done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
stmt = gsi_stmt (*gsi);
break;
--- gcc/tree-vect-slp.c.jj 2011-10-18 23:52:07.000000000 +0200
+++ gcc/tree-vect-slp.c 2011-10-20 18:06:55.000000000 +0200
@@ -129,12 +129,30 @@ vect_get_and_check_slp_defs (loop_vec_in
if (loop_vinfo)
loop = LOOP_VINFO_LOOP (loop_vinfo);
- rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
- number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
+ if (is_gimple_call (stmt))
+ {
+ number_of_oprnds = gimple_call_num_args (stmt);
+ if (number_of_oprnds != 1 && number_of_oprnds != 2)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: calls with %d "
+ "operands unhandled\n", number_of_oprnds);
+ return false;
+ }
+ rhs_class = number_of_oprnds == 1 ? GIMPLE_UNARY_RHS : GIMPLE_BINARY_RHS;
+ }
+ else
+ {
+ rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
+ number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
+ }
for (i = 0; i < number_of_oprnds; i++)
{
- oprnd = gimple_op (stmt, i + 1);
+ if (is_gimple_call (stmt))
+ oprnd = gimple_call_arg (stmt, i);
+ else
+ oprnd = gimple_op (stmt, i + 1);
if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
&dt[i])
@@ -660,7 +678,8 @@ vect_build_slp_tree (loop_vec_info loop_
/* Not memory operation. */
if (TREE_CODE_CLASS (rhs_code) != tcc_binary
- && TREE_CODE_CLASS (rhs_code) != tcc_unary)
+ && TREE_CODE_CLASS (rhs_code) != tcc_unary
+ && rhs_code != CALL_EXPR)
{
if (vect_print_dump_info (REPORT_SLP))
{
@@ -2308,9 +2327,19 @@ vect_get_slp_defs (tree op0, tree op1, s
if (reduc_index != -1)
return;
- code = gimple_assign_rhs_code (first_stmt);
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
+ if (!vec_oprnds1 || !op1)
return;
+ if (is_gimple_call (first_stmt))
+ {
+ if (gimple_call_num_args (first_stmt) < 2)
+ return;
+ }
+ else
+ {
+ code = gimple_assign_rhs_code (first_stmt);
+ if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
+ return;
+ }
/* The number of vector defs is determined by the number of vector statements
in the node from which we get those statements. */
Jakub