This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Disentangle reduction initial value compute from vect_get_vec_defs
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 28 Jun 2017 09:46:32 +0200 (CEST)
- Subject: [PATCH] Disentangle reduction initial value compute from vect_get_vec_defs
- Authentication-results: sourceware.org; auth=none
This creates a SLP variant for get_initial_def_for_reduction thereby
cleaning up the get_defs interfaces to not pass in reduc_index.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2017-06-28 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (vect_get_vec_defs): Remove.
(vect_get_slp_defs): Adjust.
* tree-vect-loop.c (get_initial_defs_for_reduction): Split
out from ...
* tree-vect-slp.c (vect_get_constant_vectors): ... here and
simplify.
* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
get_initial_defs_for_reduction instead of vect_get_vec_defs.
(vectorizable_reduction): Adjust.
* tree-vect-slp.c (vect_get_constant_vectors): Remove reduction
handling.
(vect_get_slp_defs): Likewise.
* tree-vect-stmts.c (vect_get_vec_defs): Make static and adjust.
(vectorizable_bswap): Adjust.
(vectorizable_call): Likewise.
(vectorizable_conversion): Likewise.
(vectorizable_assignment): Likewise.
(vectorizable_shift): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_store): Likewise.
(vectorizable_condition): Likewise.
(vectorizable_comparison): Likewise.
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c (revision 249686)
+++ gcc/tree-vect-loop.c (working copy)
@@ -4045,6 +4045,203 @@ get_initial_def_for_reduction (gimple *s
return init_def;
}
+/* Get at the initial defs for OP in the reduction SLP_NODE.
+ NUMBER_OF_VECTORS is the number of vector defs to create.
+ REDUC_INDEX is the index of the reduction operand in the statements. */
+
+static void
+get_initial_defs_for_reduction (slp_tree slp_node,
+ vec<tree> *vec_oprnds,
+ unsigned int number_of_vectors,
+ int reduc_index, enum tree_code code)
+{
+ vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+ gimple *stmt = stmts[0];
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ unsigned nunits;
+ tree vec_cst;
+ tree *elts;
+ unsigned j, number_of_places_left_in_vector;
+ tree vector_type, scalar_type;
+ tree vop;
+ int group_size = stmts.length ();
+ unsigned int vec_num, i;
+ unsigned number_of_copies = 1;
+ vec<tree> voprnds;
+ voprnds.create (number_of_vectors);
+ bool constant_p;
+ tree neutral_op = NULL;
+ gimple *def_stmt;
+ struct loop *loop;
+ gimple_seq ctor_seq = NULL;
+
+ vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
+ scalar_type = TREE_TYPE (vector_type);
+ nunits = TYPE_VECTOR_SUBPARTS (vector_type);
+
+ gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+ && reduc_index != -1);
+
+ /* op is the reduction operand of the first stmt already. */
+ /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
+ we need either neutral operands or the original operands. See
+ get_initial_def_for_reduction() for details. */
+ switch (code)
+ {
+ case WIDEN_SUM_EXPR:
+ case DOT_PROD_EXPR:
+ case SAD_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ neutral_op = build_zero_cst (scalar_type);
+ break;
+
+ case MULT_EXPR:
+ neutral_op = build_one_cst (scalar_type);
+ break;
+
+ case BIT_AND_EXPR:
+ neutral_op = build_all_ones_cst (scalar_type);
+ break;
+
+ /* For MIN/MAX we don't have an easy neutral operand but
+ the initial values can be used fine here. Only for
+ a reduction chain we have to force a neutral element. */
+ case MAX_EXPR:
+ case MIN_EXPR:
+ if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
+ neutral_op = NULL;
+ else
+ {
+ tree op = get_reduction_op (stmts[0], reduc_index);
+ def_stmt = SSA_NAME_DEF_STMT (op);
+ loop = (gimple_bb (stmt))->loop_father;
+ neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
+ }
+ break;
+
+ default:
+ gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
+ neutral_op = NULL;
+ }
+
+ /* NUMBER_OF_COPIES is the number of times we need to use the same values in
+ created vectors. It is greater than 1 if unrolling is performed.
+
+ For example, we have two scalar operands, s1 and s2 (e.g., group of
+ strided accesses of size two), while NUNITS is four (i.e., four scalars
+ of this type can be packed in a vector). The output vector will contain
+ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
+ will be 2).
+
+ If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
+ containing the operands.
+
+ For example, NUNITS is four as before, and the group size is 8
+ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
+ {s5, s6, s7, s8}. */
+
+ number_of_copies = nunits * number_of_vectors / group_size;
+
+ number_of_places_left_in_vector = nunits;
+ constant_p = true;
+ elts = XALLOCAVEC (tree, nunits);
+ for (j = 0; j < number_of_copies; j++)
+ {
+ for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
+ {
+ tree op = get_reduction_op (stmt, reduc_index);
+ loop = (gimple_bb (stmt))->loop_father;
+ def_stmt = SSA_NAME_DEF_STMT (op);
+
+ gcc_assert (loop);
+
+ /* Get the def before the loop. In reduction chain we have only
+ one initial value. */
+ if ((j != (number_of_copies - 1)
+ || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
+ && i != 0))
+ && neutral_op)
+ op = neutral_op;
+ else
+ op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
+
+ /* Create 'vect_ = {op0,op1,...,opn}'. */
+ number_of_places_left_in_vector--;
+ elts[number_of_places_left_in_vector] = op;
+ if (!CONSTANT_CLASS_P (op))
+ constant_p = false;
+
+ if (number_of_places_left_in_vector == 0)
+ {
+ if (constant_p)
+ vec_cst = build_vector (vector_type, elts);
+ else
+ {
+ vec<constructor_elt, va_gc> *v;
+ unsigned k;
+ vec_alloc (v, nunits);
+ for (k = 0; k < nunits; ++k)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
+ vec_cst = build_constructor (vector_type, v);
+ }
+ tree init;
+ gimple_stmt_iterator gsi;
+ init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
+ if (ctor_seq != NULL)
+ {
+ gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
+ gsi_insert_seq_before_without_update (&gsi, ctor_seq,
+ GSI_SAME_STMT);
+ ctor_seq = NULL;
+ }
+ voprnds.quick_push (init);
+
+ number_of_places_left_in_vector = nunits;
+ constant_p = true;
+ }
+ }
+ }
+
+ /* Since the vectors are created in the reverse order, we should invert
+ them. */
+ vec_num = voprnds.length ();
+ for (j = vec_num; j != 0; j--)
+ {
+ vop = voprnds[j - 1];
+ vec_oprnds->quick_push (vop);
+ }
+
+ voprnds.release ();
+
+ /* In case that VF is greater than the unrolling factor needed for the SLP
+ group of stmts, NUMBER_OF_VECTORS to be created is greater than
+ NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
+ to replicate the vectors. */
+ while (number_of_vectors > vec_oprnds->length ())
+ {
+ tree neutral_vec = NULL;
+
+ if (neutral_op)
+ {
+ if (!neutral_vec)
+ neutral_vec = build_vector_from_val (vector_type, neutral_op);
+
+ vec_oprnds->quick_push (neutral_vec);
+ }
+ else
+ {
+ for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
+ vec_oprnds->quick_push (vop);
+ }
+ }
+}
+
+
/* Function vect_create_epilog_for_reduction
Create code at the loop-epilog to finalize the result of a reduction
@@ -4189,8 +4386,12 @@ vect_create_epilog_for_reduction (vec<tr
/* Get the loop-entry arguments. */
enum vect_def_type initial_def_dt = vect_unknown_def_type;
if (slp_node)
- vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
- NULL, slp_node, reduc_index);
+ {
+ unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ vec_initial_defs.reserve (vec_num);
+ get_initial_defs_for_reduction (slp_node, &vec_initial_defs,
+ vec_num, reduc_index, code);
+ }
else
{
/* Get at the scalar def before the loop, that defines the initial value
@@ -5939,7 +6140,7 @@ vectorizable_reduction (gimple *stmt, gi
if (op_type == ternary_op)
slp_ops.quick_push (reduc_index == 2 ? NULL : ops[2]);
- vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
+ vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
vec_oprnds0.safe_splice (vec_defs[reduc_index == 0 ? 1 : 0]);
vec_defs[reduc_index == 0 ? 1 : 0].release ();
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c (revision 249686)
+++ gcc/tree-vect-slp.c (working copy)
@@ -2976,8 +2976,7 @@ vect_mask_constant_operand_p (gimple *st
static void
vect_get_constant_vectors (tree op, slp_tree slp_node,
vec<tree> *vec_oprnds,
- unsigned int op_num, unsigned int number_of_vectors,
- int reduc_index)
+ unsigned int op_num, unsigned int number_of_vectors)
{
vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
gimple *stmt = stmts[0];
@@ -2996,8 +2995,6 @@ vect_get_constant_vectors (tree op, slp_
bool constant_p, is_store;
tree neutral_op = NULL;
enum tree_code code = gimple_expr_code (stmt);
- gimple *def_stmt;
- struct loop *loop;
gimple_seq ctor_seq = NULL;
/* Check if vector type is a boolean vector. */
@@ -3009,64 +3006,6 @@ vect_get_constant_vectors (tree op, slp_
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
- if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
- && reduc_index != -1)
- {
- op_num = reduc_index;
- op = gimple_op (stmt, op_num + 1);
- /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
- we need either neutral operands or the original operands. See
- get_initial_def_for_reduction() for details. */
- switch (code)
- {
- case WIDEN_SUM_EXPR:
- case DOT_PROD_EXPR:
- case SAD_EXPR:
- case PLUS_EXPR:
- case MINUS_EXPR:
- case BIT_IOR_EXPR:
- case BIT_XOR_EXPR:
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
- neutral_op = build_real (TREE_TYPE (op), dconst0);
- else
- neutral_op = build_int_cst (TREE_TYPE (op), 0);
-
- break;
-
- case MULT_EXPR:
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
- neutral_op = build_real (TREE_TYPE (op), dconst1);
- else
- neutral_op = build_int_cst (TREE_TYPE (op), 1);
-
- break;
-
- case BIT_AND_EXPR:
- neutral_op = build_int_cst (TREE_TYPE (op), -1);
- break;
-
- /* For MIN/MAX we don't have an easy neutral operand but
- the initial values can be used fine here. Only for
- a reduction chain we have to force a neutral element. */
- case MAX_EXPR:
- case MIN_EXPR:
- if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
- neutral_op = NULL;
- else
- {
- def_stmt = SSA_NAME_DEF_STMT (op);
- loop = (gimple_bb (stmt))->loop_father;
- neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
- loop_preheader_edge (loop));
- }
- break;
-
- default:
- gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
- neutral_op = NULL;
- }
- }
-
if (STMT_VINFO_DATA_REF (stmt_vinfo))
{
is_store = true;
@@ -3154,25 +3093,6 @@ vect_get_constant_vectors (tree op, slp_
}
}
- if (reduc_index != -1)
- {
- loop = (gimple_bb (stmt))->loop_father;
- def_stmt = SSA_NAME_DEF_STMT (op);
-
- gcc_assert (loop);
-
- /* Get the def before the loop. In reduction chain we have only
- one initial value. */
- if ((j != (number_of_copies - 1)
- || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
- && i != 0))
- && neutral_op)
- op = neutral_op;
- else
- op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
- loop_preheader_edge (loop));
- }
-
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
tree orig_op = op;
@@ -3340,7 +3260,7 @@ vect_get_slp_vect_defs (slp_tree slp_nod
void
vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
- vec<vec<tree> > *vec_oprnds, int reduc_index)
+ vec<vec<tree> > *vec_oprnds)
{
gimple *first_stmt;
int number_of_vects = 0, i;
@@ -3421,20 +3341,16 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
/* For reduction defs we call vect_get_constant_vectors (), since we are
looking for initial loop invariant values. */
- if (vectorized_defs && reduc_index == -1)
+ if (vectorized_defs)
/* The defs are already vectorized. */
vect_get_slp_vect_defs (child, &vec_defs);
else
/* Build vectors from scalar defs. */
vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
- number_of_vects, reduc_index);
+ number_of_vects);
vec_oprnds->quick_push (vec_defs);
- /* For reductions, we only need initial values. */
- if (reduc_index != -1)
- return;
-
first_iteration = false;
}
}
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c (revision 249686)
+++ gcc/tree-vect-stmts.c (working copy)
@@ -1558,11 +1558,11 @@ vect_get_vec_defs_for_stmt_copy (enum ve
REDUC_INDEX is the index of reduction operand in case of reduction,
and -1 otherwise. */
-void
+static void
vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
vec<tree> *vec_oprnds0,
vec<tree> *vec_oprnds1,
- slp_tree slp_node, int reduc_index)
+ slp_tree slp_node)
{
if (slp_node)
{
@@ -1574,7 +1574,7 @@ vect_get_vec_defs (tree op0, tree op1, g
if (op1)
ops.quick_push (op1);
- vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
+ vect_get_slp_defs (ops, slp_node, &vec_defs);
*vec_oprnds0 = vec_defs[0];
if (op1)
@@ -2525,7 +2525,7 @@ vectorizable_bswap (gimple *stmt, gimple
{
/* Handle uses. */
if (j == 0)
- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
+ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
@@ -2860,7 +2860,7 @@ vectorizable_call (gimple *gs, gimple_st
for (i = 0; i < nargs; i++)
vargs.quick_push (gimple_call_arg (stmt, i));
- vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
+ vect_get_slp_defs (vargs, slp_node, &vec_defs);
vec_oprnds0 = vec_defs[0];
/* Arguments are ready. Create the new vector stmt. */
@@ -2990,7 +2990,7 @@ vectorizable_call (gimple *gs, gimple_st
for (i = 0; i < nargs; i++)
vargs.quick_push (gimple_call_arg (stmt, i));
- vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
+ vect_get_slp_defs (vargs, slp_node, &vec_defs);
vec_oprnds0 = vec_defs[0];
/* Arguments are ready. Create the new vector stmt. */
@@ -4401,8 +4401,7 @@ vectorizable_conversion (gimple *stmt, g
for (j = 0; j < ncopies; j++)
{
if (j == 0)
- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
- -1);
+ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
@@ -4462,11 +4461,11 @@ vectorizable_conversion (gimple *stmt, g
vec_oprnds1.quick_push (vec_oprnd1);
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
- slp_node, -1);
+ slp_node);
}
else
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
- &vec_oprnds1, slp_node, -1);
+ &vec_oprnds1, slp_node);
}
else
{
@@ -4565,7 +4564,7 @@ vectorizable_conversion (gimple *stmt, g
/* Handle uses. */
if (slp_node)
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
- slp_node, -1);
+ slp_node);
else
{
vec_oprnds0.truncate (0);
@@ -4748,7 +4747,7 @@ vectorizable_assignment (gimple *stmt, g
{
/* Handle uses. */
if (j == 0)
- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
+ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
@@ -5155,10 +5154,10 @@ vectorizable_shift (gimple *stmt, gimple
operand 1 should be of a vector type (the usual case). */
if (vec_oprnd1)
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
- slp_node, -1);
+ slp_node);
else
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
- slp_node, -1);
+ slp_node);
}
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
@@ -5505,13 +5504,13 @@ vectorizable_operation (gimple *stmt, gi
{
if (op_type == binary_op || op_type == ternary_op)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
- slp_node, -1);
+ slp_node);
else
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
- slp_node, -1);
+ slp_node);
if (op_type == ternary_op)
vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
- slp_node, -1);
+ slp_node);
}
else
{
@@ -6076,7 +6075,7 @@ vectorizable_store (gimple *stmt, gimple
if (slp)
{
vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
- slp_node, -1);
+ slp_node);
vec_oprnd = vec_oprnds[0];
}
else
@@ -6223,7 +6222,7 @@ vectorizable_store (gimple *stmt, gimple
{
/* Get vectorized arguments for SLP_NODE. */
vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
- NULL, slp_node, -1);
+ NULL, slp_node);
vec_oprnd = vec_oprnds[0];
}
@@ -7976,7 +7975,7 @@ vectorizable_condition (gimple *stmt, gi
}
ops.safe_push (then_clause);
ops.safe_push (else_clause);
- vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
+ vect_get_slp_defs (ops, slp_node, &vec_defs);
vec_oprnds3 = vec_defs.pop ();
vec_oprnds2 = vec_defs.pop ();
if (!masked)
@@ -8314,7 +8313,7 @@ vectorizable_comparison (gimple *stmt, g
ops.safe_push (rhs1);
ops.safe_push (rhs2);
- vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
+ vect_get_slp_defs (ops, slp_node, &vec_defs);
vec_oprnds1 = vec_defs.pop ();
vec_oprnds0 = vec_defs.pop ();
}
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h (revision 249686)
+++ gcc/tree-vectorizer.h (working copy)
@@ -1094,8 +1094,6 @@ extern void vect_get_load_cost (struct d
extern void vect_get_store_cost (struct data_reference *, int,
unsigned int *, stmt_vector_for_cost *);
extern bool vect_supportable_shift (enum tree_code, tree);
-extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *,
- vec<tree> *, slp_tree, int);
extern tree vect_gen_perm_mask_any (tree, const unsigned char *);
extern tree vect_gen_perm_mask_checked (tree, const unsigned char *);
extern void optimize_mask_stores (struct loop*);
@@ -1179,8 +1177,7 @@ extern bool vect_schedule_slp (vec_info
extern bool vect_analyze_slp (vec_info *, unsigned);
extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
-extern void vect_get_slp_defs (vec<tree> , slp_tree,
- vec<vec<tree> > *, int);
+extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *);
extern bool vect_slp_bb (basic_block);
extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree);
extern bool is_simple_and_all_uses_invariant (gimple *, loop_vec_info);