res_2 = res_13 + _ifc__1;
Argument SWAP tells that arguments of conditional expression should be
swapped.
+ If LOOP_VERSIONED is true if we assume that we versioned the loop for
+ vectorization. In that case we can create a COND_OP.
Returns rhs of resulting PHI assignment. */
static tree
convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
tree cond, tree op0, tree op1, bool swap,
- bool has_nop, gimple* nop_reduc)
+ bool has_nop, gimple* nop_reduc,
+ bool loop_versioned)
{
gimple_stmt_iterator stmt_it;
gimple *new_assign;
The COND_OP will have a neutral_op else value. */
internal_fn ifn;
ifn = get_conditional_internal_fn (reduction_op);
- if (ifn != IFN_LAST
+ if (loop_versioned && ifn != IFN_LAST
&& vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
&& !swap)
{
The generated code is inserted at GSI that points to the top of
basic block's statement list.
If PHI node has more than two arguments a chain of conditional
- expression is produced. */
+ expression is produced.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
+predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
{
gimple *new_stmt = NULL, *reduc, *nop_reduc;
tree rhs, res, arg0, arg1, op0, op1, scev;
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
true_bb != gimple_bb (reduc),
- has_nop, nop_reduc);
+ has_nop, nop_reduc,
+ loop_versioned);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
else
{
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
- swap, has_nop, nop_reduc);
+ swap, has_nop, nop_reduc,
+ loop_versioned);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
new_stmt = gimple_build_assign (res, rhs);
}
/* Replaces in LOOP all the scalar phi nodes other than those in the
- LOOP->header block with conditional modify expressions. */
+ LOOP->header block with conditional modify expressions.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-predicate_all_scalar_phis (class loop *loop)
+predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
{
basic_block bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
gsi_next (&phi_gsi);
else
{
- predicate_scalar_phi (phi, &gsi);
+ predicate_scalar_phi (phi, &gsi, loop_versioned);
remove_phi_node (&phi_gsi, false);
}
}
}
/* Combine all the basic blocks from LOOP into one or two super basic
- blocks. Replace PHI nodes with conditional modify expressions. */
+ blocks. Replace PHI nodes with conditional modify expressions.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-combine_blocks (class loop *loop)
+combine_blocks (class loop *loop, bool loop_versioned)
{
basic_block bb, exit_bb, merge_target_bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
remove_conditions_and_labels (loop);
insert_gimplified_predicates (loop);
- predicate_all_scalar_phis (loop);
+ predicate_all_scalar_phis (loop, loop_versioned);
if (need_to_predicate || need_to_rewrite_undefined)
predicate_statements (loop);
bitmap exit_bbs;
edge pe;
auto_vec<data_reference_p, 10> refs;
+ bool loop_versioned;
again:
rloop = NULL;
need_to_predicate = false;
need_to_rewrite_undefined = false;
any_complicated_phi = false;
+ loop_versioned = false;
/* Apply more aggressive if-conversion when loop or its outer loop were
marked with simd pragma. When that's the case, we try to if-convert
will re-use that for things like runtime alias versioning
whose condition can end up using those invariants. */
pe = single_pred_edge (gimple_bb (preds->last ()));
+
+ loop_versioned = true;
}
if (need_to_lower_bitfields)
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
on-the-fly. */
- combine_blocks (loop);
+ combine_blocks (loop, loop_versioned);
}
/* Perform local CSE, this esp. helps the vectorizer analysis if loads
/* Successively apply CODE to each element of VECTOR_RHS, in left-to-right
order, starting with LHS. Insert the extraction statements before GSI and
associate the new scalar SSA names with variable SCALAR_DEST.
+ If MASK is nonzero mask the input and then operate on it unconditionally.
Return the SSA name for the result. */
static tree
vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
- tree_code code, tree lhs, tree vector_rhs)
+ tree_code code, tree lhs, tree vector_rhs,
+ tree mask)
{
tree vectype = TREE_TYPE (vector_rhs);
tree scalar_type = TREE_TYPE (vectype);
unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
+ /* Re-create a VEC_COND_EXPR to mask the input here in order to be able
+ to perform an unconditional element-wise reduction of it. */
+ if (mask)
+ {
+ tree masked_vector_rhs = make_temp_ssa_name (vectype, NULL,
+ "masked_vector_rhs");
+ tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
+ false);
+ tree vector_identity = build_vector_from_val (vectype, neutral_op);
+ gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
+ mask, vector_rhs, vector_identity);
+ gsi_insert_before (gsi, select, GSI_SAME_STMT);
+ vector_rhs = masked_vector_rhs;
+ }
+
for (unsigned HOST_WIDE_INT bit_offset = 0;
bit_offset < vec_size_in_bits;
bit_offset += element_bitsize)
else
{
reduc_var = vect_expand_fold_left (gsi, scalar_dest_var,
- tree_code (code), reduc_var, def0);
+ tree_code (code), reduc_var, def0,
+ mask);
new_stmt = SSA_NAME_DEF_STMT (reduc_var);
/* Remove the statement, so that we can use the same code paths
as for statements that we've just created. */