}
}
+/* If FN is an IFN_COND_* or IFN_COND_LEN_* function, return the index of the
+ argument that is used when the condition is false. Return -1 otherwise. */
+
+int
+internal_fn_else_index (internal_fn fn)
+{
+ switch (fn)
+ {
+ case IFN_COND_NEG:
+ case IFN_COND_NOT:
+ case IFN_COND_LEN_NEG:
+ case IFN_COND_LEN_NOT:
+ return 2;
+
+ case IFN_COND_ADD:
+ case IFN_COND_SUB:
+ case IFN_COND_MUL:
+ case IFN_COND_DIV:
+ case IFN_COND_MOD:
+ case IFN_COND_MIN:
+ case IFN_COND_MAX:
+ case IFN_COND_FMIN:
+ case IFN_COND_FMAX:
+ case IFN_COND_AND:
+ case IFN_COND_IOR:
+ case IFN_COND_XOR:
+ case IFN_COND_SHL:
+ case IFN_COND_SHR:
+ case IFN_COND_LEN_ADD:
+ case IFN_COND_LEN_SUB:
+ case IFN_COND_LEN_MUL:
+ case IFN_COND_LEN_DIV:
+ case IFN_COND_LEN_MOD:
+ case IFN_COND_LEN_MIN:
+ case IFN_COND_LEN_MAX:
+ case IFN_COND_LEN_FMIN:
+ case IFN_COND_LEN_FMAX:
+ case IFN_COND_LEN_AND:
+ case IFN_COND_LEN_IOR:
+ case IFN_COND_LEN_XOR:
+ case IFN_COND_LEN_SHL:
+ case IFN_COND_LEN_SHR:
+ return 3;
+
+ case IFN_COND_FMA:
+ case IFN_COND_FMS:
+ case IFN_COND_FNMA:
+ case IFN_COND_FNMS:
+ case IFN_COND_LEN_FMA:
+ case IFN_COND_LEN_FMS:
+ case IFN_COND_LEN_FNMA:
+ case IFN_COND_LEN_FNMS:
+ return 4;
+
+ default:
+ return -1;
+ }
+
+ return -1;
+}
+
/* If FN takes a vector mask argument, return the index of that argument,
otherwise return -1. */
extern bool internal_gather_scatter_fn_p (internal_fn);
extern int internal_fn_mask_index (internal_fn);
extern int internal_fn_len_index (internal_fn);
+extern int internal_fn_else_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int);
--- /dev/null
+/* Make sure a -0 stays -0 when we perform a conditional reduction. */
+/* { dg-do run } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-add-options ieee } */
+/* { dg-additional-options "-std=gnu99 -fno-fast-math" } */
+
+#include "tree-vect.h"
+
+#include <math.h>
+
+#define N (VECTOR_BITS * 17)
+
+double __attribute__ ((noinline, noclone))
+reduc_plus_double (double *restrict a, double init, int *cond, int n)
+{
+ double res = init;
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ res += a[i];
+ return res;
+}
+
+double __attribute__ ((noinline, noclone, optimize ("0")))
+reduc_plus_double_ref (double *restrict a, double init, int *cond, int n)
+{
+ double res = init;
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ res += a[i];
+ return res;
+}
+
+double __attribute__ ((noinline, noclone))
+reduc_minus_double (double *restrict a, double init, int *cond, int n)
+{
+ double res = init;
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ res -= a[i];
+ return res;
+}
+
+double __attribute__ ((noinline, noclone, optimize ("0")))
+reduc_minus_double_ref (double *restrict a, double init, int *cond, int n)
+{
+ double res = init;
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ res -= a[i];
+ return res;
+}
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ int n = 19;
+ double a[N];
+ int cond1[N], cond2[N];
+
+ for (int i = 0; i < N; i++)
+ {
+ a[i] = (i * 0.1) * (i & 1 ? 1 : -1);
+ cond1[i] = 0;
+ cond2[i] = i & 4 ? 1 : 0;
+ asm volatile ("" ::: "memory");
+ }
+
+ double res1 = reduc_plus_double (a, -0.0, cond1, n);
+ double ref1 = reduc_plus_double_ref (a, -0.0, cond1, n);
+ double res2 = reduc_minus_double (a, -0.0, cond1, n);
+ double ref2 = reduc_minus_double_ref (a, -0.0, cond1, n);
+ double res3 = reduc_plus_double (a, -0.0, cond1, n);
+ double ref3 = reduc_plus_double_ref (a, -0.0, cond1, n);
+ double res4 = reduc_minus_double (a, -0.0, cond1, n);
+ double ref4 = reduc_minus_double_ref (a, -0.0, cond1, n);
+
+ if (res1 != ref1 || signbit (res1) != signbit (ref1))
+ __builtin_abort ();
+ if (res2 != ref2 || signbit (res2) != signbit (ref2))
+ __builtin_abort ();
+ if (res3 != ref3 || signbit (res3) != signbit (ref3))
+ __builtin_abort ();
+ if (res4 != ref4 || signbit (res4) != signbit (ref4))
+ __builtin_abort ();
+
+ res1 = reduc_plus_double (a, 0.0, cond1, n);
+ ref1 = reduc_plus_double_ref (a, 0.0, cond1, n);
+ res2 = reduc_minus_double (a, 0.0, cond1, n);
+ ref2 = reduc_minus_double_ref (a, 0.0, cond1, n);
+ res3 = reduc_plus_double (a, 0.0, cond1, n);
+ ref3 = reduc_plus_double_ref (a, 0.0, cond1, n);
+ res4 = reduc_minus_double (a, 0.0, cond1, n);
+ ref4 = reduc_minus_double_ref (a, 0.0, cond1, n);
+
+ if (res1 != ref1 || signbit (res1) != signbit (ref1))
+ __builtin_abort ();
+ if (res2 != ref2 || signbit (res2) != signbit (ref2))
+ __builtin_abort ();
+ if (res3 != ref3 || signbit (res3) != signbit (ref3))
+ __builtin_abort ();
+ if (res4 != ref4 || signbit (res4) != signbit (ref4))
+ __builtin_abort ();
+
+ res1 = reduc_plus_double (a, -0.0, cond2, n);
+ ref1 = reduc_plus_double_ref (a, -0.0, cond2, n);
+ res2 = reduc_minus_double (a, -0.0, cond2, n);
+ ref2 = reduc_minus_double_ref (a, -0.0, cond2, n);
+ res3 = reduc_plus_double (a, -0.0, cond2, n);
+ ref3 = reduc_plus_double_ref (a, -0.0, cond2, n);
+ res4 = reduc_minus_double (a, -0.0, cond2, n);
+ ref4 = reduc_minus_double_ref (a, -0.0, cond2, n);
+
+ if (res1 != ref1 || signbit (res1) != signbit (ref1))
+ __builtin_abort ();
+ if (res2 != ref2 || signbit (res2) != signbit (ref2))
+ __builtin_abort ();
+ if (res3 != ref3 || signbit (res3) != signbit (ref3))
+ __builtin_abort ();
+ if (res4 != ref4 || signbit (res4) != signbit (ref4))
+ __builtin_abort ();
+
+ res1 = reduc_plus_double (a, 0.0, cond2, n);
+ ref1 = reduc_plus_double_ref (a, 0.0, cond2, n);
+ res2 = reduc_minus_double (a, 0.0, cond2, n);
+ ref2 = reduc_minus_double_ref (a, 0.0, cond2, n);
+ res3 = reduc_plus_double (a, 0.0, cond2, n);
+ ref3 = reduc_plus_double_ref (a, 0.0, cond2, n);
+ res4 = reduc_minus_double (a, 0.0, cond2, n);
+ ref4 = reduc_minus_double_ref (a, 0.0, cond2, n);
+
+ if (res1 != ref1 || signbit (res1) != signbit (ref1))
+ __builtin_abort ();
+ if (res2 != ref2 || signbit (res2) != signbit (ref2))
+ __builtin_abort ();
+ if (res3 != ref3 || signbit (res3) != signbit (ref3))
+ __builtin_abort ();
+ if (res4 != ref4 || signbit (res4) != signbit (ref4))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+double
+__attribute__ ((noipa))
+foo2 (double *__restrict a, double init, int *__restrict cond, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ init += a[i];
+ return init;
+}
+
+double
+__attribute__ ((noipa))
+foo3 (double *__restrict a, double init, int *__restrict cond, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ init -= a[i];
+ return init;
+}
+
+double
+__attribute__ ((noipa))
+foo4 (double *__restrict a, double init, int *__restrict cond, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ init *= a[i];
+ return init;
+}
+
+int
+__attribute__ ((noipa))
+foo5 (int *__restrict a, int init, int *__restrict cond, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ init &= a[i];
+ return init;
+}
+
+int
+__attribute__ ((noipa))
+foo6 (int *__restrict a, int init, int *__restrict cond, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ init |= a[i];
+ return init;
+}
+
+int
+__attribute__ ((noipa))
+foo7 (int *__restrict a, int init, int *__restrict cond, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (cond[i])
+ init ^= a[i];
+ return init;
+}
+
+#define SZ 125
+
+int
+main ()
+{
+ double res1 = 0, res2 = 0, res3 = 0;
+ double a1[SZ], a2[SZ], a3[SZ];
+ int c1[SZ], c2[SZ], c3[SZ];
+
+ int a4[SZ], a5[SZ], a6[SZ];
+ int res4 = 0, res5 = 0, res6 = 0;
+ int c4[SZ], c5[SZ], c6[SZ];
+
+ for (int i = 0; i < SZ; i++)
+ {
+ a1[i] = i * 3 + (i & 4) - (i & 7);
+ a2[i] = i * 3 + (i & 4) - (i & 7);
+ a3[i] = i * 0.05 + (i & 4) - (i & 7);
+ a4[i] = i * 3 + (i & 4) - (i & 7);
+ a5[i] = i * 3 + (i & 4) - (i & 7);
+ a6[i] = i * 3 + (i & 4) - (i & 7);
+ c1[i] = i & 1;
+ c2[i] = i & 2;
+ c3[i] = i & 3;
+ c4[i] = i & 4;
+ c5[i] = i & 5;
+ c6[i] = i & 6;
+ __asm__ volatile ("" : : : "memory");
+ }
+
+ double init1 = 2.7, init2 = 8.2, init3 = 0.1;
+ double ref1 = init1, ref2 = init2, ref3 = init3;
+
+ int init4 = 87, init5 = 11, init6 = -123894344;
+ int ref4 = init4, ref5 = init5, ref6 = init6;
+
+#pragma GCC novector
+ for (int i = 0; i < SZ; i++)
+ {
+ if (c1[i])
+ ref1 += a1[i];
+ if (c2[i])
+ ref2 -= a2[i];
+ if (c3[i])
+ ref3 *= a3[i];
+ if (c4[i])
+ ref4 &= a4[i];
+ if (c5[i])
+ ref5 |= a5[i];
+ if (c6[i])
+ ref6 ^= a6[i];
+ }
+
+ res1 = foo2 (a1, init1, c1, SZ);
+ res2 = foo3 (a2, init2, c2, SZ);
+ res3 = foo4 (a3, init3, c3, SZ);
+ res4 = foo5 (a4, init4, c4, SZ);
+ res5 = foo6 (a5, init5, c5, SZ);
+ res6 = foo7 (a6, init6, c6, SZ);
+
+ if (res1 != ref1)
+ __builtin_abort ();
+ if (res2 != ref2)
+ __builtin_abort ();
+ if (res3 != ref3)
+ __builtin_abort ();
+ if (res4 != ref4)
+ __builtin_abort ();
+ if (res5 != ref5)
+ __builtin_abort ();
+ if (res6 != ref6)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-not "VCOND_MASK" "vect" } } */
#include "reduc_call-1.c"
-/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
+/* { dg-final { scan-assembler-times {vfmadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
+/* { dg-final { scan-assembler-not {vmerge} } } */
#include "reduc_call-1.c"
-/* { dg-final { scan-assembler {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler {vfmadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+} } } */
+/* { dg-final { scan-assembler {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-not {vmerge} } } */
gimple *new_assign;
tree rhs;
tree rhs1 = gimple_assign_rhs1 (reduc);
+ tree lhs = gimple_assign_lhs (reduc);
tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
tree c;
enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
- tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
+ tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op,
+ NULL, false);
gimple_seq stmts = NULL;
if (dump_file && (dump_flags & TDF_DETAILS))
print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
}
- /* Build cond expression using COND and constant operand
- of reduction rhs. */
- c = fold_build_cond_expr (TREE_TYPE (rhs1),
- unshare_expr (cond),
- swap ? op_nochange : op1,
- swap ? op1 : op_nochange);
-
- /* Create assignment stmt and insert it at GSI. */
- new_assign = gimple_build_assign (tmp, c);
- gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
- /* Build rhs for unconditional increment/decrement/logic_operation. */
- rhs = gimple_build (&stmts, reduction_op,
- TREE_TYPE (rhs1), op0, tmp);
+ /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
+ The COND_OP will have a neutral_op else value. */
+ internal_fn ifn;
+ ifn = get_conditional_internal_fn (reduction_op);
+ if (ifn != IFN_LAST
+ && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
+ && !swap)
+ {
+ gcall *cond_call = gimple_build_call_internal (ifn, 4,
+ unshare_expr (cond),
+ op0, op1, op0);
+ gsi_insert_before (gsi, cond_call, GSI_SAME_STMT);
+ gimple_call_set_lhs (cond_call, tmp);
+ rhs = tmp;
+ }
+ else
+ {
+ /* Build cond expression using COND and constant operand
+ of reduction rhs. */
+ c = fold_build_cond_expr (TREE_TYPE (rhs1),
+ unshare_expr (cond),
+ swap ? op_nochange : op1,
+ swap ? op1 : op_nochange);
+ /* Create assignment stmt and insert it at GSI. */
+ new_assign = gimple_build_assign (tmp, c);
+ gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
+ /* Build rhs for unconditional increment/decrement/logic_operation. */
+ rhs = gimple_build (&stmts, reduction_op,
+ TREE_TYPE (rhs1), op0, tmp);
+ }
if (has_nop)
{
{
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
- swap,has_nop, nop_reduc);
+ swap, has_nop, nop_reduc);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
new_stmt = gimple_build_assign (res, rhs);
static bool
fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn)
{
- if (code == PLUS_EXPR)
+ /* We support MINUS_EXPR by negating the operand. This also preserves an
+ initial -0.0 since -0.0 - 0.0 (neutral op for MINUS_EXPR) == -0.0 +
+ (-0.0) = -0.0. */
+ if (code == PLUS_EXPR || code == MINUS_EXPR)
{
*reduc_fn = IFN_FOLD_LEFT_PLUS;
return true;
by the introduction of additional X elements, return that X, otherwise
return null. CODE is the code of the reduction and SCALAR_TYPE is type
of the scalar elements. If the reduction has just a single initial value
- then INITIAL_VALUE is that value, otherwise it is null. */
+ then INITIAL_VALUE is that value, otherwise it is null.
+ If AS_INITIAL is TRUE the value is supposed to be used as initial value.
+ In that case no signed zero is returned. */
tree
neutral_op_for_reduction (tree scalar_type, code_helper code,
- tree initial_value)
+ tree initial_value, bool as_initial)
{
if (code.is_tree_code ())
switch (tree_code (code))
{
- case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR:
case SAD_EXPR:
- case PLUS_EXPR:
case MINUS_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
return build_zero_cst (scalar_type);
+ case WIDEN_SUM_EXPR:
+ case PLUS_EXPR:
+ if (!as_initial && HONOR_SIGNED_ZEROS (scalar_type))
+ return build_real (scalar_type, dconstm0);
+ else
+ return build_zero_cst (scalar_type);
case MULT_EXPR:
return build_one_cst (scalar_type);
use_operand_p use_p;
gimple *op_use_stmt;
unsigned cnt = 0;
+ bool cond_fn_p = op.code.is_internal_fn ()
+ && (conditional_internal_fn_code (internal_fn (op.code))
+ != ERROR_MARK);
+
FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])
- if (!is_gimple_debug (op_use_stmt)
- && (*code != ERROR_MARK
- || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
- FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
- cnt++;
+ {
+ /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have
+ op1 twice (once as definition, once as else) in the same operation.
+ Allow this. */
+ if (cond_fn_p)
+ {
+ gcall *call = dyn_cast<gcall *> (use_stmt);
+ unsigned else_pos
+ = internal_fn_else_index (internal_fn (op.code));
+
+ for (unsigned int j = 0; j < gimple_call_num_args (call); ++j)
+ {
+ if (j == else_pos)
+ continue;
+ if (gimple_call_arg (call, j) == op.ops[opi])
+ cnt++;
+ }
+ }
+ else if (!is_gimple_debug (op_use_stmt)
+ && (*code != ERROR_MARK
+ || flow_bb_inside_loop_p (loop,
+ gimple_bb (op_use_stmt))))
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ cnt++;
+ }
+
if (cnt != 1)
{
fail = true;
return NULL;
}
- nphi_def_loop_uses++;
- phi_use_stmt = use_stmt;
+ /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have
+ op1 twice (once as definition, once as else) in the same operation.
+ Only count it as one. */
+ if (use_stmt != phi_use_stmt)
+ {
+ nphi_def_loop_uses++;
+ phi_use_stmt = use_stmt;
+ }
}
tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
}
-
+
scalar_dest = gimple_get_lhs (orig_stmt_info->stmt);
scalar_type = TREE_TYPE (scalar_dest);
scalar_results.truncate (0);
if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
initial_value = reduc_info->reduc_initial_values[0];
neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code,
- initial_value);
+ initial_value, false);
}
if (neutral_op)
vector_identity = gimple_build_vector_from_val (&seq, vectype,
gimple_stmt_iterator *gsi,
gimple **vec_stmt, slp_tree slp_node,
gimple *reduc_def_stmt,
- tree_code code, internal_fn reduc_fn,
- tree ops[3], tree vectype_in,
+ code_helper code, internal_fn reduc_fn,
+ tree *ops, int num_ops, tree vectype_in,
int reduc_index, vec_loop_masks *masks,
vec_loop_lens *lens)
{
gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
gcc_assert (ncopies == 1);
- gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
+
+ bool is_cond_op = false;
+ if (!code.is_tree_code ())
+ {
+ code = conditional_internal_fn_code (internal_fn (code));
+ gcc_assert (code != ERROR_MARK);
+ is_cond_op = true;
+ }
+
+ gcc_assert (TREE_CODE_LENGTH (tree_code (code)) == binary_op);
if (slp_node)
- gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
- TYPE_VECTOR_SUBPARTS (vectype_in)));
+ {
+ if (is_cond_op)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "fold-left reduction on SLP not supported.\n");
+ return false;
+ }
+
+ gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
+ TYPE_VECTOR_SUBPARTS (vectype_in)));
+ }
- tree op0 = ops[1 - reduc_index];
+ /* The operands either come from a binary operation or an IFN_COND operation.
+ The former is a gimple assign with binary rhs and the latter is a
+ gimple call with four arguments. */
+ gcc_assert (num_ops == 2 || num_ops == 4);
+ tree op0, opmask;
+ if (!is_cond_op)
+ op0 = ops[1 - reduc_index];
+ else
+ {
+ op0 = ops[2];
+ opmask = ops[0];
+ gcc_assert (!slp_node);
+ }
int group_size = 1;
stmt_vec_info scalar_dest_def_info;
- auto_vec<tree> vec_oprnds0;
+ auto_vec<tree> vec_oprnds0, vec_opmask;
if (slp_node)
{
auto_vec<vec<tree> > vec_defs (2);
vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
op0, &vec_oprnds0);
scalar_dest_def_info = stmt_info;
+
+ /* For an IFN_COND_OP we also need the vector mask operand. */
+ if (is_cond_op)
+ vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+ opmask, &vec_opmask);
}
- tree scalar_dest = gimple_assign_lhs (scalar_dest_def_info->stmt);
+ gimple *sdef = scalar_dest_def_info->stmt;
+ tree scalar_dest = gimple_get_lhs (sdef);
tree scalar_type = TREE_TYPE (scalar_dest);
tree reduc_var = gimple_phi_result (reduc_def_stmt);
tree bias = NULL_TREE;
if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i);
+ else if (is_cond_op)
+ mask = vec_opmask[0];
if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
{
len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
i, 1);
signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
bias = build_int_cst (intQI_type_node, biasval);
- mask = build_minus_one_cst (truth_type_for (vectype_in));
+ if (!is_cond_op)
+ mask = build_minus_one_cst (truth_type_for (vectype_in));
}
/* Handle MINUS by adding the negative. */
def0 = negated;
}
- if (mask && mask_reduc_fn == IFN_LAST)
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && mask && mask_reduc_fn == IFN_LAST)
def0 = merge_with_identity (gsi, mask, vectype_out, def0,
vector_identity);
}
else
{
- reduc_var = vect_expand_fold_left (gsi, scalar_dest_var, code,
- reduc_var, def0);
+ reduc_var = vect_expand_fold_left (gsi, scalar_dest_var,
+ tree_code (code), reduc_var, def0);
new_stmt = SSA_NAME_DEF_STMT (reduc_var);
/* Remove the statement, so that we can use the same code paths
as for statements that we've just created. */
if (i == STMT_VINFO_REDUC_IDX (stmt_info))
continue;
+ /* For an IFN_COND_OP we might hit the reduction definition operand
+ twice (once as definition, once as else). */
+ if (op.ops[i] == op.ops[STMT_VINFO_REDUC_IDX (stmt_info)])
+ continue;
+
/* There should be only one cycle def in the stmt, the one
- leading to reduc_def. */
+ leading to reduc_def. */
if (VECTORIZABLE_CYCLE_DEF (dt))
return false;
when generating the code inside the loop. */
code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info);
+
+ /* If conversion might have created a conditional operation like
+ IFN_COND_ADD already. Use the internal code for the following checks. */
+ if (orig_code.is_internal_fn ())
+ {
+ tree_code new_code = conditional_internal_fn_code (internal_fn (orig_code));
+ orig_code = new_code != ERROR_MARK ? new_code : orig_code;
+ }
+
STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction: not commutative/associative");
+ "reduction: not commutative/associative\n");
return false;
}
}
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
else if (reduction_type == FOLD_LEFT_REDUCTION
- && reduc_fn == IFN_LAST
+ && internal_fn_mask_index (reduc_fn) == -1
&& FLOAT_TYPE_P (vectype_in)
- && HONOR_SIGNED_ZEROS (vectype_in)
&& HONOR_SIGN_DEPENDENT_ROUNDING (vectype_in))
{
if (dump_enabled_p ())
code_helper code = canonicalize_code (op.code, op.type);
internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
+
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
if (code == COND_EXPR)
gcc_assert (ncopies == 1);
+ /* A binary COND_OP reduction must have the same definition and else
+ value. */
+ bool cond_fn_p = code.is_internal_fn ()
+ && conditional_internal_fn_code (internal_fn (code)) != ERROR_MARK;
+ if (cond_fn_p)
+ {
+ gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
+ || code == IFN_COND_MUL || code == IFN_COND_AND
+ || code == IFN_COND_IOR || code == IFN_COND_XOR);
+ gcc_assert (op.num_ops == 4 && (op.ops[1] == op.ops[3]));
+ }
+
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
if (reduction_type == FOLD_LEFT_REDUCTION)
{
internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
- gcc_assert (code.is_tree_code ());
+ gcc_assert (code.is_tree_code () || cond_fn_p);
return vectorize_fold_left_reduction
(loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi,
- tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, masks,
- lens);
+ code, reduc_fn, op.ops, op.num_ops, vectype_in,
+ reduc_index, masks, lens);
}
bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ /* Get NCOPIES vector definitions for all operands except the reduction
+ definition. */
vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
single_defuse_cycle && reduc_index == 0
? NULL_TREE : op.ops[0], &vec_oprnds0,
single_defuse_cycle && reduc_index == 1
? NULL_TREE : op.ops[1], &vec_oprnds1,
- op.num_ops == 3
- && !(single_defuse_cycle && reduc_index == 2)
+ op.num_ops == 4
+ || (op.num_ops == 3
+ && !(single_defuse_cycle && reduc_index == 2))
? op.ops[2] : NULL_TREE, &vec_oprnds2);
+
+ /* For single def-use cycles get one copy of the vectorized reduction
+ definition. */
if (single_defuse_cycle)
{
gcc_assert (!slp_node);
}
else
{
- if (op.num_ops == 3)
+ if (op.num_ops >= 3)
vop[2] = vec_oprnds2[i];
if (masked_loop_p && mask_by_cond_expr)
if (emulated_mixed_dot_prod)
new_stmt = vect_emulate_mixed_dot_prod (loop_vinfo, stmt_info, gsi,
vec_dest, vop);
- else if (code.is_internal_fn ())
+
+ else if (code.is_internal_fn () && !cond_fn_p)
new_stmt = gimple_build_call_internal (internal_fn (code),
op.num_ops,
vop[0], vop[1], vop[2]);
+ else if (code.is_internal_fn () && cond_fn_p)
+ new_stmt = gimple_build_call_internal (internal_fn (code),
+ op.num_ops,
+ vop[0], vop[1], vop[2],
+ vop[1]);
else
new_stmt = gimple_build_assign (vec_dest, tree_code (op.code),
vop[0], vop[1], vop[2]);
tree);
/* In tree-vect-loop.cc. */
-extern tree neutral_op_for_reduction (tree, code_helper, tree);
+extern tree neutral_op_for_reduction (tree, code_helper, tree, bool = true);
extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *);
/* Used in tree-vect-loop-manip.cc */