This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix up vectorizable_condition for comparisons of 2 booleans (PR tree-optimization/78938)
- From: Richard Biener <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 9 Jan 2017 10:22:23 +0100 (CET)
- Subject: Re: [PATCH] Fix up vectorizable_condition for comparisons of 2 booleans (PR tree-optimization/78938)
- Authentication-results: sourceware.org; auth=none
- References: <20170105211928.GE21933@tucnak>
On Thu, 5 Jan 2017, Jakub Jelinek wrote:
> Hi!
>
> As mentioned in the PR, while vectorizable_comparison has code to deal
> with comparison of 2 booleans by transorming those into one or two
> BIT_*_EXPR operations that work equally well on normal vectors as well
> as the AVX512 bitset masks, vectorizable_comparison lacks that and we
> ICE during expansion because of that.
> The following patch teaches vectorizable_condition to do that too.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
Richard.
> 2017-01-05 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/78938
> * tree-vect-stmts.c (vectorizable_condition): For non-masked COND_EXPR
> where comp_vectype is VECTOR_BOOLEAN_TYPE_P, use
> BIT_{NOT,XOR,AND,IOR}_EXPR on the comparison operands instead of
> {EQ,NE,GE,GT,LE,LT}_EXPR directly inside of VEC_COND_EXPR. Formatting
> fixes.
>
> * gcc.dg/vect/pr78938.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj 2017-01-01 12:45:39.000000000 +0100
> +++ gcc/tree-vect-stmts.c 2017-01-05 15:54:41.075218409 +0100
> @@ -7731,7 +7731,8 @@ vectorizable_condition (gimple *stmt, gi
> {
> tree scalar_dest = NULL_TREE;
> tree vec_dest = NULL_TREE;
> - tree cond_expr, then_clause, else_clause;
> + tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
> + tree then_clause, else_clause;
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> tree comp_vectype = NULL_TREE;
> tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
> @@ -7741,7 +7742,7 @@ vectorizable_condition (gimple *stmt, gi
> loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> enum vect_def_type dt, dts[4];
> int ncopies;
> - enum tree_code code;
> + enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
> stmt_vec_info prev_stmt_info = NULL;
> int i, j;
> bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> @@ -7825,11 +7826,76 @@ vectorizable_condition (gimple *stmt, gi
> if (vec_cmp_type == NULL_TREE)
> return false;
>
> + cond_code = TREE_CODE (cond_expr);
> + if (!masked)
> + {
> + cond_expr0 = TREE_OPERAND (cond_expr, 0);
> + cond_expr1 = TREE_OPERAND (cond_expr, 1);
> + }
> +
> + if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
> + {
> + /* Boolean values may have another representation in vectors
> + and therefore we prefer bit operations over comparison for
> + them (which also works for scalar masks). We store opcodes
> + to use in bitop1 and bitop2. Statement is vectorized as
> + BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
> + depending on bitop1 and bitop2 arity. */
> + switch (cond_code)
> + {
> + case GT_EXPR:
> + bitop1 = BIT_NOT_EXPR;
> + bitop2 = BIT_AND_EXPR;
> + break;
> + case GE_EXPR:
> + bitop1 = BIT_NOT_EXPR;
> + bitop2 = BIT_IOR_EXPR;
> + break;
> + case LT_EXPR:
> + bitop1 = BIT_NOT_EXPR;
> + bitop2 = BIT_AND_EXPR;
> + std::swap (cond_expr0, cond_expr1);
> + break;
> + case LE_EXPR:
> + bitop1 = BIT_NOT_EXPR;
> + bitop2 = BIT_IOR_EXPR;
> + std::swap (cond_expr0, cond_expr1);
> + break;
> + case NE_EXPR:
> + bitop1 = BIT_XOR_EXPR;
> + break;
> + case EQ_EXPR:
> + bitop1 = BIT_XOR_EXPR;
> + bitop2 = BIT_NOT_EXPR;
> + break;
> + default:
> + return false;
> + }
> + cond_code = SSA_NAME;
> + }
> +
> if (!vec_stmt)
> {
> STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
> + if (bitop1 != NOP_EXPR)
> + {
> + machine_mode mode = TYPE_MODE (comp_vectype);
> + optab optab;
> +
> + optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
> + if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
> + return false;
> +
> + if (bitop2 != NOP_EXPR)
> + {
> + optab = optab_for_tree_code (bitop2, comp_vectype,
> + optab_default);
> + if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
> + return false;
> + }
> + }
> return expand_vec_cond_expr_p (vectype, comp_vectype,
> - TREE_CODE (cond_expr));
> + cond_code);
> }
>
> /* Transform. */
> @@ -7858,11 +7924,11 @@ vectorizable_condition (gimple *stmt, gi
> auto_vec<vec<tree>, 4> vec_defs;
>
> if (masked)
> - ops.safe_push (cond_expr);
> + ops.safe_push (cond_expr);
> else
> {
> - ops.safe_push (TREE_OPERAND (cond_expr, 0));
> - ops.safe_push (TREE_OPERAND (cond_expr, 1));
> + ops.safe_push (cond_expr0);
> + ops.safe_push (cond_expr1);
> }
> ops.safe_push (then_clause);
> ops.safe_push (else_clause);
> @@ -7886,17 +7952,15 @@ vectorizable_condition (gimple *stmt, gi
> }
> else
> {
> - vec_cond_lhs =
> - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
> - stmt, comp_vectype);
> - vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
> - loop_vinfo, >emp, &dts[0]);
> -
> - vec_cond_rhs =
> - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
> - stmt, comp_vectype);
> - vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
> - loop_vinfo, >emp, &dts[1]);
> + vec_cond_lhs
> + = vect_get_vec_def_for_operand (cond_expr0,
> + stmt, comp_vectype);
> + vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]);
> +
> + vec_cond_rhs
> + = vect_get_vec_def_for_operand (cond_expr1,
> + stmt, comp_vectype);
> + vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]);
> }
> if (reduc_index == 1)
> vec_then_clause = reduc_def;
> @@ -7953,8 +8017,37 @@ vectorizable_condition (gimple *stmt, gi
> else
> {
> vec_cond_rhs = vec_oprnds1[i];
> - vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
> - vec_cond_lhs, vec_cond_rhs);
> + if (bitop1 == NOP_EXPR)
> + vec_compare = build2 (cond_code, vec_cmp_type,
> + vec_cond_lhs, vec_cond_rhs);
> + else
> + {
> + new_temp = make_ssa_name (vec_cmp_type);
> + if (bitop1 == BIT_NOT_EXPR)
> + new_stmt = gimple_build_assign (new_temp, bitop1,
> + vec_cond_rhs);
> + else
> + new_stmt
> + = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
> + vec_cond_rhs);
> + vect_finish_stmt_generation (stmt, new_stmt, gsi);
> + if (bitop2 == NOP_EXPR)
> + vec_compare = new_temp;
> + else if (bitop2 == BIT_NOT_EXPR)
> + {
> + /* Instead of doing ~x ? y : z do x ? z : y. */
> + vec_compare = new_temp;
> + std::swap (vec_then_clause, vec_else_clause);
> + }
> + else
> + {
> + vec_compare = make_ssa_name (vec_cmp_type);
> + new_stmt
> + = gimple_build_assign (vec_compare, bitop2,
> + vec_cond_lhs, new_temp);
> + vect_finish_stmt_generation (stmt, new_stmt, gsi);
> + }
> + }
> }
> new_temp = make_ssa_name (vec_dest);
> new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
> --- gcc/testsuite/gcc.dg/vect/pr78938.c.jj 2017-01-05 16:06:37.854958230 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr78938.c 2017-01-05 16:07:04.770609976 +0100
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/78938 */
> +/* { dg-do compile } */
> +/* { dg-additional-options "-mavx512bw" { target i?86-*-* x86_64-*-* } } */
> +
> +short int v;
> +
> +int
> +foo (char x, int *b)
> +{
> + int a = 1;
> + for (; x < 1; x++)
> + {
> + int c = *b != v;
> + int d = x != 0;
> + a = c == d ? 2 : 0;
> + }
> + return a;
> +}
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)