This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 11/25] Simplify vec_merge according to the mask.
Andrew Stubbs <ams@codesourcery.com> writes:
> On 17/09/18 10:05, Richard Sandiford wrote:
>> Would be good to have self-tests for the new transforms.
> [...]
>> known_eq, since we require equality for correctness. Same for the
>> other tests.
>
> How about the attached? I've made the edits you requested and written
> some self-tests.
>
>> Doesn't simplify_merge_mask make the second two redundant? I couldn't
>> see the difference between them and the first condition tested by
>> simplify_merge_mask.
>
> Yes, I think you're right. Removed, now.
>
> Andrew
>
> Simplify vec_merge according to the mask.
>
> This patch was part of the original patch we acquired from Honza and Martin.
>
> It simplifies nested vec_merge operations using the same mask.
>
> Self-tests are included.
>
> 2018-09-20 Andrew Stubbs <ams@codesourcery.com>
> Jan Hubicka <jh@suse.cz>
> Martin Jambor <mjambor@suse.cz>
>
> * simplify-rtx.c (simplify_merge_mask): New function.
> (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
> same masks are used in op1 or op2.
> (test_vec_merge): New function.
> (test_vector_ops): Call test_vec_merge.
>
> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
> index f77e1aa..13b2882 100644
> --- a/gcc/simplify-rtx.c
> +++ b/gcc/simplify-rtx.c
> @@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
> return NULL_RTX;
> }
>
> +/* Try to simplify nested VEC_MERGE operations by comparing the masks. The
> + nested operations need not use the same vector mode, but must have the same
> + number of elements.
> +
> + X is an operand number OP of a VEC_MERGE operation with MASK.
> + Returns NULL_RTX if no simplification is possible. */
X isn't always operand OP, it can be nested within it. How about:
/* Try to simplify X given that it appears within operand OP of a
VEC_MERGE operation whose mask is MASK. X need not use the same
vector mode as the VEC_MERGE, but it must have the same number of
elements.
Return the simplified X on success, otherwise return NULL_RTX. */
> +
> +rtx
> +simplify_merge_mask (rtx x, rtx mask, int op)
> +{
> + gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
> + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
> + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
> + {
> + if (!side_effects_p (XEXP (x, 1 - op)))
> + return XEXP (x, op);
> + }
> + if (side_effects_p (x))
> + return NULL_RTX;
> + if (UNARY_P (x)
> + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
> + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
> + {
> + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
> + if (top0)
> + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
> + GET_MODE (XEXP (x, 0)));
> + }
> + if (BINARY_P (x)
> + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
> + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
> + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
> + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
> + {
> + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
> + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
> + if (top0 || top1)
> + return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
> + top0 ? top0 : XEXP (x, 0),
> + top1 ? top1 : XEXP (x, 1));
> + }
> + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
> + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
> + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
> + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
> + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
> + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
> + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
> + {
> + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
> + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
> + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
> + if (top0 || top1)
|| top2?
> + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
> + GET_MODE (XEXP (x, 0)),
> + top0 ? top0 : XEXP (x, 0),
> + top1 ? top1 : XEXP (x, 1),
> + top2 ? top2 : XEXP (x, 2));
> + }
> + return NULL_RTX;
> +}
> +
>
> /* Simplify CODE, an operation with result mode MODE and three operands,
> OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became
> @@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
> && !side_effects_p (op2) && !side_effects_p (op1))
> return op0;
>
> + if (!side_effects_p (op2))
> + {
> + rtx top0 = simplify_merge_mask (op0, op2, 0);
> + rtx top1 = simplify_merge_mask (op1, op2, 1);
> + if (top0 || top1)
> + return simplify_gen_ternary (code, mode, mode,
> + top0 ? top0 : op0,
> + top1 ? top1 : op1, op2);
> + }
> +
> break;
>
> default:
> @@ -6932,6 +7004,71 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg)
> constm1_rtx));
> }
>
> +/* Verify simplify_merge_mask works correctly. */
> +
> +static void
> +test_vec_merge (machine_mode mode)
> +{
> + rtx op0 = make_test_reg (mode);
> + rtx op1 = make_test_reg (mode);
> + rtx op2 = make_test_reg (mode);
> + rtx op3 = make_test_reg (mode);
> + rtx op4 = make_test_reg (mode);
> + rtx op5 = make_test_reg (mode);
> + rtx mask1 = make_test_reg (SImode);
> + rtx mask2 = make_test_reg (SImode);
> + rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1);
> + rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1);
> + rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1);
> +
> + /* Simple vec_merge. */
> + ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));
> + ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));
> + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0));
> + ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1));
> +
> + /* Nested vec_merge. */
> + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);
> + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));
> + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));
Think the last two should simplify to op0 and op3, which I guess
means recursing on the "return XEXP (x, op);"
> + /* Intermediate unary op. */
> + rtx unop = gen_rtx_NOT (mode, vm1);
> + ASSERT_EQ (op0, XEXP (simplify_merge_mask (unop, mask1, 0), 0));
> + ASSERT_EQ (op1, XEXP (simplify_merge_mask (unop, mask1, 1), 0));
> +
> + /* Intermediate binary op. */
> + rtx binop = gen_rtx_PLUS (mode, vm1, vm2);
> + rtx res = simplify_merge_mask (binop, mask1, 0);
> + ASSERT_EQ (op0, XEXP (res, 0));
> + ASSERT_EQ (op2, XEXP (res, 1));
> + res = simplify_merge_mask (binop, mask1, 1);
> + ASSERT_EQ (op1, XEXP (res, 0));
> + ASSERT_EQ (op3, XEXP (res, 1));
> +
> + /* Intermediate ternary op. */
> + rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3);
> + res = simplify_merge_mask (tenop, mask1, 0);
> + ASSERT_EQ (op0, XEXP (res, 0));
> + ASSERT_EQ (op2, XEXP (res, 1));
> + ASSERT_EQ (op4, XEXP (res, 2));
> + res = simplify_merge_mask (tenop, mask1, 1);
> + ASSERT_EQ (op1, XEXP (res, 0));
> + ASSERT_EQ (op3, XEXP (res, 1));
> + ASSERT_EQ (op5, XEXP (res, 2));
> [...]
> + /* Called indirectly. */
> + res = simplify_rtx (nvm);
> + ASSERT_EQ (op0, XEXP (res, 0));
> + ASSERT_EQ (op3, XEXP (res, 1));
Would probably be better to ASSERT_RTX_EQ against the full simplified rtx,
e.g. gen_rtx_NOT (mode, op0)
Thanks,
Richard