[PATCH] Fix avx512 vpermq (PR target/79812)
Uros Bizjak
ubizjak@gmail.com
Mon Mar 6 10:03:00 GMT 2017
On Fri, Mar 3, 2017 at 8:42 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> vpermq/vpermpd instructions for 512-bit vectors use bogus RTL and if
> we happen to simplify-rtx.c it, we ICE.
> The problem is that for V8D[IF]mode VEC_SELECT we need to use a PARALLEL
> with 8 elements, not 4.
> The <avx512>_vec_dup<mode>_1 change is unrelated to this, spotted
> first by manual inspection and verified by the genrecog.c verify_pattern
> patch; the broadcast wants to broadcast the first element, so it should be
> a scalar vec_select in vec_duplicate, rather than same size vector as
> vec_select's operand.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-03-03 Jakub Jelinek <jakub@redhat.com>
>
> PR target/79812
> * config/i386/sse.md (VI8F_256_512): Remove mode iterator.
> (<avx2_avx512>_perm<mode>): Rename to ...
> (avx2_perm<mode>): ... this. Use VI8F_256 iterator instead
> of VI8F_256_512.
> (<avx512>_perm<mode>_mask): Rename to ...
> (avx512vl_perm<mode>_mask): ... this. Use VI8F_256 iterator instead
> of VI8F_256_512.
> (<avx2_avx512>_perm<mode>_1<mask_name>): Rename to ...
> (avx2_perm<mode>_1<mask_name): ... this. Use VI8F_256 iterator
> instead of VI8F_256_512.
> (avx512f_perm<mode>): New define_expand.
> (avx512f_perm<mode>_mask): Likewise.
> (avx512f_perm<mode>_1<mask_name>): New define_insn.
> (<avx512>_vec_dup<mode>_1): Fix up vec_select mode.
>
> * gcc.target/i386/avx512f-vpermq-imm-3.c: New test.
LGTM.
Thanks,
Uros.
> --- gcc/config/i386/sse.md.jj 2017-03-02 10:19:07.000000000 +0100
> +++ gcc/config/i386/sse.md 2017-03-03 16:10:42.317111636 +0100
> @@ -549,8 +549,6 @@ (define_mode_iterator VI4F_128 [V4SI V4S
> (define_mode_iterator VI8F_128 [V2DI V2DF])
> (define_mode_iterator VI4F_256 [V8SI V8SF])
> (define_mode_iterator VI8F_256 [V4DI V4DF])
> -(define_mode_iterator VI8F_256_512
> - [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
> (define_mode_iterator VI48F_256_512
> [V8SI V8SF
> (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
> @@ -17306,43 +17304,43 @@ (define_insn "<avx512>_permvar<mode><mas
> (set_attr "prefix" "<mask_prefix2>")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "<avx2_avx512>_perm<mode>"
> - [(match_operand:VI8F_256_512 0 "register_operand")
> - (match_operand:VI8F_256_512 1 "nonimmediate_operand")
> +(define_expand "avx2_perm<mode>"
> + [(match_operand:VI8F_256 0 "register_operand")
> + (match_operand:VI8F_256 1 "nonimmediate_operand")
> (match_operand:SI 2 "const_0_to_255_operand")]
> "TARGET_AVX2"
> {
> int mask = INTVAL (operands[2]);
> - emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
> - GEN_INT ((mask >> 0) & 3),
> - GEN_INT ((mask >> 2) & 3),
> - GEN_INT ((mask >> 4) & 3),
> - GEN_INT ((mask >> 6) & 3)));
> + emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3)));
> DONE;
> })
>
> -(define_expand "<avx512>_perm<mode>_mask"
> - [(match_operand:VI8F_256_512 0 "register_operand")
> - (match_operand:VI8F_256_512 1 "nonimmediate_operand")
> +(define_expand "avx512vl_perm<mode>_mask"
> + [(match_operand:VI8F_256 0 "register_operand")
> + (match_operand:VI8F_256 1 "nonimmediate_operand")
> (match_operand:SI 2 "const_0_to_255_operand")
> - (match_operand:VI8F_256_512 3 "vector_move_operand")
> + (match_operand:VI8F_256 3 "vector_move_operand")
> (match_operand:<avx512fmaskmode> 4 "register_operand")]
> - "TARGET_AVX512F"
> + "TARGET_AVX512VL"
> {
> int mask = INTVAL (operands[2]);
> emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
> - GEN_INT ((mask >> 0) & 3),
> - GEN_INT ((mask >> 2) & 3),
> - GEN_INT ((mask >> 4) & 3),
> - GEN_INT ((mask >> 6) & 3),
> - operands[3], operands[4]));
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3),
> + operands[3], operands[4]));
> DONE;
> })
>
> -(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
> - [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
> - (vec_select:VI8F_256_512
> - (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
> +(define_insn "avx2_perm<mode>_1<mask_name>"
> + [(set (match_operand:VI8F_256 0 "register_operand" "=v")
> + (vec_select:VI8F_256
> + (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
> (parallel [(match_operand 2 "const_0_to_3_operand")
> (match_operand 3 "const_0_to_3_operand")
> (match_operand 4 "const_0_to_3_operand")
> @@ -17361,6 +17359,77 @@ (define_insn "<avx2_avx512>_perm<mode>_1
> (set_attr "prefix" "<mask_prefix2>")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_expand "avx512f_perm<mode>"
> + [(match_operand:V8FI 0 "register_operand")
> + (match_operand:V8FI 1 "nonimmediate_operand")
> + (match_operand:SI 2 "const_0_to_255_operand")]
> + "TARGET_AVX512F"
> +{
> + int mask = INTVAL (operands[2]);
> + emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3),
> + GEN_INT (((mask >> 0) & 3) + 4),
> + GEN_INT (((mask >> 2) & 3) + 4),
> + GEN_INT (((mask >> 4) & 3) + 4),
> + GEN_INT (((mask >> 6) & 3) + 4)));
> + DONE;
> +})
> +
> +(define_expand "avx512f_perm<mode>_mask"
> + [(match_operand:V8FI 0 "register_operand")
> + (match_operand:V8FI 1 "nonimmediate_operand")
> + (match_operand:SI 2 "const_0_to_255_operand")
> + (match_operand:V8FI 3 "vector_move_operand")
> + (match_operand:<avx512fmaskmode> 4 "register_operand")]
> + "TARGET_AVX512F"
> +{
> + int mask = INTVAL (operands[2]);
> + emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
> + GEN_INT ((mask >> 0) & 3),
> + GEN_INT ((mask >> 2) & 3),
> + GEN_INT ((mask >> 4) & 3),
> + GEN_INT ((mask >> 6) & 3),
> + GEN_INT (((mask >> 0) & 3) + 4),
> + GEN_INT (((mask >> 2) & 3) + 4),
> + GEN_INT (((mask >> 4) & 3) + 4),
> + GEN_INT (((mask >> 6) & 3) + 4),
> + operands[3], operands[4]));
> + DONE;
> +})
> +
> +(define_insn "avx512f_perm<mode>_1<mask_name>"
> + [(set (match_operand:V8FI 0 "register_operand" "=v")
> + (vec_select:V8FI
> + (match_operand:V8FI 1 "nonimmediate_operand" "vm")
> + (parallel [(match_operand 2 "const_0_to_3_operand")
> + (match_operand 3 "const_0_to_3_operand")
> + (match_operand 4 "const_0_to_3_operand")
> + (match_operand 5 "const_0_to_3_operand")
> + (match_operand 6 "const_4_to_7_operand")
> + (match_operand 7 "const_4_to_7_operand")
> + (match_operand 8 "const_4_to_7_operand")
> + (match_operand 9 "const_4_to_7_operand")])))]
> + "TARGET_AVX512F && <mask_mode512bit_condition>
> + && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
> + && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
> + && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
> + && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
> +{
> + int mask = 0;
> + mask |= INTVAL (operands[2]) << 0;
> + mask |= INTVAL (operands[3]) << 2;
> + mask |= INTVAL (operands[4]) << 4;
> + mask |= INTVAL (operands[5]) << 6;
> + operands[2] = GEN_INT (mask);
> + return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
> +}
> + [(set_attr "type" "sselog")
> + (set_attr "prefix" "<mask_prefix2>")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_insn "avx2_permv2ti"
> [(set (match_operand:V4DI 0 "register_operand" "=x")
> (unspec:V4DI
> @@ -17389,7 +17458,7 @@ (define_insn "avx2_vec_dupv4df"
> (define_insn "<avx512>_vec_dup<mode>_1"
> [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
> (vec_duplicate:VI_AVX512BW
> - (vec_select:VI_AVX512BW
> + (vec_select:<ssescalarmode>
> (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
> (parallel [(const_int 0)]))))]
> "TARGET_AVX512F"
> --- gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c.jj 2017-03-03 16:13:19.852037848 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c 2017-03-03 16:14:14.952312508 +0100
> @@ -0,0 +1,5 @@
> +/* PR target/79812 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512f" } */
> +
> +#include "avx512f-vpermq-imm-2.c"
>
> Jakub
More information about the Gcc-patches
mailing list