This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH i386 AVX512] [41/n] Extend extract insn patterns.


On Tue, Sep 16, 2014 at 7:26 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello,
> This patch extends extract insn patterns.
> It also fixes ICE on testsuite when F16C switched off.
> Also it fixes condition in old xtract pattern.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
>         * config/i386/i386.c
>         (ix86_expand_vector_extract): Handle V32HI and V64QI modes.
>         * config/i386/sse.md
>         (define_mode_iterator VI48F_256): New.
>         (define_mode_attr extract_type): Ditto.
>         (define_mode_attr extract_suf): Ditto.
>         (define_mode_iterator AVX512_VEC): Ditto.
>         (define_expand
>         "<extract_type>_vextract<shuffletype><extract_suf>_mask"): Use
>         AVX512_VEC.
>         (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"): New.
>         (define_insn
>         "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"):
>         Ditto.
>         (define_mode_attr extract_type_2): Ditto.
>         (define_mode_attr extract_suf_2): Ditto.
>         (define_mode_iterator AVX512_VEC_2): Ditto.
>         (define_expand
>         "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"): Use
>         AVX512_VEC_2 mode iterator.
>         (define_insn "vec_extract_hi_<mode>_maskm"): Ditto.
>         (define_expand "avx512vl_vextractf128<mode>"): Ditto.
>         (define_insn_and_split "vec_extract_lo_<mode>"): Delete.
>         (define_insn "vec_extract_lo_<mode><mask_name>"): New.
>         (define_split for V16FI mode): Ditto.
>         (define_insn_and_split "vec_extract_lo_<mode>"): Delete.
>         (define_insn "vec_extract_lo_<mode><mask_name>"): New.
>         (define_split for VI8F_256 mode): Ditto.
>         (define_insn "vec_extract_hi_<mode><mask_name>"): Add masking.
>         (define_insn_and_split "vec_extract_lo_<mode>"): Delete.
>         (define_insn "vec_extract_lo_<mode><mask_name>"): New.
>         (define_split for VI4F_256 mode): Ditto.
>         (define_insn "vec_extract_lo_<mode>_maskm"): Ditto.
>         (define_insn "vec_extract_hi_<mode>_maskm"): Ditto.
>         (define_insn "vec_extract_hi_<mode><mask_name>"): Add masking.
>         (define_mode_iterator VEC_EXTRACT_MODE): Add V64QI and V32HI modes.
>         (define_insn "vcvtph2ps<mask_name>"): Fix pattern condition.
>         (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"): Ditto.
>
> --
> Thanks, K
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 30120a5..ccfd47d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -40979,6 +40979,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
>         }
>        break;
>
> +    case V32HImode:
> +      if (TARGET_AVX512BW)
> +       {
> +         tmp = gen_reg_rtx (V16HImode);
> +         if (elt < 16)
> +           emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
> +         else
> +           emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
> +         ix86_expand_vector_extract (false, target, tmp, elt & 15);
> +         return;
> +       }
> +      break;
> +
> +    case V64QImode:
> +      if (TARGET_AVX512BW)
> +       {
> +         tmp = gen_reg_rtx (V32QImode);
> +         if (elt < 32)
> +           emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
> +         else
> +           emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
> +         ix86_expand_vector_extract (false, target, tmp, elt & 31);
> +         return;
> +       }
> +      break;
> +
>      case V16SFmode:
>        tmp = gen_reg_rtx (V8SFmode);
>        if (elt < 8)
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index bd321fc..0e21031 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -534,6 +534,7 @@
>     (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
>     (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
>     (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
> +(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
>
>  ;; Mapping from float mode to required SSE level
>  (define_mode_attr sse
> @@ -6319,44 +6320,64 @@
>    operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
>  })
>
> -(define_expand "avx512f_vextract<shuffletype>32x4_mask"
> +(define_mode_attr extract_type
> +  [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
> +
> +(define_mode_attr extract_suf
> +  [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
> +
> +(define_mode_iterator AVX512_VEC
> +  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
> +
> +(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
>    [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
> -   (match_operand:V16FI 1 "register_operand")
> +   (match_operand:AVX512_VEC 1 "register_operand")
>     (match_operand:SI 2 "const_0_to_3_operand")
>     (match_operand:<ssequartermode> 3 "nonimmediate_operand")
>     (match_operand:QI 4 "register_operand")]
>    "TARGET_AVX512F"
>  {
> +  int mask;
> +  mask = INTVAL (operands[2]);
> +
>    if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
>      operands[0] = force_reg (<ssequartermode>mode, operands[0]);
> -  switch (INTVAL (operands[2]))
> -    {
> -    case 0:
> -      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
> -          operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
> -          GEN_INT (3), operands[3], operands[4]));
> -      break;
> -    case 1:
> -      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
> -          operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
> -          GEN_INT (7), operands[3], operands[4]));
> -      break;
> -    case 2:
> -      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
> -          operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
> -          GEN_INT (11), operands[3], operands[4]));
> -      break;
> -    case 3:
> -      emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
> -          operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
> -          GEN_INT (15), operands[3], operands[4]));
> -      break;
> -    default:
> -      gcc_unreachable ();
> -    }
> +
> +  if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
> +    emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
> +        operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
> +       GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
> +       operands[4]));
> +  else
> +    emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
> +        operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
> +       operands[4]));
>    DONE;
>  })
>
> +(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
> +  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
> +       (vec_merge:<ssequartermode>
> +         (vec_select:<ssequartermode>
> +           (match_operand:V8FI 1 "register_operand" "v")
> +           (parallel [(match_operand 2  "const_0_to_7_operand")
> +             (match_operand 3  "const_0_to_7_operand")]))
> +         (match_operand:<ssequartermode> 4 "memory_operand" "0")
> +         (match_operand:QI 5 "register_operand" "k")))]
> +  "TARGET_AVX512DQ
> +   && (INTVAL (operands[2]) % 2 == 0)
> +   && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1 )"
> +{
> +  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
> +  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "memory" "store")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
>    [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
>         (vec_merge:<ssequartermode>
> @@ -6369,7 +6390,8 @@
>           (match_operand:<ssequartermode> 6 "memory_operand" "0")
>           (match_operand:QI 7 "register_operand" "Yk")))]
>    "TARGET_AVX512F
> -   && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
> +   && ((INTVAL (operands[2]) % 4 == 0)
> +       && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
>         && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
>         && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
>  {
> @@ -6383,6 +6405,27 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
> +  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
> +       (vec_select:<ssequartermode>
> +         (match_operand:V8FI 1 "register_operand" "v")
> +         (parallel [(match_operand 2  "const_0_to_7_operand")
> +            (match_operand 3  "const_0_to_7_operand")])))]
> +  "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"

Ouch, you have assignment instead of comparison here!

> +{
> +  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
> +  return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "length_immediate" "1")
> +   (set (attr "memory")
> +      (if_then_else (match_test "MEM_P (operands[0])")
> +       (const_string "store")
> +       (const_string "none")))

Set the type attribute to sselog1 to automatically calculate memory
attribute. Please see the definition of the attribute in i386.md.

> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
>    [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
>         (vec_select:<ssequartermode>
> @@ -6409,9 +6452,18 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "avx512f_vextract<shuffletype>64x4_mask"
> +(define_mode_attr extract_type_2
> +  [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
> +
> +(define_mode_attr extract_suf_2
> +  [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
> +
> +(define_mode_iterator AVX512_VEC_2
> +  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
> +
> +(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
>    [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
> -   (match_operand:V8FI 1 "register_operand")
> +   (match_operand:AVX512_VEC_2 1 "register_operand")
>     (match_operand:SI 2 "const_0_to_1_operand")
>     (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
>     (match_operand:QI 4 "register_operand")]
> @@ -6467,7 +6519,7 @@
>           (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
>           (match_operand:QI 3 "register_operand" "Yk")))]
>    "TARGET_AVX512F"
> -"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
> +  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
>    [(set_attr "type" "sselog")
>     (set_attr "prefix_extra" "1")
>     (set_attr "length_immediate" "1")
> @@ -6533,6 +6585,29 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn "vec_extract_hi_<mode>_maskm"
> +   [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
> +       (vec_merge:<ssehalfvecmode>
> +         (vec_select:<ssehalfvecmode>
> +           (match_operand:V16FI 1 "register_operand" "v")
> +           (parallel [(const_int 8) (const_int 9)
> +             (const_int 10) (const_int 11)
> +             (const_int 12) (const_int 13)
> +             (const_int 14) (const_int 15)]))
> +         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
> +         (match_operand:QI 3 "register_operand" "k")))]
> +  "TARGET_AVX512DQ"
> +  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "length_immediate" "1")
> +   (set (attr "memory")
> +      (if_then_else (match_test "MEM_P (operands[0])")
> +       (const_string "store")
> +       (const_string "none")))

Set the type to sselog1 and remove memory attribute calculation (as above).

> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_insn "vec_extract_hi_<mode><mask_name>"
>    [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
>         (vec_select:<ssehalfvecmode>
> @@ -6541,7 +6616,7 @@
>              (const_int 10) (const_int 11)
>             (const_int 12) (const_int 13)
>             (const_int 14) (const_int 15)])))]
> -  "TARGET_AVX512F && (!<mask_applied> || TARGET_AVX512DQ)"
> +  "TARGET_AVX512F && <mask_avx512dq_condition>"
>    "@
>     vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
>     vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
> @@ -6552,6 +6627,35 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> +(define_expand "avx512vl_vextractf128<mode>"
> +  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
> +   (match_operand:VI48F_256 1 "register_operand")
> +   (match_operand:SI 2 "const_0_to_1_operand")
> +   (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
> +   (match_operand:QI 4 "register_operand")]
> +  "TARGET_AVX512DQ && TARGET_AVX512VL"
> +{
> +  rtx (*insn)(rtx, rtx, rtx, rtx);
> +
> +  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
> +    operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
> +
> +  switch (INTVAL (operands[2]))
> +    {
> +    case 0:
> +      insn = gen_vec_extract_lo_<mode>_mask;
> +      break;
> +    case 1:
> +      insn = gen_vec_extract_hi_<mode>_mask;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
> +  DONE;
> +})
> +
>  (define_expand "avx_vextractf128<mode>"
>    [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
>     (match_operand:V_256 1 "register_operand")
> @@ -6576,7 +6680,7 @@
>    DONE;
>  })
>
> -(define_insn_and_split "vec_extract_lo_<mode>"
> +(define_insn "vec_extract_lo_<mode><mask_name>"
>    [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
>         (vec_select:<ssehalfvecmode>
>           (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
> @@ -6584,11 +6688,28 @@
>                       (const_int 2) (const_int 3)
>                       (const_int 4) (const_int 5)
>                       (const_int 6) (const_int 7)])))]
> -  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> -  "#"
> -  "&& reload_completed"
> -  [(const_int 0)]
> +  "TARGET_AVX512F
> +   && <mask_mode512bit_condition>
> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
>  {
> +  if (<mask_applied>)
> +    return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
> +  else
> +    return "#";
> +})
> +
> +(define_split
> +  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
> +       (vec_select:<ssehalfvecmode>
> +         (match_operand:V16FI 1 "nonimmediate_operand")
> +         (parallel [(const_int 0) (const_int 1)
> +            (const_int 2) (const_int 3)
> +           (const_int 4) (const_int 5)
> +           (const_int 6) (const_int 7)])))]
> +  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
> +   && reload_completed"
> +   [(const_int 0)]
> + {
>    rtx op1 = operands[1];
>    if (REG_P (op1))
>      op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
> @@ -6598,29 +6719,57 @@
>    DONE;
>  })
>
> -(define_insn_and_split "vec_extract_lo_<mode>"
> -  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
> +(define_insn "vec_extract_lo_<mode><mask_name>"
> +  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
>         (vec_select:<ssehalfvecmode>
> -         (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
> +         (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
>           (parallel [(const_int 0) (const_int 1)])))]
> -  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 0) (match_dup 1))]
> +  "TARGET_AVX
> +   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
>  {
> -  if (REG_P (operands[1]))
> -    operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
> +  if (<mask_applied>)
> +    return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
> +  else
> +    return "#";
> +}
> +   [(set_attr "type" "sselog")
> +    (set_attr "prefix_extra" "1")
> +    (set_attr "length_immediate" "1")
> +    (set_attr "memory" "none,store")

Set the type to sselog1 and remove memory attribute calculation.

> +    (set_attr "prefix" "evex")
> +    (set_attr "mode" "XI")])
> +
> +(define_split
> +  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
> +       (vec_select:<ssehalfvecmode>
> +         (match_operand:VI8F_256 1 "nonimmediate_operand")
> +         (parallel [(const_int 0) (const_int 1)])))]
> +  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
> +  && reload_completed"
> +   [(const_int 0)]
> +{
> +  rtx op1 = operands[1];
> +  if (REG_P (op1))
> +    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
>    else
> -    operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
> +    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
> +  emit_move_insn (operands[0], op1);
> +  DONE;
>  })
>
> -(define_insn "vec_extract_hi_<mode>"
> -  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
> +(define_insn "vec_extract_hi_<mode><mask_name>"
> +  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
>         (vec_select:<ssehalfvecmode>
> -         (match_operand:VI8F_256 1 "register_operand" "x,x")
> +         (match_operand:VI8F_256 1 "register_operand" "v,v")
>           (parallel [(const_int 2) (const_int 3)])))]
>    "TARGET_AVX"
> -  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
> +{
> +  if (TARGET_AVX512DQ && TARGET_AVX512VL)
> +    return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
> +  else
> +    return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
> +}
>    [(set_attr "type" "sselog")
>     (set_attr "prefix_extra" "1")
>     (set_attr "length_immediate" "1")
> @@ -6628,36 +6777,106 @@
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn_and_split "vec_extract_lo_<mode>"
> -  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
> +(define_split
> +  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
>         (vec_select:<ssehalfvecmode>
> -         (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
> +         (match_operand:VI4F_256 1 "nonimmediate_operand")
>           (parallel [(const_int 0) (const_int 1)
>                      (const_int 2) (const_int 3)])))]
> -  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 0) (match_dup 1))]
> +  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
> +   [(const_int 0)]
>  {
> -  if (REG_P (operands[1]))
> -    operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
> +  rtx op1 = operands[1];
> +  if (REG_P (op1))
> +    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
>    else
> -    operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
> +    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
> +  emit_move_insn (operands[0], op1);
> +  DONE;
>  })
>
> -(define_insn "vec_extract_hi_<mode>"
> -  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
> +
> +(define_insn "vec_extract_lo_<mode><mask_name>"
> +  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
>         (vec_select:<ssehalfvecmode>
> -         (match_operand:VI4F_256 1 "register_operand" "x,x")
> +         (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
> +         (parallel [(const_int 0) (const_int 1)
> +                    (const_int 2) (const_int 3)])))]
> +  "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
> +{
> +  if (<mask_applied>)
> +    return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
> +  else
> +    return "#";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "length_immediate" "1")
> +   (set (attr "memory")
> +      (if_then_else (match_test "MEM_P (operands[0])")
> +       (const_string "store")
> +       (const_string "none")))
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "vec_extract_lo_<mode>_maskm"
> +  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
> +       (vec_merge:<ssehalfvecmode>
> +         (vec_select:<ssehalfvecmode>
> +           (match_operand:VI4F_256 1 "register_operand" "v")
> +           (parallel [(const_int 0) (const_int 1)
> +                     (const_int 2) (const_int 3)]))
> +         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
> +         (match_operand:QI 3 "register_operand" "k")))]
> +  "TARGET_AVX512VL && TARGET_AVX512F"
> +  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "vec_extract_hi_<mode>_maskm"
> +  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
> +       (vec_merge:<ssehalfvecmode>
> +         (vec_select:<ssehalfvecmode>
> +           (match_operand:VI4F_256 1 "register_operand" "v")
> +           (parallel [(const_int 4) (const_int 5)
> +                     (const_int 6) (const_int 7)]))
> +         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
> +         (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
> +  "TARGET_AVX512F && TARGET_AVX512VL"
> +{
> +  return "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "memory" "store")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "vec_extract_hi_<mode><mask_name>"
> +  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
> +       (vec_select:<ssehalfvecmode>
> +         (match_operand:VI4F_256 1 "register_operand" "v")
>           (parallel [(const_int 4) (const_int 5)
>                      (const_int 6) (const_int 7)])))]
> -  "TARGET_AVX"
> -  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
> +  "TARGET_AVX && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512F))"

Please split the pattern to avoid too complex insn constraints.

> +{
> +  if (TARGET_AVX512VL && TARGET_AVX512F)
> +    return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
> +  else
> +    return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
> +}
>    [(set_attr "type" "sselog")
>     (set_attr "prefix_extra" "1")
>     (set_attr "length_immediate" "1")
> -   (set_attr "memory" "none,store")
> -   (set_attr "prefix" "vex")
> +   (set_attr "memory" "none")
> +   (set (attr "prefix")
> +     (if_then_else
> +       (match_test "TARGET_AVX512VL")
> +     (const_string "evex")
> +     (const_string "vex")))
>     (set_attr "mode" "<sseinsnmode>")])
>
>  (define_insn_and_split "vec_extract_lo_v32hi"
> @@ -6846,8 +7065,8 @@
>
>  ;; Modes handled by vec_extract patterns.
>  (define_mode_iterator VEC_EXTRACT_MODE
> -  [(V32QI "TARGET_AVX") V16QI
> -   (V16HI "TARGET_AVX") V8HI
> +  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
> +   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
>     (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
>     (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
>     (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
> @@ -16498,7 +16717,7 @@
>                         (match_operand:SI 2 "const_0_to_255_operand" "N")]
>                        UNSPEC_VCVTPS2PH)
>           (match_operand:V4HI 3 "const0_operand")))]
> -  "TARGET_F16C && <mask_avx512vl_condition>"
> +  "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
>    "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
>    [(set_attr "type" "ssecvt")
>     (set_attr "prefix" "maybe_evex")
> diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
> index b05cb17..91228c8 100644
> --- a/gcc/config/i386/subst.md
> +++ b/gcc/config/i386/subst.md
> @@ -57,6 +57,7 @@
>  (define_subst_attr "mask_mode512bit_condition" "mask" "1" "(<MODE_SIZE> == 64 || TARGET_AVX512VL)")
>  (define_subst_attr "mask_avx512vl_condition" "mask" "1" "TARGET_AVX512VL")
>  (define_subst_attr "mask_avx512bw_condition" "mask" "1" "TARGET_AVX512BW")
> +(define_subst_attr "mask_avx512dq_condition" "mask" "1" "TARGET_AVX512DQ")
>  (define_subst_attr "store_mask_constraint" "mask" "vm" "v")
>  (define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
>  (define_subst_attr "mask_prefix" "mask" "vex" "evex")

Uros.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]