[PATCH] Fix broadcast from scalar patterns (PR target/63594)

Uros Bizjak ubizjak@gmail.com
Tue Dec 9 08:49:00 GMT 2014


On Mon, Dec 8, 2014 at 10:42 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch attempts to fix
> (set (reg:V*<mode>) (vec_duplicate:V*<mode> (reg/mem:<mode>)))
> patterns.  One issue is that there were separate patterns for
> broadcast from gpr and separate patterns for broadcast from memory
> (and vector reg), that isn't a good idea for reload, which can't then
> freely choose.  Another issue is that some pre-AVX2 broadcast patterns
> were present above the avx512vl broadcast patterns, so again, reload didn't
> have the possibility to use %xmm16-31/%ymm16-31 registers.  Also, the
> splitter written for AVX2 broadcasts from gpr went into the way of AVX512VL
> broadcasts.  And finally, the avx512*intrin.h headers were using
> #ifdef TARGET_64BIT, macro not used anywhere (probably meant to write
> __x86_64__ instead, but with the patch we actually just have one set of
> builtins.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2014-12-08  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/63594
>         * config/i386/sse.md (vec_dupv4sf): Move after
>         <mask_codefor><avx512>_vec_dup_gpr<mode><mask_name> pattern.
>         (*vec_dupv4si, *vec_dupv2di): Likewise.
>         (<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>): Merge into ...
>         (<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): ... this
>         pattern.
>         (*vec_dup<mode> AVX2_VEC_DUP_MODE splitter): Disable for
>         TARGET_AVX512VL (for QI/HI scalar modes only if TARGET_AVX512BW
>         is set too).
>         * config/i386/i386.c (enum ix86_builtins): Remove
>         IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
>         IX86_BUILTIN_PBROADCASTQ128_MEM_MASK and
>         IX86_BUILTIN_PBROADCASTQ512_MEM.
>         (bdesc_args): Use __builtin_ia32_pbroadcastq512_gpr_mask,
>         __builtin_ia32_pbroadcastq256_gpr_mask and
>         __builtin_ia32_pbroadcastq128_gpr_mask instead of *_mem_mask
>         regardless of OPTION_MASK_ISA_64BIT.
>         * config/i386/avx512fintrin.h (_mm512_set1_epi64,
>         _mm512_mask_set1_epi64, _mm512_maskz_set1_epi64): Use *_gpr_mask
>         builtins regardless of whether TARGET_64BIT is defined or not.
>         * config/i386/avx512vlintrin.h (_mm256_mask_set1_epi64,
>         _mm256_maskz_set1_epi64, _mm_mask_set1_epi64, _mm_maskz_set1_epi64):
>         Likewise.

LGTM, but please see inline comment below.

> --- gcc/config/i386/sse.md.jj   2014-12-03 11:52:41.000000000 +0100
> +++ gcc/config/i386/sse.md      2014-12-08 13:26:06.505543457 +0100
> @@ -6319,22 +6319,6 @@ (define_insn "avx512f_vec_dup<mode>_1"
>      (set_attr "prefix" "evex")
>      (set_attr "mode" "<MODE>")])
>
> -(define_insn "vec_dupv4sf"
> -  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
> -       (vec_duplicate:V4SF
> -         (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
> -  "TARGET_SSE"
> -  "@
> -   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
> -   vbroadcastss\t{%1, %0|%0, %1}
> -   shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "avx,avx,noavx")
> -   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "vex,vex,orig")
> -   (set_attr "mode" "V4SF")])
> -
>  ;; Although insertps takes register source, we prefer
>  ;; unpcklps with register source since it is shorter.
>  (define_insn "*vec_concatv2sf_sse4_1"
> @@ -12821,37 +12805,6 @@ (define_split
>    operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
>  })
>
> -(define_insn "*vec_dupv4si"
> -  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
> -       (vec_duplicate:V4SI
> -         (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
> -  "TARGET_SSE"
> -  "@
> -   %vpshufd\t{$0, %1, %0|%0, %1, 0}
> -   vbroadcastss\t{%1, %0|%0, %1}
> -   shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "sse2,avx,noavx")
> -   (set_attr "type" "sselog1,ssemov,sselog1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "maybe_vex,vex,orig")
> -   (set_attr "mode" "TI,V4SF,V4SF")])
> -
> -(define_insn "*vec_dupv2di"
> -  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
> -       (vec_duplicate:V2DI
> -         (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
> -  "TARGET_SSE"
> -  "@
> -   punpcklqdq\t%0, %0
> -   vpunpcklqdq\t{%d1, %0|%0, %d1}
> -   %vmovddup\t{%1, %0|%0, %1}
> -   movlhps\t%0, %0"
> -  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
> -   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
> -   (set_attr "prefix" "orig,vex,maybe_vex,orig")
> -   (set_attr "mode" "TI,TI,DF,V4SF")])
> -
>  (define_insn "*vec_concatv2si_sse4_1"
>    [(set (match_operand:V2SI 0 "register_operand"     "=Yr,*x,x, Yr,*x,x, x, *y,*y")
>         (vec_concat:V2SI
> @@ -16665,46 +16618,78 @@ (define_insn "<mask_codefor>avx512f_broa
>     (set_attr "mode" "<sseinsnmode>")])
>
>  (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
> -  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
> +  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
>         (vec_duplicate:VI12_AVX512VL
> -         (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
> +         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
>    "TARGET_AVX512BW"
> -  "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
> +  "@
> +   vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
> +   vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
>  (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
> -  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
> -       (vec_duplicate:VI48_AVX512VL
> -         (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
> -  "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
> -{
> -  return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
> -}
> -  [(set_attr "type" "ssemov")
> -   (set_attr "prefix" "evex")
> -   (set_attr "mode" "<sseinsnmode>")])
> -
> -(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
> -  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
> -       (vec_duplicate:V48_AVX512VL
> -         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
> +  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
> +       (vec_duplicate:V48_AVX512VL
> +         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
>    "TARGET_AVX512F"
>    "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
> -   (set_attr "mode" "<sseinsnmode>")])
> +   (set_attr "mode" "<sseinsnmode>")
> +   (set (attr "enabled")
> +     (if_then_else (eq_attr "alternative" "1")
> +       (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
> +                    && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
> +       (const_int 1)))])
>
> -(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
> -  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
> -       (vec_duplicate:VI12_AVX512VL
> -         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
> -  "TARGET_AVX512BW"
> -  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
> -  [(set_attr "type" "ssemov")
> -   (set_attr "prefix" "evex")
> -   (set_attr "mode" "<sseinsnmode>")])
> +(define_insn "vec_dupv4sf"
> +  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
> +       (vec_duplicate:V4SF
> +         (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
> +  "TARGET_SSE"
> +  "@
> +   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
> +   vbroadcastss\t{%1, %0|%0, %1}
> +   shufps\t{$0, %0, %0|%0, %0, 0}"
> +  [(set_attr "isa" "avx,avx,noavx")
> +   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
> +   (set_attr "length_immediate" "1,0,1")
> +   (set_attr "prefix_extra" "0,1,*")
> +   (set_attr "prefix" "vex,vex,orig")
> +   (set_attr "mode" "V4SF")])
> +
> +(define_insn "*vec_dupv4si"
> +  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
> +       (vec_duplicate:V4SI
> +         (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
> +  "TARGET_SSE"
> +  "@
> +   %vpshufd\t{$0, %1, %0|%0, %1, 0}
> +   vbroadcastss\t{%1, %0|%0, %1}
> +   shufps\t{$0, %0, %0|%0, %0, 0}"
> +  [(set_attr "isa" "sse2,avx,noavx")
> +   (set_attr "type" "sselog1,ssemov,sselog1")
> +   (set_attr "length_immediate" "1,0,1")
> +   (set_attr "prefix_extra" "0,1,*")
> +   (set_attr "prefix" "maybe_vex,vex,orig")
> +   (set_attr "mode" "TI,V4SF,V4SF")])
> +
> +(define_insn "*vec_dupv2di"
> +  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
> +       (vec_duplicate:V2DI
> +         (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
> +  "TARGET_SSE"
> +  "@
> +   punpcklqdq\t%0, %0
> +   vpunpcklqdq\t{%d1, %0|%0, %d1}
> +   %vmovddup\t{%1, %0|%0, %1}
> +   movlhps\t%0, %0"
> +  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
> +   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
> +   (set_attr "prefix" "orig,vex,maybe_vex,orig")
> +   (set_attr "mode" "TI,TI,DF,V4SF")])
>
>  (define_insn "avx2_vbroadcasti128_<mode>"
>    [(set (match_operand:VI_256 0 "register_operand" "=x")
> @@ -16759,7 +16744,10 @@ (define_split
>    [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
>         (vec_duplicate:AVX2_VEC_DUP_MODE
>           (match_operand:<ssescalarmode> 1 "register_operand")))]
> -  "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])"
> +  "TARGET_AVX2
> +   && (!TARGET_AVX512VL
> +       || (!TARGET_AVX512BW && GET_MODE_SIZE (<ssescalarmode>mode) > 2))
> +   && reload_completed && GENERAL_REG_P (operands[1])"
>    [(const_int 0)]

We would like to avoid convoluted insn enable condition by moving the
target delated complexity to the mode iterator, even if it requires
additional single-use mode iterator. In the ideal case, the remaining
target-dependant condition would represent the baseline target for an
insn and all other target-related conditions would be inside mode
iterator.

>  {
>    emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> --- gcc/config/i386/i386.c.jj   2014-12-08 10:57:17.000000000 +0100
> +++ gcc/config/i386/i386.c      2014-12-08 12:18:51.377459354 +0100
> @@ -28819,7 +28819,6 @@ enum ix86_builtins
>    IX86_BUILTIN_PBROADCASTMW512,
>    IX86_BUILTIN_PBROADCASTQ512,
>    IX86_BUILTIN_PBROADCASTQ512_GPR,
> -  IX86_BUILTIN_PBROADCASTQ512_MEM,
>    IX86_BUILTIN_PCMPEQD512_MASK,
>    IX86_BUILTIN_PCMPEQQ512_MASK,
>    IX86_BUILTIN_PCMPGTD512_MASK,
> @@ -29257,10 +29256,8 @@ enum ix86_builtins
>    IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
>    IX86_BUILTIN_PBROADCASTQ256_MASK,
>    IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
> -  IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
>    IX86_BUILTIN_PBROADCASTQ128_MASK,
>    IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
> -  IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
>    IX86_BUILTIN_BROADCASTSS256,
>    IX86_BUILTIN_BROADCASTSS128,
>    IX86_BUILTIN_BROADCASTSD256,
> @@ -31799,8 +31796,7 @@ static const struct builtin_description
>    { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
>    { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
>    { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
> -  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
> -  { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
> +  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
>    { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
>    { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
>    { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
> @@ -32074,11 +32070,9 @@ static const struct builtin_description
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
> -  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
> -  { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
> +  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
> -  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
> -  { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
> +  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
>    { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
> --- gcc/config/i386/avx512fintrin.h.jj  2014-11-18 08:26:47.000000000 +0100
> +++ gcc/config/i386/avx512fintrin.h     2014-12-08 13:07:40.657521773 +0100
> @@ -3603,47 +3603,28 @@ extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_set1_epi64 (long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
>                                                            (__v8di)
>                                                            _mm512_undefined_si512 (),
>                                                            (__mmask8)(-1));
> -#else
> -  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
> -                                                          (__v8di)
> -                                                          _mm512_undefined_si512 (),
> -                                                          (__mmask8)(-1));
> -#endif
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
>                                                            __M);
> -#else
> -  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
> -                                                          __M);
> -#endif
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m512i)
>          __builtin_ia32_pbroadcastq512_gpr_mask (__A,
>                                                  (__v8di) _mm512_setzero_si512 (),
>                                                  __M);
> -#else
> -  return (__m512i)
> -        __builtin_ia32_pbroadcastq512_mem_mask (__A,
> -                                                (__v8di) _mm512_setzero_si512 (),
> -                                                __M);
> -#endif
>  }
>
>  extern __inline __m512
> --- gcc/config/i386/avx512vlintrin.h.jj 2014-11-11 00:06:22.000000000 +0100
> +++ gcc/config/i386/avx512vlintrin.h    2014-12-08 12:20:06.498102723 +0100
> @@ -2642,30 +2642,18 @@ extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
>                                                            __M);
> -#else
> -  return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
> -                                                          __M);
> -#endif
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
>                                                            (__v4di)
>                                                            _mm256_setzero_si256 (),
>                                                            __M);
> -#else
> -  return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
> -                                                          (__v4di)
> -                                                          _mm256_setzero_si256 (),
> -                                                          __M);
> -#endif
>  }
>
>  extern __inline __m128i
> @@ -2691,30 +2679,18 @@ extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
>                                                            __M);
> -#else
> -  return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
> -                                                          __M);
> -#endif
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
>  {
> -#ifdef TARGET_64BIT
>    return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
>                                                            (__v2di)
>                                                            _mm_setzero_si128 (),
>                                                            __M);
> -#else
> -  return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
> -                                                          (__v2di)
> -                                                          _mm_setzero_si128 (),
> -                                                          __M);
> -#endif
>  }
>
>  extern __inline __m256
>
>         Jakub



More information about the Gcc-patches mailing list