[PATCH] Fix up avx512vlintrin.h with -O0 (PR target/79932)

Uros Bizjak ubizjak@gmail.com
Thu Mar 9 08:38:00 GMT 2017


On Tue, Mar 7, 2017 at 7:54 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> I've preprocessed x86intrin.h with -O0 and -O2, both with -E -dD,
> and gathered all _mm starting inline function names at column zero
> and #define _mm macro names and compared them.
> This revealed that 64 intrinsics have similar bug in avx512vlintrin.h,
> no other problems found.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-03-07  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/79932
>         * config/i386/avx512vlintrin.h (_mm256_cmpge_epi32_mask,
>         _mm256_cmpge_epi64_mask, _mm256_cmpge_epu32_mask,
>         _mm256_cmpge_epu64_mask, _mm256_cmple_epi32_mask,
>         _mm256_cmple_epi64_mask, _mm256_cmple_epu32_mask,
>         _mm256_cmple_epu64_mask, _mm256_cmplt_epi32_mask,
>         _mm256_cmplt_epi64_mask, _mm256_cmplt_epu32_mask,
>         _mm256_cmplt_epu64_mask, _mm256_cmpneq_epi32_mask,
>         _mm256_cmpneq_epi64_mask, _mm256_cmpneq_epu32_mask,
>         _mm256_cmpneq_epu64_mask, _mm256_mask_cmpge_epi32_mask,
>         _mm256_mask_cmpge_epi64_mask, _mm256_mask_cmpge_epu32_mask,
>         _mm256_mask_cmpge_epu64_mask, _mm256_mask_cmple_epi32_mask,
>         _mm256_mask_cmple_epi64_mask, _mm256_mask_cmple_epu32_mask,
>         _mm256_mask_cmple_epu64_mask, _mm256_mask_cmplt_epi32_mask,
>         _mm256_mask_cmplt_epi64_mask, _mm256_mask_cmplt_epu32_mask,
>         _mm256_mask_cmplt_epu64_mask, _mm256_mask_cmpneq_epi32_mask,
>         _mm256_mask_cmpneq_epi64_mask, _mm256_mask_cmpneq_epu32_mask,
>         _mm256_mask_cmpneq_epu64_mask, _mm_cmpge_epi32_mask,
>         _mm_cmpge_epi64_mask, _mm_cmpge_epu32_mask, _mm_cmpge_epu64_mask,
>         _mm_cmple_epi32_mask, _mm_cmple_epi64_mask, _mm_cmple_epu32_mask,
>         _mm_cmple_epu64_mask, _mm_cmplt_epi32_mask, _mm_cmplt_epi64_mask,
>         _mm_cmplt_epu32_mask, _mm_cmplt_epu64_mask, _mm_cmpneq_epi32_mask,
>         _mm_cmpneq_epi64_mask, _mm_cmpneq_epu32_mask, _mm_cmpneq_epu64_mask,
>         _mm_mask_cmpge_epi32_mask, _mm_mask_cmpge_epi64_mask,
>         _mm_mask_cmpge_epu32_mask, _mm_mask_cmpge_epu64_mask,
>         _mm_mask_cmple_epi32_mask, _mm_mask_cmple_epi64_mask,
>         _mm_mask_cmple_epu32_mask, _mm_mask_cmple_epu64_mask,
>         _mm_mask_cmplt_epi32_mask, _mm_mask_cmplt_epi64_mask,
>         _mm_mask_cmplt_epu32_mask, _mm_mask_cmplt_epu64_mask,
>         _mm_mask_cmpneq_epi32_mask, _mm_mask_cmpneq_epi64_mask,
>         _mm_mask_cmpneq_epu32_mask, _mm_mask_cmpneq_epu64_mask): Move
>         definitions outside of __OPTIMIZE__ guarded section.
>
>         * gcc.target/i386/pr79932-2.c: New test.

OK for trunk and backports.

Thanks,
Uros.

> --- gcc/config/i386/avx512vlintrin.h.jj 2017-01-17 18:40:59.000000000 +0100
> +++ gcc/config/i386/avx512vlintrin.h    2017-03-07 08:27:31.071641043 +0100
> @@ -9172,6 +9172,582 @@ _mm256_mask_permutexvar_epi32 (__m256i _
>                                                      __M);
>  }
>
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 4,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 4,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 1,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 1,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 5,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 5,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 2,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> +                                                 (__v8si) __Y, 2,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 4,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 4,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 1,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 1,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 5,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 5,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 2,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> +                                                 (__v4di) __Y, 2,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 4,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 4,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 1,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 1,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 5,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 5,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 2,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> +                                                (__v8si) __Y, 2,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 4,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 4,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 1,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 1,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 5,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 5,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 2,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> +                                                (__v4di) __Y, 2,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 4,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 4,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 1,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 1,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 5,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 5,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 2,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> +                                                 (__v4si) __Y, 2,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 4,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 4,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 1,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 1,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 5,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 5,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 2,
> +                                                 (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> +                                                 (__v2di) __Y, 2,
> +                                                 (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 4,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 4,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 1,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 1,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 5,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 5,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 2,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> +                                                (__v4si) __Y, 2,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 4,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 4,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 1,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 1,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 5,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 5,
> +                                                (__mmask8) -1);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 2,
> +                                                (__mmask8) __M);
> +}
> +
> +extern __inline __mmask8
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
> +{
> +  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> +                                                (__v2di) __Y, 2,
> +                                                (__mmask8) -1);
> +}
> +
>  #ifdef __OPTIMIZE__
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> @@ -11784,582 +12360,6 @@ _mm256_permutex_pd (__m256d __X, const i
>                                                   (__mmask8) -1);
>  }
>
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 4,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 4,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 1,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 1,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 5,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 5,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 2,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
> -                                                 (__v8si) __Y, 2,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 4,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 4,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 1,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 1,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 5,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 5,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 2,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
> -                                                 (__v4di) __Y, 2,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 4,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 4,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 1,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 1,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 5,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 5,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 2,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
> -                                                (__v8si) __Y, 2,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 4,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 4,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 1,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 1,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 5,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 5,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 2,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
> -                                                (__v4di) __Y, 2,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 4,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 4,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 1,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 1,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 5,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 5,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 2,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
> -                                                 (__v4si) __Y, 2,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 4,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 4,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 1,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 1,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 5,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 5,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 2,
> -                                                 (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
> -                                                 (__v2di) __Y, 2,
> -                                                 (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 4,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 4,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 1,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 1,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 5,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 5,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 2,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
> -                                                (__v4si) __Y, 2,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 4,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 4,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 1,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 1,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 5,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 5,
> -                                                (__mmask8) -1);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 2,
> -                                                (__mmask8) __M);
> -}
> -
> -extern __inline __mmask8
> -  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
> -{
> -  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
> -                                                (__v2di) __Y, 2,
> -                                                (__mmask8) -1);
> -}
> -
>  #else
>  #define _mm256_permutex_pd(X, M)                                               \
>    ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),    \
> --- gcc/testsuite/gcc.target/i386/pr79932-2.c.jj        2017-03-07 08:37:10.835990732 +0100
> +++ gcc/testsuite/gcc.target/i386/pr79932-2.c   2017-03-07 08:36:13.000000000 +0100
> @@ -0,0 +1,78 @@
> +/* PR target/79932 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mavx512vl" } */
> +
> +#include <x86intrin.h>
> +
> +__m256i a, b;
> +__m128i c, d;
> +__mmask32 e, f[64];
> +
> +void
> +foo (void)
> +{
> +  f[0] = _mm256_cmpge_epi32_mask (a, b);
> +  f[1] = _mm256_cmpge_epi64_mask (a, b);
> +  f[2] = _mm256_cmpge_epu32_mask (a, b);
> +  f[3] = _mm256_cmpge_epu64_mask (a, b);
> +  f[4] = _mm256_cmple_epi32_mask (a, b);
> +  f[5] = _mm256_cmple_epi64_mask (a, b);
> +  f[6] = _mm256_cmple_epu32_mask (a, b);
> +  f[7] = _mm256_cmple_epu64_mask (a, b);
> +  f[8] = _mm256_cmplt_epi32_mask (a, b);
> +  f[9] = _mm256_cmplt_epi64_mask (a, b);
> +  f[10] = _mm256_cmplt_epu32_mask (a, b);
> +  f[11] = _mm256_cmplt_epu64_mask (a, b);
> +  f[12] = _mm256_cmpneq_epi32_mask (a, b);
> +  f[13] = _mm256_cmpneq_epi64_mask (a, b);
> +  f[14] = _mm256_cmpneq_epu32_mask (a, b);
> +  f[15] = _mm256_cmpneq_epu64_mask (a, b);
> +  f[16] = _mm256_mask_cmpge_epi32_mask (e, a, b);
> +  f[17] = _mm256_mask_cmpge_epi64_mask (e, a, b);
> +  f[18] = _mm256_mask_cmpge_epu32_mask (e, a, b);
> +  f[19] = _mm256_mask_cmpge_epu64_mask (e, a, b);
> +  f[20] = _mm256_mask_cmple_epi32_mask (e, a, b);
> +  f[21] = _mm256_mask_cmple_epi64_mask (e, a, b);
> +  f[22] = _mm256_mask_cmple_epu32_mask (e, a, b);
> +  f[23] = _mm256_mask_cmple_epu64_mask (e, a, b);
> +  f[24] = _mm256_mask_cmplt_epi32_mask (e, a, b);
> +  f[25] = _mm256_mask_cmplt_epi64_mask (e, a, b);
> +  f[26] = _mm256_mask_cmplt_epu32_mask (e, a, b);
> +  f[27] = _mm256_mask_cmplt_epu64_mask (e, a, b);
> +  f[28] = _mm256_mask_cmpneq_epi32_mask (e, a, b);
> +  f[29] = _mm256_mask_cmpneq_epi64_mask (e, a, b);
> +  f[30] = _mm256_mask_cmpneq_epu32_mask (e, a, b);
> +  f[31] = _mm256_mask_cmpneq_epu64_mask (e, a, b);
> +  f[32] = _mm_cmpge_epi32_mask (c, d);
> +  f[33] = _mm_cmpge_epi64_mask (c, d);
> +  f[34] = _mm_cmpge_epu32_mask (c, d);
> +  f[35] = _mm_cmpge_epu64_mask (c, d);
> +  f[36] = _mm_cmple_epi32_mask (c, d);
> +  f[37] = _mm_cmple_epi64_mask (c, d);
> +  f[38] = _mm_cmple_epu32_mask (c, d);
> +  f[39] = _mm_cmple_epu64_mask (c, d);
> +  f[40] = _mm_cmplt_epi32_mask (c, d);
> +  f[41] = _mm_cmplt_epi64_mask (c, d);
> +  f[42] = _mm_cmplt_epu32_mask (c, d);
> +  f[43] = _mm_cmplt_epu64_mask (c, d);
> +  f[44] = _mm_cmpneq_epi32_mask (c, d);
> +  f[45] = _mm_cmpneq_epi64_mask (c, d);
> +  f[46] = _mm_cmpneq_epu32_mask (c, d);
> +  f[47] = _mm_cmpneq_epu64_mask (c, d);
> +  f[48] = _mm_mask_cmpge_epi32_mask (e, c, d);
> +  f[49] = _mm_mask_cmpge_epi64_mask (e, c, d);
> +  f[50] = _mm_mask_cmpge_epu32_mask (e, c, d);
> +  f[51] = _mm_mask_cmpge_epu64_mask (e, c, d);
> +  f[52] = _mm_mask_cmple_epi32_mask (e, c, d);
> +  f[53] = _mm_mask_cmple_epi64_mask (e, c, d);
> +  f[54] = _mm_mask_cmple_epu32_mask (e, c, d);
> +  f[55] = _mm_mask_cmple_epu64_mask (e, c, d);
> +  f[56] = _mm_mask_cmplt_epi32_mask (e, c, d);
> +  f[57] = _mm_mask_cmplt_epi64_mask (e, c, d);
> +  f[58] = _mm_mask_cmplt_epu32_mask (e, c, d);
> +  f[59] = _mm_mask_cmplt_epu64_mask (e, c, d);
> +  f[60] = _mm_mask_cmpneq_epi32_mask (e, c, d);
> +  f[61] = _mm_mask_cmpneq_epi64_mask (e, c, d);
> +  f[62] = _mm_mask_cmpneq_epu32_mask (e, c, d);
> +  f[63] = _mm_mask_cmpneq_epu64_mask (e, c, d);
> +}
>
>         Jakub



More information about the Gcc-patches mailing list