This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix __mmask* types on many AVX512 intrinsics


Hi!

On Fri, Jul 06, 2018 at 12:47:07PM +0200, Jakub Jelinek wrote:
> On Thu, Jul 05, 2018 at 11:57:26PM +0300, Grazvydas Ignotas wrote:
> > I think it would be more efficient if you took care of it. I won't
> > have time for at least a few days anyway.

Here is the complete patch, I found two further issues where
the __mmask mismatch was in between the return type and what was used
in the rest of the intrinsic, so not caught by my earlier greps.

I've added (except for the avx512bitalg which seems to have no runtime
test coverage whatsoever) tests that cover the real bugs and further
fixed the avx512*-vpcmp{,u}b-2.c test because (rel) << i triggered UB
if i could go up to 63.

I don't have AVX512* hw, so I've just bootstrapped/regtested the patch
normally on i686-linux and x86_64-linux AVX2 hw and tried the affected
tests without the config/i386/ changes and with them under SDE.
The patch should fix these FAILs:

FAIL: gcc.target/i386/avx512bw-vpcmpb-2.c execution test
FAIL: gcc.target/i386/avx512bw-vpcmpub-2.c execution test
FAIL: gcc.target/i386/avx512f-vinsertf32x4-3.c execution test
FAIL: gcc.target/i386/avx512f-vinserti32x4-3.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgeb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgeub-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgeuw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgew-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpleb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpleub-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpleuw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmplew-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltub-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltuw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpneqb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpnequb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpnequw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpneqw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpub-2.c execution test

Ok for trunk?

I guess we want to backport it soon, but would appreciate somebody testing
it on real AVX512-{BW,VL} hw before doing the backports.

Another thing to consider is whether we shouldn't add those grep/sed checks
I've been doing (at least the easy ones that don't cross-check the
i386-builtins.def against the uses in the intrin headers) to config/i386/t-*
some way.

2018-07-07  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
	Use __mmask64 type instead of __mmask8 for __M argument.
	* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
	_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
	__U argument.
	(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
	__mmask16 for __M argument.
	(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
	_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
	to __mmask16 instead of __mmask8.
	* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
	_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
	_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
	_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
	instead of __mmask16 for __U argument.
	* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
	__mmask16 instead of __mmask8 for __U argument.
	(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
	argument.
	(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
	of __mmask16.
	(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
	_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
	_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
	_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
	_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
	_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
	__mmask16.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
	of hardcoding size.  Cast (rel) to MASK_TYPE.
	* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
	* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
	* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.

2018-07-07  Grazvydas Ignotas  <notasas@gmail.com>

	* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
	_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
	for __U argument.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
	(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
	* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.

--- gcc/config/i386/avx512bwintrin.h.jj	2018-01-03 10:20:06.699535804 +0100
+++ gcc/config/i386/avx512bwintrin.h	2018-07-06 23:33:03.782664372 +0200
@@ -3043,7 +3043,7 @@ _mm512_cmp_epi16_mask (__m512i __X, __m5
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epi8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
 			   const int __P)
 {
   return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
@@ -3081,7 +3081,7 @@ _mm512_cmp_epu16_mask (__m512i __X, __m5
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epu8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
 			   const int __P)
 {
   return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
--- gcc/config/i386/avx512bitalgintrin.h.jj	2018-01-26 12:43:26.374922539 +0100
+++ gcc/config/i386/avx512bitalgintrin.h	2018-07-06 23:33:03.782664372 +0200
@@ -107,7 +107,7 @@ _mm512_bitshuffle_epi64_mask (__m512i __
 
 extern __inline __mmask64
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
 						 (__v64qi) __B,
--- gcc/config/i386/avx512fintrin.h.jj	2018-05-21 13:15:43.494581775 +0200
+++ gcc/config/i386/avx512fintrin.h	2018-07-06 23:33:03.786664375 +0200
@@ -7377,7 +7377,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i _
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
 						 (__v8di) __B,
@@ -7387,7 +7387,7 @@ _mm512_mask_xor_epi64 (__m512i __W, __mm
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
 						 (__v8di) __B,
@@ -9615,7 +9615,7 @@ _mm512_cmpneq_epu32_mask (__m512i __X, _
 
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
+_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
 						    (__v8di) __Y, 4,
@@ -10877,22 +10877,22 @@ _mm512_mask_insertf32x4 (__m512 __A, __m
 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
-    (__mmask8)(A)))
+    (__mmask16)(A)))
 
 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
-    (__mmask8)(A)))
+    (__mmask16)(A)))
 
 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
-					     (__mmask8)(B)))
+					     (__mmask16)(B)))
 
 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
-					      (__mmask8)(B)))
+					      (__mmask16)(B)))
 #endif
 
 extern __inline __m512i
--- gcc/config/i386/avx512vlintrin.h.jj	2018-01-03 10:20:06.152535716 +0100
+++ gcc/config/i386/avx512vlintrin.h	2018-07-06 23:33:03.789664378 +0200
@@ -466,7 +466,7 @@ _mm256_maskz_add_pd (__mmask8 __U, __m25
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -476,7 +476,7 @@ _mm_mask_add_ps (__m128 __W, __mmask16 _
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -487,7 +487,7 @@ _mm_maskz_add_ps (__mmask16 __U, __m128
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -497,7 +497,7 @@ _mm256_mask_add_ps (__m256 __W, __mmask1
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -551,7 +551,7 @@ _mm256_maskz_sub_pd (__mmask8 __U, __m25
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -561,7 +561,7 @@ _mm_mask_sub_ps (__m128 __W, __mmask16 _
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -572,7 +572,7 @@ _mm_maskz_sub_ps (__mmask16 __U, __m128
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -582,7 +582,7 @@ _mm256_mask_sub_ps (__m256 __W, __mmask1
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -1320,7 +1320,7 @@ _mm256_mask_cvtepi32_ps (__m256 __W, __m
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
+_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
 {
   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
 						   (__v8sf)
@@ -1339,7 +1339,7 @@ _mm_mask_cvtepi32_ps (__m128 __W, __mmas
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
+_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
 						   (__v4sf)
--- gcc/config/i386/avx512vlbwintrin.h.jj	2018-01-03 10:20:06.598535787 +0100
+++ gcc/config/i386/avx512vlbwintrin.h	2018-07-06 23:33:03.790664378 +0200
@@ -1467,7 +1467,7 @@ _mm256_cmp_epi16_mask (__m256i __X, __m2
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
 			const int __P)
 {
   return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
@@ -1486,7 +1486,7 @@ _mm_cmp_epi8_mask (__m128i __X, __m128i
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
 			   const int __P)
 {
   return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
@@ -1494,7 +1494,7 @@ _mm256_mask_cmp_epi8_mask (__mmask16 __U
 						  (__mmask32) __U);
 }
 
-extern __inline __mmask16
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
 {
@@ -1543,7 +1543,7 @@ _mm256_cmp_epu16_mask (__m256i __X, __m2
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
 			const int __P)
 {
   return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
@@ -1562,7 +1562,7 @@ _mm_cmp_epu8_mask (__m128i __X, __m128i
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
 			   const int __P)
 {
   return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
@@ -1570,7 +1570,7 @@ _mm256_mask_cmp_epu8_mask (__mmask16 __U
 						   (__mmask32) __U);
 }
 
-extern __inline __mmask16
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
 {
@@ -1998,7 +1998,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m1
 #define _mm_mask_cmp_epi16_mask(M, X, Y, P)				\
   ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
 					    (__v8hi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(M)))
+					    (__mmask8)(M)))
 
 #define _mm_mask_cmp_epi8_mask(M, X, Y, P)				\
   ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
@@ -2430,7 +2430,7 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
 						    (__v16hi) __W,
@@ -2449,7 +2449,7 @@ _mm256_maskz_cvtepi8_epi16 (__mmask16 __
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
 						    (__v8hi) __W,
@@ -2468,7 +2468,7 @@ _mm_maskz_cvtepi8_epi16 (__mmask8 __U, _
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
 						    (__v16hi) __W,
@@ -2487,7 +2487,7 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
 						    (__v8hi) __W,
@@ -4541,148 +4541,148 @@ _mm_mask_cmple_epi16_mask (__mmask8 __M,
 						 (__mmask8) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 4,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 4,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 1,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 1,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 5,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 5,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 2,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 2,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 4,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 4,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 1,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 1,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 5,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 5,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 2,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 2,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 4,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 4,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 1,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 1,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 5,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 5,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 2,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 2,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 4,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 4,
+						  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 1,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 1,
+						  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 5,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 5,
+						  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 2,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 2,
+						  (__mmask16) __M);
 }
 
 #ifdef __DISABLE_AVX512VLBW__
--- gcc/config/i386/avx512vbmi2vlintrin.h.jj	2018-01-03 10:20:06.085535705 +0100
+++ gcc/config/i386/avx512vbmi2vlintrin.h	2018-07-06 23:33:03.791664379 +0200
@@ -541,7 +541,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __
 	(__v4si)(__m128i)(B),(int)(C))
 #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
-	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
 #define _mm_maskz_shrdi_epi32(A, B, C, D) \
   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
 	(__v4si)(__m128i)(C),(int)(D), \
@@ -601,7 +601,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __
 	(__v4si)(__m128i)(B),(int)(C))
 #define _mm_mask_shldi_epi32(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
-	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
 #define _mm_maskz_shldi_epi32(A, B, C, D) \
   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
 	(__v4si)(__m128i)(C),(int)(D), \
--- gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c.jj	2014-12-01 14:57:15.467700715 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c	2018-07-06 22:39:20.531825189 +0200
@@ -6,17 +6,15 @@
 #include "avx512f-helper.h"
 
 #include <math.h>
-#define SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 8)
 #include "avx512f-mask-type.h"
 
 #if AVX512F_LEN == 512
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 64; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm512_loadu_si512 (s1);		\
     source2.x = _mm512_loadu_si512 (s2);		\
     dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\
@@ -29,10 +27,8 @@
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 32; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm256_loadu_si256 ((__m256i*)s1);	\
     source2.x = _mm256_loadu_si256 ((__m256i*)s2);	\
     dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\
@@ -45,10 +41,8 @@
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 16; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm_loadu_si128 ((__m128i*)s1);		\
     source2.x = _mm_loadu_si128 ((__m128i*)s2);		\
     dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequb-2.c.jj	2018-07-06 21:55:30.376035400 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequb-2.c	2018-07-06 21:56:09.360078733 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmplew-2.c.jj	2018-07-06 23:26:43.442365253 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmplew-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeub-2.c.jj	2018-07-06 21:55:30.394035420 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeub-2.c	2018-07-06 21:56:45.496118894 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeub-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleb-2.c.jj	2018-07-06 21:55:30.398035424 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleb-2.c	2018-07-06 21:56:54.170128532 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeb-2.c.jj	2018-07-06 21:55:30.389035415 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeb-2.c	2018-07-06 21:56:38.217110801 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequw-2.c.jj	2018-07-06 23:26:43.442365253 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequw-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltb-2.c.jj	2018-07-06 21:55:30.380035405 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltb-2.c	2018-07-06 21:56:18.298088665 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-3.c.jj	2018-07-06 21:03:53.975540200 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-3.c	2018-07-06 21:04:11.356560385 +0200
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, union128i_d s2, int *res_ref, int imm)
+{
+  memcpy (res_ref, s1.a, SIZE * sizeof (int));
+  memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+  union128i_d s2;
+  int res_ref[SIZE];
+  int j;
+
+  MASK_TYPE mask = (MASK_TYPE) 0xa55a;
+
+  for (j = 0; j < SIZE; j++)
+    {
+      s1.a[j] = j * j;
+      res1.a[j] = DEFAULT_VALUE;
+      res2.a[j] = DEFAULT_VALUE;
+      res3.a[j] = DEFAULT_VALUE;
+    }
+
+  for (j = 0; j < 4; j++)
+    s2.a[j] = j * j * j;
+
+  res1.x = INTRINSIC (_inserti32x4) (s1.x, s2.x, 1);
+  res2.x = INTRINSIC (_mask_inserti32x4) (res2.x, mask, s1.x, s2.x, 1);
+  res3.x = INTRINSIC (_maskz_inserti32x4) (mask, s1.x, s2.x, 1);
+
+  CALC (s1, s2, res_ref, 1);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+    abort ();
+
+  MASK_MERGE (i_d) (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+    abort ();
+
+  MASK_ZERO (i_d) (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgew-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgew-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgew-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgew-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuw-2.c.jj	2018-07-06 23:26:43.442365253 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuw-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeuw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltub-2.c.jj	2018-07-06 21:55:30.385035410 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltub-2.c	2018-07-06 21:56:29.853101506 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltub-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltuw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleub-2.c.jj	2018-07-06 21:55:30.403035430 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleub-2.c	2018-07-06 21:57:04.198139681 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c.jj	2014-12-01 14:57:15.466700732 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c	2018-07-06 22:40:41.666912357 +0200
@@ -6,17 +6,15 @@
 #include "avx512f-helper.h"
 
 #include <math.h>
-#define SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 8)
 #include "avx512f-mask-type.h"
 
 #if AVX512F_LEN == 512
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 64; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm512_loadu_si512 (s1);		\
     source2.x = _mm512_loadu_si512 (s2);		\
     dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\
@@ -29,10 +27,8 @@
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 32; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm256_loadu_si256 ((__m256i*)s1);	\
     source2.x = _mm256_loadu_si256 ((__m256i*)s2);	\
     dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\
@@ -45,10 +41,8 @@
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 16; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm_loadu_si128 ((__m128i*)s1);		\
     source2.x = _mm_loadu_si128 ((__m128i*)s2);		\
     dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-3.c.jj	2018-07-06 21:00:03.880272894 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-3.c	2018-07-06 21:01:52.615399210 +0200
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN,) s1, union128 s2, float *res_ref, int imm)
+{
+  memcpy (res_ref, s1.a, SIZE * sizeof (float));
+  memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
+  union128 s2;
+  float res_ref[SIZE];
+  int j;
+
+  MASK_TYPE mask = (MASK_TYPE) 0xa55a;
+
+  for (j = 0; j < SIZE; j++)
+    {
+      s1.a[j] = j * j / 10.2;
+      res1.a[j] = DEFAULT_VALUE;
+      res2.a[j] = DEFAULT_VALUE;
+      res3.a[j] = DEFAULT_VALUE;
+    }
+
+  for (j = 0; j < 4; j++)
+    s2.a[j] = j * j * j / 2.03;
+
+  res1.x = INTRINSIC (_insertf32x4) (s1.x, s2.x, 1);
+  res2.x = INTRINSIC (_mask_insertf32x4) (res2.x, mask, s1.x, s2.x, 1);
+  res3.x = INTRINSIC (_maskz_insertf32x4) (mask, s1.x, s2.x, 1);
+
+  CALC (s1, s2, res_ref, 1);
+
+  if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+    abort ();
+
+  MASK_MERGE () (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqb-2.c.jj	2018-07-06 21:55:30.371035395 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqb-2.c	2018-07-06 21:55:52.639060139 +0200
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqb-2.c"


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]