This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Add AVX512 k-mask intrinsics


Hi,

this patch adds several AVX512 intrinsics for k-mask instructions.
Also attached.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a87a17f..a3456f6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,46 @@
+2016-11-11  Andrew Senkevich  <andrew.senkevich@intel.com>
+
+ * config/i386/avx512bwintrin.h: Add new k-mask intrinsics.
+ * config/i386/avx512dqintrin.h: Ditto.
+ * config/i386/avx512fintrin.h: Ditto.
+ * config/i386/i386-builtin-types.def (UCHAR_FTYPE_UQI_UQI_PUCHAR,
+ UCHAR_FTYPE_UHI_UHI_PUCHAR, UCHAR_FTYPE_USI_USI_PUCHAR,
+ UCHAR_FTYPE_UDI_UDI_PUCHAR, UCHAR_FTYPE_UQI_UQI, UCHAR_FTYPE_UHI_UHI,
+ UCHAR_FTYPE_USI_USI, UCHAR_FTYPE_UDI_UDI, UQI_FTYPE_UQI_INT,
+ UHI_FTYPE_UHI_INT, USI_FTYPE_USI_INT, UDI_FTYPE_UDI_INT,
+ UQI_FTYPE_UQI, USI_FTYPE_USI, UDI_FTYPE_UDI, UQI_FTYPE_UQI_UQI): New
+ function types.
+ * config/i386/i386-builtin.def (__builtin_ia32_kortest_mask8_u8qi,
+ __builtin_ia32_kortest_mask16_u8hi,
+ __builtin_ia32_kortest_mask32_u8si,
+ __builtin_ia32_kortest_mask64_u8di,
+ __builtin_ia32_kortestz_mask8_u8qi,
+ __builtin_ia32_kortestz_mask16_u8hi,
+ __builtin_ia32_kortestz_mask32_u8si,
+ __builtin_ia32_kortestz_mask64_u8di,
+ __builtin_ia32_kortestc_mask8_u8qi,
+ __builtin_ia32_kortestc_mask16_u8hi,
+ __builtin_ia32_kortestc_mask32_u8si,
+ __builtin_ia32_kortestc_mask64_u8di,
+ __builtin_ia32_kshiftliqi, __builtin_ia32_kshiftlihi,
+ __builtin_ia32_kshiftlisi, __builtin_ia32_kshiftlidi,
+ __builtin_ia32_kshiftriqi, __builtin_ia32_kshiftrihi,
+ __builtin_ia32_kshiftrisi, __builtin_ia32_kshiftridi,
+ __builtin_ia32_knotqi, __builtin_ia32_knotsi, __builtin_ia32_knotdi,
+ __builtin_ia32_korqi, __builtin_ia32_korsi, __builtin_ia32_kordi,
+ __builtin_ia32_kxnorqi, __builtin_ia32_kxnorsi,
+ __builtin_ia32_kxnordi, __builtin_ia32_kxorqi, __builtin_ia32_kxorsi,
+ __builtin_ia32_kxordi, __builtin_ia32_kaddqi, __builtin_ia32_kaddhi,
+ __builtin_ia32_kaddsi, __builtin_ia32_kadddi, __builtin_ia32_kandqi,
+ __builtin_ia32_kandsi, __builtin_ia32_kanddi, __builtin_ia32_kandnqi,
+ __builtin_ia32_kandnsi, __builtin_ia32_kandndi, __builtin_ia32_kmov8,
+ __builtin_ia32_kmov32, __builtin_ia32_kmov64): New.
+ * config/i386/i386.c (ix86_expand_args_builtin): Handle new types.
+ * config/i386/i386.md (define_insn "kmovb"): New.
+ (define_insn "kmovd"): Ditto.
+ (define_insn "kmovq"): Ditto.
+ (define_insn "kadd<mode>"): Ditto.
+
 2016-11-10  Vladimir Makarov  <vmakarov@redhat.com>

  * target.def (additional_allocno_class_p): New.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d522e24..dfd35bf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,55 @@
+2016-11-11  Andrew Senkevich  <andrew.senkevich@intel.com>
+
+ * gcc.target/i386/avx512bw-kaddd-1.c: New test.
+ * gcc.target/i386/avx512bw-kaddq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kandd-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kandnd-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kandnq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kandq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovd-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovd-2.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovd-3.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovd-4.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovq-2.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovq-3.c: Ditto.
+ * gcc.target/i386/avx512bw-kmovq-4.c: Ditto.
+ * gcc.target/i386/avx512bw-knotd-1.c: Ditto.
+ * gcc.target/i386/avx512bw-knotq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kord-1.c: Ditto.
+ * gcc.target/i386/avx512bw-korq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftld-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kunpckdq-3.c: Ditto.
+ * gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto.
+ * gcc.target/i386/avx512bw-kxnord-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kxnorq-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kxord-1.c: Ditto.
+ * gcc.target/i386/avx512bw-kxorq-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kaddb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kandb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kandnb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kmovb-2.c: Ditto.
+ * gcc.target/i386/avx512dq-kmovb-3.c: Ditto.
+ * gcc.target/i386/avx512dq-kmovb-4.c: Ditto.
+ * gcc.target/i386/avx512dq-kmovb-5.c: Ditto.
+ * gcc.target/i386/avx512dq-knotb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-korb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kxnorb-1.c: Ditto.
+ * gcc.target/i386/avx512dq-kxorb-1.c: Ditto.
+ * gcc.target/i386/avx512f-kaddw-1.c: Ditto.
+ * gcc.target/i386/avx512f-kmovw-2.c: Ditto.
+ * gcc.target/i386/avx512f-kmovw-3.c: Ditto.
+ * gcc.target/i386/avx512f-kmovw-4.c: Ditto.
+ * gcc.target/i386/avx512f-kmovw-5.c: Ditto.
+ * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
+ * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
+ * gcc.target/i386/avx512f-kunpckbw-3.c: Ditto.
+
 2016-11-10  Jakub Jelinek  <jakub@redhat.com>

  * gfortran.dg/openmp-define-3.f90: Expect 201511 instead of
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 8f03249..0829af3 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -40,6 +40,238 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64)));

 typedef unsigned long long __mmask64;

+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__C)
+{
+  return (unsigned char) __builtin_ia32_kortest_mask32_u8si ((__mmask32) __A,
+     (__mmask32) __B,
+     (unsigned char *) __C);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestz_mask32_u8si ((__mmask32) __A,
+      (__mmask32) __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestc_mask32_u8si ((__mmask32) __A,
+      (__mmask32) __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__C)
+{
+  return (unsigned char) __builtin_ia32_kortest_mask64_u8di ((__mmask64) __A,
+     (__mmask64) __B,
+     (unsigned char *) __C);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestz_mask64_u8di ((__mmask64) __A,
+      (__mmask64) __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestc_mask64_u8di ((__mmask64) __A,
+      (__mmask64) __B);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtmask32_u32 (__mmask32 __A)
+{
+  return (unsigned int) __builtin_ia32_kmov32 ((__mmask32) __A);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtmask64_u64 (__mmask64 __A)
+{
+  return (unsigned long long) __builtin_ia32_kmov64 ((__mmask64) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtu32_mask32 (unsigned int __A)
+{
+  return (__mmask32) __builtin_ia32_kmov32 ((__mmask32) __A);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtu64_mask64 (unsigned long long __A)
+{
+  return (__mmask64) __builtin_ia32_kmov64 ((__mmask64) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_load_mask32 (__mmask32 *__A)
+{
+  return (__mmask32) __builtin_ia32_kmov32 (*(__mmask32 *) __A);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_load_mask64 (__mmask64 *__A)
+{
+  return (__mmask64) __builtin_ia32_kmov64 (*(__mmask64 *) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask32 (__mmask32 *__A, __mmask32 __B)
+{
+  *(__mmask32 *) __A = __builtin_ia32_kmov32 (__B);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask64 (__mmask64 *__A, __mmask64 __B)
+{
+  *(__mmask64 *) __A = __builtin_ia32_kmov64 (__B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask32 (__mmask32 __A, int __B)
+{
+  return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask64 (__mmask64 __A, int __B)
+{
+  return (__mmask64) __builtin_ia32_kshiftlidi ((__mmask64) __A, __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask32 (__mmask32 __A, int __B)
+{
+  return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask64 (__mmask64 __A, int __B)
+{
+  return (__mmask64) __builtin_ia32_kshiftridi ((__mmask64) __A, __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask32 (__mmask32 __A)
+{
+  return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask64 (__mmask64 __A)
+{
+  return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_setzero_qi (void)
@@ -138,6 +370,14 @@ _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
       (__mmask32) __B);
 }

+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+      (__mmask32) __B);
+}
+
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
@@ -146,6 +386,14 @@ _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
       (__mmask64) __B);
 }

+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackd_mask64 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
+      (__mmask64) __B);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 1dbb6b0..87681f7 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -34,6 +34,122 @@
 #define __DISABLE_AVX512DQ__
 #endif /* __AVX512DQ__ */

+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char* __C)
+{
+  return (unsigned char) __builtin_ia32_kortest_mask8_u8qi ((__mmask8) __A,
+    (__mmask8) __B,
+    (unsigned char *) __C);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestz_mask8_u8qi ((__mmask8) __A,
+     (__mmask8) __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestc_mask8_u8qi ((__mmask8) __A,
+     (__mmask8) __B);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtmask8_u32 (__mmask8 __A)
+{
+  return (unsigned int) __builtin_ia32_kmov8 ((__mmask8) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtu32_mask8 (unsigned int __A)
+{
+  return (__mmask8) __builtin_ia32_kmov8 ((__mmask8) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_load_mask8 (__mmask8 *__A)
+{
+  return (__mmask8) __builtin_ia32_kmov8 (*(__mmask8 *) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask8 (__mmask8 *__A, __mmask8 __B)
+{
+  *(__mmask8 *) __A = __builtin_ia32_kmov8 (__B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask8 (__mmask8 __A, int __B)
+{
+  return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask8 (__mmask8 __A, int __B)
+{
+  return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask8 (__mmask8 __A)
+{
+  return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
+}
+
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f64x2 (__m128d __A)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 2372c83..8787da8 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -9977,6 +9977,62 @@ _mm512_maskz_expandloadu_epi32 (__mmask16 __U,
void const *__P)
 }

 /* Mask arithmetic operations */
+#define _kand_mask16 _mm512_kand
+#define _kandn_mask16 _mm512_kandn
+#define _knot_mask16 _mm512_knot
+#define _kor_mask16 _mm512_kor
+#define _kxnor_mask16 _mm512_kxnor
+#define _kxor_mask16 _mm512_kxor
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtmask16_u32 (__mmask16 __A)
+{
+  return (unsigned int) __builtin_ia32_kmov16 ((__mmask16 ) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtu32_mask16 (unsigned int __A)
+{
+  return (__mmask16) __builtin_ia32_kmov16 ((__mmask16 ) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_load_mask16 (__mmask16 *__A)
+{
+  return (__mmask16) __builtin_ia32_kmov16 (*(__mmask16 *) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask16 (__mmask16 *__A, __mmask16 __B)
+{
+  *(__mmask16 *) __A = __builtin_ia32_kmov16 (__B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask16 (__mmask16 __A, int __B)
+{
+  return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A, __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask16 (__mmask16 __A, int __B)
+{
+  return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A, __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask16 (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
+}
+
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kand (__mmask16 __A, __mmask16 __B)
@@ -9988,7 +10044,8 @@ extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kandn (__mmask16 __A, __mmask16 __B)
 {
-  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
+  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
+     (__mmask16) __B);
 }

 extern __inline __mmask16
@@ -9998,6 +10055,31 @@ _mm512_kor (__mmask16 __A, __mmask16 __B)
   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
 }

+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__C)
+{
+  return (unsigned char) __builtin_ia32_kortest_mask16_u8hi ((__mmask16) __A,
+     (__mmask16) __B,
+     (unsigned char *) __C);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestz_mask16_u8hi ((__mmask16) __A,
+     (__mmask16) __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestc_mask16_u8hi ((__mmask16) __A,
+     (__mmask16) __B);
+}
+
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
@@ -10042,6 +10124,13 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A,
(__mmask16) __B);
 }

+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A,
(__mmask16) __B);
+}
+
 #ifdef __OPTIMIZE__
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/i386-builtin-types.def
b/gcc/config/i386/i386-builtin-types.def
index b34cfda..125fa94 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -139,6 +139,12 @@ DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
 DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
 DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)

+DEF_POINTER_TYPE (PUQI, UQI)
+DEF_POINTER_TYPE (PUHI, UHI)
+DEF_POINTER_TYPE (PUSI, USI)
+DEF_POINTER_TYPE (PUDI, UDI)
+DEF_POINTER_TYPE (PUCHAR, UCHAR)
+
 DEF_POINTER_TYPE (PV2SI, V2SI)
 DEF_POINTER_TYPE (PV2DF, V2DF)
 DEF_POINTER_TYPE (PV2DI, V2DI)
@@ -527,7 +533,23 @@ DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED, UNSIGNED)
 DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)

 # Instructions returning mask
+DEF_FUNCTION_TYPE (UCHAR, UQI, UQI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, UQI, UQI)
+DEF_FUNCTION_TYPE (UCHAR, UHI, UHI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, UHI, UHI)
+DEF_FUNCTION_TYPE (UCHAR, USI, USI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, USI, USI)
+DEF_FUNCTION_TYPE (UCHAR, UDI, UDI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, UDI, UDI)
+
+DEF_FUNCTION_TYPE (UQI, UQI, INT)
+DEF_FUNCTION_TYPE (UHI, UHI, INT)
+DEF_FUNCTION_TYPE (USI, USI, INT)
+DEF_FUNCTION_TYPE (UDI, UDI, INT)
+DEF_FUNCTION_TYPE (UQI, UQI)
 DEF_FUNCTION_TYPE (UHI, UHI)
+DEF_FUNCTION_TYPE (USI, USI)
+DEF_FUNCTION_TYPE (UDI, UDI)
 DEF_FUNCTION_TYPE (UHI, V16QI)
 DEF_FUNCTION_TYPE (USI, V32QI)
 DEF_FUNCTION_TYPE (UDI, V64QI)
@@ -540,6 +562,7 @@ DEF_FUNCTION_TYPE (UHI, V16SI)
 DEF_FUNCTION_TYPE (UQI, V2DI)
 DEF_FUNCTION_TYPE (UQI, V4DI)
 DEF_FUNCTION_TYPE (UQI, V8DI)
+DEF_FUNCTION_TYPE (UQI, UQI, UQI)
 DEF_FUNCTION_TYPE (UHI, UHI, UHI)
 DEF_FUNCTION_TYPE (USI, USI, USI)
 DEF_FUNCTION_TYPE (UDI, UDI, UDI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 227526b..5dae57d 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1436,16 +1436,75 @@ BDESC (OPTION_MASK_ISA_AVX512F,
CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__bu
 BDESC (OPTION_MASK_ISA_AVX512F,
CODE_FOR_avx512f_roundpd_vec_pack_sfix512,
"__builtin_ia32_ceilpd_vec_pack_sfix512",
IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL,
(int) V16SI_FTYPE_V8DF_V8DF_ROUND)

 /* Mask arithmetic operations */
-BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3,
"__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi,
"__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_movqi,
"__builtin_ia32_kortest_mask8_u8qi", IX86_BUILTIN_KORTEST8_U8,
UNKNOWN, (int) UCHAR_FTYPE_UQI_UQI_PUCHAR)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw,
"__builtin_ia32_kortest_mask16_u8hi", IX86_BUILTIN_KORTEST16_U8,
UNKNOWN, (int) UCHAR_FTYPE_UHI_UHI_PUCHAR)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_movsi,
"__builtin_ia32_kortest_mask32_u8si", IX86_BUILTIN_KORTEST32_U8,
UNKNOWN, (int) UCHAR_FTYPE_USI_USI_PUCHAR)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_movdi,
"__builtin_ia32_kortest_mask64_u8di", IX86_BUILTIN_KORTEST64_U8,
UNKNOWN, (int) UCHAR_FTYPE_UDI_UDI_PUCHAR)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_movqi,
"__builtin_ia32_kortestz_mask8_u8qi", IX86_BUILTIN_KORTESTZ8_U8,
UNKNOWN, (int) UCHAR_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw,
"__builtin_ia32_kortestz_mask16_u8hi", IX86_BUILTIN_KORTESTZ16_U8,
UNKNOWN, (int) UCHAR_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_movsi,
"__builtin_ia32_kortestz_mask32_u8si", IX86_BUILTIN_KORTESTZ32_U8,
UNKNOWN, (int) UCHAR_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_movdi,
"__builtin_ia32_kortestz_mask64_u8di", IX86_BUILTIN_KORTESTZ64_U8,
UNKNOWN, (int) UCHAR_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_movqi,
"__builtin_ia32_kortestc_mask8_u8qi", IX86_BUILTIN_KORTESTC8_U8,
UNKNOWN, (int) UCHAR_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw,
"__builtin_ia32_kortestc_mask16_u8hi", IX86_BUILTIN_KORTESTC16_U8,
UNKNOWN, (int) UCHAR_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_movsi,
"__builtin_ia32_kortestc_mask32_u8si", IX86_BUILTIN_KORTESTC32_U8,
UNKNOWN, (int) UCHAR_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_movdi,
"__builtin_ia32_kortestc_mask64_u8di", IX86_BUILTIN_KORTESTC64_U8,
UNKNOWN, (int) UCHAR_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_shiftlqi3_1,
"__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int)
UQI_FTYPE_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_shiftlhi3_1,
"__builtin_ia32_kshiftlihi", IX86_BUILTIN_KSHIFTLI16, UNKNOWN, (int)
UHI_FTYPE_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_shiftlsi3_1,
"__builtin_ia32_kshiftlisi", IX86_BUILTIN_KSHIFTLI32, UNKNOWN, (int)
USI_FTYPE_USI_INT)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_shiftldi3_1,
"__builtin_ia32_kshiftlidi", IX86_BUILTIN_KSHIFTLI64, UNKNOWN, (int)
UDI_FTYPE_UDI_INT)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_shiftrqi3_1,
"__builtin_ia32_kshiftriqi", IX86_BUILTIN_KSHIFTRI8, UNKNOWN, (int)
UQI_FTYPE_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_shiftrhi3_1,
"__builtin_ia32_kshiftrihi", IX86_BUILTIN_KSHIFTRI16, UNKNOWN, (int)
UHI_FTYPE_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_shiftrsi3_1,
"__builtin_ia32_kshiftrisi", IX86_BUILTIN_KSHIFTRI32, UNKNOWN, (int)
USI_FTYPE_USI_INT)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_shiftrdi3_1,
"__builtin_ia32_kshiftridi", IX86_BUILTIN_KSHIFTRI64, UNKNOWN, (int)
UDI_FTYPE_UDI_INT)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_one_cmplqi2,
"__builtin_ia32_knotqi", IX86_BUILTIN_KNOT8, UNKNOWN, (int)
UQI_FTYPE_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2,
"__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int)
UHI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_one_cmplsi2,
"__builtin_ia32_knotsi", IX86_BUILTIN_KNOT32, UNKNOWN, (int)
USI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_one_cmpldi2,
"__builtin_ia32_knotdi", IX86_BUILTIN_KNOT64, UNKNOWN, (int)
UDI_FTYPE_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorqi3,
"__builtin_ia32_korqi", IX86_BUILTIN_KOR8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3,
"__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_iorsi3,
"__builtin_ia32_korsi", IX86_BUILTIN_KOR32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_iordi3,
"__builtin_ia32_kordi", IX86_BUILTIN_KOR64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kxnorqi,
"__builtin_ia32_kxnorqi", IX86_BUILTIN_KXNOR8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi,
"__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxnorsi,
"__builtin_ia32_kxnorsi", IX86_BUILTIN_KXNOR32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxnordi,
"__builtin_ia32_kxnordi", IX86_BUILTIN_KXNOR64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorqi3,
"__builtin_ia32_kxorqi", IX86_BUILTIN_KXOR8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3,
"__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_xorsi3,
"__builtin_ia32_kxorsi", IX86_BUILTIN_KXOR32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_xordi3,
"__builtin_ia32_kxordi", IX86_BUILTIN_KXOR64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kaddqi,
"__builtin_ia32_kaddqi", IX86_BUILTIN_KADD8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kaddhi,
"__builtin_ia32_kaddhi", IX86_BUILTIN_KADD16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kaddsi,
"__builtin_ia32_kaddsi", IX86_BUILTIN_KADD32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kadddi,
"__builtin_ia32_kadddi", IX86_BUILTIN_KADD64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andqi3,
"__builtin_ia32_kandqi", IX86_BUILTIN_KAND8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3,
"__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_andsi3,
"__builtin_ia32_kandsi", IX86_BUILTIN_KAND32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_anddi3,
"__builtin_ia32_kanddi", IX86_BUILTIN_KAND64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kandnqi,
"__builtin_ia32_kandnqi", IX86_BUILTIN_KANDN8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi,
"__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandnsi,
"__builtin_ia32_kandnsi", IX86_BUILTIN_KANDN32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandndi,
"__builtin_ia32_kandndi", IX86_BUILTIN_KANDN64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)
+
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi,
"__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi,
"__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi,
"__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi,
"__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3,
"__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kmovb,
"__builtin_ia32_kmov8", IX86_BUILTIN_KMOV8, UNKNOWN, (int)
UQI_FTYPE_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw,
"__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int)
UHI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kmovd,
"__builtin_ia32_kmov32", IX86_BUILTIN_KMOV32, UNKNOWN, (int)
USI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kmovq,
"__builtin_ia32_kmov64", IX86_BUILTIN_KMOV64, UNKNOWN, (int)
UDI_FTYPE_UDI)

 /* SHA */
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0,
IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a5c4ba7..fc40b86 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -34638,7 +34638,12 @@ ix86_expand_args_builtin (const struct
builtin_description *d,
     case V4DI_FTYPE_V8HI:
     case V4DI_FTYPE_V4SI:
     case V4DI_FTYPE_V2DI:
+    case UQI_FTYPE_UQI:
     case UHI_FTYPE_UHI:
+    case USI_FTYPE_USI:
+//    case USI_FTYPE_UQI:
+//    case USI_FTYPE_UHI:
+    case UDI_FTYPE_UDI:
     case UHI_FTYPE_V16QI:
     case USI_FTYPE_V32QI:
     case UDI_FTYPE_V64QI:
@@ -34772,6 +34777,7 @@ ix86_expand_args_builtin (const struct
builtin_description *d,
     case UINT_FTYPE_UINT_UCHAR:
     case UINT16_FTYPE_UINT16_INT:
     case UINT8_FTYPE_UINT8_INT:
+    case UQI_FTYPE_UQI_UQI:
     case UHI_FTYPE_UHI_UHI:
     case USI_FTYPE_USI_USI:
     case UDI_FTYPE_UDI_UDI:
@@ -34819,6 +34825,10 @@ ix86_expand_args_builtin (const struct
builtin_description *d,
     case V4DI_FTYPE_V8DI_INT:
     case QI_FTYPE_V4SF_INT:
     case QI_FTYPE_V2DF_INT:
+    case UQI_FTYPE_UQI_INT:
+    case UHI_FTYPE_UHI_INT:
+    case USI_FTYPE_USI_INT:
+    case UDI_FTYPE_UDI_INT:
       nargs = 2;
       nargs_constant = 1;
       break;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a5650a1..800450e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2497,6 +2497,46 @@
    (set_attr "type" "mskmov")
    (set_attr "prefix" "vex")])

+(define_insn "kmovb"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=k,k")
+ (unspec:QI
+  [(match_operand:QI 1 "nonimmediate_operand" "r,km")]
+  UNSPEC_KMOV))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512DQ"
+  "@
+   kmovb\t{%k1, %0|%0, %k1}
+   kmovb\t{%1, %0|%0, %1}";
+  [(set_attr "mode" "QI")
+   (set_attr "type" "mskmov")
+   (set_attr "prefix" "vex")])
+
+(define_insn "kmovd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=k,k")
+ (unspec:SI
+  [(match_operand:SI 1 "nonimmediate_operand" "r,km")]
+  UNSPEC_KMOV))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512BW"
+  "@
+   kmovd\t{%k1, %0|%0, %k1}
+   kmovd\t{%1, %0|%0, %1}";
+  [(set_attr "mode" "SI")
+   (set_attr "type" "mskmov")
+   (set_attr "prefix" "vex")])
+
+(define_insn "kmovq"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=k,k,km")
+ (unspec:DI
+  [(match_operand:DI 1 "nonimmediate_operand" "r,km,k")]
+  UNSPEC_KMOV))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512BW"
+  "@
+   kmovq\t{%k1, %0|%0, %k1}
+   kmovq\t{%1, %0|%0, %1}
+   kmovq\t{%1, %0|%0, %1}";
+  [(set_attr "mode" "DI")
+   (set_attr "type" "mskmov")
+   (set_attr "prefix" "vex")])
+

 (define_insn "*movhi_internal"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k, r,m")
@@ -8304,11 +8344,11 @@
    (set_attr "mode" "QI")])

 (define_insn "kandn<mode>"
-  [(set (match_operand:SWI12 0 "register_operand" "=r,&r,!k")
- (and:SWI12
-  (not:SWI12
-    (match_operand:SWI12 1 "register_operand" "r,0,k"))
-  (match_operand:SWI12 2 "register_operand" "r,r,k")))
+  [(set (match_operand:SWI1248x 0 "register_operand" "=r,&r,!k")
+ (and:SWI1248x
+  (not:SWI1248x
+    (match_operand:SWI1248x 1 "register_operand" "r,0,k"))
+  (match_operand:SWI1248x 2 "register_operand" "r,r,k")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_AVX512F"
 {
@@ -8319,10 +8359,50 @@
     case 1:
       return "#";
     case 2:
-      if (TARGET_AVX512DQ && <MODE>mode == QImode)
+      if (TARGET_AVX512BW && <MODE>mode == DImode)
+ return "kandnq\t{%2, %1, %0|%0, %1, %2}";
+      else if (TARGET_AVX512BW && <MODE>mode == SImode)
+ return "kandnd\t{%2, %1, %0|%0, %1, %2}";
+      else if (TARGET_AVX512DQ && <MODE>mode == QImode)
  return "kandnb\t{%2, %1, %0|%0, %1, %2}";
       else
  return "kandnw\t{%2, %1, %0|%0, %1, %2}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "bmi,*,avx512f")
+   (set_attr "type" "bitmanip,*,msklog")
+   (set_attr "prefix" "*,*,vex")
+   (set_attr "btver2_decode" "direct,*,*")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "kadd<mode>"
+  [(set (match_operand:SWI1248x 0 "register_operand" "=r,&r,!k")
+ (plus:SWI1248x
+  (not:SWI1248x
+    (match_operand:SWI1248x 1 "register_operand" "r,0,k"))
+  (match_operand:SWI1248x 2 "register_operand" "r,r,k")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "add\t{%k2, %k1, %k0|%k0, %k1, %k2}";
+    case 1:
+      return "#";
+    case 2:
+      if (TARGET_AVX512BW && <MODE>mode == DImode)
+ return "kaddq\t{%2, %1, %0|%0, %1, %2}";
+      else if (TARGET_AVX512BW && <MODE>mode == SImode)
+ return "kaddd\t{%2, %1, %0|%0, %1, %2}";
+      else if (TARGET_AVX512DQ && <MODE>mode == QImode)
+ return "kaddb\t{%2, %1, %0|%0, %1, %2}";
+      else
+ return "kaddw\t{%2, %1, %0|%0, %1, %2}";
+
     default:
       gcc_unreachable ();
     }
@@ -9687,7 +9767,7 @@
 ;; shift pair, instead using moves and sign extension for counts greater
 ;; than 31.

-(define_insn "*<mshift><mode>3"
+(define_insn "<mshift><mode>3_1"
   [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
  (any_lshift:SWI1248_AVX512BWDQ (match_operand:SWI1248_AVX512BWDQ 1
"register_operand" "k")
        (match_operand:QI 2 "immediate_operand" "i")))]
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kaddd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kaddd-1.c
new file mode 100644
index 0000000..0b38850
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kaddd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kaddd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kadd_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kaddq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kaddq-1.c
new file mode 100644
index 0000000..5b7b417
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kaddq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kaddq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kadd_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c
new file mode 100644
index 0000000..2a934f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_epi32();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kand_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c
new file mode 100644
index 0000000..6b68ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandnd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kandn_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c
new file mode 100644
index 0000000..35f1c12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandnq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kandn_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c
new file mode 100644
index 0000000..a1aaed6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_epi32();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kand_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-1.c
new file mode 100644
index 0000000..a89b2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask32 m1;
+volatile __mmask32 m2;
+
+void
+avx512bw_test ()
+{
+  m2 = _load_mask32 (&m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-2.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-2.c
new file mode 100644
index 0000000..dcb65fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask32 m1;
+extern __mmask32 m2;
+
+void
+avx512bw_test ()
+{
+  _store_mask32 (&m2, m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-3.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-3.c
new file mode 100644
index 0000000..fe5e1d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask32 m1;
+extern unsigned int m2;
+
+void
+avx512bw_test ()
+{
+  m2 = _cvtmask32_u32 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-4.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-4.c
new file mode 100644
index 0000000..8a085d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+unsigned int m1;
+extern __mmask32 m2;
+
+void
+avx512bw_test ()
+{
+  m2 = _cvtu32_mask32 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-1.c
new file mode 100644
index 0000000..51d547d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask64 m1;
+volatile __mmask64 m2;
+
+void
+avx512bw_test ()
+{
+  m2 = _load_mask64 (&m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-2.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-2.c
new file mode 100644
index 0000000..9baf200
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask64 m1;
+extern __mmask64 m2;
+
+void
+avx512bw_test ()
+{
+  _store_mask64 (&m2, m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-3.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-3.c
new file mode 100644
index 0000000..3a02d38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask64 m1;
+extern unsigned long long m2;
+
+void
+avx512bw_test ()
+{
+  m2 = _cvtmask64_u64 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-4.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-4.c
new file mode 100644
index 0000000..1cc16ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kmovq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+unsigned long long m1;
+extern __mmask64 m2;
+
+void
+avx512bw_test ()
+{
+  m2 = _cvtu64_mask64 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c
new file mode 100644
index 0000000..dd6b6e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "knotd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (45) );
+
+  k2 = _knot_mask32 (k1);
+  x = _mm512_mask_add_epi16 (x, k1, x, x);
+  x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c
new file mode 100644
index 0000000..5b94358
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "knotq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (45) );
+
+  k2 = _knot_mask64 (k1);
+  x = _mm512_mask_add_epi8 (x, k1, x, x);
+  x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c
new file mode 100644
index 0000000..163c46e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kord\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kor_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c
new file mode 100644
index 0000000..77b1b9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "korq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kor_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-1.c
new file mode 100644
index 0000000..85be9b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftld-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftld\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2;
+  int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask32 (k1, i);
+  x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-1.c
new file mode 100644
index 0000000..cd5707e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftlq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2;
+  int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask64 (k1, i);
+  x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-1.c
new file mode 100644
index 0000000..91b6313
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2;
+  int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask32 (k1, i);
+  x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-1.c
new file mode 100644
index 0000000..c10fa4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kshiftrq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2;
+  int i = 5;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask64 (k1, i);
+  x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c
new file mode 100644
index 0000000..951260f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kunpckdq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test () {
+  volatile __mmask64 k3;
+  __mmask32 k1, k2;
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kunpackd_mask64 (k1, k2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
new file mode 100644
index 0000000..c68ad8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kunpckwd\[
\\t\]+\[^\{\n\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test () {
+  volatile __mmask32 k3;
+  __mmask16 k1, k2;
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kunpackw_mask32 (k1, k2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c
new file mode 100644
index 0000000..ccf4b63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxnord\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxnor_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c
new file mode 100644
index 0000000..b9c0979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxnorq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxnor_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c
new file mode 100644
index 0000000..ce03ab4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxord\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxor_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c
new file mode 100644
index 0000000..d6366dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxorq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovq" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxor_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kaddb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kaddb-1.c
new file mode 100644
index 0000000..a84d8ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kaddb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kaddb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kadd_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c
new file mode 100644
index 0000000..b5b5367
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kandb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_epi32();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kand_mask8 (k1, k2);
+  x = _mm512_mask_add_epi64 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c
new file mode 100644
index 0000000..ff50610
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kandnb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kandn_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-2.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-2.c
new file mode 100644
index 0000000..3832853
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kmovb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask8 m1;
+volatile __mmask8 m2;
+
+void
+avx512dq_test ()
+{
+  m2 = _load_mask8 (&m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-3.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-3.c
new file mode 100644
index 0000000..8d06674
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kmovb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask8 m1;
+extern __mmask8 m2;
+
+void
+avx512dq_test ()
+{
+  _store_mask8 (&m2, m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-4.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-4.c
new file mode 100644
index 0000000..2da4719
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kmovb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask8 m1;
+extern unsigned int m2;
+
+void
+avx512dq_test ()
+{
+  m2 = _cvtmask8_u32 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c
new file mode 100644
index 0000000..d3f8c5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kmovb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+unsigned int m1;
+extern __mmask8 m2;
+
+void
+avx512dq_test ()
+{
+  m2 = _cvtu32_mask8 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c
new file mode 100644
index 0000000..8bb9249
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "knotb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (45) );
+
+  k2 = _knot_mask8 (k1);
+  x = _mm512_mask_add_pd (x, k1, x, x);
+  x = _mm512_mask_add_pd (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c
new file mode 100644
index 0000000..22b727d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "korb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kor_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-1.c
new file mode 100644
index 0000000..422d0b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftlb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2;
+  int i = 5;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask8 (k1, i);
+  x = _mm512_mask_add_pd (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-1.c
new file mode 100644
index 0000000..f87cf74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kshiftrb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2;
+  int i = 5;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask8 (k1, i);
+  x = _mm512_mask_add_pd (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c
new file mode 100644
index 0000000..ee21aa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kxnorb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxnor_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c
new file mode 100644
index 0000000..63a1ff8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kxorb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovb" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxor_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kaddw-1.c
b/gcc/testsuite/gcc.target/i386/avx512f-kaddw-1.c
new file mode 100644
index 0000000..9faf4ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kaddw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kaddw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovw" 2 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+  __mmask16 k1, k2, k3;
+  volatile __m512 x = _mm512_setzero_ps();
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kadd_mask16 (k1, k2);
+  x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-2.c
b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-2.c
new file mode 100644
index 0000000..77c8ddc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kmovw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask16 m1;
+volatile __mmask16 m2;
+
+void
+avx512f_test ()
+{
+  m2 = _load_mask16 (&m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-3.c
b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-3.c
new file mode 100644
index 0000000..740ea9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kmovw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask16 m1;
+extern __mmask16 m2;
+
+void
+avx512f_test ()
+{
+  _store_mask16 (&m2, m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-4.c
b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-4.c
new file mode 100644
index 0000000..127a4ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kmovw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+__mmask16 m1;
+extern unsigned int m2;
+
+void
+avx512f_test ()
+{
+  m2 = _cvtmask16_u32 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c
b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c
new file mode 100644
index 0000000..d729e8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kmovw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\{*%k\[0-7\]" 1 } } */
+
+#include <immintrin.h>
+
+unsigned int m1;
+extern __mmask16 m2;
+
+void
+avx512f_test ()
+{
+  m2 = _cvtu32_mask16 (m1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-1.c
b/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-1.c
new file mode 100644
index 0000000..7a9de12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kshiftlw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kshiftlw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+  __mmask16 k1, k2;
+  int i = 5;
+  volatile __m512 x = _mm512_setzero_ps();
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftli_mask16 (k1, i);
+  x = _mm512_mask_add_ps (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-1.c
b/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-1.c
new file mode 100644
index 0000000..641d307
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kshiftrw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kshiftrw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+  __mmask16 k1, k2;
+  int i = 5;
+  volatile __m512 x = _mm512_setzero_ps();
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+
+  k2 = _kshiftri_mask16 (k1, i);
+  x = _mm512_mask_add_ps (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c
b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c
new file mode 100644
index 0000000..2061f0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kunpckbw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test () {
+  __mmask8 k1, k2;
+  __mmask16 k3;
+  volatile __m512 x = _mm512_setzero_ps();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kunpackb_mask16 (k1, k2);
+  x = _mm512_mask_add_ps (x, k3, x, x);
+}

Is it ok for trunk?


--
WBR,
Andrew

Attachment: add_k-mask_intrinsics.patch
Description: Binary data


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]