This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Add missing avx512fintrin.h intrinsics (PR target/89602)


On Thu, Mar 07, 2019 at 08:11:53AM +0100, Uros Bizjak wrote:
> > +(define_insn "*avx512f_load<mode>_mask"
> > +  [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
> > +       (vec_merge:<ssevecmode>
> > +         (vec_merge:<ssevecmode>
> > +           (vec_duplicate:<ssevecmode>
> > +             (match_operand:MODEF 1 "memory_operand" "m"))
> > +           (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
> > +           (match_operand:QI 3 "nonmemory_operand" "Yk"))
> 
> Is there a reason to have nonmemory_operand predicate here instead of
> register_operand?

Thanks for catching that up, that was from my earlier attempt to have
Yk,n constraints and deal with that during output.  For store it was
possible, for others there were some cases it couldn't handle but further
testing revealed that the combiner already handles most of the constant
mask cases right.

Here is updated patch, I've changed this in two spots.  It even improves the
constant 1 case (the only one that is still not optimized as much as it
should):
 f4:
-	movzbl	.LC0(%rip), %eax
+	movl	$1, %eax
 	kmovw	%eax, %k1
 	vmovsd	(%rsi), %xmm0{%k1}{z}
 	ret
Tested so far with make check-gcc RUNTESTFLAGS=i386.exp=avx512f-vmovs*.c
and compiling/eyeballing differences on the short testcase I've posted
in the description with also the u, -> 1, and u, -> 0, changes, appart
from the above f4 no differences.

Ok for trunk if it passes another full bootstrap/regtest?

2019-03-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/89602
	* config/i386/sse.md (avx512f_mov<ssescalarmodelower>_mask,
	*avx512f_load<mode>_mask, avx512f_store<mode>_mask): New define_insns.
	(avx512f_load<mode>_mask): New define_expand.
	* config/i386/i386-builtin.def (__builtin_ia32_loadsd_mask,
	__builtin_ia32_loadss_mask, __builtin_ia32_storesd_mask,
	__builtin_ia32_storess_mask, __builtin_ia32_movesd_mask,
	__builtin_ia32_movess_mask): New builtins.
	* config/i386/avx512fintrin.h (_mm_mask_load_ss, _mm_maskz_load_ss,
	_mm_mask_load_sd, _mm_maskz_load_sd, _mm_mask_move_ss,
	_mm_maskz_move_ss, _mm_mask_move_sd, _mm_maskz_move_sd,
	_mm_mask_store_ss, _mm_mask_store_sd): New intrinsics.

	* gcc.target/i386/avx512f-vmovss-1.c: New test.
	* gcc.target/i386/avx512f-vmovss-2.c: New test.
	* gcc.target/i386/avx512f-vmovss-3.c: New test.
	* gcc.target/i386/avx512f-vmovsd-1.c: New test.
	* gcc.target/i386/avx512f-vmovsd-2.c: New test.
	* gcc.target/i386/avx512f-vmovsd-3.c: New test.

--- gcc/config/i386/sse.md.jj	2019-02-20 23:40:17.119140235 +0100
+++ gcc/config/i386/sse.md	2019-03-06 19:15:12.379749161 +0100
@@ -1151,6 +1151,67 @@ (define_insn "<avx512>_load<mode>_mask"
    (set_attr "memory" "none,load")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_mov<ssescalarmodelower>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (match_operand:VF_128 2 "register_operand" "v")
+	    (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
+	    (match_operand:QI 4 "register_operand" "Yk"))
+	  (match_operand:VF_128 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "avx512f_load<mode>_mask"
+  [(set (match_operand:<ssevecmode> 0 "register_operand")
+	(vec_merge:<ssevecmode>
+	  (vec_merge:<ssevecmode>
+	    (vec_duplicate:<ssevecmode>
+	      (match_operand:MODEF 1 "memory_operand"))
+	    (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
+	    (match_operand:QI 3 "nonmemory_operand"))
+	  (match_dup 4)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "operands[4] = CONST0_RTX (<ssevecmode>mode);")
+
+(define_insn "*avx512f_load<mode>_mask"
+  [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
+	(vec_merge:<ssevecmode>
+	  (vec_merge:<ssevecmode>
+	    (vec_duplicate:<ssevecmode>
+	      (match_operand:MODEF 1 "memory_operand" "m"))
+	    (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
+	    (match_operand:QI 3 "register_operand" "Yk"))
+	  (match_operand:<ssevecmode> 4 "const0_operand" "C")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_store<mode>_mask"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(if_then_else:MODEF
+	  (and:QI (match_operand:QI 2 "register_operand" "Yk")
+		 (const_int 1))
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "register_operand" "v")
+	    (parallel [(const_int 0)]))
+	  (match_dup 0)))]
+  "TARGET_AVX512F"
+  "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<avx512>_blendm<mode>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
 	(vec_merge:V48_AVX512VL
--- gcc/config/i386/i386-builtin.def.jj	2019-01-22 23:26:46.622213698 +0100
+++ gcc/config/i386/i386-builtin.def	2019-03-06 15:20:59.096670143 +0100
@@ -255,6 +255,10 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storesf_mask, "__builtin_ia32_storess_mask", IX86_BUILTIN_STORESS_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_UQI)
 
 BDESC (OPTION_MASK_ISA_LWP, 0, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID)
 BDESC (OPTION_MASK_ISA_LWP, 0, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID)
@@ -1470,6 +1474,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
--- gcc/config/i386/avx512fintrin.h.jj	2019-01-17 13:20:00.812472551 +0100
+++ gcc/config/i386/avx512fintrin.h	2019-03-06 15:22:53.662791558 +0100
@@ -6273,6 +6273,83 @@ _mm512_mask_storeu_ps (void *__P, __mmas
 				   (__mmask16) __U);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
+{
+  return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_ss (__mmask8 __U, const float *__P)
+{
+  return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
+					      __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
+{
+  return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sd (__mmask8 __U, const double *__P)
+{
+  return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
+					       __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
+					      (__v4sf) __W, __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
+					      (__v4sf) _mm_setzero_ps (), __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
+					       (__v2df) __W, __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
+					       (__v2df) _mm_setzero_pd (),
+					       __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
+{
+  __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
+{
+  __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
--- gcc/testsuite/gcc.target/i386/avx512f-vmovss-1.c.jj	2019-03-06 15:34:07.972734673 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vmovss-1.c	2019-03-06 15:44:46.891258107 +0100
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -masm=att" } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\\(%\[a-z0-9,]*\\), %xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\\(%\[a-z0-9,]*\\), %xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+%xmm\[0-9\]+, %xmm\[0-9\]+, %xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+%xmm\[0-9\]+, %xmm\[0-9\]+, %xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+%xmm\[0-9\]+, \\(%\[a-z0-9,]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2, x3;
+volatile __mmask8 m;
+float *volatile p;
+
+void extern
+avx512f_test (void)
+{
+  x1 = _mm_mask_load_ss (x1, m, p);
+  x1 = _mm_maskz_load_ss (m, p);
+  x1 = _mm_mask_move_ss (x1, m, x2, x3);
+  x1 = _mm_maskz_move_ss (m, x2, x3);
+  _mm_mask_store_ss (p, m, x1);
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vmovss-2.c.jj	2019-03-06 15:50:52.072264356 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vmovss-2.c	2019-03-06 19:08:14.933598873 +0100
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2, src3;
+  volatile __mmask8 mask = 5;
+  float val[2] = { 35.5f, 0.0f };
+  float *volatile p = &val[0];
+  float res_ref[SIZE];
+  float zero[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5f + i;
+      src2.a[i] = 7.5f + i;
+      src3.a[i] = 4.5f + i;
+      zero[i] = 0.0f;
+    }
+
+  res1.x = _mm_mask_load_ss (src1.x, mask, p);
+  res2.x = _mm_maskz_load_ss (mask, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = val[0];
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_ss (src1.x, mask, src2.x, src3.x);
+  res4.x = _mm_maskz_move_ss (mask, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src3.a[0];
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  if (check_union128 (res4, res_ref))
+    abort ();
+
+  _mm_mask_store_ss (p + 1, mask, src1.x);
+  if (val[1] != src1.a[0])
+    abort ();
+
+  mask ^= 1;
+
+  res1.x = _mm_mask_load_ss (src1.x, mask, p);
+  res2.x = _mm_maskz_load_ss (mask, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = src1.a[0];
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_ss (src1.x, mask, src2.x, src3.x);
+  res4.x = _mm_maskz_move_ss (mask, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src1.a[0];
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128 (res4, res_ref))
+    abort ();
+
+  val[1] = 42.0f;
+  _mm_mask_store_ss (p + 1, mask, src1.x);
+  if (val[1] != 42.0f)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vmovss-3.c.jj	2019-03-06 19:11:19.058577646 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vmovss-3.c	2019-03-06 19:11:46.815122188 +0100
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, res4, src1, src2, src3;
+  float val[2] = { 35.5f, 0.0f };
+  float *volatile p = &val[0];
+  float res_ref[SIZE];
+  float zero[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5f + i;
+      src2.a[i] = 7.5f + i;
+      src3.a[i] = 4.5f + i;
+      zero[i] = 0.0f;
+    }
+
+  res1.x = _mm_mask_load_ss (src1.x, 1, p);
+  res2.x = _mm_maskz_load_ss (1, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = val[0];
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_ss (src1.x, 1, src2.x, src3.x);
+  res4.x = _mm_maskz_move_ss (1, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src3.a[0];
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  if (check_union128 (res4, res_ref))
+    abort ();
+
+  _mm_mask_store_ss (p + 1, 1, src1.x);
+  if (val[1] != src1.a[0])
+    abort ();
+
+  res1.x = _mm_mask_load_ss (src1.x, 0, p);
+  res2.x = _mm_maskz_load_ss (0, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = src1.a[0];
+  if (check_union128 (res1, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_ss (src1.x, 0, src2.x, src3.x);
+  res4.x = _mm_maskz_move_ss (0, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src1.a[0];
+  if (check_union128 (res3, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128 (res4, res_ref))
+    abort ();
+
+  val[1] = 42.0f;
+  _mm_mask_store_ss (p + 1, 0, src1.x);
+  if (val[1] != 42.0f)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vmovsd-1.c.jj	2019-03-06 15:45:04.922962437 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vmovsd-1.c	2019-03-06 15:45:30.032550703 +0100
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -masm=att" } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\\(%\[a-z0-9,]*\\), %xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\\(%\[a-z0-9,]*\\), %xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+%xmm\[0-9\]+, %xmm\[0-9\]+, %xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+%xmm\[0-9\]+, %xmm\[0-9\]+, %xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+%xmm\[0-9\]+, \\(%\[a-z0-9,]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2, x3;
+volatile __mmask8 m;
+double *volatile p;
+
+void extern
+avx512f_test (void)
+{
+  x1 = _mm_mask_load_sd (x1, m, p);
+  x1 = _mm_maskz_load_sd (m, p);
+  x1 = _mm_mask_move_sd (x1, m, x2, x3);
+  x1 = _mm_maskz_move_sd (m, x2, x3);
+  _mm_mask_store_sd (p, m, x1);
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vmovsd-2.c.jj	2019-03-06 19:05:18.862487956 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vmovsd-2.c	2019-03-06 19:07:58.954861065 +0100
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2, src3;
+  volatile __mmask8 mask = 5;
+  double val[2] = { 35.5, 0.0 };
+  double *volatile p = &val[0];
+  double res_ref[SIZE];
+  double zero[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + i;
+      src2.a[i] = 7.5 + i;
+      src3.a[i] = 4.5 + i;
+      zero[i] = 0.0;
+    }
+
+  res1.x = _mm_mask_load_sd (src1.x, mask, p);
+  res2.x = _mm_maskz_load_sd (mask, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = val[0];
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_sd (src1.x, mask, src2.x, src3.x);
+  res4.x = _mm_maskz_move_sd (mask, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src3.a[0];
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  if (check_union128d (res4, res_ref))
+    abort ();
+
+  _mm_mask_store_sd (p + 1, mask, src1.x);
+  if (val[1] != src1.a[0])
+    abort ();
+
+  mask ^= 1;
+
+  res1.x = _mm_mask_load_sd (src1.x, mask, p);
+  res2.x = _mm_maskz_load_sd (mask, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = src1.a[0];
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_sd (src1.x, mask, src2.x, src3.x);
+  res4.x = _mm_maskz_move_sd (mask, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src1.a[0];
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128d (res4, res_ref))
+    abort ();
+
+  val[1] = 42.0;
+  _mm_mask_store_sd (p + 1, mask, src1.x);
+  if (val[1] != 42.0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512f-vmovsd-3.c.jj	2019-03-06 19:11:57.977939021 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vmovsd-3.c	2019-03-06 19:12:47.090133163 +0100
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, res4, src1, src2, src3;
+  double val[2] = { 35.5, 0.0 };
+  double *volatile p = &val[0];
+  double res_ref[SIZE];
+  double zero[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + i;
+      src2.a[i] = 7.5 + i;
+      src3.a[i] = 4.5 + i;
+      zero[i] = 0.0;
+    }
+
+  res1.x = _mm_mask_load_sd (src1.x, 1, p);
+  res2.x = _mm_maskz_load_sd (1, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = val[0];
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_sd (src1.x, 1, src2.x, src3.x);
+  res4.x = _mm_maskz_move_sd (1, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src3.a[0];
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  if (check_union128d (res4, res_ref))
+    abort ();
+
+  _mm_mask_store_sd (p + 1, 1, src1.x);
+  if (val[1] != src1.a[0])
+    abort ();
+
+  res1.x = _mm_mask_load_sd (src1.x, 0, p);
+  res2.x = _mm_maskz_load_sd (0, p);
+
+  __builtin_memcpy (res_ref, zero, sizeof (zero));
+  res_ref[0] = src1.a[0];
+  if (check_union128d (res1, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  res3.x = _mm_mask_move_sd (src1.x, 0, src2.x, src3.x);
+  res4.x = _mm_maskz_move_sd (0, src2.x, src3.x);
+
+  __builtin_memcpy (res_ref, src2.a, sizeof (src2.a));
+  res_ref[0] = src1.a[0];
+  if (check_union128d (res3, res_ref))
+    abort ();
+
+  res_ref[0] = zero[0];
+  if (check_union128d (res4, res_ref))
+    abort ();
+
+  val[1] = 42.0;
+  _mm_mask_store_sd (p + 1, 0, src1.x);
+  if (val[1] != 42.0)
+    abort ();
+}


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]