[PATCH] Don't emit AVX512DQ insns for -mavx512vl -mno-avx512dq (PR target/70927, take 2)

Jakub Jelinek jakub@redhat.com
Mon May 9 16:44:00 GMT 2016


On Tue, May 03, 2016 at 08:23:28PM +0200, Jakub Jelinek wrote:
> While working on a patch I'm going to post momentarily, I've noticed that
> we sometimes emit AVX512DQ specific instructions even when avx512dq is not
> enabled (in particular, EVEX andnps and andnpd are AVX512DQ plus if
> they have 128-bit or 256-bit arguments, also AVX512VL).
> 
> I'm not 100% happy about the patch, because (pre-existing issue)
> get_attr_mode doesn't reflect that the insn is in that not a vector float
> insn, but perhaps we'd need to use another alternative and some ugly
> conditionals in mode attribute for that case.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> after a while 6.2, or do you prefer some other fix?

Here is perhaps better variant, which handles stuff in the mode attribute.
Now also with testcases.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-09  Jakub Jelinek  <jakub@redhat.com>

	PR target/70927
	* config/i386/sse.md (<sse>_andnot<mode>3<mask_name>),
	*<code><mode>3<mask_name>): For !TARGET_AVX512DQ and EVEX encoding,
	use vp*[dq] instead of v*p[sd] instructions and adjust mode attribute
	accordingly.

	* gcc.target/i386/avx512vl-logic-1.c: New test.
	* gcc.target/i386/avx512vl-logic-2.c: New test.
	* gcc.target/i386/avx512dq-logic-2.c: New test.

--- gcc/config/i386/sse.md.jj	2016-05-09 10:20:27.280249673 +0200
+++ gcc/config/i386/sse.md	2016-05-09 10:52:44.391756028 +0200
@@ -2783,54 +2783,61 @@ (define_expand "vcond_mask_<mode><sseint
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "<sse>_andnot<mode>3<mask_name>"
-  [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
 	(and:VF_128_256
 	  (not:VF_128_256
-	    (match_operand:VF_128_256 1 "register_operand" "0,v"))
-	  (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
+	    (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
+	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
   "TARGET_SSE && <mask_avx512vl_condition>"
 {
   static char buf[128];
   const char *ops;
   const char *suffix;
 
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V8SF:
-    case MODE_V4SF:
-      suffix = "ps";
-      break;
-    default:
-      suffix = "<ssemodesuffix>";
-    }
-
   switch (which_alternative)
     {
     case 0:
       ops = "andn%s\t{%%2, %%0|%%0, %%2}";
       break;
     case 1:
+    case 2:
+    case 3:
       ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
       break;
     default:
       gcc_unreachable ();
     }
 
-  /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
-  if (<mask_applied> && !TARGET_AVX512DQ)
+  switch (get_attr_mode (insn))
     {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      suffix = "ps";
+      break;
+    case MODE_OI:
+    case MODE_TI:
+      /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
       suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
       ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+      break;
+    default:
+      suffix = "<ssemodesuffix>";
     }
 
   snprintf (buf, sizeof (buf), ops, suffix);
   return buf;
 }
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,maybe_evex")
+   (set_attr "prefix" "orig,maybe_vex,evex,evex")
    (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
+	(cond [(and (match_test "<mask_applied>")
+		    (and (eq_attr "alternative" "1")
+			 (match_test "!TARGET_AVX512DQ")))
+		 (const_string "<sseintvecmode2>")
+	       (eq_attr "alternative" "3")
+		 (const_string "<sseintvecmode2>")
+	       (and (match_test "<MODE_SIZE> == 16")
 		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		 (const_string "<ssePSmode>")
 	       (match_test "TARGET_AVX")
@@ -2870,7 +2877,10 @@ (define_insn "<sse>_andnot<mode>3<mask_n
 }
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+   (set (attr "mode")
+        (if_then_else (match_test "TARGET_AVX512DQ")
+		      (const_string "<sseinsnmode>")
+		      (const_string "XI")))])
 
 (define_expand "<code><mode>3<mask_name>"
   [(set (match_operand:VF_128_256 0 "register_operand")
@@ -2889,10 +2899,10 @@ (define_expand "<code><mode>3<mask_name>
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<code><mode>3<mask_name>"
-  [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
+  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
 	(any_logic:VF_128_256
-	  (match_operand:VF_128_256 1 "vector_operand" "%0,v")
-	  (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
+	  (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
+	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
   "TARGET_SSE && <mask_avx512vl_condition>
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
@@ -2900,43 +2910,50 @@ (define_insn "*<code><mode>3<mask_name>"
   const char *ops;
   const char *suffix;
 
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V8SF:
-    case MODE_V4SF:
-      suffix = "ps";
-      break;
-    default:
-      suffix = "<ssemodesuffix>";
-    }
-
   switch (which_alternative)
     {
     case 0:
       ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
       break;
     case 1:
+    case 2:
+    case 3:
       ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
       break;
     default:
       gcc_unreachable ();
     }
 
-  /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
-  if (<mask_applied> && !TARGET_AVX512DQ)
+  switch (get_attr_mode (insn))
     {
+    case MODE_V8SF:
+    case MODE_V4SF:
+      suffix = "ps";
+      break;
+    case MODE_OI:
+    case MODE_TI:
+      /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[qd].  */
       suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
       ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+      break;
+    default:
+      suffix = "<ssemodesuffix>";
     }
 
   snprintf (buf, sizeof (buf), ops, suffix);
   return buf;
 }
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,maybe_evex")
+   (set_attr "prefix" "orig,maybe_evex,evex,evex")
    (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
+	(cond [(and (match_test "<mask_applied>")
+		    (and (eq_attr "alternative" "1")
+			 (match_test "!TARGET_AVX512DQ")))
+		 (const_string "<sseintvecmode2>")
+	       (eq_attr "alternative" "3")
+		 (const_string "<sseintvecmode2>")
+	       (and (match_test "<MODE_SIZE> == 16")
 		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		 (const_string "<ssePSmode>")
 	       (match_test "TARGET_AVX")
@@ -2961,7 +2978,7 @@ (define_insn "*<code><mode>3<mask_name>"
   ops = "";
 
   /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
-  if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
+  if (!TARGET_AVX512DQ)
     {
       suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
       ops = "p";
@@ -2974,7 +2991,10 @@ (define_insn "*<code><mode>3<mask_name>"
 }
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+   (set (attr "mode")
+        (if_then_else (match_test "TARGET_AVX512DQ")
+		      (const_string "<sseinsnmode>")
+		      (const_string "XI")))])
 
 (define_expand "copysign<mode>3"
   [(set (match_dup 4)
--- gcc/testsuite/gcc.target/i386/avx512vl-logic-1.c.jj	2016-05-09 11:29:10.143166815 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-logic-1.c	2016-05-09 11:34:48.289612305 +0200
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */
+
+#include <x86intrin.h>
+
+__m128d
+f1 (__m128d a, __m128d b)
+{
+  return _mm_and_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f2 (__m128d a, __m128d b)
+{
+  return _mm_or_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f3 (__m128d a, __m128d b)
+{
+  return _mm_xor_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f4 (__m128d a, __m128d b)
+{
+  return _mm_andnot_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f5 (__m128 a, __m128 b)
+{
+  return _mm_and_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f6 (__m128 a, __m128 b)
+{
+  return _mm_or_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f7 (__m128 a, __m128 b)
+{
+  return _mm_xor_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f8 (__m128 a, __m128 b)
+{
+  return _mm_andnot_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m256d
+f9 (__m256d a, __m256d b)
+{
+  return _mm256_and_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f10 (__m256d a, __m256d b)
+{
+  return _mm256_or_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f11 (__m256d a, __m256d b)
+{
+  return _mm256_xor_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f12 (__m256d a, __m256d b)
+{
+  return _mm256_andnot_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f13 (__m256 a, __m256 b)
+{
+  return _mm256_and_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f14 (__m256 a, __m256 b)
+{
+  return _mm256_or_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f15 (__m256 a, __m256 b)
+{
+  return _mm256_xor_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f16 (__m256 a, __m256 b)
+{
+  return _mm256_andnot_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm\[0-9\]" 1 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-logic-2.c.jj	2016-05-09 11:31:41.282131117 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-logic-2.c	2016-05-09 11:34:56.768498103 +0200
@@ -0,0 +1,196 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */
+
+#include <x86intrin.h>
+
+__m128d
+f1 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_and_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f2 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_or_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vporq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f3 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_xor_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxorq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f4 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_andnot_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f5 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_and_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f6 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_or_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f7 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_xor_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f8 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_andnot_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m256d
+f9 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_and_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f10 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_or_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vporq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f11 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_xor_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxorq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f12 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_andnot_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f13 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_and_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f14 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_or_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f15 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_xor_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f16 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_andnot_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*ymm\[0-9\]" 1 } } */
--- gcc/testsuite/gcc.target/i386/avx512dq-logic-2.c.jj	2016-05-09 11:32:04.359820283 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-logic-2.c	2016-05-09 11:35:11.622298092 +0200
@@ -0,0 +1,196 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq" } */
+
+#include <x86intrin.h>
+
+__m128d
+f1 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_and_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f2 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_or_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f3 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_xor_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f4 (__m128d a, __m128d b)
+{
+  register __m128d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_andnot_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f5 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_and_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f6 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_or_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f7 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_xor_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f8 (__m128 a, __m128 b)
+{
+  register __m128 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm_andnot_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m256d
+f9 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_and_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f10 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_or_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f11 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_xor_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f12 (__m256d a, __m256d b)
+{
+  register __m256d c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_andnot_pd (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f13 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_and_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f14 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_or_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f15 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_xor_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f16 (__m256 a, __m256 b)
+{
+  register __m256 c __asm ("xmm16") = a;
+  asm volatile ("" : "+v" (c));
+  c = _mm256_andnot_ps (c, b);
+  asm volatile ("" : "+v" (c));
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm\[0-9\]" 1 } } */


	Jakub



More information about the Gcc-patches mailing list