This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Don't emit AVX512DQ insns for -mavx512vl -mno-avx512dq (PR target/70927, take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>, Kirill Yukhin <kirill dot yukhin at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 9 May 2016 18:43:50 +0200
- Subject: [PATCH] Don't emit AVX512DQ insns for -mavx512vl -mno-avx512dq (PR target/70927, take 2)
- Authentication-results: sourceware.org; auth=none
- References: <20160503182328 dot GF26501 at tucnak dot zalov dot cz>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Tue, May 03, 2016 at 08:23:28PM +0200, Jakub Jelinek wrote:
> While working on a patch I'm going to post momentarily, I've noticed that
> we sometimes emit AVX512DQ specific instructions even when avx512dq is not
> enabled (in particular, EVEX andnps and andnpd are AVX512DQ plus if
> they have 128-bit or 256-bit arguments, also AVX512VL).
>
> I'm not 100% happy about the patch, because (pre-existing issue)
> get_attr_mode doesn't reflect that the insn is in that not a vector float
> insn, but perhaps we'd need to use another alternative and some ugly
> conditionals in mode attribute for that case.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> after a while 6.2, or do you prefer some other fix?
Here is perhaps better variant, which handles stuff in the mode attribute.
Now also with testcases.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-09 Jakub Jelinek <jakub@redhat.com>
PR target/70927
* config/i386/sse.md (<sse>_andnot<mode>3<mask_name>),
*<code><mode>3<mask_name>): For !TARGET_AVX512DQ and EVEX encoding,
use vp*[dq] instead of v*p[sd] instructions and adjust mode attribute
accordingly.
* gcc.target/i386/avx512vl-logic-1.c: New test.
* gcc.target/i386/avx512vl-logic-2.c: New test.
* gcc.target/i386/avx512dq-logic-2.c: New test.
--- gcc/config/i386/sse.md.jj 2016-05-09 10:20:27.280249673 +0200
+++ gcc/config/i386/sse.md 2016-05-09 10:52:44.391756028 +0200
@@ -2783,54 +2783,61 @@ (define_expand "vcond_mask_<mode><sseint
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "<sse>_andnot<mode>3<mask_name>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
(and:VF_128_256
(not:VF_128_256
- (match_operand:VF_128_256 1 "register_operand" "0,v"))
- (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
+ (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
"TARGET_SSE && <mask_avx512vl_condition>"
{
static char buf[128];
const char *ops;
const char *suffix;
- switch (get_attr_mode (insn))
- {
- case MODE_V8SF:
- case MODE_V4SF:
- suffix = "ps";
- break;
- default:
- suffix = "<ssemodesuffix>";
- }
-
switch (which_alternative)
{
case 0:
ops = "andn%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
+ case 2:
+ case 3:
ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
- /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
- if (<mask_applied> && !TARGET_AVX512DQ)
+ switch (get_attr_mode (insn))
{
+ case MODE_V8SF:
+ case MODE_V4SF:
+ suffix = "ps";
+ break;
+ case MODE_OI:
+ case MODE_TI:
+ /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+ break;
+ default:
+ suffix = "<ssemodesuffix>";
}
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix" "orig,maybe_vex,evex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
+ (cond [(and (match_test "<mask_applied>")
+ (and (eq_attr "alternative" "1")
+ (match_test "!TARGET_AVX512DQ")))
+ (const_string "<sseintvecmode2>")
+ (eq_attr "alternative" "3")
+ (const_string "<sseintvecmode2>")
+ (and (match_test "<MODE_SIZE> == 16")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
@@ -2870,7 +2877,10 @@ (define_insn "<sse>_andnot<mode>3<mask_n
}
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))])
(define_expand "<code><mode>3<mask_name>"
[(set (match_operand:VF_128_256 0 "register_operand")
@@ -2889,10 +2899,10 @@ (define_expand "<code><mode>3<mask_name>
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<code><mode>3<mask_name>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
(any_logic:VF_128_256
- (match_operand:VF_128_256 1 "vector_operand" "%0,v")
- (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
+ (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
"TARGET_SSE && <mask_avx512vl_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
@@ -2900,43 +2910,50 @@ (define_insn "*<code><mode>3<mask_name>"
const char *ops;
const char *suffix;
- switch (get_attr_mode (insn))
- {
- case MODE_V8SF:
- case MODE_V4SF:
- suffix = "ps";
- break;
- default:
- suffix = "<ssemodesuffix>";
- }
-
switch (which_alternative)
{
case 0:
ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
+ case 2:
+ case 3:
ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
- /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
- if (<mask_applied> && !TARGET_AVX512DQ)
+ switch (get_attr_mode (insn))
{
+ case MODE_V8SF:
+ case MODE_V4SF:
+ suffix = "ps";
+ break;
+ case MODE_OI:
+ case MODE_TI:
+ /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+ break;
+ default:
+ suffix = "<ssemodesuffix>";
}
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix" "orig,maybe_evex,evex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
+ (cond [(and (match_test "<mask_applied>")
+ (and (eq_attr "alternative" "1")
+ (match_test "!TARGET_AVX512DQ")))
+ (const_string "<sseintvecmode2>")
+ (eq_attr "alternative" "3")
+ (const_string "<sseintvecmode2>")
+ (and (match_test "<MODE_SIZE> == 16")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
@@ -2961,7 +2978,7 @@ (define_insn "*<code><mode>3<mask_name>"
ops = "";
/* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
- if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
+ if (!TARGET_AVX512DQ)
{
suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
ops = "p";
@@ -2974,7 +2991,10 @@ (define_insn "*<code><mode>3<mask_name>"
}
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))])
(define_expand "copysign<mode>3"
[(set (match_dup 4)
--- gcc/testsuite/gcc.target/i386/avx512vl-logic-1.c.jj 2016-05-09 11:29:10.143166815 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-logic-1.c 2016-05-09 11:34:48.289612305 +0200
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */
+
+#include <x86intrin.h>
+
+__m128d
+f1 (__m128d a, __m128d b)
+{
+ return _mm_and_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f2 (__m128d a, __m128d b)
+{
+ return _mm_or_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f3 (__m128d a, __m128d b)
+{
+ return _mm_xor_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f4 (__m128d a, __m128d b)
+{
+ return _mm_andnot_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f5 (__m128 a, __m128 b)
+{
+ return _mm_and_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f6 (__m128 a, __m128 b)
+{
+ return _mm_or_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f7 (__m128 a, __m128 b)
+{
+ return _mm_xor_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f8 (__m128 a, __m128 b)
+{
+ return _mm_andnot_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m256d
+f9 (__m256d a, __m256d b)
+{
+ return _mm256_and_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f10 (__m256d a, __m256d b)
+{
+ return _mm256_or_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f11 (__m256d a, __m256d b)
+{
+ return _mm256_xor_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f12 (__m256d a, __m256d b)
+{
+ return _mm256_andnot_pd (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f13 (__m256 a, __m256 b)
+{
+ return _mm256_and_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f14 (__m256 a, __m256 b)
+{
+ return _mm256_or_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f15 (__m256 a, __m256 b)
+{
+ return _mm256_xor_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f16 (__m256 a, __m256 b)
+{
+ return _mm256_andnot_ps (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm\[0-9\]" 1 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-logic-2.c.jj 2016-05-09 11:31:41.282131117 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-logic-2.c 2016-05-09 11:34:56.768498103 +0200
@@ -0,0 +1,196 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */
+
+#include <x86intrin.h>
+
+__m128d
+f1 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_and_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f2 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_or_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vporq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f3 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_xor_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxorq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f4 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_andnot_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnq\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f5 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_and_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f6 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_or_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f7 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_xor_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f8 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_andnot_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m256d
+f9 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_and_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f10 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_or_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vporq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f11 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_xor_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxorq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f12 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_andnot_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnq\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f13 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_and_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f14 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_or_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f15 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_xor_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f16 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_andnot_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*ymm\[0-9\]" 1 } } */
--- gcc/testsuite/gcc.target/i386/avx512dq-logic-2.c.jj 2016-05-09 11:32:04.359820283 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-logic-2.c 2016-05-09 11:35:11.622298092 +0200
@@ -0,0 +1,196 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq" } */
+
+#include <x86intrin.h>
+
+__m128d
+f1 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_and_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f2 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_or_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f3 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_xor_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128d
+f4 (__m128d a, __m128d b)
+{
+ register __m128d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_andnot_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f5 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_and_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f6 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_or_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f7 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_xor_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m128
+f8 (__m128 a, __m128 b)
+{
+ register __m128 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm_andnot_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+__m256d
+f9 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_and_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f10 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_or_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f11 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_xor_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256d
+f12 (__m256d a, __m256d b)
+{
+ register __m256d c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_andnot_pd (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f13 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_and_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f14 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_or_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f15 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_xor_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+__m256
+f16 (__m256 a, __m256 b)
+{
+ register __m256 c __asm ("xmm16") = a;
+ asm volatile ("" : "+v" (c));
+ c = _mm256_andnot_ps (c, b);
+ asm volatile ("" : "+v" (c));
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm\[0-9\]" 1 } } */
Jakub