This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Improve *pmaddwd
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>, Kirill Yukhin <kirill dot yukhin at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 4 May 2016 21:48:10 +0200
- Subject: [PATCH] Improve *pmaddwd
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
As the testcase shows, we unnecessarily disallow xmm16+, even when
we can use them for -mavx512bw.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-04 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (*avx2_pmaddwd, *sse2_pmaddwd): Use
v instead of x in vex or maybe_vex alternatives, use
maybe_evex instead of vex in prefix.
* gcc.target/i386/avx512bw-vpmaddwd-3.c: New test.
--- gcc/config/i386/sse.md.jj 2016-05-04 14:36:08.000000000 +0200
+++ gcc/config/i386/sse.md 2016-05-04 15:16:44.180894303 +0200
@@ -9803,19 +9817,19 @@ (define_expand "avx2_pmaddwd"
"ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
(define_insn "*avx2_pmaddwd"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+ [(set (match_operand:V8SI 0 "register_operand" "=x,v")
(plus:V8SI
(mult:V8SI
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 1 "nonimmediate_operand" "%x")
+ (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -9836,7 +9850,8 @@ (define_insn "*avx2_pmaddwd"
"TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
"vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
- (set_attr "prefix" "vex")
+ (set_attr "isa" "*,avx512bw")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_expand "sse2_pmaddwd"
@@ -9866,17 +9881,17 @@ (define_expand "sse2_pmaddwd"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
(define_insn "*sse2_pmaddwd"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
(plus:V4SI
(mult:V4SI
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "vector_operand" "%0,x")
+ (match_operand:V8HI 1 "vector_operand" "%0,x,v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 2 "vector_operand" "xBm,xm")
+ (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)]))))
(mult:V4SI
@@ -9891,12 +9906,13 @@ (define_insn "*sse2_pmaddwd"
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"@
pmaddwd\t{%2, %0|%0, %2}
+ vpmaddwd\t{%2, %1, %0|%0, %1, %2}
vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "TI")])
(define_insn "avx512dq_mul<mode>3<mask_name>"
--- gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-3.c.jj 2016-05-04 16:37:21.196223424 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-3.c 2016-05-04 16:37:51.867819502 +0200
@@ -0,0 +1,24 @@
+/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, __m128i y)
+{
+ register __m128i a __asm ("xmm16"), b __asm ("xmm17");
+ a = x; b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm_madd_epi16 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m256i x, __m256i y)
+{
+ register __m256i a __asm ("xmm16"), b __asm ("xmm17");
+ a = x; b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm256_madd_epi16 (a, b);
+ asm volatile ("" : "+v" (a));
+}
Jakub