This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Improve *ssse3_palignr<mode>_perm
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>, Kirill Yukhin <kirill dot yukhin at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 23 May 2016 19:17:14 +0200
- Subject: [PATCH] Improve *ssse3_palignr<mode>_perm
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This pattern is used to improve __builtin_shuffle in some cases;
VPALIGNR is AVX512BW & AVX512VL.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-23 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (*ssse3_palignr<mode>_perm): Add avx512bw
alternative. Formatting fix.
* gcc.target/i386/avx512bw-vpalignr-4.c: New test.
* gcc.target/i386/avx512vl-vpalignr-4.c: New test.
--- gcc/config/i386/sse.md.jj 2016-05-23 14:53:36.000000000 +0200
+++ gcc/config/i386/sse.md 2016-05-23 15:07:41.518548599 +0200
@@ -17747,33 +17747,34 @@ (define_insn "*avx_vperm2f128<mode>_noze
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_palignr<mode>_perm"
- [(set (match_operand:V_128 0 "register_operand" "=x,x")
+ [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
(vec_select:V_128
- (match_operand:V_128 1 "register_operand" "0,x")
+ (match_operand:V_128 1 "register_operand" "0,x,v")
(match_parallel 2 "palignr_operand"
- [(match_operand 3 "const_int_operand" "n, n")])))]
+ [(match_operand 3 "const_int_operand" "n,n,n")])))]
"TARGET_SSSE3"
{
- operands[2] =
- GEN_INT (INTVAL (operands[3]) * GET_MODE_UNIT_SIZE (GET_MODE (operands[0])));
+ operands[2] = (GEN_INT (INTVAL (operands[3])
+ * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
switch (which_alternative)
{
case 0:
return "palignr\t{%2, %1, %0|%0, %1, %2}";
case 1:
+ case 2:
return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,*,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")])
+ (set_attr "prefix" "orig,vex,evex")])
(define_expand "avx512vl_vinsert<mode>"
[(match_operand:VI48F_256 0 "register_operand")
--- gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-4.c.jj 2016-05-23 15:18:57.787640379 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-4.c 2016-05-23 15:18:26.000000000 +0200
@@ -0,0 +1,86 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+
+typedef char V1 __attribute__((vector_size (16)));
+
+void
+f1 (V1 x)
+{
+ register V1 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V1) { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpalignr\[^\n\r]*\\\$6\[^\n\r]*%xmm16\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef short V2 __attribute__((vector_size (16)));
+
+void
+f2 (V2 x)
+{
+ register V2 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V2) { 5, 6, 7, 0, 1, 2, 3, 4 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpalignr\[^\n\r]*\\\$10\[^\n\r]*%xmm16\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef int V3 __attribute__((vector_size (16)));
+
+void
+f3 (V3 x)
+{
+ register V3 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V3) { 3, 0, 1, 2 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpshufd\[^\n\r]*\\\$147\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef long long V4 __attribute__((vector_size (16)));
+
+void
+f4 (V4 x)
+{
+ register V4 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V4) { 1, 0 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpalignr\[^\n\r]*\\\$8\[^\n\r]*%xmm16\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef float V5 __attribute__((vector_size (16)));
+
+void
+f5 (V5 x)
+{
+ register V5 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V3) { 3, 0, 1, 2 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$147\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef double V6 __attribute__((vector_size (16)));
+
+void
+f6 (V6 x)
+{
+ register V6 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V4) { 1, 0 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpermilpd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-vpalignr-4.c.jj 2016-05-23 15:19:34.352162361 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpalignr-4.c 2016-05-23 15:20:02.570793519 +0200
@@ -0,0 +1,86 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw -masm=att" } */
+
+typedef char V1 __attribute__((vector_size (16)));
+
+void
+f1 (V1 x)
+{
+ register V1 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V1) { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpalignr\[^\n\r]*\\\$6\[^\n\r]*%xmm16\[^\n\r]*%xmm16\[^\n\r]*%xmm16" } } */
+
+typedef short V2 __attribute__((vector_size (16)));
+
+void
+f2 (V2 x)
+{
+ register V2 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V2) { 5, 6, 7, 0, 1, 2, 3, 4 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpalignr\[^\n\r]*\\\$10\[^\n\r]*%xmm16\[^\n\r]*%xmm16\[^\n\r]*%xmm16" } } */
+
+typedef int V3 __attribute__((vector_size (16)));
+
+void
+f3 (V3 x)
+{
+ register V3 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V3) { 3, 0, 1, 2 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpshufd\[^\n\r]*\\\$147\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef long long V4 __attribute__((vector_size (16)));
+
+void
+f4 (V4 x)
+{
+ register V4 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V4) { 1, 0 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpalignr\[^\n\r]*\\\$8\[^\n\r]*%xmm16\[^\n\r]*%xmm16\[^\n\r]*%xmm16" } } */
+
+typedef float V5 __attribute__((vector_size (16)));
+
+void
+f5 (V5 x)
+{
+ register V5 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V3) { 3, 0, 1, 2 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$147\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
+
+typedef double V6 __attribute__((vector_size (16)));
+
+void
+f6 (V6 x)
+{
+ register V6 a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = __builtin_shuffle (a, (V4) { 1, 0 });
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vpermilpd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
Jakub