This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Add pre-reload splitter for low part SI/DImode extraction out of vector regs (PR target/65078)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>, Yuri Rumyantsev <ysrumyan at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 17 Mar 2015 19:15:22 +0100
- Subject: [PATCH] Add pre-reload splitter for low part SI/DImode extraction out of vector regs (PR target/65078)
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch fixes a regression where since the removal of specialized
builtin from _mm_storel_epi64 we force the extraction of DImode (or SImode)
low value out of 16/32/64 byte vector registers into memory.
As the vector extraction is from a vector register with a different
element mode, the expander doesn't know it might be beneficial to subreg it
to a vector mode with the same size, but different element mode and do
vector extraction out of that. This patch adds a pre-reload splitter that
will turn it into such a vector extraction. At least for the -m32
DImode extraction directly into memory, I think teaching RA to do that would
be much harder.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2015-03-17 Jakub Jelinek <jakub@redhat.com>
PR target/65078
* config/i386/sse.md (movsi/movdi -> vec_extract_*_0 splitter): New.
* gcc.target/i386/pr65078-1.c: New test.
* gcc.target/i386/pr65078-2.c: New test.
* gcc.target/i386/pr65078-3.c: New test.
* gcc.target/i386/pr65078-4.c: New test.
* gcc.target/i386/pr65078-5.c: New test.
* gcc.target/i386/pr65078-6.c: New test.
--- gcc/config/i386/sse.md.jj 2015-01-23 20:52:13.000000000 +0100
+++ gcc/config/i386/sse.md 2015-03-17 15:57:31.274655235 +0100
@@ -12805,6 +12805,65 @@ (define_split
operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
})
+;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
+;; vector modes into vec_extract*.
+(define_split
+ [(set (match_operand:SWI48x 0 "nonimmediate_operand")
+ (match_operand:SWI48x 1 "register_operand"))]
+ "can_create_pseudo_p ()
+ && GET_CODE (operands[1]) == SUBREG
+ && REG_P (SUBREG_REG (operands[1]))
+ && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
+ || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
+ == MODE_VECTOR_FLOAT))
+ && SUBREG_BYTE (operands[1]) == 0
+ && TARGET_SSE
+ && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
+ || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
+ && TARGET_AVX)
+ || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
+ && TARGET_AVX512F))
+ && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
+ [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
+ (parallel [(const_int 0)])))]
+{
+ rtx tmp;
+ operands[1] = SUBREG_REG (operands[1]);
+ switch (GET_MODE_SIZE (GET_MODE (operands[1])))
+ {
+ case 64:
+ if (<MODE>mode == SImode)
+ {
+ tmp = gen_reg_rtx (V8SImode);
+ emit_insn (gen_vec_extract_lo_v16si (tmp,
+ gen_lowpart (V16SImode,
+ operands[1])));
+ }
+ else
+ {
+ tmp = gen_reg_rtx (V4DImode);
+ emit_insn (gen_vec_extract_lo_v8di (tmp,
+ gen_lowpart (V8DImode,
+ operands[1])));
+ }
+ operands[1] = tmp;
+ /* FALLTHRU */
+ case 32:
+ tmp = gen_reg_rtx (<ssevecmode>mode);
+ if (<MODE>mode == SImode)
+ emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
+ operands[1])));
+ else
+ emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
+ operands[1])));
+ operands[1] = tmp;
+ break;
+ case 16:
+ operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
+ break;
+ }
+})
+
(define_insn "*vec_concatv2si_sse4_1"
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
(vec_concat:V2SI
--- gcc/testsuite/gcc.target/i386/pr65078-1.c.jj 2015-03-17 15:43:43.735200197 +0100
+++ gcc/testsuite/gcc.target/i386/pr65078-1.c 2015-03-17 16:08:17.022117378 +0100
@@ -0,0 +1,61 @@
+/* PR target/65078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-additional-options "-mregparm=2" { target ia32 } } */
+/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+typedef unsigned long long W __attribute__((vector_size (16)));
+typedef unsigned int T __attribute__((vector_size (16)));
+
+void
+f1 (unsigned long long *x, V y)
+{
+ *x = ((W)y)[0];
+}
+
+#if defined(__x86_64__) || defined(ALL)
+unsigned long long
+f2 (V y)
+{
+ return ((W)y)[0];
+}
+#endif
+
+void
+f3 (unsigned int *x, V y)
+{
+ *x = ((T)y)[0];
+}
+
+unsigned int
+f4 (V y)
+{
+ return ((T)y)[0];
+}
+
+void
+f5 (unsigned long long *x, W y)
+{
+ *x = ((W)y)[0];
+}
+
+#if defined(__x86_64__) || defined(ALL)
+unsigned long long
+f6 (W y)
+{
+ return ((W)y)[0];
+}
+#endif
+
+void
+f7 (unsigned int *x, T y)
+{
+ *x = ((T)y)[0];
+}
+
+unsigned int
+f8 (T y)
+{
+ return ((T)y)[0];
+}
--- gcc/testsuite/gcc.target/i386/pr65078-2.c.jj 2015-03-17 15:44:19.097620771 +0100
+++ gcc/testsuite/gcc.target/i386/pr65078-2.c 2015-03-17 16:08:09.440240908 +0100
@@ -0,0 +1,61 @@
+/* PR target/65078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-additional-options "-mregparm=2" { target ia32 } } */
+/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" } } */
+
+typedef unsigned char V __attribute__((vector_size (32)));
+typedef unsigned long long W __attribute__((vector_size (32)));
+typedef unsigned int T __attribute__((vector_size (32)));
+
+void
+f1 (unsigned long long *x, V y)
+{
+ *x = ((W)y)[0];
+}
+
+#if defined(__x86_64__) || defined(ALL)
+unsigned long long
+f2 (V y)
+{
+ return ((W)y)[0];
+}
+#endif
+
+void
+f3 (unsigned int *x, V y)
+{
+ *x = ((T)y)[0];
+}
+
+unsigned int
+f4 (V y)
+{
+ return ((T)y)[0];
+}
+
+void
+f5 (unsigned long long *x, W y)
+{
+ *x = ((W)y)[0];
+}
+
+#if defined(__x86_64__) || defined(ALL)
+unsigned long long
+f6 (W y)
+{
+ return ((W)y)[0];
+}
+#endif
+
+void
+f7 (unsigned int *x, T y)
+{
+ *x = ((T)y)[0];
+}
+
+unsigned int
+f8 (T y)
+{
+ return ((T)y)[0];
+}
--- gcc/testsuite/gcc.target/i386/pr65078-3.c.jj 2015-03-17 15:44:21.943574191 +0100
+++ gcc/testsuite/gcc.target/i386/pr65078-3.c 2015-03-17 16:08:24.930988521 +0100
@@ -0,0 +1,61 @@
+/* PR target/65078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-additional-options "-mregparm=2" { target ia32 } } */
+/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" } } */
+
+typedef unsigned char V __attribute__((vector_size (64)));
+typedef unsigned long long W __attribute__((vector_size (64)));
+typedef unsigned int T __attribute__((vector_size (64)));
+
+void
+f1 (unsigned long long *x, V y)
+{
+ *x = ((W)y)[0];
+}
+
+#if defined(__x86_64__) || defined(ALL)
+unsigned long long
+f2 (V y)
+{
+ return ((W)y)[0];
+}
+#endif
+
+void
+f3 (unsigned int *x, V y)
+{
+ *x = ((T)y)[0];
+}
+
+unsigned int
+f4 (V y)
+{
+ return ((T)y)[0];
+}
+
+void
+f5 (unsigned long long *x, W y)
+{
+ *x = ((W)y)[0];
+}
+
+#if defined(__x86_64__) || defined(ALL)
+unsigned long long
+f6 (W y)
+{
+ return ((W)y)[0];
+}
+#endif
+
+void
+f7 (unsigned int *x, T y)
+{
+ *x = ((T)y)[0];
+}
+
+unsigned int
+f8 (T y)
+{
+ return ((T)y)[0];
+}
--- gcc/testsuite/gcc.target/i386/pr65078-4.c.jj 2015-03-17 16:05:28.777858535 +0100
+++ gcc/testsuite/gcc.target/i386/pr65078-4.c 2015-03-17 16:06:41.911666986 +0100
@@ -0,0 +1,5 @@
+/* PR target/65078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -DALL" } */
+
+#include "pr65078-1.c"
--- gcc/testsuite/gcc.target/i386/pr65078-5.c.jj 2015-03-17 16:06:49.899536842 +0100
+++ gcc/testsuite/gcc.target/i386/pr65078-5.c 2015-03-17 16:06:58.916389933 +0100
@@ -0,0 +1,5 @@
+/* PR target/65078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -DALL" } */
+
+#include "pr65078-2.c"
--- gcc/testsuite/gcc.target/i386/pr65078-6.c.jj 2015-03-17 16:07:05.977274892 +0100
+++ gcc/testsuite/gcc.target/i386/pr65078-6.c 2015-03-17 16:07:13.856146524 +0100
@@ -0,0 +1,5 @@
+/* PR target/65078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -DALL" } */
+
+#include "pr65078-3.c"
Jakub