This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve QImode extraction without SSE4.1 (PR tree-optimization/91201)


Hi!

As mentioned in the PR, for SSE4.1 we use pextrb for vec_extractv16qiqi,
but at least for element 0 we store the vector into memory and load the
single byte from there and we can just use movd instead.

The following patch does that, just skips it for the case when we know we'll
go through memory anyway.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

For the non-Os case, perhaps we could also handle other (elt & 3) == 0 cases
which V4SImode ix86_expand_vector_extract also handles through shuffles
(pshufd or unpck*) before the movd, just not sure if it is a win.

2019-08-01  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/91201
	* config/i386/i386-expand.c (ix86_expand_vector_extract): For elt == 0
	V16QImode extraction without sse4.1 try to use V4SImode lowpart
	extraction.

	* gcc.target/i386/sse2-pr91201-3.c: New test.
	* gcc.target/i386/sse2-pr91201-4.c: New test.
	* gcc.target/i386/sse2-pr91201-5.c: New test.
	* gcc.target/i386/sse2-pr91201-6.c: New test.

--- gcc/config/i386/i386-expand.c.jj	2019-07-30 09:12:34.578959902 +0200
+++ gcc/config/i386/i386-expand.c	2019-08-01 10:41:22.545887899 +0200
@@ -14706,6 +14706,17 @@ ix86_expand_vector_extract (bool mmx_ok,
 
     case E_V16QImode:
       use_vec_extr = TARGET_SSE4_1;
+      if (!use_vec_extr
+	  && TARGET_SSE2
+	  && elt == 0
+	  && (optimize_insn_for_size_p () || TARGET_INTER_UNIT_MOVES_FROM_VEC))
+	{
+	  tmp = gen_reg_rtx (SImode);
+	  ix86_expand_vector_extract (false, tmp, gen_lowpart (V4SImode, vec),
+				      0);
+	  emit_insn (gen_rtx_SET (target, gen_lowpart (QImode, tmp)));
+	  return;
+	}
       break;
 
     case E_V8SFmode:
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c.jj	2019-08-01 10:59:07.508887273 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c	2019-08-01 11:07:42.925165205 +0200
@@ -0,0 +1,13 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler-not "\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c.jj	2019-08-01 10:59:33.539497274 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c	2019-08-01 11:07:35.304279381 +0200
@@ -0,0 +1,13 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-Os -msse2 -mno-sse3 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler-not "\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c.jj	2019-08-01 11:01:07.994082143 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c	2019-08-01 11:07:19.230520198 +0200
@@ -0,0 +1,13 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=k8 -masm=att" } */
+/* { dg-final { scan-assembler-not "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler "\tmov(zbl|b)\t\[^\n\r]*\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c.jj	2019-08-01 11:03:34.896881216 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c	2019-08-01 11:07:27.564395340 +0200
@@ -0,0 +1,13 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-Os -msse2 -mno-sse3 -mtune=k8 -masm=att" } */
+/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler-not "\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]