[PATCH] tree-optimization/92645 - avoid harmful early BIT_FIELD_REF canonicalization

Richard Biener rguenther@suse.de
Wed Jan 13 13:13:52 GMT 2021


This avoids canonicalizing BIT_FIELD_REF <T1> (a, <sz>, 0) to
(T1)a on integer typed a.  This confuses the vectorizer SLP matching.

With this delayed to after vector lowering the testcase in PR92645
from Skia is now finally optimized to reasonable assembly.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2021-01-13  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92645
	* match.pd (BIT_FIELD_REF to conversion): Delay canonicalization
	until after vector lowering.

	* gcc.target/i386/pr92645-7.c: New testcase.
---
 gcc/match.pd                              |  2 ++
 gcc/testsuite/gcc.target/i386/pr92645-7.c | 24 +++++++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92645-7.c

diff --git a/gcc/match.pd b/gcc/match.pd
index c286a540c4e..60c383da13b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6075,6 +6075,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	   /* Low-parts can be reduced to integral conversions.
 	      ???  The following doesn't work for PDP endian.  */
 	   || (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+	       /* But only do this after vectorization.  */
+	       && canonicalize_math_after_vectorization_p ()
 	       /* Don't even think about BITS_BIG_ENDIAN.  */
 	       && TYPE_PRECISION (TREE_TYPE (@0)) % BITS_PER_UNIT == 0
 	       && TYPE_PRECISION (type) % BITS_PER_UNIT == 0
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-7.c b/gcc/testsuite/gcc.target/i386/pr92645-7.c
new file mode 100644
index 00000000000..e4c04c2a82a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92645-7.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -msse2" } */
+
+typedef long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+void bar (v4si *p, __int128_t *q)
+{
+  union { __int128_t a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[0];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+/* The function should end up with sth like
+     [v]pshufd $216, (%esi), %xmm0
+     [v]movdqa %xmm0, (%edi)
+     ret
+   recognized by SLP vectorization involving an existing "vector".  */
+/* { dg-final { scan-assembler-not "punpck" } } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
-- 
2.26.2


More information about the Gcc-patches mailing list