This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, i386]: Committed: Fix PR target/32708
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 10 Jul 2007 21:27:05 +0200
- Subject: [PATCH, i386]: Committed: Fix PR target/32708
Hello!
Currently, gcc generates quite unoptimized code for x86_64 for following
test:
#include <emmintrin.h>
__m128i long2vector(long long i) { return _mm_cvtsi64x_si128(i); }
gcc -O2:
long2vector:
.LFB527:
movq %rdi, -8(%rsp)
movq -8(%rsp), %mm0
movq2dq %mm0, %xmm0
ret
The problem is that vec_concatv2di pattern doesn't have "r" register
constraint for its 2nd input operand. "r" would be invalid for 32bit
targets, so we need another pattern for 64bit targets. The above test
then compiles depending on TARGET_INTER_UNIT_MOVES into:
k8:
long2vector:
.LFB3:
movq %rdi, -8(%rsp)
movq -8(%rsp), %xmm0
ret
core2:
long2vector:
.LFB3:
movq %rdi, %xmm0
ret
The patch was bootstrapped on x86_64 and regression tested for all
default languages. Patch is committed to mainline SVN.
2007-07-10 Uros Bizjak <ubizjak@gmail.com>
PR target/32708
* config/i386/sse.md (vec_concatv2di): Disable for TARGET_64BIT.
(*vec_concatv2di_rex): New insn pattern.
testsuite/ChangeLog:
2007-07-10 Uros Bizjak <ubizjak@gmail.com>
PR target/32708
* gcc.target/i386/pr32708-1.c: New test.
* gcc.target/i386/pr32708-2.c: Ditto.
* gcc.target/i386/pr32708-3.c: Ditto.
Uros.
Index: testsuite/gcc.target/i386/pr32708-2.c
===================================================================
--- testsuite/gcc.target/i386/pr32708-2.c (revision 0)
+++ testsuite/gcc.target/i386/pr32708-2.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -mtune=k8" } */
+
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+ return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+__m128i long2vector(long long __i)
+{
+ return _mm_set_epi64x (0, __i);
+}
+
+/* { dg-final { scan-assembler-not "movq2dq" } } */
Index: testsuite/gcc.target/i386/pr32708-3.c
===================================================================
--- testsuite/gcc.target/i386/pr32708-3.c (revision 0)
+++ testsuite/gcc.target/i386/pr32708-3.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+ return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+__m128i long2vector(long long __i)
+{
+ return _mm_set_epi64x (0, __i);
+}
+
+/* { dg-final { scan-assembler-not "movq2dq" } } */
Index: testsuite/gcc.target/i386/pr32708-1.c
===================================================================
--- testsuite/gcc.target/i386/pr32708-1.c (revision 0)
+++ testsuite/gcc.target/i386/pr32708-1.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+ return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+__m128i long2vector(long long __i)
+{
+ return _mm_set_epi64x (0, __i);
+}
+
+/* { dg-final { scan-assembler-not "movq2dq" } } */
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 126514)
+++ config/i386/sse.md (working copy)
@@ -4717,7 +4717,7 @@
(vec_concat:V2DI
(match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
(match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
- "TARGET_SSE"
+ "!TARGET_64BIT && TARGET_SSE"
"@
movq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
@@ -4728,6 +4728,23 @@
[(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
(set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
+(define_insn "*vec_concatv2di_rex"
+ [(set (match_operand:V2DI 0 "register_operand" "=Yt,Yi,!Yt,Yt,x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
+ (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Yt,x,m,0")))]
+ "TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
+ (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+
(define_expand "vec_setv2di"
[(match_operand:V2DI 0 "register_operand" "")
(match_operand:DI 1 "register_operand" "")