This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: PR target/36992: Very stange code for _mm_move_epi64
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org, ubizjak at gmail dot com
- Cc: Joey Ye <joey dot ye at intel dot com>, Xuepeng Guo <xuepeng dot guo at intel dot com>
- Date: Tue, 5 Aug 2008 10:33:15 -0700
- Subject: PATCH: PR target/36992: Very stange code for _mm_move_epi64
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
Here is a patch to implement _mm_move_epi64 with a new builtin,
__builtin_ia32_movq128. Now I got
[hjl@gnu-6 gcc]$ ./xgcc -B./ -S
/export/gnu/src/gcc-work/gcc/gcc/testsuite/gcc.target/i386/pr36992-2.c
-msse4 -m32
[hjl@gnu-6 gcc]$ cat pr36992-2.s
.file "pr36992-2.c"
.text
.globl test
.type test, @function
test:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movdqa %xmm0, -24(%ebp)
movdqa -24(%ebp), %xmm1
movq %xmm1, %xmm0
leave
ret
.size test, .-test
.ident "GCC: (GNU) 4.4.0 20080804 (experimental) [trunk
revision 138652]"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-6 gcc]$ ./xgcc -B./ -S
/export/gnu/src/gcc-work/gcc/gcc/testsuite/gcc.target/i386/pr36992-2.c
-msse2 -m32
[hjl@gnu-6 gcc]$ cat pr36992-2.s
.file "pr36992-2.c"
.text
.globl test
.type test, @function
test:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movdqa %xmm0, -24(%ebp)
movdqa -24(%ebp), %xmm1
movq %xmm1, %xmm0
leave
ret
.size test, .-test
.ident "GCC: (GNU) 4.4.0 20080804 (experimental) [trunk
revision 138652]"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-6 gcc]$ ./xgcc -B./ -S
/export/gnu/src/gcc-work/gcc/gcc/testsuite/gcc.target/i386/pr36992-2.c
-msse2 -m32 -O
[hjl@gnu-6 gcc]$ cat pr36992-2.s
.file "pr36992-2.c"
.text
.globl test
.type test, @function
test:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movq %xmm0, %xmm0
leave
ret
.size test, .-test
.ident "GCC: (GNU) 4.4.0 20080804 (experimental) [trunk
revision 138652]"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-6 gcc]$ ./xgcc -B./ -S
/export/gnu/src/gcc-work/gcc/gcc/testsuite/gcc.target/i386/pr36992-2.c
-msse4 -m32 -O
[hjl@gnu-6 gcc]$ cat pr36992-2.s
.file "pr36992-2.c"
.text
.globl test
.type test, @function
test:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movq %xmm0, %xmm0
leave
ret
.size test, .-test
.ident "GCC: (GNU) 4.4.0 20080804 (experimental) [trunk
revision 138652]"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-6 gcc]$
OK for trunk? Thanks.
H.J.
----
gcc/
2008-08-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/36992
* config/i386/emmintrin.h (_mm_move_epi64): Use
__builtin_ia32_movq128.
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVQ128.
(bdesc_args): Add IX86_BUILTIN_MOVQ128.
* config/i386/sse.md (sse2_movq128): New.
(*sse2_movq128): Likewise.
* doc/extend.texi: Document __builtin_ia32_movq128.
gcc/testsuite/
2008-08-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/36992
* gcc.target/i386/pr36992-1.c: Scan movq.
* gcc.target/i386/pr36992-2.c: Likewise.
* gcc.target/i386/pr36992-3.c: New.
--- gcc/config/i386/emmintrin.h.movq 2008-03-17 06:44:51.000000000 -0700
+++ gcc/config/i386/emmintrin.h 2008-08-05 09:50:34.000000000 -0700
@@ -726,7 +726,7 @@ _mm_movpi64_epi64 (__m64 __A)
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_epi64 (__m128i __A)
{
- return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A));
+ return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
}
/* Create a vector of zeros. */
--- gcc/config/i386/i386.c.movq 2008-08-04 15:44:37.000000000 -0700
+++ gcc/config/i386/i386.c 2008-08-05 10:12:07.000000000 -0700
@@ -18729,6 +18729,8 @@ enum ix86_builtins
IX86_BUILTIN_MOVNTPD,
IX86_BUILTIN_MOVNTDQ,
+ IX86_BUILTIN_MOVQ128,
+
/* SSE2 MMX */
IX86_BUILTIN_MASKMOVDQU,
IX86_BUILTIN_MOVMSKPD,
@@ -19858,6 +19860,8 @@ static const struct builtin_description
{ OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
--- gcc/config/i386/sse.md.movq 2008-08-04 08:47:14.000000000 -0700
+++ gcc/config/i386/sse.md 2008-08-05 10:15:02.000000000 -0700
@@ -210,6 +210,28 @@
DONE;
})
+(define_expand "sse2_movq128"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_concat:V2DI
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x")
+ (parallel [(const_int 0)]))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (DImode);")
+
+(define_insn "*sse2_movq128"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_concat:V2DI
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x")
+ (parallel [(const_int 0)]))
+ (match_operand:DI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
(define_insn "<sse>_movup<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
(unspec:SSEMODEF2P
--- gcc/doc/extend.texi.movq 2008-07-30 12:29:33.000000000 -0700
+++ gcc/doc/extend.texi 2008-08-05 10:23:34.000000000 -0700
@@ -8071,6 +8071,7 @@ v2di __builtin_ia32_psrlqi128 (v2di, int
v8hi __builtin_ia32_psrawi128 (v8hi, int)
v4si __builtin_ia32_psradi128 (v4si, int)
v4si __builtin_ia32_pmaddwd128 (v8hi, v8hi)
+v2di __builtin_ia32_movq128 (v2di)
@end smallexample
The following built-in functions are available when @option{-msse3} is used.
--- gcc/testsuite/gcc.target/i386/pr36992-1.c.movq 2008-08-04 08:47:09.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr36992-1.c 2008-08-05 10:27:32.000000000 -0700
@@ -9,4 +9,4 @@ test (__m128i b)
return _mm_move_epi64 (b);
}
-/* { dg-final { scan-assembler-times "mov\[qd\]\[ \\t\]+.*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%xmm" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr36992-2.c.movq 2008-08-04 08:47:09.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr36992-2.c 2008-08-05 10:27:45.000000000 -0700
@@ -9,4 +9,4 @@ test (__m128i b)
return _mm_move_epi64 (b);
}
-/* { dg-final { scan-assembler-not "%mm" } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%xmm" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr36992-3.c.movq 2008-08-05 10:17:46.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr36992-3.c 2008-08-05 10:26:53.000000000 -0700
@@ -0,0 +1,12 @@
+/* { dg-do compile }
+/* { dg-options "-O2 -msse4" } */
+
+#include <emmintrin.h>
+
+__m128i
+test (__m128i b)
+{
+ return _mm_move_epi64 (b);
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%xmm" 1 } } */