This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: PR target/36992: Very stange code for _mm_move_epi64
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Joey Ye <joey dot ye at intel dot com>, Xuepeng Guo <xuepeng dot guo at intel dot com>
- Date: Wed, 6 Aug 2008 06:40:59 -0700
- Subject: Re: PATCH: PR target/36992: Very stange code for _mm_move_epi64
- References: <20080805173315.GA18259@lucon.org> <20080806011832.GA7330@lucon.org> <489999D9.20509@gmail.com>
On Wed, Aug 06, 2008 at 02:32:25PM +0200, Uros Bizjak wrote:
>
> Both patterns can be substituted with:
>
> (define_insn "sse2_movq128"
> [(set (match_operand:V2DI 0 "register_operand" "=x")
> (vec_concat:V2DI
> (vec_select:DI
> (match_operand:V2DI 1 "nonimmediate_operand" "xm")
> (parallel [(const_int 0)]))
> (const_int 0)))]
> "TARGET_SSE2"
> "movq\t{%1, %0|%0, %1}"
> [(set_attr "type" "ssemov")
> (set_attr "mode" "TI")])
>
>
> The test above was introduced to check for possible %mm usage when -O0
> was used. Do you think we should check generation of movq for -O0 or we
> could delete this test as redundant now?
>
Here is the updated patch. OK for trunk?
Thanks.
H.J.
--
gcc/
2008-08-06 H.J. Lu <hongjiu.lu@intel.com>
PR target/36992
* config/i386/emmintrin.h (_mm_move_epi64): Use
__builtin_ia32_movq128.
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVQ128.
(bdesc_args): Add IX86_BUILTIN_MOVQ128.
* config/i386/sse.md (sse2_movq128): New.
* doc/extend.texi: Document __builtin_ia32_movq128.
gcc/testsuite/
2008-08-06 H.J. Lu <hongjiu.lu@intel.com>
PR target/36992
* gcc.target/i386/pr36992-1.c: Scan movq.
* gcc.target/i386/pr36992-2.c: Use "-O2 -msse4" instead of
"-O0 -msse2". Scan movq.
--- gcc/config/i386/emmintrin.h.movq 2008-03-13 09:04:41.000000000 -0700
+++ gcc/config/i386/emmintrin.h 2008-08-06 06:19:49.000000000 -0700
@@ -726,7 +726,7 @@ _mm_movpi64_epi64 (__m64 __A)
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_epi64 (__m128i __A)
{
- return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A));
+ return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
}
/* Create a vector of zeros. */
--- gcc/config/i386/i386.c.movq 2008-08-06 06:16:08.000000000 -0700
+++ gcc/config/i386/i386.c 2008-08-06 06:19:49.000000000 -0700
@@ -18726,6 +18726,8 @@ enum ix86_builtins
IX86_BUILTIN_MOVNTPD,
IX86_BUILTIN_MOVNTDQ,
+ IX86_BUILTIN_MOVQ128,
+
/* SSE2 MMX */
IX86_BUILTIN_MASKMOVDQU,
IX86_BUILTIN_MOVMSKPD,
@@ -19855,6 +19857,8 @@ static const struct builtin_description
{ OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
--- gcc/config/i386/sse.md.movq 2008-08-03 09:47:02.000000000 -0700
+++ gcc/config/i386/sse.md 2008-08-06 06:22:52.000000000 -0700
@@ -210,6 +210,18 @@
DONE;
})
+(define_insn "sse2_movq128"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_concat:V2DI
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (const_int 0)))]
+ "TARGET_SSE2"
+ "movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
(define_insn "<sse>_movup<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
(unspec:SSEMODEF2P
--- gcc/doc/extend.texi.movq 2008-07-30 20:53:11.000000000 -0700
+++ gcc/doc/extend.texi 2008-08-06 06:19:49.000000000 -0700
@@ -8071,6 +8071,7 @@ v2di __builtin_ia32_psrlqi128 (v2di, int
v8hi __builtin_ia32_psrawi128 (v8hi, int)
v4si __builtin_ia32_psradi128 (v4si, int)
v4si __builtin_ia32_pmaddwd128 (v8hi, v8hi)
+v2di __builtin_ia32_movq128 (v2di)
@end smallexample
The following built-in functions are available when @option{-msse3} is used.
--- gcc/testsuite/gcc.target/i386/pr36992-1.c.movq 2008-08-03 09:47:01.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr36992-1.c 2008-08-06 06:19:49.000000000 -0700
@@ -9,4 +9,4 @@ test (__m128i b)
return _mm_move_epi64 (b);
}
-/* { dg-final { scan-assembler-times "mov\[qd\]\[ \\t\]+.*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%xmm" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr36992-2.c.movq 2008-08-03 09:47:01.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr36992-2.c 2008-08-06 06:19:50.000000000 -0700
@@ -1,5 +1,5 @@
/* { dg-do compile }
-/* { dg-options "-O0 -msse2" } */
+/* { dg-options "-O2 -msse4" } */
#include <emmintrin.h>
@@ -9,4 +9,4 @@ test (__m128i b)
return _mm_move_epi64 (b);
}
-/* { dg-final { scan-assembler-not "%mm" } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%xmm" 1 } } */