This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: PR target/33944: streaming 64-bit integer stores
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Uros Bizjak <ubizjak at gmail dot com>
- Date: Fri, 18 Nov 2011 09:58:42 -0800
- Subject: PATCH: PR target/33944: streaming 64-bit integer stores
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
Hi,
_mm_stream_si64 intrinsic is missing. It is implemented in VC++:
http://msdn.microsoft.com/en-us/library/bb531466.aspx
We are adding it to Intel64 SDM. Here is a patch to implement it in GCC.
I added UNSPEC_MOVNTI so that it won't get confused with
(define_insn "sse_movntdi"
[(set (match_operand:DI 0 "memory_operand" "=m")
(unspec:DI [(match_operand:DI 1 "register_operand" "y")]
UNSPEC_MOVNT))]
"TARGET_SSE || TARGET_3DNOW_A"
"movntq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxmov")
(set_attr "mode" "DI")])
OK for trunk?
Thanks.
H.J.
---
2011-11-16 H.J. Lu <hongjiu.lu@intel.com>
PR target/33944
* doc/extend.texi: Document __builtin_ia32_movnti64.
* config/i386/emmintrin.h (_mm_stream_si64): New.
* config/i386/i386-builtin-types.def: Add VOID_FTYPE_PLONGLONG_LONGLONG.
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTI64.
(bdesc_special_args): Update __builtin_ia32_movnti. Add
__builtin_ia32_movnti64.
(ix86_expand_special_args_builtin): Handle
VOID_FTYPE_PLONGLONG_LONGLONG.
* config/i386/i386.md (UNSPEC_MOVNTI): New.
* config/i386/sse.md (sse2_movntsi): Renamed to ...
(sse2_movnti<mode>): This.
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index fe4cd6a..07ac9f3 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -1418,6 +1418,14 @@ _mm_stream_si32 (int *__A, int __B)
__builtin_ia32_movnti (__A, __B);
}
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si64 (long long int *__A, long long int __B)
+{
+ __builtin_ia32_movnti64 (__A, __B);
+}
+#endif
+
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si128 (__m128i *__A, __m128i __B)
{
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 79fb142..d00b053 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -111,6 +111,7 @@ DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
DEF_POINTER_TYPE (PFLOAT, FLOAT)
DEF_POINTER_TYPE (PUSHORT, USHORT)
DEF_POINTER_TYPE (PINT, INT)
+DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
@@ -357,6 +358,7 @@ DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF)
DEF_FUNCTION_TYPE (VOID, PINT, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, LONGLONG)
DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG)
DEF_FUNCTION_TYPE (VOID, PV2SI, V2SI)
DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 691c89a..a2a962c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25245,6 +25245,7 @@ enum ix86_builtins
IX86_BUILTIN_CVTTPS2DQ,
IX86_BUILTIN_MOVNTI,
+ IX86_BUILTIN_MOVNTI64,
IX86_BUILTIN_MOVNTPD,
IX86_BUILTIN_MOVNTDQ,
@@ -26327,7 +26328,8 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+ { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
@@ -29313,6 +29315,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PFLOAT_V4SF:
case VOID_FTYPE_PDOUBLE_V4DF:
case VOID_FTYPE_PDOUBLE_V2DF:
+ case VOID_FTYPE_PLONGLONG_LONGLONG:
case VOID_FTYPE_PULONGLONG_ULONGLONG:
case VOID_FTYPE_PINT_INT:
nargs = 1;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index bc60253..daf1387 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -116,6 +116,7 @@
UNSPEC_MASKMOV
UNSPEC_MOVMSK
UNSPEC_MOVNT
+ UNSPEC_MOVNTI
UNSPEC_MOVU
UNSPEC_RCP
UNSPEC_RSQRT
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b476752..e43310b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -573,15 +573,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "sse2_movntsi"
- [(set (match_operand:SI 0 "memory_operand" "=m")
- (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
- UNSPEC_MOVNT))]
+(define_insn "sse2_movnti<mode>"
+ [(set (match_operand:SWI48 0 "memory_operand" "=m")
+ (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
+ UNSPEC_MOVNTI))]
"TARGET_SSE2"
"movnti\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_data16" "0")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "<sse>_movnt<mode>"
[(set (match_operand:VF 0 "memory_operand" "=m")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 15238c1..de483a3 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9384,6 +9384,7 @@ v2df __builtin_ia32_loadlpd (v2df, double const *)
int __builtin_ia32_movmskpd (v2df)
int __builtin_ia32_pmovmskb128 (v16qi)
void __builtin_ia32_movnti (int *, int)
+void __builtin_ia32_movnti64 (long long int *, long long int)
void __builtin_ia32_movntpd (double *, v2df)
void __builtin_ia32_movntdq (v2df *, v2df)
v4si __builtin_ia32_pshufd (v4si, int)