PATCH: Add 256bit vmontXXX intrinsic support
H.J. Lu
hongjiu.lu@intel.com
Tue Jan 6 21:13:00 GMT 2009
AVX Programming Reference (December, 2008) includes 256bit vmontXXX
intrinsics. This patch adds them. Tested on Linux/Intel64. OK for
trunk?
Thanks.
H.J.
----
gcc/
2009-01-06 H.J. Lu <hongjiu.lu@intel.com>
AVX Programming Reference (December, 2008)
* config/i386/avxintrin.h (_mm256_stream_si256): New.
(_mm256_stream_pd): Likewise.
(_mm256_stream_ps): Likewise.
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256,
IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256.
(ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI.
(bdesc_special_args): Add __builtin_ia32_movntdq256,
__builtin_ia32_movntpd256 and __builtin_ia32_movntps256.
(ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI.
(ix86_expand_special_args_builtin): Likewise.
* config/i386/sse.md (AVXMODEDI): New.
(avx_movnt<mode>): Likewise.
(avx_movnt<mode>): Likewise.
(<sse>_movnt<mode>): Remove AVX support.
(sse2_movntv2di): Likewise.
gcc/testsuite/
2009-01-06 H.J. Lu <hongjiu.lu@intel.com>
AVX Programming Reference (December, 2008)
* gcc.target/i386/avx-vmovntdq-256-1.c: New.
* gcc.target/i386/avx-vmovntpd-256-1.c: Likewise.
* gcc.target/i386/avx-vmovntps-256-1.c: Likewise.
* gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte.
* gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise.
Index: gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c (revision 4868)
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+static void
+__attribute__((noinline))
+test (float *p, __m256 s)
+{
+ return _mm256_stream_ps (p, s);
+}
+
+static void
+avx_test (void)
+{
+ union256 u;
+ float e[8] __attribute__ ((aligned(32)));
+
+ u.x = _mm256_set_ps (24.43, 68.346, -43.35, 546.46,
+ 46.9, -2.78, 82.9, -0.4);
+ test (e, u.x);
+
+ if (check_union256 (u, e))
+ abort ();
+}
Index: gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c (revision 4868)
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+static void
+__attribute__((noinline))
+test (__m256i *p, __m256i s)
+{
+ return _mm256_stream_si256 (p, s);
+}
+
+static void
+avx_test (void)
+{
+ union256i_d u;
+ int e[8] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1};
+
+ u.x = _mm256_set_epi32 (2434, 6845, 3789, 4683,
+ 4623, 2236, 8295, 1084);
+
+ test ((__m256i *)e, u.x);
+
+ if (check_union256i_d (u, e))
+ abort ();
+}
Index: gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c (revision 4868)
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+static void
+__attribute__((noinline))
+test (double *p, __m256d s)
+{
+ return _mm256_stream_pd (p, s);
+}
+
+static void
+avx_test (void)
+{
+ union256d u;
+ double e[4] __attribute__ ((aligned(32))) = {1,1,1,1};
+
+ u.x = _mm256_set_pd (2134.3343, 1234.635654, -13443.35, 43.35345);
+ test (e, u.x);
+
+ if (check_union256d (u, e))
+ abort ();
+}
Index: gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c (revision 4863)
+++ gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c (working copy)
@@ -24,7 +24,7 @@ static void
TEST (void)
{
union128d u;
- double e[2] __attribute__ ((aligned(32)));
+ double e[2] __attribute__ ((aligned(16)));
u.x = _mm_set_pd (2134.3343,1234.635654);
test (e, u.x);
Index: gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c (revision 4863)
+++ gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c (working copy)
@@ -24,7 +24,7 @@ static void
TEST (void)
{
union128i_d u;
- int e[4] __attribute__ ((aligned(32)));
+ int e[4] __attribute__ ((aligned(16)));
u.x = _mm_set_epi32 (21, 34, 334, 8567);
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md (revision 4863)
+++ gcc/config/i386/sse.md (working copy)
@@ -35,6 +35,9 @@
;; All QI vector modes handled by AVX
(define_mode_iterator AVXMODEQI [V32QI V16QI])
+;; All DI vector modes handled by AVX
+(define_mode_iterator AVXMODEDI [V4DI V2DI])
+
;; All vector modes handled by AVX
(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
@@ -383,26 +386,46 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "avx_movnt<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_movnt<mode>"
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
- "%vmovntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_movnt<mode>"
+ [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
+ (unspec:AVXMODEDI
+ [(match_operand:AVXMODEDI 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_AVX"
+ "vmovntdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse2_movntv2di"
[(set (match_operand:V2DI 0 "memory_operand" "=m")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
- "%vmovntdq\t{%1, %0|%0, %1}"
+ "movntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
- (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse2_movntsi"
Index: gcc/config/i386/avxintrin.h
===================================================================
--- gcc/config/i386/avxintrin.h (revision 4863)
+++ gcc/config/i386/avxintrin.h (working copy)
@@ -968,6 +968,24 @@ _mm256_lddqu_si256 (__m256i const *__P)
return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
}
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_si256 (__m256i *__A, __m256i __B)
+{
+ __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_pd (double *__A, __m256d __B)
+{
+ __builtin_ia32_movntpd256 (__A, (__v4df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_ps (float *__P, __m256 __A)
+{
+ __builtin_ia32_movntps256 (__P, (__v8sf)__A);
+}
+
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_ps (__m256 __A)
{
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 4863)
+++ gcc/config/i386/i386.c (working copy)
@@ -19790,6 +19790,9 @@ enum ix86_builtins
IX86_BUILTIN_STOREUPD256,
IX86_BUILTIN_STOREUPS256,
IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_MOVNTDQ256,
+ IX86_BUILTIN_MOVNTPD256,
+ IX86_BUILTIN_MOVNTPS256,
IX86_BUILTIN_LOADDQU256,
IX86_BUILTIN_STOREDQU256,
IX86_BUILTIN_MASKLOADPD,
@@ -20261,6 +20264,7 @@ enum ix86_special_builtin_type
V2DF_FTYPE_PCV2DF_V2DF,
V2DI_FTYPE_PV2DI,
VOID_FTYPE_PV2SF_V4SF,
+ VOID_FTYPE_PV4DI_V4DI,
VOID_FTYPE_PV2DI_V2DI,
VOID_FTYPE_PCHAR_V32QI,
VOID_FTYPE_PCHAR_V16QI,
@@ -20500,6 +20504,10 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
@@ -22031,6 +22039,11 @@ ix86_init_mmx_sse_builtins (void)
V8SI_type_node, V4SI_type_node,
integer_type_node,
NULL_TREE);
+ tree pv4di_type_node = build_pointer_type (V4DI_type_node);
+ tree void_ftype_pv4di_v4di
+ = build_function_type_list (void_type_node,
+ pv4di_type_node, V4DI_type_node,
+ NULL_TREE);
tree v8sf_ftype_v8sf_v4sf_int
= build_function_type_list (V8SF_type_node,
V8SF_type_node, V4SF_type_node,
@@ -22217,6 +22230,9 @@ ix86_init_mmx_sse_builtins (void)
case VOID_FTYPE_PV2SF_V4SF:
type = void_ftype_pv2sf_v4sf;
break;
+ case VOID_FTYPE_PV4DI_V4DI:
+ type = void_ftype_pv4di_v4di;
+ break;
case VOID_FTYPE_PV2DI_V2DI:
type = void_ftype_pv2di_v2di;
break;
@@ -24063,6 +24079,7 @@ ix86_expand_special_args_builtin (const
memory = 0;
break;
case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV4DI_V4DI:
case VOID_FTYPE_PV2DI_V2DI:
case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
More information about the Gcc-patches
mailing list