This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: Add 256bit vmontXXX intrinsic support


AVX Programming Reference (December, 2008) includes 256bit vmontXXX
intrinsics.  This patch adds them.  Tested on Linux/Intel64.  OK for
trunk?

Thanks.


H.J.
----
gcc/

2009-01-06  H.J. Lu  <hongjiu.lu@intel.com>

	AVX Programming Reference (December, 2008)
	* config/i386/avxintrin.h (_mm256_stream_si256): New.
	(_mm256_stream_pd): Likewise.
	(_mm256_stream_ps): Likewise.

	* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MOVNTDQ256,
	IX86_BUILTIN_MOVNTPD256 and IX86_BUILTIN_MOVNTPS256.
	(ix86_special_builtin_type): Add VOID_FTYPE_PV4DI_V4DI.
	(bdesc_special_args): Add __builtin_ia32_movntdq256,
	__builtin_ia32_movntpd256 and __builtin_ia32_movntps256.
	(ix86_init_mmx_sse_builtins): Handle VOID_FTYPE_PV4DI_V4DI.
	(ix86_expand_special_args_builtin): Likewise.

	* config/i386/sse.md (AVXMODEDI): New.
	(avx_movnt<mode>): Likewise.
	(avx_movnt<mode>): Likewise.
	(<sse>_movnt<mode>): Remove AVX support.
	(sse2_movntv2di): Likewise.

gcc/testsuite/

2009-01-06  H.J. Lu  <hongjiu.lu@intel.com>

	AVX Programming Reference (December, 2008)
	* gcc.target/i386/avx-vmovntdq-256-1.c: New.
	* gcc.target/i386/avx-vmovntpd-256-1.c: Likewise.
	* gcc.target/i386/avx-vmovntps-256-1.c: Likewise.

	* gcc.target/i386/sse2-movntdq-1.c (TEST): Align array to 16byte.
	* gcc.target/i386/sse2-movntpd-1.c (TEST): Likewise.

Index: gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/avx-vmovntps-256-1.c	(revision 4868)
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+static void 
+__attribute__((noinline))
+test (float *p, __m256 s)
+{
+  return _mm256_stream_ps (p, s); 
+}
+
+static void
+avx_test (void)
+{
+  union256 u;
+  float e[8] __attribute__ ((aligned(32)));
+
+  u.x = _mm256_set_ps (24.43, 68.346, -43.35, 546.46,
+		       46.9, -2.78, 82.9, -0.4);
+  test (e, u.x); 
+  
+  if (check_union256 (u, e))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/avx-vmovntdq-256-1.c	(revision 4868)
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+static void
+__attribute__((noinline))
+test (__m256i *p, __m256i s)
+{
+  return _mm256_stream_si256 (p, s); 
+}
+
+static void
+avx_test (void)
+{
+  union256i_d u;
+  int e[8] __attribute__ ((aligned(32))) = {1,1,1,1,1,1,1,1};
+
+  u.x = _mm256_set_epi32 (2434, 6845, 3789, 4683,
+			  4623, 2236, 8295, 1084);
+
+  test ((__m256i *)e, u.x);
+  
+  if (check_union256i_d (u, e))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/avx-vmovntpd-256-1.c	(revision 4868)
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+static void 
+__attribute__((noinline))
+test (double *p, __m256d s)
+{
+  return _mm256_stream_pd (p, s); 
+}
+
+static void
+avx_test (void)
+{
+  union256d u;
+  double e[4] __attribute__ ((aligned(32))) = {1,1,1,1};
+   
+  u.x = _mm256_set_pd (2134.3343, 1234.635654, -13443.35, 43.35345);
+  test (e, u.x); 
+   
+  if (check_union256d (u, e))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c	(revision 4863)
+++ gcc/testsuite/gcc.target/i386/sse2-movntpd-1.c	(working copy)
@@ -24,7 +24,7 @@ static void
 TEST (void)
 {
   union128d u;
-  double e[2] __attribute__ ((aligned(32)));
+  double e[2] __attribute__ ((aligned(16)));
    
   u.x = _mm_set_pd (2134.3343,1234.635654);
   test (e, u.x); 
Index: gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c	(revision 4863)
+++ gcc/testsuite/gcc.target/i386/sse2-movntdq-1.c	(working copy)
@@ -24,7 +24,7 @@ static void
 TEST (void)
 {
   union128i_d u;
-  int e[4] __attribute__ ((aligned(32)));
+  int e[4] __attribute__ ((aligned(16)));
 
   u.x = _mm_set_epi32 (21, 34, 334, 8567);
 
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md	(revision 4863)
+++ gcc/config/i386/sse.md	(working copy)
@@ -35,6 +35,9 @@
 ;; All QI vector modes handled by AVX
 (define_mode_iterator AVXMODEQI [V32QI V16QI])
 
+;; All DI vector modes handled by AVX
+(define_mode_iterator AVXMODEDI [V4DI V2DI])
+
 ;; All vector modes handled by AVX
 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
 
@@ -383,26 +386,46 @@
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
+(define_insn "avx_movnt<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<sse>_movnt<mode>"
   [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
 	(unspec:SSEMODEF2P
 	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
 	  UNSPEC_MOVNT))]
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
-  "%vmovntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+  "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
-   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx_movnt<mode>"
+  [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
+	(unspec:AVXMODEDI
+	  [(match_operand:AVXMODEDI 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "TARGET_AVX"
+  "vmovntdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
 (define_insn "sse2_movntv2di"
   [(set (match_operand:V2DI 0 "memory_operand" "=m")
 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
 		     UNSPEC_MOVNT))]
   "TARGET_SSE2"
-  "%vmovntdq\t{%1, %0|%0, %1}"
+  "movntdq\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_data16" "1")
-   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
 (define_insn "sse2_movntsi"
Index: gcc/config/i386/avxintrin.h
===================================================================
--- gcc/config/i386/avxintrin.h	(revision 4863)
+++ gcc/config/i386/avxintrin.h	(working copy)
@@ -968,6 +968,24 @@ _mm256_lddqu_si256 (__m256i const *__P)
   return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
 }
 
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_si256 (__m256i *__A, __m256i __B)
+{
+  __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_pd (double *__A, __m256d __B)
+{
+  __builtin_ia32_movntpd256 (__A, (__v4df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_ps (float *__P, __m256 __A)
+{
+  __builtin_ia32_movntps256 (__P, (__v8sf)__A);
+}
+
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rcp_ps (__m256 __A)
 {
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 4863)
+++ gcc/config/i386/i386.c	(working copy)
@@ -19790,6 +19790,9 @@ enum ix86_builtins
   IX86_BUILTIN_STOREUPD256,
   IX86_BUILTIN_STOREUPS256,
   IX86_BUILTIN_LDDQU256,
+  IX86_BUILTIN_MOVNTDQ256,
+  IX86_BUILTIN_MOVNTPD256,
+  IX86_BUILTIN_MOVNTPS256,
   IX86_BUILTIN_LOADDQU256,
   IX86_BUILTIN_STOREDQU256,
   IX86_BUILTIN_MASKLOADPD,
@@ -20261,6 +20264,7 @@ enum ix86_special_builtin_type
   V2DF_FTYPE_PCV2DF_V2DF,
   V2DI_FTYPE_PV2DI,
   VOID_FTYPE_PV2SF_V4SF,
+  VOID_FTYPE_PV4DI_V4DI,
   VOID_FTYPE_PV2DI_V2DI,
   VOID_FTYPE_PCHAR_V32QI,
   VOID_FTYPE_PCHAR_V16QI,
@@ -20500,6 +20504,10 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
 
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
@@ -22031,6 +22039,11 @@ ix86_init_mmx_sse_builtins (void)
 				V8SI_type_node, V4SI_type_node,
 				integer_type_node,
 				NULL_TREE);
+  tree pv4di_type_node = build_pointer_type (V4DI_type_node);
+  tree void_ftype_pv4di_v4di
+    = build_function_type_list (void_type_node,
+				pv4di_type_node, V4DI_type_node,
+				NULL_TREE);
   tree v8sf_ftype_v8sf_v4sf_int
     = build_function_type_list (V8SF_type_node,
 				V8SF_type_node, V4SF_type_node,
@@ -22217,6 +22230,9 @@ ix86_init_mmx_sse_builtins (void)
 	case VOID_FTYPE_PV2SF_V4SF:
 	  type = void_ftype_pv2sf_v4sf;
 	  break;
+	case VOID_FTYPE_PV4DI_V4DI:
+	  type = void_ftype_pv4di_v4di;
+	  break;
 	case VOID_FTYPE_PV2DI_V2DI:
 	  type = void_ftype_pv2di_v2di;
 	  break;
@@ -24063,6 +24079,7 @@ ix86_expand_special_args_builtin (const 
       memory = 0;
       break;
     case VOID_FTYPE_PV2SF_V4SF:
+    case VOID_FTYPE_PV4DI_V4DI:
     case VOID_FTYPE_PV2DI_V2DI:
     case VOID_FTYPE_PCHAR_V32QI:
     case VOID_FTYPE_PCHAR_V16QI:


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]