This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[3.3/mainline] Add 64bit SSE intrincisc


Hi,
with this patch we should have complette set of intrincisc for
x86-64....

Bootstrapped/regtested x86-64, regtested i386.  OK?

Honza

Thu Feb 20 13:22:54 CET 2003  Jan Hubicka  <jh at suse dot cz>
	* i386.c (def_builtin):  Special case 64bit builtins.
	(MASK_SSE164, MASK_SSE264): New constants.
	(builtin_description): Add 64bit builtins.
	(ix86_init_mmx_sse_builtins): Likewise.
	* i386.h (enum ix86_builtins): Likewise.
	* i386.md (cvtss2siq, cvttss2siq, cvtsd2siq, cvttsd2siq, cvtsi2sdq,
	sse2_movq2dq_rex64, sse2_movsq2q_rex64): New.
	(sse2_movq2dq, sse2_movsq2q): Disable for 64bit.
	* mmintrin.h (_mm_cvtsi64x_si64, _mm_set_pi64x, _mm_cvtsi64_si64x): New.
	* xmmintrin.h (_mm_cvtss_si64x, _mm_cvttss_si64x, _mm_cvtsi64x_ss,
	_mm_set_epi64x, _mm_set1_epi64x, _mm_cvtsd_si64x, _mm_cvttsd_si64x,
	_mm_cvtsi64x_sd, _mm_cvtsi64x_si128, _mm_cvtsi128_si64x): New.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.490.2.24
diff -c -3 -p -r1.490.2.24 i386.c
*** i386.c	16 Feb 2003 18:50:08 -0000	1.490.2.24
--- i386.c	20 Feb 2003 12:21:05 -0000
*************** x86_initialize_trampoline (tramp, fnaddr
*** 12400,12406 ****
  
  #define def_builtin(MASK, NAME, TYPE, CODE)			\
  do {								\
!   if ((MASK) & target_flags)					\
      builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
  		      NULL, NULL_TREE);				\
  } while (0)
--- 12400,12407 ----
  
  #define def_builtin(MASK, NAME, TYPE, CODE)			\
  do {								\
!   if ((MASK) & target_flags					\
!       && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
      builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
  		      NULL, NULL_TREE);				\
  } while (0)
*************** struct builtin_description
*** 12417,12422 ****
--- 12418,12425 ----
  
  /* Used for builtins that are enabled both by -msse and -msse2.  */
  #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
+ #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
+ #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
  
  static const struct builtin_description bdesc_comi[] =
  {
*************** static const struct builtin_description 
*** 12550,12555 ****
--- 12553,12559 ----
  
    { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
    { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+   { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
  
    { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
    { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
*************** static const struct builtin_description 
*** 12700,12705 ****
--- 12704,12710 ----
    { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+   { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
    { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
    { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
  };
*************** static const struct builtin_description 
*** 12715,12722 ****
--- 12720,12729 ----
  
    { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
    { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+   { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
    { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
    { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+   { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
*************** static const struct builtin_description 
*** 12738,12743 ****
--- 12745,12752 ----
  
    { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
    { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+   { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+   { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
    { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
*************** ix86_init_mmx_sse_builtins ()
*** 12783,12793 ****
--- 12792,12809 ----
    tree int_ftype_v4sf
      = build_function_type_list (integer_type_node,
  				V4SF_type_node, NULL_TREE);
+   tree int64_ftype_v4sf
+     = build_function_type_list (long_long_integer_type_node,
+ 				V4SF_type_node, NULL_TREE);
    tree int_ftype_v8qi
      = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
    tree v4sf_ftype_v4sf_int
      = build_function_type_list (V4SF_type_node,
  				V4SF_type_node, integer_type_node, NULL_TREE);
+   tree v4sf_ftype_v4sf_int64
+     = build_function_type_list (V4SF_type_node,
+ 				V4SF_type_node, long_long_integer_type_node,
+ 				NULL_TREE);
    tree v4sf_ftype_v4sf_v2si
      = build_function_type_list (V4SF_type_node,
  				V4SF_type_node, V2SI_type_node, NULL_TREE);
*************** ix86_init_mmx_sse_builtins ()
*** 12938,12946 ****
--- 12954,12969 ----
      = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
    tree int_ftype_v2df
      = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+   tree int64_ftype_v2df
+     = build_function_type_list (long_long_integer_type_node,
+ 		    		V2DF_type_node, NULL_TREE);
    tree v2df_ftype_v2df_int
      = build_function_type_list (V2DF_type_node,
  				V2DF_type_node, integer_type_node, NULL_TREE);
+   tree v2df_ftype_v2df_int64
+     = build_function_type_list (V2DF_type_node,
+ 				V2DF_type_node, long_long_integer_type_node,
+ 				NULL_TREE);
    tree v4sf_ftype_v4sf_v2df
      = build_function_type_list (V4SF_type_node,
  				V4SF_type_node, V2DF_type_node, NULL_TREE);
*************** ix86_init_mmx_sse_builtins ()
*** 13143,13151 ****
--- 13166,13177 ----
    def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
    def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
    def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+   def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
    def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+   def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
    def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
    def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+   def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
  
    def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
    def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
*************** ix86_init_mmx_sse_builtins ()
*** 13263,13274 ****
--- 13289,13303 ----
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+   def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+   def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+   def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
    def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
  
Index: i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.305.2.11
diff -c -3 -p -r1.305.2.11 i386.h
*** i386.h	16 Feb 2003 18:50:08 -0000	1.305.2.11
--- i386.h	20 Feb 2003 12:21:05 -0000
*************** enum ix86_builtins
*** 2124,2132 ****
--- 2124,2135 ----
    IX86_BUILTIN_CVTPI2PS,
    IX86_BUILTIN_CVTPS2PI,
    IX86_BUILTIN_CVTSI2SS,
+   IX86_BUILTIN_CVTSI642SS,
    IX86_BUILTIN_CVTSS2SI,
+   IX86_BUILTIN_CVTSS2SI64,
    IX86_BUILTIN_CVTTPS2PI,
    IX86_BUILTIN_CVTTSS2SI,
+   IX86_BUILTIN_CVTTSS2SI64,
  
    IX86_BUILTIN_MAXPS,
    IX86_BUILTIN_MAXSS,
*************** enum ix86_builtins
*** 2383,2393 ****
--- 2386,2399 ----
  
    IX86_BUILTIN_CVTPI2PD,
    IX86_BUILTIN_CVTSI2SD,
+   IX86_BUILTIN_CVTSI642SD,
  
    IX86_BUILTIN_CVTSD2SI,
+   IX86_BUILTIN_CVTSD2SI64,
    IX86_BUILTIN_CVTSD2SS,
    IX86_BUILTIN_CVTSS2SD,
    IX86_BUILTIN_CVTTSD2SI,
+   IX86_BUILTIN_CVTTSD2SI64,
  
    IX86_BUILTIN_CVTPS2DQ,
    IX86_BUILTIN_CVTPS2PD,
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.401.2.18
diff -c -3 -p -r1.401.2.18 i386.md
*** i386.md	19 Feb 2003 16:18:50 -0000	1.401.2.18
--- i386.md	20 Feb 2003 12:21:07 -0000
***************
*** 20192,20197 ****
--- 20192,20208 ----
     (set_attr "athlon_decode" "double,vector")
     (set_attr "mode" "SF")])
  
+ (define_insn "cvtss2siq"
+   [(set (match_operand:DI 0 "register_operand" "=r,r")
+ 	(vec_select:DI
+ 	 (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
+ 	 (parallel [(const_int 0)])))]
+   "TARGET_SSE"
+   "cvtss2siq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sseicvt")
+    (set_attr "athlon_decode" "double,vector")
+    (set_attr "mode" "SF")])
+ 
  (define_insn "cvttss2si"
    [(set (match_operand:SI 0 "register_operand" "=r,r")
  	(vec_select:SI
***************
*** 20204,20209 ****
--- 20215,20232 ----
     (set_attr "mode" "SF")
     (set_attr "athlon_decode" "double,vector")])
  
+ (define_insn "cvttss2siq"
+   [(set (match_operand:DI 0 "register_operand" "=r,r")
+ 	(vec_select:DI
+ 	 (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
+ 		      UNSPEC_FIX)
+ 	 (parallel [(const_int 0)])))]
+   "TARGET_SSE && TARGET_64BIT"
+   "cvttss2siq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sseicvt")
+    (set_attr "mode" "SF")
+    (set_attr "athlon_decode" "double,vector")])
+ 
  
  ;; MMX insns
  
***************
*** 21782,21787 ****
--- 21805,21819 ----
    [(set_attr "type" "sseicvt")
     (set_attr "mode" "SI")])
  
+ (define_insn "cvtsd2siq"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ 			       (parallel [(const_int 0)]))))]
+   "TARGET_SSE2 && TARGET_64BIT"
+   "cvtsd2siq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sseicvt")
+    (set_attr "mode" "SI")])
+ 
  (define_insn "cvttsd2si"
    [(set (match_operand:SI 0 "register_operand" "=r,r")
  	(unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
***************
*** 21792,21797 ****
--- 21824,21839 ----
     (set_attr "mode" "SI")
     (set_attr "athlon_decode" "double,vector")])
  
+ (define_insn "cvttsd2siq"
+   [(set (match_operand:DI 0 "register_operand" "=r,r")
+ 	(unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
+ 				   (parallel [(const_int 0)]))] UNSPEC_FIX))]
+   "TARGET_SSE2 && TARGET_64BIT"
+   "cvttsd2siq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "sseicvt")
+    (set_attr "mode" "DI")
+    (set_attr "athlon_decode" "double,vector")])
+ 
  (define_insn "cvtsi2sd"
    [(set (match_operand:V2DF 0 "register_operand" "=x,x")
  	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
***************
*** 21805,21810 ****
--- 21847,21865 ----
     (set_attr "mode" "DF")
     (set_attr "athlon_decode" "double,direct")])
  
+ (define_insn "cvtsi2sdq"
+   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ 	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
+ 	 		(vec_duplicate:V2DF
+ 			  (float:DF
+ 			    (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ 			(const_int 2)))]
+   "TARGET_SSE2 && TARGET_64BIT"
+   "cvtsi2sdq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "sseicvt")
+    (set_attr "mode" "DF")
+    (set_attr "athlon_decode" "double,direct")])
+ 
  ;; Conversions between SF and DF
  
  (define_insn "cvtsd2ss"
***************
*** 22741,22762 ****
    [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
  	(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
  		       (parallel [(const_int 0)])))]
!   "TARGET_SSE2"
    "@
     movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movq2dq"
    [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
  	(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
  			 (const_int 0)))]
!   "TARGET_SSE2"
    "@
     movq\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt,ssemov")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movq"
--- 22796,22841 ----
    [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
  	(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
  		       (parallel [(const_int 0)])))]
!   "TARGET_SSE2 && !TARGET_64BIT"
    "@
     movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "TI")])
  
+ (define_insn "sse2_movdq2q_rex64"
+   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
+ 	(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
+ 		       (parallel [(const_int 0)])))]
+   "TARGET_SSE2 && TARGET_64BIT"
+   "@
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "ssecvt")
+    (set_attr "mode" "TI")])
+ 
  (define_insn "sse2_movq2dq"
    [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
  	(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
  			 (const_int 0)))]
!   "TARGET_SSE2 && !TARGET_64BIT"
    "@
     movq\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt,ssemov")
+    (set_attr "mode" "TI")])
+ 
+ (define_insn "sse2_movq2dq_rex64"
+   [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
+ 	(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
+ 			 (const_int 0)))]
+   "TARGET_SSE2 && TARGET_64BIT"
+   "@
+    movq\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}"
+   [(set_attr "type" "ssecvt,ssemov,ssecvt")
     (set_attr "mode" "TI")])
  
  (define_insn "sse2_movq"
Index: mmintrin.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/mmintrin.h,v
retrieving revision 1.4
diff -c -3 -p -r1.4 mmintrin.h
*** mmintrin.h	19 Oct 2002 08:48:37 -0000	1.4
--- mmintrin.h	20 Feb 2003 12:21:07 -0000
*************** _mm_cvtsi32_si64 (int __i)
*** 56,61 ****
--- 56,77 ----
    return (__m64) __tmp;
  }
  
+ #ifdef __x86_64__
+ /* Convert I to a __m64 object.  */
+ static __inline __m64 
+ _mm_cvtsi64x_si64 (long long __i)
+ {
+   return (__m64) __i;
+ }
+ 
+ /* Convert I to a __m64 object.  */
+ static __inline __m64 
+ _mm_set_pi64x (long long __i)
+ {
+   return (__m64) __i;
+ }
+ #endif
+ 
  /* Convert the lower 32 bits of the __m64 object into an integer.  */
  static __inline int
  _mm_cvtsi64_si32 (__m64 __i)
*************** _mm_cvtsi64_si32 (__m64 __i)
*** 63,68 ****
--- 79,93 ----
    long long __tmp = (long long)__i;
    return __tmp;
  }
+ 
+ #ifdef __x86_64__
+ /* Convert the lower 32 bits of the __m64 object into an integer.  */
+ static __inline long long
+ _mm_cvtsi64_si64x (__m64 __i)
+ {
+   return (long long)__i;
+ }
+ #endif
  
  /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
     the result, and the four 16-bit values from M2 into the upper four 8-bit
Index: xmmintrin.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/xmmintrin.h,v
retrieving revision 1.19.2.1
diff -c -3 -p -r1.19.2.1 xmmintrin.h
*** xmmintrin.h	12 Jan 2003 14:24:22 -0000	1.19.2.1
--- xmmintrin.h	20 Feb 2003 12:21:07 -0000
*************** _mm_cvtss_si32 (__m128 __A)
*** 475,480 ****
--- 475,490 ----
    return __builtin_ia32_cvtss2si ((__v4sf) __A);
  }
  
+ #ifdef __x86_64__
+ /* Convert the lower SPFP value to a 32-bit integer according to the current
+    rounding mode.  */
+ static __inline long long
+ _mm_cvtss_si64x (__m128 __A)
+ {
+   return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+ }
+ #endif
+ 
  /* Convert the two lower SPFP values to 32-bit integers according to the
     current rounding mode.  Return the integers in packed form.  */
  static __inline __m64
*************** _mm_cvttss_si32 (__m128 __A)
*** 490,495 ****
--- 500,514 ----
    return __builtin_ia32_cvttss2si ((__v4sf) __A);
  }
  
+ #ifdef __x86_64__
+ /* Truncate the lower SPFP value to a 32-bit integer.  */
+ static __inline long long
+ _mm_cvttss_si64x (__m128 __A)
+ {
+   return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+ }
+ #endif
+ 
  /* Truncate the two lower SPFP values to 32-bit integers.  Return the
     integers in packed form.  */
  static __inline __m64
*************** _mm_cvtsi32_ss (__m128 __A, int __B)
*** 505,510 ****
--- 524,538 ----
    return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
  }
  
+ #ifdef __x86_64__
+ /* Convert B to a SPFP value and insert it as element zero in A.  */
+ static __inline __m128
+ _mm_cvtsi64x_ss (__m128 __A, long long __B)
+ {
+   return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+ }
+ #endif
+ 
  /* Convert the two 32-bit values in B to SPFP form and insert them
     as the two lower elements in A.  */
  static __inline __m128
*************** _mm_set_epi32 (int __Z, int __Y, int __X
*** 1656,1661 ****
--- 1684,1707 ----
  
    return __u.__v;
  }
+ 
+ #ifdef __x86_64__
+ /* Create the vector [Z Y].  */
+ static __inline __m128i
+ _mm_set_epi64x (long long __Z, long long __Y)
+ {
+   union {
+     long __a[2];
+     __m128i __v;
+   } __u;
+ 
+   __u.__a[0] = __Y;
+   __u.__a[1] = __Z;
+ 
+   return __u.__v;
+ }
+ #endif
+ 
  /* Create the vector [S T U V Z Y X W].  */
  static __inline __m128i
  _mm_set_epi16 (short __Z, short __Y, short __X, short __W,
*************** _mm_set1_epi32 (int __A)
*** 1724,1729 ****
--- 1770,1784 ----
    return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
  }
  
+ #ifdef __x86_64__
+ static __inline __m128i
+ _mm_set1_epi64x (long long __A)
+ {
+   __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+   return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
+ }
+ #endif
+ 
  static __inline __m128i
  _mm_set1_epi16 (short __A)
  {
*************** _mm_cvtsd_si32 (__m128d __A)
*** 1893,1904 ****
--- 1948,1975 ----
    return __builtin_ia32_cvtsd2si ((__v2df) __A);
  }
  
+ #ifdef __x86_64__
+ static __inline long long
+ _mm_cvtsd_si64x (__m128d __A)
+ {
+   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+ }
+ #endif
+ 
  static __inline int
  _mm_cvttsd_si32 (__m128d __A)
  {
    return __builtin_ia32_cvttsd2si ((__v2df) __A);
  }
  
+ #ifdef __x86_64__
+ static __inline long long
+ _mm_cvttsd_si64x (__m128d __A)
+ {
+   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+ }
+ #endif
+ 
  static __inline __m128
  _mm_cvtsd_ss (__m128 __A, __m128d __B)
  {
*************** _mm_cvtsi32_sd (__m128d __A, int __B)
*** 1911,1916 ****
--- 1982,1995 ----
    return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
  }
  
+ #ifdef __x86_64__
+ static __inline __m128d
+ _mm_cvtsi64x_sd (__m128d __A, long long __B)
+ {
+   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+ }
+ #endif
+ 
  static __inline __m128d
  _mm_cvtss_sd (__m128d __A, __m128 __B)
  {
*************** _mm_cvtsi32_si128 (int __A)
*** 2459,2464 ****
--- 2538,2551 ----
    return (__m128i) __builtin_ia32_loadd (&__A);
  }
  
+ #ifdef __x86_64__
+ static __inline __m128i
+ _mm_cvtsi64x_si128 (long long __A)
+ {
+   return (__m128i) __builtin_ia32_movq2dq (__A);
+ }
+ #endif
+ 
  static __inline int
  _mm_cvtsi128_si32 (__m128i __A)
  {
*************** _mm_cvtsi128_si32 (__m128i __A)
*** 2466,2471 ****
--- 2553,2566 ----
    __builtin_ia32_stored (&__tmp, (__v4si)__A);
    return __tmp;
  }
+ 
+ #ifdef __x86_64__
+ static __inline long long
+ _mm_cvtsi128_si64x (__m128i __A)
+ {
+   return __builtin_ia32_movdq2q ((__v2di)__A);
+ }
+ #endif
  
  #endif /* __SSE2__  */
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]