From 48126a9798aace080f17f517d6eb43500471cdb6 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 17 Oct 2002 19:09:17 +0200 Subject: [PATCH] re PR other/8062 (double precision loads and stores missing from xmmintrin.h) * mmintrin.h: Guard by __MMX__ * xmmintrin.h: Guard by __SSE__ PR other/8062 * xmmintrin.h (_MM_SHUFFLE2): New macro. (_mm_load*_?d): New functions. (_mm_set*_?d): New functions. (_mm_store*_?d): New functions. From-SVN: r58252 --- gcc/ChangeLog | 11 +++ gcc/config/i386/i386.c | 5 ++ gcc/config/i386/i386.md | 11 +++ gcc/config/i386/mmintrin.h | 4 + gcc/config/i386/xmmintrin.h | 157 +++++++++++++++++++++++++++++++++++- 5 files changed, 187 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b00610de28e9..3ec27fbbd657 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +Thu Oct 17 18:40:47 CEST 2002 Jan Hubicka + + * mmintrin.h: Guard by __MMX__ + * xmmintrin.h: Guard by __SSE__ + + PR other/8062 + * xmmintrin.h (_MM_SHUFFLE2): New macro. + (_mm_load*_?d): New functions. + (_mm_set*_?d): New functions. + (_mm_store*_?d): New functions. + Wed Oct 16 15:01:29 CEST 2002 Jan Hubicka Really commit patch announced at Oct 14 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index fe4ee7cf94b0..739db6a287e3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13315,6 +13315,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STORERPD: return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + case IX86_BUILTIN_CLRPD: + target = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse_clrv2df (target)); + return target; + case IX86_BUILTIN_MFENCE: emit_insn (gen_sse2_mfence ()); return 0; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index de6216f9d0d8..095de745e40f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18810,6 +18810,17 @@ (set_attr "memory" "none") (set_attr "mode" "V4SF")]) +;; Use xor, but don't show input operands so they aren't live before +;; this insn. +(define_insn "sse_clrv2df" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(const_int 0)] UNSPEC_NOP))] + "TARGET_SSE2" + "xorpd\t{%0, %0|%0, %0}" + [(set_attr "type" "sselog") + (set_attr "memory" "none") + (set_attr "mode" "V4SF")]) + ;; SSE mask-generating compares (define_insn "maskcmpv4sf3" diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index 88e384f80117..2defd50d6d64 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -30,6 +30,9 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED +#ifndef __MMX__ +# error "MMX instruction set not enabled" +#else /* The data type intended for user use. */ typedef unsigned long long __m64 __attribute__ ((__aligned__ (8))); @@ -539,4 +542,5 @@ _mm_set1_pi8 (char __b) return _mm_set1_pi32 (__i); } +#endif /* __MMX__ */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 627dcdf5ca34..8c4aa62acbf0 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -30,6 +30,10 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED +#ifndef __SSE__ +# error "SSE instruction set not enabled" +#else + /* We need type definitions from the MMX header file. */ #include @@ -1082,9 +1086,159 @@ typedef int __v4si __attribute__ ((mode (V4SI))); typedef int __v8hi __attribute__ ((mode (V8HI))); typedef int __v16qi __attribute__ ((mode (V16QI))); +/* Create a selector for use with the SHUFPD instruction. */ +#define _MM_SHUFFLE2(fp1,fp0) \ + (((fp1) << 1) | (fp0)) + #define __m128i __v2di #define __m128d __v2df +/* Create a vector with element 0 as *P and the rest zero. */ +static __inline __m128d +_mm_load_sd (double *__P) +{ + return (__m128d) __builtin_ia32_loadsd (__P); +} + +/* Create a vector with all two elements equal to *P. */ +static __inline __m128d +_mm_load1_pd (double *__P) +{ + __v2df __tmp = __builtin_ia32_loadsd (__P); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); +} + +static __inline __m128d +_mm_load_pd1 (double *__P) +{ + return _mm_load1_pd (__P); +} + +/* Load two DPFP values from P. The addresd must be 16-byte aligned. */ +static __inline __m128d +_mm_load_pd (double *__P) +{ + return (__m128d) __builtin_ia32_loadapd (__P); +} + +/* Load two DPFP values from P. The addresd need not be 16-byte aligned. */ +static __inline __m128d +_mm_loadu_pd (double *__P) +{ + return (__m128d) __builtin_ia32_loadupd (__P); +} + +/* Load two DPFP values in reverse order. The addresd must be aligned. */ +static __inline __m128d +_mm_loadr_pd (double *__P) +{ + __v2df __tmp = __builtin_ia32_loadapd (__P); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); +} + +/* Create a vector with element 0 as F and the rest zero. */ +static __inline __m128d +_mm_set_sd (double __F) +{ + return (__m128d) __builtin_ia32_loadsd (&__F); +} + +/* Create a vector with all two elements equal to F. */ +static __inline __m128d +_mm_set1_pd (double __F) +{ + __v2df __tmp = __builtin_ia32_loadsd (&__F); + return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0)); +} + +static __inline __m128d +_mm_set_pd1 (double __F) +{ + return _mm_set1_pd (__F); +} + +/* Create the vector [Z Y]. */ +static __inline __m128d +_mm_set_pd (double __Z, double __Y) +{ + union { + double __a[2]; + __m128d __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} + +/* Create the vector [Y Z]. */ +static __inline __m128d +_mm_setr_pd (double __Z, double __Y) +{ + return _mm_set_pd (__Y, __Z); +} + +/* Create a vector of zeros. */ +static __inline __m128d +_mm_setzero_pd (void) +{ + return (__m128d) __builtin_ia32_setzeropd (); +} + +/* Stores the lower DPFP value. */ +static __inline void +_mm_store_sd (double *__P, __m128d __A) +{ + __builtin_ia32_storesd (__P, (__v2df)__A); +} + +/* Store the lower DPFP value acrosd two words. */ +static __inline void +_mm_store1_pd (double *__P, __m128d __A) +{ + __v2df __va = (__v2df)__A; + __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0)); + __builtin_ia32_storeapd (__P, __tmp); +} + +static __inline void +_mm_store_pd1 (double *__P, __m128d __A) +{ + _mm_store1_pd (__P, __A); +} + +/* Store two DPFP values. The addresd must be 16-byte aligned. */ +static __inline void +_mm_store_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeapd (__P, (__v2df)__A); +} + +/* Store two DPFP values. The addresd need not be 16-byte aligned. */ +static __inline void +_mm_storeu_pd (double *__P, __m128d __A) +{ + __builtin_ia32_storeupd (__P, (__v2df)__A); +} + +/* Store two DPFP values in reverse order. The addresd must be aligned. */ +static __inline void +_mm_storer_pd (double *__P, __m128d __A) +{ + __v2df __va = (__v2df)__A; + __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1)); + __builtin_ia32_storeapd (__P, __tmp); +} + +/* Sets the low DPFP value of A from the low value of B. */ +static __inline __m128d +_mm_move_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); +} + + static __inline __m128d _mm_add_pd (__m128d __A, __m128d __B) { @@ -2013,6 +2167,7 @@ _mm_mfence (void) __builtin_ia32_mfence (); } -#endif /* __SSE2_BUILTINS__ */ +#endif /* __SSE2__ */ +#endif /* __SSE__ */ #endif /* _XMMINTRIN_H_INCLUDED */ -- 2.43.5