This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[x86, 6/n] Replace builtins with vector extensions


Hello,

< > and == for integer vectors of size 128. I was surprised not to find _mm_cmplt_epi64 anywhere. Note that I can do the same for size 256, but not 512, there is no corresponding intrinsic, there are only _mask versions that return a mask.

For gcc-5, we should stop either after 5/n or after 7/n (avx2 version of 6/n).

Regtested with 5/n.

2014-11-10  Marc Glisse  <marc.glisse@inria.fr>

	* config/i386/emmintrin.h (_mm_cmpeq_epi8, _mm_cmpeq_epi16,
	_mm_cmpeq_epi32, _mm_cmplt_epi8, _mm_cmplt_epi16, _mm_cmplt_epi32,
	_mm_cmpgt_epi8, _mm_cmpgt_epi16, _mm_cmpgt_epi32): Use vector
	extensions instead of builtins.
	* config/i386/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64):
	Likewise.


--
Marc Glisse
Index: emmintrin.h
===================================================================
--- emmintrin.h	(revision 217263)
+++ emmintrin.h	(working copy)
@@ -1268,69 +1268,69 @@ _mm_or_si128 (__m128i __A, __m128i __B)
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_xor_si128 (__m128i __A, __m128i __B)
 {
   return (__m128i) ((__v2du)__A ^ (__v2du)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+  return (__m128i) ((__v16qi)__A == (__v16qi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+  return (__m128i) ((__v8hi)__A == (__v8hi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+  return (__m128i) ((__v4si)__A == (__v4si)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmplt_epi8 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+  return (__m128i) ((__v16qi)__A < (__v16qi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmplt_epi16 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+  return (__m128i) ((__v8hi)__A < (__v8hi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmplt_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+  return (__m128i) ((__v4si)__A < (__v4si)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+  return (__m128i) ((__v16qi)__A > (__v16qi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+  return (__m128i) ((__v8hi)__A > (__v8hi)__B);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+  return (__m128i) ((__v4si)__A > (__v4si)__B);
 }
 
 #ifdef __OPTIMIZE__
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_extract_epi16 (__m128i const __A, int const __N)
 {
   return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
Index: smmintrin.h
===================================================================
--- smmintrin.h	(revision 217259)
+++ smmintrin.h	(working copy)
@@ -260,21 +260,21 @@ _mm_dp_pd (__m128d __X, __m128d __Y, con
 #define _mm_dp_pd(X, Y, M)						\
   ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),			\
 				  (__v2df)(__m128d)(Y), (int)(M)))
 #endif
 
 /* Packed integer 64-bit comparison, zeroing or filling with ones
    corresponding parts of result.  */
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+  return (__m128i) ((__v2di)__X == (__v2di)__Y);
 }
 
 /*  Min/max packed integer instructions.  */
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_epi8 (__m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
 }
 
@@ -788,21 +788,21 @@ _mm_cmpestrz (__m128i __X, int __LX, __m
   ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
 				       (__v16qi)(__m128i)(Y), (int)(LY), \
 				       (int)(M)))
 #endif
 
 /* Packed integer 64-bit comparison, zeroing or filling with ones
    corresponding parts of result.  */
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+  return (__m128i) ((__v2di)__X > (__v2di)__Y);
 }
 
 #ifdef __DISABLE_SSE4_2__
 #undef __DISABLE_SSE4_2__
 #pragma GCC pop_options
 #endif /* __DISABLE_SSE4_2__ */
 
 #ifdef __DISABLE_SSE4_1__
 #undef __DISABLE_SSE4_1__
 #pragma GCC pop_options

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]