This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[x86, 6/n] Replace builtins with vector extensions
- From: Marc Glisse <marc dot glisse at inria dot fr>
- To: gcc-patches at gcc dot gnu dot org
- Cc: ubizjak at gmail dot com
- Date: Sun, 9 Nov 2014 17:26:27 +0100 (CET)
- Subject: [x86, 6/n] Replace builtins with vector extensions
- Authentication-results: sourceware.org; auth=none
Hello,
< > and == for integer vectors of size 128. I was surprised not to find
_mm_cmplt_epi64 anywhere. Note that I can do the same for size 256, but
not 512, there is no corresponding intrinsic, there are only _mask
versions that return a mask.
For gcc-5, we should stop either after 5/n or after 7/n (avx2 version of
6/n).
Regtested with 5/n.
2014-11-10 Marc Glisse <marc.glisse@inria.fr>
* config/i386/emmintrin.h (_mm_cmpeq_epi8, _mm_cmpeq_epi16,
_mm_cmpeq_epi32, _mm_cmplt_epi8, _mm_cmplt_epi16, _mm_cmplt_epi32,
_mm_cmpgt_epi8, _mm_cmpgt_epi16, _mm_cmpgt_epi32): Use vector
extensions instead of builtins.
* config/i386/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64):
Likewise.
--
Marc Glisse
Index: emmintrin.h
===================================================================
--- emmintrin.h (revision 217263)
+++ emmintrin.h (working copy)
@@ -1268,69 +1268,69 @@ _mm_or_si128 (__m128i __A, __m128i __B)
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si128 (__m128i __A, __m128i __B)
{
return (__m128i) ((__v2du)__A ^ (__v2du)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+ return (__m128i) ((__v16qi)__A == (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+ return (__m128i) ((__v8hi)__A == (__v8hi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i) ((__v4si)__A == (__v4si)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi8 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+ return (__m128i) ((__v16qi)__A < (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi16 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+ return (__m128i) ((__v8hi)__A < (__v8hi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+ return (__m128i) ((__v4si)__A < (__v4si)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+ return (__m128i) ((__v16qi)__A > (__v16qi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+ return (__m128i) ((__v8hi)__A > (__v8hi)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i) ((__v4si)__A > (__v4si)__B);
}
#ifdef __OPTIMIZE__
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi16 (__m128i const __A, int const __N)
{
return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
Index: smmintrin.h
===================================================================
--- smmintrin.h (revision 217259)
+++ smmintrin.h (working copy)
@@ -260,21 +260,21 @@ _mm_dp_pd (__m128d __X, __m128d __Y, con
#define _mm_dp_pd(X, Y, M) \
((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (int)(M)))
#endif
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
{
- return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+ return (__m128i) ((__v2di)__X == (__v2di)__Y);
}
/* Min/max packed integer instructions. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
}
@@ -788,21 +788,21 @@ _mm_cmpestrz (__m128i __X, int __LX, __m
((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
(__v16qi)(__m128i)(Y), (int)(LY), \
(int)(M)))
#endif
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
{
- return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+ return (__m128i) ((__v2di)__X > (__v2di)__Y);
}
#ifdef __DISABLE_SSE4_2__
#undef __DISABLE_SSE4_2__
#pragma GCC pop_options
#endif /* __DISABLE_SSE4_2__ */
#ifdef __DISABLE_SSE4_1__
#undef __DISABLE_SSE4_1__
#pragma GCC pop_options