[PATCH 1/2] rs6000: Support _mm_insert_epi{8,32,64}
Paul A. Clarke
pc@us.ibm.com
Fri Sep 25 14:07:46 GMT 2020
On Thu, Sep 24, 2020 at 06:22:10PM -0500, Segher Boessenkool wrote:
> On Wed, Sep 23, 2020 at 05:12:44PM -0500, Paul A. Clarke wrote:
> > +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
> > +{
> > + __v16qi result = (__v16qi)__A;
> > +
> > + result [(__N & 0b1111)] = __D;
>
> Hrm, GCC supports binary constants like this since 2007, so okay. But I
> have to wonder if this improves anything over hex (or decimal even!)
> The parens are superfluous (and only hinder legibility), fwiw.
>
> > +_mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
> > +{
> > + __v2di result = (__v2di)__A;
> > +
> > + result [(__N & 0b1)] = __D;
>
> Especially single-digit numbers look really goofy (like 0x0, but even
> worse for binary somehow).
>
> Anyway, okay for trunk, with or without those things improved. Thanks!
I was trying to obviously and consistently convey the sizes of the masks,
but I really want to convey _why_ there are masks, so let me try a
different approach, below.
--
Add compatibility implementations for SSE4.1 intrinsics
_mm_insert_epi8, _mm_insert_epi32, _mm_insert_epi64.
2020-09-25 Paul A. Clarke <pc@us.ibm.com>
gcc/
* config/rs6000/smmintrin.h (_mm_insert_epi8): New.
(_mm_insert_epi32): New.
(_mm_insert_epi64): New.
---
gcc/config/rs6000/smmintrin.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index d78ddba99d9..8128c417978 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -42,6 +42,36 @@
#include <altivec.h>
#include <tmmintrin.h>
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
+{
+ __v16qi result = (__v16qi)__A;
+
+ result [__N % (sizeof result / sizeof result[0])] = __D;
+
+ return (__m128i) result;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
+{
+ __v4si result = (__v4si)__A;
+
+ result [__N % (sizeof result / sizeof result[0])] = __D;
+
+ return (__m128i) result;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
+{
+ __v2di result = (__v2di)__A;
+
+ result [__N % (sizeof result / sizeof result[0])] = __D;
+
+ return (__m128i) result;
+}
+
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi8 (__m128i __X, const int __N)
{
--
2.18.4
More information about the Gcc-patches
mailing list