This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
various _mm512_set* intrinsics
- From: Ulrich Drepper <drepper at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 27 Mar 2014 22:11:46 -0400
- Subject: various _mm512_set* intrinsics
- Authentication-results: sourceware.org; auth=none
Here are more intrinsics that are missing. I know that gcc currently
generates horrible code for most of them but I think it's more important
to have the API in place, albeit non-optimal. Maybe this entices some
one to add the necessary optimizations.
The code is self-contained and shouldn't interfere with any correct
code. Should this also go into 4.9?
2014-03-27 Ulrich Drepper <drepper@gmail.com>
* config/i386/avx512fintrin.h (__v32hi): Define type.
(__v64qi): Likewise.
(_mm512_set1_epi8): Define.
(_mm512_set1_epi16): Define.
(_mm512_set4_epi32): Define.
(_mm512_set4_epi64): Define.
(_mm512_set4_pd): Define.
(_mm512_set4_ps): Define.
(_mm512_setr4_epi64): Define.
(_mm512_setr4_epi32): Define.
(_mm512_setr4_pd): Define.
(_mm512_setr4_ps): Define.
(_mm512_setzero_epi32): Define.
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 9602866..314895a 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -39,6 +39,8 @@ typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
typedef int __v16si __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
@@ -130,6 +132,32 @@ _mm512_undefined_si512 (void)
return __Y;
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi8 (char __A)
+{
+ return __extension__ (__m512i)(__v64qi)
+ { __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi16 (short __A)
+{
+ return __extension__ (__m512i)(__v32hi)
+ { __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_pd (double __A)
@@ -152,6 +180,54 @@ _mm512_set1_ps (float __A)
(__mmask16) -1);
}
+/* Create the vector [A B C D A B C D A B C D A B C D]. */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
+{
+ return __extension__ (__m512i)(__v16si)
+ { __D, __C, __B, __A, __D, __C, __B, __A,
+ __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi64 (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m512i) (__v8di)
+ { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_pd (double __A, double __B, double __C, double __D)
+{
+ return __extension__ (__m512d)
+ { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_ps (float __A, float __B, float __C, float __D)
+{
+ return __extension__ (__m512)
+ { __D, __C, __B, __A, __D, __C, __B, __A,
+ __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+#define _mm512_setr4_epi64(e0,e1,e2,e3) \
+ _mm512_set4_epi64(e3,e2,e1,e0)
+
+#define _mm512_setr4_epi32(e0,e1,e2,e3) \
+ _mm512_set4_epi32(e3,e2,e1,e0)
+
+#define _mm512_setr4_pd(e0,e1,e2,e3) \
+ _mm512_set4_pd(e3,e2,e1,e0)
+
+#define _mm512_setr4_ps(e0,e1,e2,e3) \
+ _mm512_set4_ps(e3,e2,e1,e0)
+
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ps (void)
@@ -169,6 +245,13 @@ _mm512_setzero_pd (void)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_epi32 (void)
+{
+ return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_si512 (void)
{
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };