[PATCH] x86: Define _mm*_undefined_*
Ulrich Drepper
drepper@gmail.com
Sun Mar 16 11:25:00 GMT 2014
[This patch is so far really meant for commenting. I haven't tested it
at all yet.]
Intel's intrinsic specification includes one set which currently is not
defined in gcc's headers: the _mm*_undefined_* intrinsics.
The purpose of these instrinsics (currently three classes, three formats
each) is to create a pseudo-value the compiler does not assume is
uninitialized without incurring any code doing so. The purpose is to
use these intrinsics in places where it is known the value of a register
is never used. This is already important with AVX2 and becomes really
crucial with AVX512.
Currently three different techniques are used:
- _mm*_setzero_*() is used. Even though the XOR operation does not
cost anything it still messes with the instruction scheduling and
more code is generated.
- another parameter is duplicated. This leads most of the time to
one additional move instruction.
- uninitialized variables are used (this is in new AVX512 code). The
compiler should generate warnings for these headers. I haven't
tried it.
Using the _mm*_undefined_*() intrinsics is much cleaner and also
potentially allows to generate better code.
For now the implementation uses an inline asm to suggest to the compiler
that the variable is initialized. This does not prevent a real register
to be allocated for this purpose but it saves the XOR instruction.
The correct and optimal implementation will require a compiler built-in
which will do something different based on how the value is used:
- if the value is never modified then any register should be picked.
In function/intrinsic calls the parameter simply need not be loaded at
all.
- if the value is modified (and allocated to a register or memory
location) no initialization for the variable is needed (equivalent
to the asm now).
The questions are:
- is there interest in adding the necessary compiler built-in?
- if yes, anyone interested in working on this?
- and: is it worth adding a patch like the on here in the meantime?
As it stands now gcc's instrinsics are not complete and programs following
Intel's manuals can fail to compile.
2014-03-16 Ulrich Drepper <drepper@gmail.com>
* config/i386/avxintrin.h (_mm256_undefined_si256): Define.
(_mm256_undefined_ps): Define.
(_mm256_undefined_pd): Define.
* config/i386/emmintrin.h (_mm_undefined_si128): Define.
(_mm_undefined_pd): Define.
* config/i386/xmmintrin.h (_mm_undefined_ps): Define.
* config/i386/avx512fintrin.h (_mm512_undefined_si512): Define.
(_mm512_undefined_ps): Define.
(_mm512_undefined_pd): Define.
Use _mm*_undefined_*.
* config/i386/avx2intrin.h: Use _mm*_undefined_*.
diff -up gcc/config/i386/avx2intrin.h.old gcc/config/i386/avx2intrin.h
--- gcc/config/i386/avx2intrin.h.old 2014-03-15 21:44:32.709952284 -0400
+++ gcc/config/i386/avx2intrin.h 2014-03-15 21:56:38.705674509 -0400
@@ -1233,10 +1233,10 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_pd (double const *base, __m128i index, const int scale)
{
- __v2df src = _mm_setzero_pd ();
- __v2df mask = _mm_cmpeq_pd (src, src);
+ __v2df zero = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (zero, zero);
- return (__m128d) __builtin_ia32_gathersiv2df (src,
+ return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
base,
(__v4si)index,
mask,
@@ -1259,10 +1259,10 @@ extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
{
- __v4df src = _mm256_setzero_pd ();
- __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
+ __v4df zero = _mm256_setzero_pd ();
+ __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
- return (__m256d) __builtin_ia32_gathersiv4df (src,
+ return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
base,
(__v4si)index,
mask,
diff -up gcc/config/i386/avx512fintrin.h.old gcc/config/i386/avx512fintrin.h
--- gcc/config/i386/avx512fintrin.h.old 2014-03-15 22:19:17.347734027 -0400
+++ gcc/config/i386/avx512fintrin.h 2014-03-16 00:09:54.699179008 -0400
@@ -108,6 +108,33 @@ _mm512_set_ps (float __A, float __B, flo
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ps (void)
+{
+ __m512 __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_pd (void)
+{
+ __m512d __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_si512 (void)
+{
+ __m512i __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ps (void)
{
return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
@@ -389,7 +416,7 @@ _mm512_mullo_epi32 (__m512i __A, __m512i
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -420,7 +447,7 @@ _mm512_sllv_epi32 (__m512i __X, __m512i
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -452,7 +479,7 @@ _mm512_srav_epi32 (__m512i __X, __m512i
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -484,7 +511,7 @@ _mm512_srlv_epi32 (__m512i __X, __m512i
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -516,7 +543,7 @@ _mm512_add_epi64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -548,7 +575,7 @@ _mm512_sub_epi64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -580,7 +607,7 @@ _mm512_sllv_epi64 (__m512i __X, __m512i
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -612,7 +639,7 @@ _mm512_srav_epi64 (__m512i __X, __m512i
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -644,7 +671,7 @@ _mm512_srlv_epi64 (__m512i __X, __m512i
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -676,7 +703,7 @@ _mm512_add_epi32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -708,7 +735,7 @@ _mm512_mul_epi32 (__m512i __X, __m512i _
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -739,7 +766,7 @@ _mm512_sub_epi32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -771,7 +798,7 @@ _mm512_mul_epu32 (__m512i __X, __m512i _
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -802,7 +829,7 @@ _mm512_slli_epi64 (__m512i __A, unsigned
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -828,7 +855,7 @@ _mm512_maskz_slli_epi64 (__mmask8 __U, _
#else
#define _mm512_slli_epi64(X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_slli_epi64(W, U, X, C) \
@@ -849,7 +876,7 @@ _mm512_sll_epi64 (__m512i __A, __m128i _
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -881,7 +908,7 @@ _mm512_srli_epi64 (__m512i __A, unsigned
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -907,7 +934,7 @@ _mm512_maskz_srli_epi64 (__mmask8 __U, _
#else
#define _mm512_srli_epi64(X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_srli_epi64(W, U, X, C) \
@@ -928,7 +955,7 @@ _mm512_srl_epi64 (__m512i __A, __m128i _
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -960,7 +987,7 @@ _mm512_srai_epi64 (__m512i __A, unsigned
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -986,7 +1013,7 @@ _mm512_maskz_srai_epi64 (__mmask8 __U, _
#else
#define _mm512_srai_epi64(X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_srai_epi64(W, U, X, C) \
@@ -1007,7 +1034,7 @@ _mm512_sra_epi64 (__m512i __A, __m128i _
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1039,7 +1066,7 @@ _mm512_slli_epi32 (__m512i __A, unsigned
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1065,7 +1092,7 @@ _mm512_maskz_slli_epi32 (__mmask16 __U,
#else
#define _mm512_slli_epi32(X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_slli_epi32(W, U, X, C) \
@@ -1086,7 +1113,7 @@ _mm512_sll_epi32 (__m512i __A, __m128i _
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1118,7 +1145,7 @@ _mm512_srli_epi32 (__m512i __A, unsigned
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1144,7 +1171,7 @@ _mm512_maskz_srli_epi32 (__mmask16 __U,
#else
#define _mm512_srli_epi32(X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_srli_epi32(W, U, X, C) \
@@ -1165,7 +1192,7 @@ _mm512_srl_epi32 (__m512i __A, __m128i _
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1197,7 +1224,7 @@ _mm512_srai_epi32 (__m512i __A, unsigned
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1223,7 +1250,7 @@ _mm512_maskz_srai_epi32 (__mmask16 __U,
#else
#define _mm512_srai_epi32(X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_srai_epi32(W, U, X, C) \
@@ -1244,7 +1271,7 @@ _mm512_sra_epi32 (__m512i __A, __m128i _
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1414,7 +1441,7 @@ _mm512_rcp14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -1443,7 +1470,7 @@ _mm512_rcp14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -1488,7 +1515,7 @@ _mm512_rsqrt14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -1517,7 +1544,7 @@ _mm512_rsqrt14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -1563,7 +1590,7 @@ _mm512_sqrt_round_pd (__m512d __A, const
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -1593,7 +1620,7 @@ _mm512_sqrt_round_ps (__m512 __A, const
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -1635,7 +1662,7 @@ _mm_sqrt_round_ss (__m128 __A, __m128 __
}
#else
#define _mm512_sqrt_round_pd(A, C) \
- (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
@@ -1644,7 +1671,7 @@ _mm_sqrt_round_ss (__m128 __A, __m128 __
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sqrt_round_ps(A, C) \
- (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
@@ -1665,7 +1692,7 @@ _mm512_cvtepi8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1694,7 +1721,7 @@ _mm512_cvtepi8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1723,7 +1750,7 @@ _mm512_cvtepi16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1752,7 +1779,7 @@ _mm512_cvtepi16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1781,7 +1808,7 @@ _mm512_cvtepi32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1810,7 +1837,7 @@ _mm512_cvtepu8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1839,7 +1866,7 @@ _mm512_cvtepu8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1868,7 +1895,7 @@ _mm512_cvtepu16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1897,7 +1924,7 @@ _mm512_cvtepu16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1926,7 +1953,7 @@ _mm512_cvtepu32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1957,7 +1984,7 @@ _mm512_add_round_pd (__m512d __A, __m512
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -1991,7 +2018,7 @@ _mm512_add_round_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2024,7 +2051,7 @@ _mm512_sub_round_pd (__m512d __A, __m512
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2058,7 +2085,7 @@ _mm512_sub_round_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2085,7 +2112,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U
}
#else
#define _mm512_add_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_add_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
@@ -2094,7 +2121,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_add_round_ps(A, B, C) \
- (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_add_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
@@ -2103,7 +2130,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_sub_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
@@ -2112,7 +2139,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sub_round_ps(A, B, C) \
- (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
@@ -2129,7 +2156,7 @@ _mm512_mul_round_pd (__m512d __A, __m512
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2163,7 +2190,7 @@ _mm512_mul_round_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2196,7 +2223,7 @@ _mm512_div_round_pd (__m512d __M, __m512
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2230,7 +2257,7 @@ _mm512_div_round_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2294,7 +2321,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B
#else
#define _mm512_mul_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
@@ -2303,7 +2330,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_mul_round_ps(A, B, C) \
- (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
@@ -2312,7 +2339,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_div_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_div_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
@@ -2321,7 +2348,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_div_round_ps(A, B, C) \
- (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_div_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
@@ -2350,7 +2377,7 @@ _mm512_max_round_pd (__m512d __A, __m512
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2384,7 +2411,7 @@ _mm512_max_round_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2417,7 +2444,7 @@ _mm512_min_round_pd (__m512d __A, __m512
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2451,7 +2478,7 @@ _mm512_min_round_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2478,7 +2505,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U
}
#else
#define _mm512_max_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
+ (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_max_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
@@ -2487,7 +2514,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U
(__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
#define _mm512_max_round_ps(A, B, R) \
- (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_pd(), -1, R)
+ (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_max_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
@@ -2496,7 +2523,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U
(__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
#define _mm512_min_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
+ (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_min_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
@@ -2505,7 +2532,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U
(__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
#define _mm512_min_round_ps(A, B, R) \
- (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, R)
+ (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
#define _mm512_mask_min_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
@@ -2522,7 +2549,7 @@ _mm512_scalef_round_pd (__m512d __A, __m
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2556,7 +2583,7 @@ _mm512_scalef_round_ps (__m512 __A, __m5
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2602,7 +2629,7 @@ _mm_scalef_round_ss (__m128 __A, __m128
}
#else
#define _mm512_scalef_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
@@ -2611,7 +2638,7 @@ _mm_scalef_round_ss (__m128 __A, __m128
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_scalef_round_ps(A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
@@ -3294,7 +3321,7 @@ _mm512_abs_epi64 (__m512i __A)
{
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3323,7 +3350,7 @@ _mm512_abs_epi32 (__m512i __A)
{
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3350,8 +3377,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastss_ps (__m128 __A)
{
- __v16sf __O;
- return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O,
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf)
+ __mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -3377,8 +3405,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastsd_pd (__m128d __A)
{
- __v8df __O;
- return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O,
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df)
+ __mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -3404,8 +3433,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastd_epi32 (__m128i __A)
{
- __v16si __O;
- return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si)
+ __mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3431,8 +3461,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi32 (int __A)
{
- __v16si __O;
- return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+ (__v16si)
+ _mm512_undefined_si512 (),
(__mmask16)(-1));
}
@@ -3458,8 +3489,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastq_epi64 (__m128i __A)
{
- __v8di __O;
- return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3485,12 +3517,15 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi64 (long long __A)
{
- __v8di __O;
#ifdef TARGET_64BIT
- return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8)(-1));
#else
- return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8)(-1));
#endif
}
@@ -3529,8 +3564,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f32x4 (__m128 __A)
{
- __v16sf __O;
- return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O,
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -3557,9 +3593,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i32x4 (__m128i __A)
{
- __v16si __O;
return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
- __O,
+ (__v16si)
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3586,9 +3622,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f64x4 (__m256d __A)
{
- __v8df __O;
return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
- __O,
+ (__v8df)
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -3615,9 +3651,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i64x4 (__m256i __A)
{
- __v8di __O;
return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
- __O,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3738,7 +3774,7 @@ _mm512_shuffle_epi32 (__m512i __A, _MM_P
return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
__mask,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3771,7 +3807,7 @@ _mm512_shuffle_i64x2 (__m512i __A, __m51
return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
(__v8di) __B, __imm,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3806,7 +3842,7 @@ _mm512_shuffle_i32x4 (__m512i __A, __m51
(__v16si) __B,
__imm,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3842,7 +3878,7 @@ _mm512_shuffle_f64x2 (__m512d __A, __m51
return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
(__v8df) __B, __imm,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -3876,7 +3912,7 @@ _mm512_shuffle_f32x4 (__m512 __A, __m512
return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
(__v16sf) __B, __imm,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -3906,7 +3942,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __
#else
#define _mm512_shuffle_epi32(X, C) \
((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_shuffle_epi32(W, U, X, C) \
@@ -3922,7 +3958,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __
#define _mm512_shuffle_i64x2(X, Y, C) \
((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
@@ -3940,7 +3976,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __
#define _mm512_shuffle_i32x4(X, Y, C) \
((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
(__v16si)(__m512i)(Y), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
@@ -3958,7 +3994,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __
#define _mm512_shuffle_f64x2(X, Y, C) \
((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(C),\
- (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
(__mmask8)-1))
#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
@@ -3976,7 +4012,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __
#define _mm512_shuffle_f32x4(X, Y, C) \
((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(C),\
- (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
(__mmask16)-1))
#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
@@ -3999,7 +4035,7 @@ _mm512_rolv_epi32 (__m512i __A, __m512i
return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -4031,7 +4067,7 @@ _mm512_rorv_epi32 (__m512i __A, __m512i
return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -4063,7 +4099,7 @@ _mm512_rolv_epi64 (__m512i __A, __m512i
return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -4095,7 +4131,7 @@ _mm512_rorv_epi64 (__m512i __A, __m512i
return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -4127,7 +4163,7 @@ _mm512_cvtt_roundpd_epi32 (__m512d __A,
{
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4157,7 +4193,7 @@ _mm512_cvtt_roundpd_epu32 (__m512d __A,
{
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4182,7 +4218,7 @@ _mm512_maskz_cvtt_roundpd_epu32 (__mmask
}
#else
#define _mm512_cvtt_roundpd_epi32(A, B) \
- ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
@@ -4191,7 +4227,7 @@ _mm512_maskz_cvtt_roundpd_epu32 (__mmask
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
#define _mm512_cvtt_roundpd_epu32(A, B) \
- ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
@@ -4207,7 +4243,7 @@ _mm512_cvt_roundpd_epi32 (__m512d __A, c
{
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4237,7 +4273,7 @@ _mm512_cvt_roundpd_epu32 (__m512d __A, c
{
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4262,7 +4298,7 @@ _mm512_maskz_cvt_roundpd_epu32 (__mmask8
}
#else
#define _mm512_cvt_roundpd_epi32(A, B) \
- ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
@@ -4271,7 +4307,7 @@ _mm512_maskz_cvt_roundpd_epu32 (__mmask8
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
#define _mm512_cvt_roundpd_epu32(A, B) \
- ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
@@ -4287,7 +4323,7 @@ _mm512_cvtt_roundps_epi32 (__m512 __A, c
{
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4317,7 +4353,7 @@ _mm512_cvtt_roundps_epu32 (__m512 __A, c
{
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4342,7 +4378,7 @@ _mm512_maskz_cvtt_roundps_epu32 (__mmask
}
#else
#define _mm512_cvtt_roundps_epi32(A, B) \
- ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
@@ -4351,7 +4387,7 @@ _mm512_maskz_cvtt_roundps_epu32 (__mmask
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
#define _mm512_cvtt_roundps_epu32(A, B) \
- ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
@@ -4367,7 +4403,7 @@ _mm512_cvt_roundps_epi32 (__m512 __A, co
{
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4397,7 +4433,7 @@ _mm512_cvt_roundps_epu32 (__m512 __A, co
{
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4422,7 +4458,7 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask1
}
#else
#define _mm512_cvt_roundps_epi32(A, B) \
- ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
@@ -4431,7 +4467,7 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask1
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
#define _mm512_cvt_roundps_epu32(A, B) \
- ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
@@ -4553,8 +4589,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O,
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -4587,8 +4624,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi32_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O,
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask16) -1);
}
@@ -4621,8 +4659,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi32_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O,
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask16) -1);
}
@@ -4656,8 +4695,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi16 (__m512i __A)
{
- __v16hi __O;
- return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O,
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
(__mmask16) -1);
}
@@ -4690,8 +4730,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi32_epi16 (__m512i __A)
{
- __v16hi __O;
- return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O,
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
(__mmask16) -1);
}
@@ -4724,8 +4765,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi32_epi16 (__m512i __A)
{
- __v16hi __O;
- return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O,
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
(__mmask16) -1);
}
@@ -4759,8 +4801,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_epi32 (__m512i __A)
{
- __v8si __O;
- return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O,
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -4794,7 +4837,9 @@ __attribute__ ((__gnu_inline__, __always
_mm512_cvtsepi64_epi32 (__m512i __A)
{
__v8si __O;
- return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O,
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -4827,8 +4872,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi64_epi32 (__m512i __A)
{
- __v8si __O;
- return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O,
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -4861,8 +4907,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_epi16 (__m512i __A)
{
- __v8hi __O;
- return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm256_undefined_si128 (),
(__mmask8) -1);
}
@@ -4895,8 +4942,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi64_epi16 (__m512i __A)
{
- __v8hi __O;
- return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4929,8 +4977,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi64_epi16 (__m512i __A)
{
- __v8hi __O;
- return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4963,8 +5012,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4997,8 +5047,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi64_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined(si128 (),
(__mmask8) -1);
}
@@ -5031,8 +5082,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi64_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -5068,7 +5120,7 @@ _mm512_cvtepi32_pd (__m256i __A)
{
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5097,7 +5149,7 @@ _mm512_cvtepu32_pd (__m256i __A)
{
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5127,7 +5179,7 @@ _mm512_cvt_roundepi32_ps (__m512i __A, c
{
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -5157,7 +5209,7 @@ _mm512_cvt_roundepu32_ps (__m512i __A, c
{
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -5183,7 +5235,7 @@ _mm512_maskz_cvt_roundepu32_ps (__mmask1
#else
#define _mm512_cvt_roundepi32_ps(A, B) \
- (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
@@ -5192,7 +5244,7 @@ _mm512_maskz_cvt_roundepu32_ps (__mmask1
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
#define _mm512_cvt_roundepu32_ps(A, B) \
- (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
@@ -5209,7 +5261,7 @@ _mm512_extractf64x4_pd (__m512d __A, con
return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
__imm,
(__v4df)
- _mm256_setzero_pd (),
+ _mm256_undefined_pd (),
(__mmask8) -1);
}
@@ -5242,7 +5294,7 @@ _mm512_extractf32x4_ps (__m512 __A, cons
return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
__imm,
(__v4sf)
- _mm_setzero_ps (),
+ _mm_undefined_ps (),
(__mmask8) -1);
}
@@ -5275,7 +5327,7 @@ _mm512_extracti64x4_epi64 (__m512i __A,
return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
__imm,
(__v4di)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -5308,7 +5360,7 @@ _mm512_extracti32x4_epi32 (__m512i __A,
return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
__imm,
(__v4si)
- _mm_setzero_si128 (),
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -5338,7 +5390,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask
#define _mm512_extractf64x4_pd(X, C) \
((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
(int) (C),\
- (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__v4df)(__m256d)_mm256_undefined_pd(),\
(__mmask8)-1))
#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
@@ -5356,7 +5408,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask
#define _mm512_extractf32x4_ps(X, C) \
((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
(int) (C),\
- (__v4sf)(__m128)_mm_setzero_ps(),\
+ (__v4sf)(__m128)_mm_undefined_ps(),\
(__mmask8)-1))
#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
@@ -5374,7 +5426,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask
#define _mm512_extracti64x4_epi64(X, C) \
((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
(int) (C),\
- (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__v4di)(__m256i)_mm256_undefined_si256 (),\
(__mmask8)-1))
#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
@@ -5392,7 +5444,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask
#define _mm512_extracti32x4_epi32(X, C) \
((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
(int) (C),\
- (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__v4si)(__m128i)_mm_undefined_si128 (),\
(__mmask8)-1))
#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
@@ -5437,7 +5489,7 @@ _mm512_inserti64x4 (__m512i __A, __m256i
(__v4di) __B,
__imm,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -5474,7 +5526,7 @@ _mm512_insertf64x4 (__m512d __A, __m256d
(__v4df) __B,
__imm,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5514,7 +5566,7 @@ _mm512_maskz_insertf64x4 (__mmask8 __U,
#define _mm512_insertf64x4(X, Y, C) \
((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
(__v4df)(__m256d) (Y), (int) (C), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)(__m512d)_mm512_undefined_pd(), \
(__mmask8)-1))
#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
@@ -5532,7 +5584,7 @@ _mm512_maskz_insertf64x4 (__mmask8 __U,
#define _mm512_inserti64x4(X, Y, C) \
((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
(__v4di)(__m256i) (Y), (int) (C), \
- (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__v8di)(__m512i)_mm512_undefined_si512 (), \
(__mmask8)-1))
#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
@@ -5554,7 +5606,7 @@ _mm512_loadu_pd (void const *__P)
{
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5599,7 +5651,7 @@ _mm512_loadu_ps (void const *__P)
{
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -5717,7 +5769,7 @@ _mm512_permutevar_pd (__m512d __A, __m51
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
(__v8di) __C,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5749,7 +5801,7 @@ _mm512_permutevar_ps (__m512 __A, __m512
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
(__v16si) __C,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -5968,7 +6020,7 @@ _mm512_permute_pd (__m512d __X, const in
{
return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5997,7 +6049,7 @@ _mm512_permute_ps (__m512 __X, const int
{
return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6022,7 +6074,7 @@ _mm512_maskz_permute_ps (__mmask16 __U,
#else
#define _mm512_permute_pd(X, C) \
((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
- (__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
(__mmask8)(-1)))
#define _mm512_mask_permute_pd(W, U, X, C) \
@@ -6037,7 +6089,7 @@ _mm512_maskz_permute_ps (__mmask16 __U,
#define _mm512_permute_ps(X, C) \
((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
- (__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
(__mmask16)(-1)))
#define _mm512_mask_permute_ps(W, U, X, C) \
@@ -6058,7 +6110,7 @@ _mm512_permutex_epi64 (__m512i __X, cons
{
return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) (-1));
}
@@ -6088,7 +6140,7 @@ _mm512_permutex_pd (__m512d __X, const i
{
return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -6113,7 +6165,8 @@ _mm512_maskz_permutex_pd (__mmask8 __U,
#else
#define _mm512_permutex_pd(X, M) \
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)(__m512d)(X), (__mmask8)-1))
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
+ (__mmask8)-1))
#define _mm512_mask_permutex_pd(W, U, X, M) \
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
@@ -6127,7 +6180,8 @@ _mm512_maskz_permutex_pd (__mmask8 __U,
#define _mm512_permutex_epi64(X, I) \
((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
(int)(I), \
- (__v8di)(__m512i)(X), \
+ (__v8di)(__m512i) \
+ (_mm512_undefined_si512 ()),\
(__mmask8)(-1)))
#define _mm512_maskz_permutex_epi64(M, X, I) \
@@ -6162,7 +6216,7 @@ _mm512_permutexvar_epi64 (__m512i __X, _
return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
(__v8di) __X,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6195,7 +6249,7 @@ _mm512_permutexvar_epi32 (__m512i __X, _
return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
(__v16si) __X,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6217,7 +6271,7 @@ _mm512_permutexvar_pd (__m512i __X, __m5
return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
(__v8di) __X,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -6249,7 +6303,7 @@ _mm512_permutexvar_ps (__m512i __X, __m5
return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
(__v16si) __X,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6282,7 +6336,7 @@ _mm512_shuffle_ps (__m512 __M, __m512 __
return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
(__v16sf) __V, __imm,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6315,7 +6369,7 @@ _mm512_shuffle_pd (__m512d __M, __m512d
return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
(__v8df) __V, __imm,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -6485,7 +6539,7 @@ _mm_maskz_fixupimm_round_ss (__mmask8 __
#define _mm512_shuffle_pd(X, Y, C) \
((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(C),\
- (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
(__mmask8)-1))
#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
@@ -6503,7 +6557,7 @@ _mm_maskz_fixupimm_round_ss (__mmask8 __
#define _mm512_shuffle_ps(X, Y, C) \
((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(C),\
- (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
(__mmask16)-1))
#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
@@ -6585,7 +6639,7 @@ _mm512_movehdup_ps (__m512 __A)
{
return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6614,7 +6668,7 @@ _mm512_moveldup_ps (__m512 __A)
{
return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6644,7 +6698,7 @@ _mm512_or_si512 (__m512i __A, __m512i __
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6655,7 +6709,7 @@ _mm512_or_epi32 (__m512i __A, __m512i __
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6687,7 +6741,7 @@ _mm512_or_epi64 (__m512i __A, __m512i __
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6719,7 +6773,7 @@ _mm512_xor_si512 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6730,7 +6784,7 @@ _mm512_xor_epi32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6762,7 +6816,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6794,7 +6848,7 @@ _mm512_rol_epi32 (__m512i __A, const int
{
return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6823,7 +6877,7 @@ _mm512_ror_epi32 (__m512i __A, int __B)
{
return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6852,7 +6906,7 @@ _mm512_rol_epi64 (__m512i __A, const int
{
return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6881,7 +6935,7 @@ _mm512_ror_epi64 (__m512i __A, int __B)
{
return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6908,7 +6962,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __
#define _mm512_rol_epi32(A, B) \
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
(int)(B), \
- (__v16si)_mm512_setzero_si512 (), \
+ (__v16si)_mm512_undefined_si512 (), \
(__mmask16)(-1)))
#define _mm512_mask_rol_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
@@ -6923,7 +6977,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __
#define _mm512_ror_epi32(A, B) \
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
(int)(B), \
- (__v16si)_mm512_setzero_si512 (), \
+ (__v16si)_mm512_undefined_si512 (), \
(__mmask16)(-1)))
#define _mm512_mask_ror_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
@@ -6938,7 +6992,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __
#define _mm512_rol_epi64(A, B) \
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
(int)(B), \
- (__v8di)_mm512_setzero_si512 (), \
+ (__v8di)_mm512_undefined_si512 (), \
(__mmask8)(-1)))
#define _mm512_mask_rol_epi64(W, U, A, B) \
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
@@ -6954,7 +7008,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __
#define _mm512_ror_epi64(A, B) \
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
(int)(B), \
- (__v8di)_mm512_setzero_si512 (), \
+ (__v8di)_mm512_undefined_si512 (), \
(__mmask8)(-1)))
#define _mm512_mask_ror_epi64(W, U, A, B) \
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
@@ -6975,7 +7029,7 @@ _mm512_and_si512 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6986,7 +7040,7 @@ _mm512_and_epi32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7018,7 +7072,7 @@ _mm512_and_epi64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7049,7 +7103,7 @@ _mm512_andnot_si512 (__m512i __A, __m512
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7060,7 +7114,7 @@ _mm512_andnot_epi32 (__m512i __A, __m512
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7092,7 +7146,7 @@ _mm512_andnot_epi64 (__m512i __A, __m512
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7190,7 +7244,7 @@ _mm512_unpackhi_epi32 (__m512i __A, __m5
return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7223,7 +7277,7 @@ _mm512_unpackhi_epi64 (__m512i __A, __m5
return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7255,7 +7309,7 @@ _mm512_unpacklo_epi32 (__m512i __A, __m5
return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7288,7 +7342,7 @@ _mm512_unpacklo_epi64 (__m512i __A, __m5
return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7571,7 +7625,7 @@ _mm512_movedup_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -7601,7 +7655,7 @@ _mm512_unpacklo_pd (__m512d __A, __m512d
return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -7633,7 +7687,7 @@ _mm512_unpackhi_pd (__m512d __A, __m512d
return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -7665,7 +7719,7 @@ _mm512_unpackhi_ps (__m512 __A, __m512 _
return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -7697,7 +7751,7 @@ _mm512_cvt_roundps_pd (__m256 __A, const
{
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -7727,7 +7781,7 @@ _mm512_cvt_roundph_ps (__m256i __A, cons
{
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -7758,7 +7812,7 @@ _mm512_cvt_roundps_ph (__m512 __A, const
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
__I,
(__v16hi)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
-1);
}
@@ -7769,7 +7823,7 @@ _mm512_cvtps_ph (__m512 __A, const int _
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
__I,
(__v16hi)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
-1);
}
@@ -7817,7 +7871,7 @@ _mm512_maskz_cvtps_ph (__mmask16 __W, __
}
#else
#define _mm512_cvt_roundps_pd(A, B) \
- (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, B)
+ (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
@@ -7826,7 +7880,7 @@ _mm512_maskz_cvtps_ph (__mmask16 __W, __
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
#define _mm512_cvt_roundph_ps(A, B) \
- (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
@@ -7836,10 +7890,10 @@ _mm512_maskz_cvtps_ph (__mmask16 __W, __
#define _mm512_cvt_roundps_ph(A, I) \
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
- (__v16hi)_mm256_setzero_si256 (), -1))
+ (__v16hi)_mm256_undefined_si256 (), -1))
#define _mm512_cvtps_ph(A, I) \
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
- (__v16hi)_mm256_setzero_si256 (), -1))
+ (__v16hi)_mm256_undefined_si256 (), -1))
#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
(__v16hi)(__m256i)(U), (__mmask16) (W)))
@@ -7861,7 +7915,7 @@ _mm512_cvt_roundpd_ps (__m512d __A, cons
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf)
- _mm256_setzero_ps (),
+ _mm256_undefined_ps (),
(__mmask8) -1, __R);
}
@@ -7904,7 +7958,7 @@ _mm_cvt_roundss_sd (__m128d __A, __m128
}
#else
#define _mm512_cvt_roundpd_ps(A, B) \
- (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B)
+ (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
@@ -7972,7 +8026,7 @@ _mm512_getexp_round_ps (__m512 __A, cons
{
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -8002,7 +8056,7 @@ _mm512_getexp_round_pd (__m512d __A, con
{
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -8049,7 +8103,7 @@ _mm512_getmant_round_pd (__m512d __A, _M
{
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
(__C << 2) | __B,
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -8085,7 +8139,7 @@ _mm512_getmant_round_ps (__m512 __A, _MM
{
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
(__C << 2) | __B,
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -8142,7 +8196,7 @@ _mm_getmant_round_ss (__m128 __A, __m128
#define _mm512_getmant_round_pd(X, B, C, R) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
(int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)(__m512d)_mm512_undefined_pd(), \
(__mmask8)-1,\
(R)))
@@ -8162,7 +8216,7 @@ _mm_getmant_round_ss (__m128 __A, __m128
#define _mm512_getmant_round_ps(X, B, C, R) \
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
(int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__v16sf)(__m512)_mm512_undefined_ps(), \
(__mmask16)-1,\
(R)))
@@ -8199,7 +8253,7 @@ _mm_getmant_round_ss (__m128 __A, __m128
#define _mm512_getexp_round_ps(A, R) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
#define _mm512_mask_getexp_round_ps(W, U, A, R) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
@@ -8211,7 +8265,7 @@ _mm_getmant_round_ss (__m128 __A, __m128
#define _mm512_getexp_round_pd(A, R) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)-1, R))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
#define _mm512_mask_getexp_round_pd(W, U, A, R) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
@@ -8228,7 +8282,9 @@ __attribute__ ((__gnu_inline__, __always
_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
{
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
- (__v16sf) __A, -1, __R);
+ (__v16sf)
+ _mm512_undefined_ps (),
+ -1, __R);
}
extern __inline __m512
@@ -8258,7 +8314,9 @@ __attribute__ ((__gnu_inline__, __always
_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
{
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
- (__v8df) __A, -1, __R);
+ (__v8df)
+ _mm512_undefined_pd (),
+ -1, __R);
}
extern __inline __m512d
@@ -8303,7 +8361,7 @@ _mm_roundscale_round_sd (__m128d __A, __
#else
#define _mm512_roundscale_round_ps(A, B, R) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
- (__v16sf)(__m512)(A), (__mmask16)(-1), R))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
(int)(D), \
@@ -8316,7 +8374,7 @@ _mm_roundscale_round_sd (__m128d __A, __
(__mmask16)(A), R))
#define _mm512_roundscale_round_pd(A, B, R) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
- (__v8df)(__m512d)(A), (__mmask8)(-1), R))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
(int)(D), \
@@ -8423,7 +8481,7 @@ _mm512_alignr_epi32 (__m512i __A, __m512
return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
(__v16si) __B, __imm,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -8457,7 +8515,7 @@ _mm512_alignr_epi64 (__m512i __A, __m512
return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
(__v8di) __B, __imm,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -8486,7 +8544,7 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U,
#else
#define _mm512_alignr_epi32(X, Y, C) \
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
@@ -8496,12 +8554,13 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U,
#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#define _mm512_alignr_epi64(X, Y, C) \
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
- (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1))
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
+ (__mmask8)-1))
#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
@@ -8509,7 +8568,7 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U,
#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
- (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
@@ -8976,7 +9035,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
{
- __m512 v1_old = _mm512_setzero_ps ();
+ __m512 v1_old = _mm512_undefined_ps ();
__mmask16 mask = 0xFFFF;
return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
@@ -9000,7 +9059,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
{
- __m512d v1_old = _mm512_setzero_pd ();
+ __m512d v1_old = _mm512_undefined_pd ();
__mmask8 mask = 0xFF;
return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
@@ -9024,7 +9083,7 @@ extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
{
- __m256 v1_old = _mm256_setzero_ps ();
+ __m256 v1_old = _mm256_undefined_ps ();
__mmask8 mask = 0xFF;
return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
@@ -9048,7 +9107,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
{
- __m512d v1_old = _mm512_setzero_pd ();
+ __m512d v1_old = _mm512_undefined_pd ();
__mmask8 mask = 0xFF;
return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
@@ -9072,7 +9131,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
{
- __m512i v1_old = _mm512_setzero_si512 ();
+ __m512i v1_old = _mm512_undefined_si512 ();
__mmask16 mask = 0xFFFF;
return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
@@ -9096,7 +9155,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
{
- __m512i v1_old = _mm512_setzero_si512 ();
+ __m512i v1_old = _mm512_undefined_si512 ();
__mmask8 mask = 0xFF;
return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
@@ -9121,7 +9180,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
{
- __m256i v1_old = _mm256_setzero_si256 ();
+ __m256i v1_old = _mm256_undefined_si256 ();
__mmask8 mask = 0xFF;
return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
@@ -9145,7 +9204,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
{
- __m512i v1_old = _mm512_setzero_si512 ();
+ __m512i v1_old = _mm512_undefined_si512 ();
__mmask8 mask = 0xFF;
return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
@@ -9309,7 +9368,7 @@ _mm512_mask_i64scatter_epi64 (long long
}
#else
#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
- (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \
+ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
(float const *)ADDR, \
(__v16si)(__m512i)INDEX, \
(__mmask16)0xFFFF, (int)SCALE)
@@ -9321,7 +9380,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask16)MASK, (int)SCALE)
#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
- (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \
+ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
(double const *)ADDR, \
(__v8si)(__m256i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9333,7 +9392,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
- (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \
+ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
(float const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9345,7 +9404,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
- (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \
+ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
(double const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9357,7 +9416,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask8)MASK, (int)SCALE)
#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
- (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \
+ (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
(int const *)ADDR, \
(__v16si)(__m512i)INDEX, \
(__mmask16)0xFFFF, (int)SCALE)
@@ -9369,7 +9428,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask16)MASK, (int)SCALE)
#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
- (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \
+ (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
(long long const *)ADDR, \
(__v8si)(__m256i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9381,7 +9440,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
- (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \
+ (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
(int const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9393,7 +9452,7 @@ _mm512_mask_i64scatter_epi64 (long long
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
- (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \
+ (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
(long long const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9889,7 +9948,7 @@ _mm512_max_epi64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -9920,7 +9979,7 @@ _mm512_min_epi64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -9951,7 +10010,7 @@ _mm512_max_epu64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -9982,7 +10041,7 @@ _mm512_min_epu64 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -10013,7 +10072,7 @@ _mm512_max_epi32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10044,7 +10103,7 @@ _mm512_min_epi32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10075,7 +10134,7 @@ _mm512_max_epu32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10106,7 +10165,7 @@ _mm512_min_epu32 (__m512i __A, __m512i _
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10137,7 +10196,7 @@ _mm512_unpacklo_ps (__m512 __A, __m512 _
return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -10382,7 +10441,7 @@ _mm512_sqrt_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10414,7 +10473,7 @@ _mm512_sqrt_ps (__m512 __A)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10447,7 +10506,7 @@ _mm512_add_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10482,7 +10541,7 @@ _mm512_add_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10517,7 +10576,7 @@ _mm512_sub_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10552,7 +10611,7 @@ _mm512_sub_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10587,7 +10646,7 @@ _mm512_mul_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10622,7 +10681,7 @@ _mm512_mul_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10657,7 +10716,7 @@ _mm512_div_pd (__m512d __M, __m512d __V)
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10692,7 +10751,7 @@ _mm512_div_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10727,7 +10786,7 @@ _mm512_max_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10762,7 +10821,7 @@ _mm512_max_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10797,7 +10856,7 @@ _mm512_min_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10832,7 +10891,7 @@ _mm512_min_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10867,7 +10926,7 @@ _mm512_scalef_pd (__m512d __A, __m512d _
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10902,7 +10961,7 @@ _mm512_scalef_ps (__m512 __A, __m512 __B
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11482,7 +11541,7 @@ _mm512_cvttpd_epi32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11514,7 +11573,7 @@ _mm512_cvttpd_epu32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11546,7 +11605,7 @@ _mm512_cvtpd_epi32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11578,7 +11637,7 @@ _mm512_cvtpd_epu32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11610,7 +11669,7 @@ _mm512_cvttps_epi32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11642,7 +11701,7 @@ _mm512_cvttps_epu32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11674,7 +11733,7 @@ _mm512_cvtps_epi32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11706,7 +11765,7 @@ _mm512_cvtps_epu32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11764,7 +11823,7 @@ _mm512_cvtepi32_ps (__m512i __A)
{
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11796,7 +11855,7 @@ _mm512_cvtepu32_ps (__m512i __A)
{
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12141,7 +12200,7 @@ _mm512_cvtps_pd (__m256 __A)
{
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12173,7 +12232,7 @@ _mm512_cvtph_ps (__m256i __A)
{
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12205,7 +12264,7 @@ _mm512_cvtpd_ps (__m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf)
- _mm256_setzero_ps (),
+ _mm256_undefined_ps (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12238,7 +12297,7 @@ _mm512_getexp_ps (__m512 __A)
{
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12270,7 +12329,7 @@ _mm512_getexp_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12321,7 +12380,7 @@ _mm512_getmant_pd (__m512d __A, _MM_MANT
{
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
(__C << 2) | __B,
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12357,7 +12416,7 @@ _mm512_getmant_ps (__m512 __A, _MM_MANTI
{
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
(__C << 2) | __B,
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12412,7 +12471,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B,
#define _mm512_getmant_pd(X, B, C) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
(int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)_mm512_undefined_pd(), \
(__mmask8)-1,\
_MM_FROUND_CUR_DIRECTION))
@@ -12426,13 +12485,13 @@ _mm_getmant_ss (__m128 __A, __m128 __B,
#define _mm512_maskz_getmant_pd(U, X, B, C) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
(int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)_mm512_setzero_pd(), \
(__mmask8)(U),\
_MM_FROUND_CUR_DIRECTION))
#define _mm512_getmant_ps(X, B, C) \
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
(int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__v16sf)_mm512_undefined_ps(), \
(__mmask16)-1,\
_MM_FROUND_CUR_DIRECTION))
@@ -12446,7 +12505,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B,
#define _mm512_maskz_getmant_ps(U, X, B, C) \
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
(int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__v16sf)_mm512_setzero_ps(), \
(__mmask16)(U),\
_MM_FROUND_CUR_DIRECTION))
#define _mm_getmant_sd(X, Y, C, D) \
@@ -12471,7 +12530,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B,
#define _mm512_getexp_ps(A) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_getexp_ps(W, U, A) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
@@ -12483,7 +12542,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B,
#define _mm512_getexp_pd(A) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_getexp_pd(W, U, A) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
@@ -12500,7 +12559,9 @@ __attribute__ ((__gnu_inline__, __always
_mm512_roundscale_ps (__m512 __A, const int __imm)
{
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
- (__v16sf) __A, -1,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12532,7 +12593,9 @@ __attribute__ ((__gnu_inline__, __always
_mm512_roundscale_pd (__m512d __A, const int __imm)
{
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
- (__v8df) __A, -1,
+ (__v8df)
+ _mm512_undefined_pd (),
+ -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12580,7 +12643,7 @@ _mm_roundscale_sd (__m128d __A, __m128d
#else
#define _mm512_roundscale_ps(A, B) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
- (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_ps(A, B, C, D) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
(int)(D), \
@@ -12593,7 +12656,7 @@ _mm_roundscale_sd (__m128d __A, __m128d
(__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
#define _mm512_roundscale_pd(A, B) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
- (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_pd(A, B, C, D) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
(int)(D), \
diff -up gcc/config/i386/avxintrin.h.old gcc/config/i386/avxintrin.h
--- gcc/config/i386/avxintrin.h.old 2014-03-15 21:16:25.687572434 -0400
+++ gcc/config/i386/avxintrin.h 2014-03-16 00:14:29.787184657 -0400
@@ -1168,6 +1168,30 @@ _mm256_movemask_ps (__m256 __A)
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_pd (void)
+{
+ __m256d __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ps (void)
+{
+ __m256 __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_si256 (void)
+{
+ __m256i __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_pd (void)
{
return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
diff -up gcc/config/i386/emmintrin.h.old gcc/config/i386/emmintrin.h
--- gcc/config/i386/emmintrin.h.old 2014-03-15 22:12:57.952326921 -0400
+++ gcc/config/i386/emmintrin.h 2014-03-15 22:18:45.936617515 -0400
@@ -86,6 +86,15 @@ _mm_setr_pd (double __W, double __X)
return __extension__ (__m128d){ __W, __X };
}
+/* Create an uninitialized vector. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_pd (void)
+{
+ __m128d __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
/* Create a vector of zeros. */
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_pd (void)
@@ -728,6 +737,15 @@ _mm_move_epi64 (__m128i __A)
return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
}
+/* Create an undefined vector. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_si128 (void)
+{
+ __m128i __Y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
/* Create a vector of zeros. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si128 (void)
diff -up gcc/config/i386/xmmintrin.h.old gcc/config/i386/xmmintrin.h
--- gcc/config/i386/xmmintrin.h.old 2014-03-15 22:15:36.545915083 -0400
+++ gcc/config/i386/xmmintrin.h 2014-03-15 22:16:58.536219175 -0400
@@ -102,6 +102,15 @@ typedef float __v4sf __attribute__ ((__v
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
+/* Create an undefined vector. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+ __m128 __y;
+ __asm__ ("" : "=x" (__Y));
+ return __Y;
+}
+
/* Create a vector of zeros. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ps (void)
More information about the Gcc-patches
mailing list