]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/avx512fintrin.h
Optimize memory broadcast for constant vector under AVX512.
[gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
8d9254fc 1/* Copyright (C) 2013-2020 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
dcb2c527
JJ
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
756c5857
AI
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
4e6a811f
JJ
100extern __inline __m512i
101__attribute__((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
103 short __q27, short __q26, short __q25, short __q24,
104 short __q23, short __q22, short __q21, short __q20,
105 short __q19, short __q18, short __q17, short __q16,
106 short __q15, short __q14, short __q13, short __q12,
107 short __q11, short __q10, short __q09, short __q08,
108 short __q07, short __q06, short __q05, short __q04,
109 short __q03, short __q02, short __q01, short __q00)
110{
111 return __extension__ (__m512i)(__v32hi){
112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
116 };
117}
118
119extern __inline __m512i
120__attribute__((__gnu_inline__, __always_inline__, __artificial__))
121_mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
122 char __q59, char __q58, char __q57, char __q56,
123 char __q55, char __q54, char __q53, char __q52,
124 char __q51, char __q50, char __q49, char __q48,
125 char __q47, char __q46, char __q45, char __q44,
126 char __q43, char __q42, char __q41, char __q40,
127 char __q39, char __q38, char __q37, char __q36,
128 char __q35, char __q34, char __q33, char __q32,
129 char __q31, char __q30, char __q29, char __q28,
130 char __q27, char __q26, char __q25, char __q24,
131 char __q23, char __q22, char __q21, char __q20,
132 char __q19, char __q18, char __q17, char __q16,
133 char __q15, char __q14, char __q13, char __q12,
134 char __q11, char __q10, char __q09, char __q08,
135 char __q07, char __q06, char __q05, char __q04,
136 char __q03, char __q02, char __q01, char __q00)
137{
138 return __extension__ (__m512i)(__v64qi){
139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
147 };
148}
149
756c5857
AI
150extern __inline __m512d
151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152_mm512_set_pd (double __A, double __B, double __C, double __D,
153 double __E, double __F, double __G, double __H)
154{
155 return __extension__ (__m512d)
156 { __H, __G, __F, __E, __D, __C, __B, __A };
157}
158
159extern __inline __m512
160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161_mm512_set_ps (float __A, float __B, float __C, float __D,
162 float __E, float __F, float __G, float __H,
163 float __I, float __J, float __K, float __L,
164 float __M, float __N, float __O, float __P)
165{
166 return __extension__ (__m512)
167 { __P, __O, __N, __M, __L, __K, __J, __I,
168 __H, __G, __F, __E, __D, __C, __B, __A };
169}
170
171#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
173
174#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
175 e8,e9,e10,e11,e12,e13,e14,e15) \
176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
177
178#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
180
181#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
183
0b192937
UD
184extern __inline __m512
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm512_undefined_ps (void)
187{
188 __m512 __Y = __Y;
189 return __Y;
190}
191
dcb2c527
JJ
192#define _mm512_undefined _mm512_undefined_ps
193
0b192937
UD
194extern __inline __m512d
195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
196_mm512_undefined_pd (void)
197{
198 __m512d __Y = __Y;
199 return __Y;
200}
201
202extern __inline __m512i
203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 204_mm512_undefined_epi32 (void)
0b192937
UD
205{
206 __m512i __Y = __Y;
207 return __Y;
208}
209
4271e5cb
UB
210#define _mm512_undefined_si512 _mm512_undefined_epi32
211
7d9088c2
UD
212extern __inline __m512i
213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214_mm512_set1_epi8 (char __A)
215{
216 return __extension__ (__m512i)(__v64qi)
217 { __A, __A, __A, __A, __A, __A, __A, __A,
218 __A, __A, __A, __A, __A, __A, __A, __A,
219 __A, __A, __A, __A, __A, __A, __A, __A,
220 __A, __A, __A, __A, __A, __A, __A, __A,
221 __A, __A, __A, __A, __A, __A, __A, __A,
222 __A, __A, __A, __A, __A, __A, __A, __A,
223 __A, __A, __A, __A, __A, __A, __A, __A,
224 __A, __A, __A, __A, __A, __A, __A, __A };
225}
226
227extern __inline __m512i
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm512_set1_epi16 (short __A)
230{
231 return __extension__ (__m512i)(__v32hi)
232 { __A, __A, __A, __A, __A, __A, __A, __A,
233 __A, __A, __A, __A, __A, __A, __A, __A,
234 __A, __A, __A, __A, __A, __A, __A, __A,
235 __A, __A, __A, __A, __A, __A, __A, __A };
236}
237
2b2384e8
UD
238extern __inline __m512d
239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240_mm512_set1_pd (double __A)
241{
43373412 242 return __extension__ (__m512d)(__v8df)
243 { __A, __A, __A, __A, __A, __A, __A, __A };
2b2384e8
UD
244}
245
246extern __inline __m512
247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
248_mm512_set1_ps (float __A)
249{
43373412 250 return __extension__ (__m512)(__v16sf)
251 { __A, __A, __A, __A, __A, __A, __A, __A,
252 __A, __A, __A, __A, __A, __A, __A, __A };
2b2384e8
UD
253}
254
7d9088c2
UD
255/* Create the vector [A B C D A B C D A B C D A B C D]. */
256extern __inline __m512i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
259{
260 return __extension__ (__m512i)(__v16si)
261 { __D, __C, __B, __A, __D, __C, __B, __A,
262 __D, __C, __B, __A, __D, __C, __B, __A };
263}
264
265extern __inline __m512i
266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
267_mm512_set4_epi64 (long long __A, long long __B, long long __C,
268 long long __D)
269{
270 return __extension__ (__m512i) (__v8di)
271 { __D, __C, __B, __A, __D, __C, __B, __A };
272}
273
274extern __inline __m512d
275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276_mm512_set4_pd (double __A, double __B, double __C, double __D)
277{
278 return __extension__ (__m512d)
279 { __D, __C, __B, __A, __D, __C, __B, __A };
280}
281
282extern __inline __m512
283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
284_mm512_set4_ps (float __A, float __B, float __C, float __D)
285{
286 return __extension__ (__m512)
287 { __D, __C, __B, __A, __D, __C, __B, __A,
288 __D, __C, __B, __A, __D, __C, __B, __A };
289}
290
291#define _mm512_setr4_epi64(e0,e1,e2,e3) \
292 _mm512_set4_epi64(e3,e2,e1,e0)
293
294#define _mm512_setr4_epi32(e0,e1,e2,e3) \
295 _mm512_set4_epi32(e3,e2,e1,e0)
296
297#define _mm512_setr4_pd(e0,e1,e2,e3) \
298 _mm512_set4_pd(e3,e2,e1,e0)
299
300#define _mm512_setr4_ps(e0,e1,e2,e3) \
301 _mm512_set4_ps(e3,e2,e1,e0)
302
756c5857
AI
303extern __inline __m512
304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
305_mm512_setzero_ps (void)
306{
307 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
308 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
309}
310
4e6a811f
JJ
311extern __inline __m512
312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313_mm512_setzero (void)
314{
315 return _mm512_setzero_ps ();
316}
317
756c5857
AI
318extern __inline __m512d
319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320_mm512_setzero_pd (void)
321{
322 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
323}
324
7d9088c2
UD
325extern __inline __m512i
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm512_setzero_epi32 (void)
328{
329 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
330}
331
756c5857
AI
332extern __inline __m512i
333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334_mm512_setzero_si512 (void)
335{
336 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
337}
338
339extern __inline __m512d
340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
342{
343 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
344 (__v8df) __W,
345 (__mmask8) __U);
346}
347
348extern __inline __m512d
349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
350_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
351{
352 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
353 (__v8df)
354 _mm512_setzero_pd (),
355 (__mmask8) __U);
356}
357
358extern __inline __m512
359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
361{
362 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
363 (__v16sf) __W,
364 (__mmask16) __U);
365}
366
367extern __inline __m512
368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
370{
371 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
372 (__v16sf)
373 _mm512_setzero_ps (),
374 (__mmask16) __U);
375}
376
377extern __inline __m512d
378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
379_mm512_load_pd (void const *__P)
380{
381 return *(__m512d *) __P;
382}
383
384extern __inline __m512d
385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
387{
388 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
389 (__v8df) __W,
390 (__mmask8) __U);
391}
392
393extern __inline __m512d
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
396{
397 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
398 (__v8df)
399 _mm512_setzero_pd (),
400 (__mmask8) __U);
401}
402
403extern __inline void
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm512_store_pd (void *__P, __m512d __A)
406{
407 *(__m512d *) __P = __A;
408}
409
410extern __inline void
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
413{
414 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
415 (__mmask8) __U);
416}
417
418extern __inline __m512
419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
420_mm512_load_ps (void const *__P)
421{
422 return *(__m512 *) __P;
423}
424
425extern __inline __m512
426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
428{
429 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
430 (__v16sf) __W,
431 (__mmask16) __U);
432}
433
434extern __inline __m512
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
437{
438 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
439 (__v16sf)
440 _mm512_setzero_ps (),
441 (__mmask16) __U);
442}
443
444extern __inline void
445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446_mm512_store_ps (void *__P, __m512 __A)
447{
448 *(__m512 *) __P = __A;
449}
450
451extern __inline void
452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
454{
455 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
456 (__mmask16) __U);
457}
458
459extern __inline __m512i
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
462{
463 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
464 (__v8di) __W,
465 (__mmask8) __U);
466}
467
468extern __inline __m512i
469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
471{
472 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
473 (__v8di)
474 _mm512_setzero_si512 (),
475 (__mmask8) __U);
476}
477
478extern __inline __m512i
479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480_mm512_load_epi64 (void const *__P)
481{
482 return *(__m512i *) __P;
483}
484
485extern __inline __m512i
486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
487_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
488{
489 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
490 (__v8di) __W,
491 (__mmask8) __U);
492}
493
494extern __inline __m512i
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
497{
498 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
499 (__v8di)
500 _mm512_setzero_si512 (),
501 (__mmask8) __U);
502}
503
504extern __inline void
505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
506_mm512_store_epi64 (void *__P, __m512i __A)
507{
508 *(__m512i *) __P = __A;
509}
510
511extern __inline void
512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
513_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
514{
515 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
516 (__mmask8) __U);
517}
518
519extern __inline __m512i
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
522{
523 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
524 (__v16si) __W,
525 (__mmask16) __U);
526}
527
528extern __inline __m512i
529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
530_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
531{
532 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
533 (__v16si)
534 _mm512_setzero_si512 (),
535 (__mmask16) __U);
536}
537
538extern __inline __m512i
539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540_mm512_load_si512 (void const *__P)
541{
542 return *(__m512i *) __P;
543}
544
545extern __inline __m512i
546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547_mm512_load_epi32 (void const *__P)
548{
549 return *(__m512i *) __P;
550}
551
552extern __inline __m512i
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
555{
556 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
557 (__v16si) __W,
558 (__mmask16) __U);
559}
560
561extern __inline __m512i
562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
563_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
564{
565 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
566 (__v16si)
567 _mm512_setzero_si512 (),
568 (__mmask16) __U);
569}
570
571extern __inline void
572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573_mm512_store_si512 (void *__P, __m512i __A)
574{
575 *(__m512i *) __P = __A;
576}
577
578extern __inline void
579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580_mm512_store_epi32 (void *__P, __m512i __A)
581{
582 *(__m512i *) __P = __A;
583}
584
585extern __inline void
586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
587_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
588{
589 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
590 (__mmask16) __U);
591}
592
593extern __inline __m512i
594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
595_mm512_mullo_epi32 (__m512i __A, __m512i __B)
596{
2069d6fc 597 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
598}
599
600extern __inline __m512i
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
603{
604 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
605 (__v16si) __B,
606 (__v16si)
607 _mm512_setzero_si512 (),
608 __M);
609}
610
611extern __inline __m512i
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
614{
615 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
616 (__v16si) __B,
617 (__v16si) __W, __M);
618}
619
503ac4e0
JJ
620extern __inline __m512i
621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622_mm512_mullox_epi64 (__m512i __A, __m512i __B)
623{
624 return (__m512i) ((__v8du) __A * (__v8du) __B);
625}
626
627extern __inline __m512i
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
630{
631 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
632}
633
756c5857
AI
634extern __inline __m512i
635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
637{
638 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
639 (__v16si) __Y,
640 (__v16si)
4271e5cb 641 _mm512_undefined_epi32 (),
756c5857
AI
642 (__mmask16) -1);
643}
644
645extern __inline __m512i
646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
647_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
648{
649 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
650 (__v16si) __Y,
651 (__v16si) __W,
652 (__mmask16) __U);
653}
654
655extern __inline __m512i
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
658{
659 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
660 (__v16si) __Y,
661 (__v16si)
662 _mm512_setzero_si512 (),
663 (__mmask16) __U);
664}
665
666extern __inline __m512i
667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668_mm512_srav_epi32 (__m512i __X, __m512i __Y)
669{
670 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
671 (__v16si) __Y,
672 (__v16si)
4271e5cb 673 _mm512_undefined_epi32 (),
756c5857
AI
674 (__mmask16) -1);
675}
676
677extern __inline __m512i
678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
679_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
680{
681 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
682 (__v16si) __Y,
683 (__v16si) __W,
684 (__mmask16) __U);
685}
686
687extern __inline __m512i
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
690{
691 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
692 (__v16si) __Y,
693 (__v16si)
694 _mm512_setzero_si512 (),
695 (__mmask16) __U);
696}
697
698extern __inline __m512i
699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
700_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
701{
702 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
703 (__v16si) __Y,
704 (__v16si)
4271e5cb 705 _mm512_undefined_epi32 (),
756c5857
AI
706 (__mmask16) -1);
707}
708
709extern __inline __m512i
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
712{
713 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
714 (__v16si) __Y,
715 (__v16si) __W,
716 (__mmask16) __U);
717}
718
719extern __inline __m512i
720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
722{
723 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
724 (__v16si) __Y,
725 (__v16si)
726 _mm512_setzero_si512 (),
727 (__mmask16) __U);
728}
729
730extern __inline __m512i
731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
732_mm512_add_epi64 (__m512i __A, __m512i __B)
733{
2069d6fc 734 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
735}
736
737extern __inline __m512i
738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
740{
741 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
742 (__v8di) __B,
743 (__v8di) __W,
744 (__mmask8) __U);
745}
746
747extern __inline __m512i
748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
749_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
750{
751 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
752 (__v8di) __B,
753 (__v8di)
754 _mm512_setzero_si512 (),
755 (__mmask8) __U);
756}
757
758extern __inline __m512i
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm512_sub_epi64 (__m512i __A, __m512i __B)
761{
2069d6fc 762 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
763}
764
765extern __inline __m512i
766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
768{
769 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
770 (__v8di) __B,
771 (__v8di) __W,
772 (__mmask8) __U);
773}
774
775extern __inline __m512i
776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
778{
779 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
780 (__v8di) __B,
781 (__v8di)
782 _mm512_setzero_si512 (),
783 (__mmask8) __U);
784}
785
786extern __inline __m512i
787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
788_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
789{
790 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
791 (__v8di) __Y,
792 (__v8di)
0b192937 793 _mm512_undefined_pd (),
756c5857
AI
794 (__mmask8) -1);
795}
796
797extern __inline __m512i
798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
799_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
800{
801 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
802 (__v8di) __Y,
803 (__v8di) __W,
804 (__mmask8) __U);
805}
806
807extern __inline __m512i
808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
810{
811 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
812 (__v8di) __Y,
813 (__v8di)
814 _mm512_setzero_si512 (),
815 (__mmask8) __U);
816}
817
818extern __inline __m512i
819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
820_mm512_srav_epi64 (__m512i __X, __m512i __Y)
821{
822 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
823 (__v8di) __Y,
824 (__v8di)
4271e5cb 825 _mm512_undefined_epi32 (),
756c5857
AI
826 (__mmask8) -1);
827}
828
829extern __inline __m512i
830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
831_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
832{
833 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
834 (__v8di) __Y,
835 (__v8di) __W,
836 (__mmask8) __U);
837}
838
839extern __inline __m512i
840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
842{
843 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
844 (__v8di) __Y,
845 (__v8di)
846 _mm512_setzero_si512 (),
847 (__mmask8) __U);
848}
849
850extern __inline __m512i
851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
852_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
853{
854 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
855 (__v8di) __Y,
856 (__v8di)
4271e5cb 857 _mm512_undefined_epi32 (),
756c5857
AI
858 (__mmask8) -1);
859}
860
861extern __inline __m512i
862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
864{
865 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
866 (__v8di) __Y,
867 (__v8di) __W,
868 (__mmask8) __U);
869}
870
871extern __inline __m512i
872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
874{
875 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
876 (__v8di) __Y,
877 (__v8di)
878 _mm512_setzero_si512 (),
879 (__mmask8) __U);
880}
881
882extern __inline __m512i
883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884_mm512_add_epi32 (__m512i __A, __m512i __B)
885{
2069d6fc 886 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
887}
888
889extern __inline __m512i
890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
892{
893 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
894 (__v16si) __B,
895 (__v16si) __W,
896 (__mmask16) __U);
897}
898
899extern __inline __m512i
900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
902{
903 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
904 (__v16si) __B,
905 (__v16si)
906 _mm512_setzero_si512 (),
907 (__mmask16) __U);
908}
909
910extern __inline __m512i
911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
912_mm512_mul_epi32 (__m512i __X, __m512i __Y)
913{
914 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
915 (__v16si) __Y,
916 (__v8di)
4271e5cb 917 _mm512_undefined_epi32 (),
756c5857
AI
918 (__mmask8) -1);
919}
920
921extern __inline __m512i
922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
923_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
924{
925 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
926 (__v16si) __Y,
927 (__v8di) __W, __M);
928}
929
930extern __inline __m512i
931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
932_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
933{
934 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
935 (__v16si) __Y,
936 (__v8di)
937 _mm512_setzero_si512 (),
938 __M);
939}
940
941extern __inline __m512i
942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
943_mm512_sub_epi32 (__m512i __A, __m512i __B)
944{
2069d6fc 945 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
946}
947
948extern __inline __m512i
949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
951{
952 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
953 (__v16si) __B,
954 (__v16si) __W,
955 (__mmask16) __U);
956}
957
958extern __inline __m512i
959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
961{
962 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
963 (__v16si) __B,
964 (__v16si)
965 _mm512_setzero_si512 (),
966 (__mmask16) __U);
967}
968
969extern __inline __m512i
970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971_mm512_mul_epu32 (__m512i __X, __m512i __Y)
972{
973 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
974 (__v16si) __Y,
975 (__v8di)
4271e5cb 976 _mm512_undefined_epi32 (),
756c5857
AI
977 (__mmask8) -1);
978}
979
980extern __inline __m512i
981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
982_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
983{
984 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
985 (__v16si) __Y,
986 (__v8di) __W, __M);
987}
988
989extern __inline __m512i
990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
991_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
992{
993 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
994 (__v16si) __Y,
995 (__v8di)
996 _mm512_setzero_si512 (),
997 __M);
998}
999
1000#ifdef __OPTIMIZE__
1001extern __inline __m512i
1002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003_mm512_slli_epi64 (__m512i __A, unsigned int __B)
1004{
1005 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1006 (__v8di)
4271e5cb 1007 _mm512_undefined_epi32 (),
756c5857
AI
1008 (__mmask8) -1);
1009}
1010
1011extern __inline __m512i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1014 unsigned int __B)
1015{
1016 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1017 (__v8di) __W,
1018 (__mmask8) __U);
1019}
1020
1021extern __inline __m512i
1022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1023_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1024{
1025 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1026 (__v8di)
1027 _mm512_setzero_si512 (),
1028 (__mmask8) __U);
1029}
1030#else
1031#define _mm512_slli_epi64(X, C) \
1032 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1033 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1034 (__mmask8)-1))
1035
1036#define _mm512_mask_slli_epi64(W, U, X, C) \
1037 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1038 (__v8di)(__m512i)(W),\
1039 (__mmask8)(U)))
1040
1041#define _mm512_maskz_slli_epi64(U, X, C) \
1042 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1043 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1044 (__mmask8)(U)))
1045#endif
1046
1047extern __inline __m512i
1048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1049_mm512_sll_epi64 (__m512i __A, __m128i __B)
1050{
1051 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1052 (__v2di) __B,
1053 (__v8di)
4271e5cb 1054 _mm512_undefined_epi32 (),
756c5857
AI
1055 (__mmask8) -1);
1056}
1057
1058extern __inline __m512i
1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1061{
1062 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1063 (__v2di) __B,
1064 (__v8di) __W,
1065 (__mmask8) __U);
1066}
1067
1068extern __inline __m512i
1069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1070_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1071{
1072 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1073 (__v2di) __B,
1074 (__v8di)
1075 _mm512_setzero_si512 (),
1076 (__mmask8) __U);
1077}
1078
1079#ifdef __OPTIMIZE__
1080extern __inline __m512i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1083{
1084 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1085 (__v8di)
4271e5cb 1086 _mm512_undefined_epi32 (),
756c5857
AI
1087 (__mmask8) -1);
1088}
1089
1090extern __inline __m512i
1091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1093 __m512i __A, unsigned int __B)
1094{
1095 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1096 (__v8di) __W,
1097 (__mmask8) __U);
1098}
1099
1100extern __inline __m512i
1101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1102_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1103{
1104 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1105 (__v8di)
1106 _mm512_setzero_si512 (),
1107 (__mmask8) __U);
1108}
1109#else
1110#define _mm512_srli_epi64(X, C) \
1111 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1112 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1113 (__mmask8)-1))
1114
1115#define _mm512_mask_srli_epi64(W, U, X, C) \
1116 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1117 (__v8di)(__m512i)(W),\
1118 (__mmask8)(U)))
1119
1120#define _mm512_maskz_srli_epi64(U, X, C) \
1121 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1122 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1123 (__mmask8)(U)))
1124#endif
1125
1126extern __inline __m512i
1127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128_mm512_srl_epi64 (__m512i __A, __m128i __B)
1129{
1130 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1131 (__v2di) __B,
1132 (__v8di)
4271e5cb 1133 _mm512_undefined_epi32 (),
756c5857
AI
1134 (__mmask8) -1);
1135}
1136
1137extern __inline __m512i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1140{
1141 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1142 (__v2di) __B,
1143 (__v8di) __W,
1144 (__mmask8) __U);
1145}
1146
1147extern __inline __m512i
1148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1150{
1151 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1152 (__v2di) __B,
1153 (__v8di)
1154 _mm512_setzero_si512 (),
1155 (__mmask8) __U);
1156}
1157
1158#ifdef __OPTIMIZE__
1159extern __inline __m512i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1162{
1163 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1164 (__v8di)
4271e5cb 1165 _mm512_undefined_epi32 (),
756c5857
AI
1166 (__mmask8) -1);
1167}
1168
1169extern __inline __m512i
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1172 unsigned int __B)
1173{
1174 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1175 (__v8di) __W,
1176 (__mmask8) __U);
1177}
1178
1179extern __inline __m512i
1180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1181_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1182{
1183 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1184 (__v8di)
1185 _mm512_setzero_si512 (),
1186 (__mmask8) __U);
1187}
1188#else
1189#define _mm512_srai_epi64(X, C) \
1190 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1191 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1192 (__mmask8)-1))
1193
1194#define _mm512_mask_srai_epi64(W, U, X, C) \
1195 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1196 (__v8di)(__m512i)(W),\
1197 (__mmask8)(U)))
1198
1199#define _mm512_maskz_srai_epi64(U, X, C) \
1200 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1201 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1202 (__mmask8)(U)))
1203#endif
1204
1205extern __inline __m512i
1206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207_mm512_sra_epi64 (__m512i __A, __m128i __B)
1208{
1209 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1210 (__v2di) __B,
1211 (__v8di)
4271e5cb 1212 _mm512_undefined_epi32 (),
756c5857
AI
1213 (__mmask8) -1);
1214}
1215
1216extern __inline __m512i
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1219{
1220 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1221 (__v2di) __B,
1222 (__v8di) __W,
1223 (__mmask8) __U);
1224}
1225
1226extern __inline __m512i
1227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1229{
1230 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1231 (__v2di) __B,
1232 (__v8di)
1233 _mm512_setzero_si512 (),
1234 (__mmask8) __U);
1235}
1236
1237#ifdef __OPTIMIZE__
1238extern __inline __m512i
1239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1241{
1242 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1243 (__v16si)
4271e5cb 1244 _mm512_undefined_epi32 (),
756c5857
AI
1245 (__mmask16) -1);
1246}
1247
1248extern __inline __m512i
1249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1251 unsigned int __B)
1252{
1253 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1254 (__v16si) __W,
1255 (__mmask16) __U);
1256}
1257
1258extern __inline __m512i
1259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1260_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1261{
1262 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1263 (__v16si)
1264 _mm512_setzero_si512 (),
1265 (__mmask16) __U);
1266}
1267#else
1268#define _mm512_slli_epi32(X, C) \
1269 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1270 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1271 (__mmask16)-1))
1272
1273#define _mm512_mask_slli_epi32(W, U, X, C) \
1274 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1275 (__v16si)(__m512i)(W),\
1276 (__mmask16)(U)))
1277
1278#define _mm512_maskz_slli_epi32(U, X, C) \
1279 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1280 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1281 (__mmask16)(U)))
1282#endif
1283
1284extern __inline __m512i
1285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1286_mm512_sll_epi32 (__m512i __A, __m128i __B)
1287{
1288 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1289 (__v4si) __B,
1290 (__v16si)
4271e5cb 1291 _mm512_undefined_epi32 (),
756c5857
AI
1292 (__mmask16) -1);
1293}
1294
1295extern __inline __m512i
1296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1298{
1299 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1300 (__v4si) __B,
1301 (__v16si) __W,
1302 (__mmask16) __U);
1303}
1304
1305extern __inline __m512i
1306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1308{
1309 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1310 (__v4si) __B,
1311 (__v16si)
1312 _mm512_setzero_si512 (),
1313 (__mmask16) __U);
1314}
1315
1316#ifdef __OPTIMIZE__
1317extern __inline __m512i
1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1320{
1321 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1322 (__v16si)
4271e5cb 1323 _mm512_undefined_epi32 (),
756c5857
AI
1324 (__mmask16) -1);
1325}
1326
1327extern __inline __m512i
1328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1330 __m512i __A, unsigned int __B)
1331{
1332 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1333 (__v16si) __W,
1334 (__mmask16) __U);
1335}
1336
1337extern __inline __m512i
1338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1339_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1340{
1341 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1342 (__v16si)
1343 _mm512_setzero_si512 (),
1344 (__mmask16) __U);
1345}
1346#else
1347#define _mm512_srli_epi32(X, C) \
1348 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1349 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1350 (__mmask16)-1))
1351
1352#define _mm512_mask_srli_epi32(W, U, X, C) \
1353 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1354 (__v16si)(__m512i)(W),\
1355 (__mmask16)(U)))
1356
1357#define _mm512_maskz_srli_epi32(U, X, C) \
1358 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1359 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1360 (__mmask16)(U)))
1361#endif
1362
1363extern __inline __m512i
1364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1365_mm512_srl_epi32 (__m512i __A, __m128i __B)
1366{
1367 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1368 (__v4si) __B,
1369 (__v16si)
4271e5cb 1370 _mm512_undefined_epi32 (),
756c5857
AI
1371 (__mmask16) -1);
1372}
1373
1374extern __inline __m512i
1375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1377{
1378 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1379 (__v4si) __B,
1380 (__v16si) __W,
1381 (__mmask16) __U);
1382}
1383
1384extern __inline __m512i
1385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1386_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1387{
1388 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1389 (__v4si) __B,
1390 (__v16si)
1391 _mm512_setzero_si512 (),
1392 (__mmask16) __U);
1393}
1394
1395#ifdef __OPTIMIZE__
1396extern __inline __m512i
1397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1399{
1400 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1401 (__v16si)
4271e5cb 1402 _mm512_undefined_epi32 (),
756c5857
AI
1403 (__mmask16) -1);
1404}
1405
1406extern __inline __m512i
1407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1409 unsigned int __B)
1410{
1411 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1412 (__v16si) __W,
1413 (__mmask16) __U);
1414}
1415
1416extern __inline __m512i
1417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1419{
1420 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1421 (__v16si)
1422 _mm512_setzero_si512 (),
1423 (__mmask16) __U);
1424}
1425#else
1426#define _mm512_srai_epi32(X, C) \
1427 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1428 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1429 (__mmask16)-1))
1430
1431#define _mm512_mask_srai_epi32(W, U, X, C) \
1432 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1433 (__v16si)(__m512i)(W),\
1434 (__mmask16)(U)))
1435
1436#define _mm512_maskz_srai_epi32(U, X, C) \
1437 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1438 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1439 (__mmask16)(U)))
1440#endif
1441
1442extern __inline __m512i
1443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1444_mm512_sra_epi32 (__m512i __A, __m128i __B)
1445{
1446 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1447 (__v4si) __B,
1448 (__v16si)
4271e5cb 1449 _mm512_undefined_epi32 (),
756c5857
AI
1450 (__mmask16) -1);
1451}
1452
1453extern __inline __m512i
1454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1456{
1457 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1458 (__v4si) __B,
1459 (__v16si) __W,
1460 (__mmask16) __U);
1461}
1462
1463extern __inline __m512i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1466{
1467 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1468 (__v4si) __B,
1469 (__v16si)
1470 _mm512_setzero_si512 (),
1471 (__mmask16) __U);
1472}
1473
075691af
AI
1474#ifdef __OPTIMIZE__
1475extern __inline __m128d
1476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1477_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1478{
1479 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1480 (__v2df) __B,
1481 __R);
1482}
1483
1853f5c7
SP
1484extern __inline __m128d
1485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1486_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1487 __m128d __B, const int __R)
1488{
1489 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1490 (__v2df) __B,
1491 (__v2df) __W,
1492 (__mmask8) __U, __R);
1493}
1494
1495extern __inline __m128d
1496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1497_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1498 const int __R)
1499{
1500 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1501 (__v2df) __B,
1502 (__v2df)
1503 _mm_setzero_pd (),
1504 (__mmask8) __U, __R);
1505}
1506
075691af
AI
1507extern __inline __m128
1508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1510{
1511 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1512 (__v4sf) __B,
1513 __R);
1514}
1515
1853f5c7
SP
1516extern __inline __m128
1517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1519 __m128 __B, const int __R)
1520{
1521 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1522 (__v4sf) __B,
1523 (__v4sf) __W,
1524 (__mmask8) __U, __R);
1525}
1526
1527extern __inline __m128
1528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1529_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1530 const int __R)
1531{
1532 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1533 (__v4sf) __B,
1534 (__v4sf)
1535 _mm_setzero_ps (),
1536 (__mmask8) __U, __R);
1537}
1538
075691af
AI
1539extern __inline __m128d
1540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1541_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1542{
1543 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1544 (__v2df) __B,
1545 __R);
1546}
1547
1853f5c7
SP
1548extern __inline __m128d
1549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1551 __m128d __B, const int __R)
1552{
1553 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1554 (__v2df) __B,
1555 (__v2df) __W,
1556 (__mmask8) __U, __R);
1557}
1558
1559extern __inline __m128d
1560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1562 const int __R)
1563{
1564 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1565 (__v2df) __B,
1566 (__v2df)
1567 _mm_setzero_pd (),
1568 (__mmask8) __U, __R);
1569}
1570
075691af
AI
1571extern __inline __m128
1572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1573_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1574{
1575 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1576 (__v4sf) __B,
1577 __R);
1578}
1579
1853f5c7
SP
1580extern __inline __m128
1581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1583 __m128 __B, const int __R)
1584{
1585 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1586 (__v4sf) __B,
1587 (__v4sf) __W,
1588 (__mmask8) __U, __R);
1589}
1590
1591extern __inline __m128
1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1594 const int __R)
1595{
1596 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1597 (__v4sf) __B,
1598 (__v4sf)
1599 _mm_setzero_ps (),
1600 (__mmask8) __U, __R);
1601}
1602
075691af
AI
1603#else
1604#define _mm_add_round_sd(A, B, C) \
1605 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1606
1853f5c7
SP
1607#define _mm_mask_add_round_sd(W, U, A, B, C) \
1608 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1609
1610#define _mm_maskz_add_round_sd(U, A, B, C) \
1611 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1612
075691af
AI
1613#define _mm_add_round_ss(A, B, C) \
1614 (__m128)__builtin_ia32_addss_round(A, B, C)
1615
1853f5c7
SP
1616#define _mm_mask_add_round_ss(W, U, A, B, C) \
1617 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1618
1619#define _mm_maskz_add_round_ss(U, A, B, C) \
1620 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1621
075691af
AI
1622#define _mm_sub_round_sd(A, B, C) \
1623 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1624
1853f5c7
SP
1625#define _mm_mask_sub_round_sd(W, U, A, B, C) \
1626 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1627
1628#define _mm_maskz_sub_round_sd(U, A, B, C) \
1629 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1630
075691af
AI
1631#define _mm_sub_round_ss(A, B, C) \
1632 (__m128)__builtin_ia32_subss_round(A, B, C)
1853f5c7
SP
1633
1634#define _mm_mask_sub_round_ss(W, U, A, B, C) \
1635 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1636
1637#define _mm_maskz_sub_round_ss(U, A, B, C) \
1638 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1639
075691af
AI
1640#endif
1641
756c5857
AI
1642#ifdef __OPTIMIZE__
1643extern __inline __m512i
1644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1645_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1646 const int __imm)
756c5857
AI
1647{
1648 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1649 (__v8di) __B,
b5fd0b71 1650 (__v8di) __C, __imm,
756c5857
AI
1651 (__mmask8) -1);
1652}
1653
1654extern __inline __m512i
1655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1656_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1657 __m512i __C, const int __imm)
756c5857
AI
1658{
1659 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1660 (__v8di) __B,
b5fd0b71 1661 (__v8di) __C, __imm,
756c5857
AI
1662 (__mmask8) __U);
1663}
1664
1665extern __inline __m512i
1666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1668 __m512i __C, const int __imm)
756c5857
AI
1669{
1670 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1671 (__v8di) __B,
1672 (__v8di) __C,
b5fd0b71 1673 __imm, (__mmask8) __U);
756c5857
AI
1674}
1675
1676extern __inline __m512i
1677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1678_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1679 const int __imm)
756c5857
AI
1680{
1681 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1682 (__v16si) __B,
1683 (__v16si) __C,
b5fd0b71 1684 __imm, (__mmask16) -1);
756c5857
AI
1685}
1686
1687extern __inline __m512i
1688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1689_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1690 __m512i __C, const int __imm)
756c5857
AI
1691{
1692 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1693 (__v16si) __B,
1694 (__v16si) __C,
b5fd0b71 1695 __imm, (__mmask16) __U);
756c5857
AI
1696}
1697
1698extern __inline __m512i
1699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1700_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1701 __m512i __C, const int __imm)
756c5857
AI
1702{
1703 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1704 (__v16si) __B,
1705 (__v16si) __C,
b5fd0b71 1706 __imm, (__mmask16) __U);
756c5857
AI
1707}
1708#else
1709#define _mm512_ternarylogic_epi64(A, B, C, I) \
1710 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1711 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1712#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1713 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1714 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1715#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1716 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1717 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1718#define _mm512_ternarylogic_epi32(A, B, C, I) \
1719 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1720 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1721 (__mmask16)-1))
1722#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1723 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1724 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1725 (__mmask16)(U)))
1726#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1727 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1728 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1729 (__mmask16)(U)))
1730#endif
1731
1732extern __inline __m512d
1733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734_mm512_rcp14_pd (__m512d __A)
1735{
1736 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1737 (__v8df)
0b192937 1738 _mm512_undefined_pd (),
756c5857
AI
1739 (__mmask8) -1);
1740}
1741
1742extern __inline __m512d
1743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1745{
1746 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1747 (__v8df) __W,
1748 (__mmask8) __U);
1749}
1750
1751extern __inline __m512d
1752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1753_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1754{
1755 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1756 (__v8df)
1757 _mm512_setzero_pd (),
1758 (__mmask8) __U);
1759}
1760
1761extern __inline __m512
1762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1763_mm512_rcp14_ps (__m512 __A)
1764{
1765 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1766 (__v16sf)
0b192937 1767 _mm512_undefined_ps (),
756c5857
AI
1768 (__mmask16) -1);
1769}
1770
1771extern __inline __m512
1772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1773_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1774{
1775 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1776 (__v16sf) __W,
1777 (__mmask16) __U);
1778}
1779
1780extern __inline __m512
1781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1783{
1784 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1785 (__v16sf)
1786 _mm512_setzero_ps (),
1787 (__mmask16) __U);
1788}
1789
075691af
AI
1790extern __inline __m128d
1791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792_mm_rcp14_sd (__m128d __A, __m128d __B)
1793{
df62b4af
IT
1794 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1795 (__v2df) __A);
075691af
AI
1796}
1797
f4ee3a9e
UB
1798extern __inline __m128d
1799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1801{
1802 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1803 (__v2df) __A,
1804 (__v2df) __W,
1805 (__mmask8) __U);
1806}
1807
1808extern __inline __m128d
1809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1810_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1811{
1812 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1813 (__v2df) __A,
1814 (__v2df) _mm_setzero_ps (),
1815 (__mmask8) __U);
1816}
1817
075691af
AI
1818extern __inline __m128
1819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1820_mm_rcp14_ss (__m128 __A, __m128 __B)
1821{
df62b4af
IT
1822 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1823 (__v4sf) __A);
075691af
AI
1824}
1825
f4ee3a9e
UB
1826extern __inline __m128
1827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1828_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1829{
1830 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1831 (__v4sf) __A,
1832 (__v4sf) __W,
1833 (__mmask8) __U);
1834}
1835
1836extern __inline __m128
1837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1838_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1839{
1840 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1841 (__v4sf) __A,
1842 (__v4sf) _mm_setzero_ps (),
1843 (__mmask8) __U);
1844}
1845
756c5857
AI
1846extern __inline __m512d
1847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848_mm512_rsqrt14_pd (__m512d __A)
1849{
1850 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1851 (__v8df)
0b192937 1852 _mm512_undefined_pd (),
756c5857
AI
1853 (__mmask8) -1);
1854}
1855
1856extern __inline __m512d
1857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1859{
1860 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1861 (__v8df) __W,
1862 (__mmask8) __U);
1863}
1864
1865extern __inline __m512d
1866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1867_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1868{
1869 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1870 (__v8df)
1871 _mm512_setzero_pd (),
1872 (__mmask8) __U);
1873}
1874
1875extern __inline __m512
1876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1877_mm512_rsqrt14_ps (__m512 __A)
1878{
1879 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1880 (__v16sf)
0b192937 1881 _mm512_undefined_ps (),
756c5857
AI
1882 (__mmask16) -1);
1883}
1884
1885extern __inline __m512
1886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1887_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1888{
1889 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1890 (__v16sf) __W,
1891 (__mmask16) __U);
1892}
1893
1894extern __inline __m512
1895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1897{
1898 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1899 (__v16sf)
1900 _mm512_setzero_ps (),
1901 (__mmask16) __U);
1902}
1903
075691af
AI
1904extern __inline __m128d
1905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1907{
df62b4af
IT
1908 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1909 (__v2df) __A);
075691af
AI
1910}
1911
d7a33a4c
JK
1912extern __inline __m128d
1913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1915{
1916 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1917 (__v2df) __A,
1918 (__v2df) __W,
1919 (__mmask8) __U);
1920}
1921
1922extern __inline __m128d
1923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1925{
1926 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1927 (__v2df) __A,
1928 (__v2df) _mm_setzero_pd (),
1929 (__mmask8) __U);
1930}
1931
075691af
AI
1932extern __inline __m128
1933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1935{
df62b4af
IT
1936 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1937 (__v4sf) __A);
075691af
AI
1938}
1939
d7a33a4c
JK
1940extern __inline __m128
1941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1942_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1943{
1944 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1945 (__v4sf) __A,
1946 (__v4sf) __W,
1947 (__mmask8) __U);
1948}
1949
1950extern __inline __m128
1951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1953{
1954 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1955 (__v4sf) __A,
1956 (__v4sf) _mm_setzero_ps (),
1957 (__mmask8) __U);
1958}
1959
756c5857
AI
1960#ifdef __OPTIMIZE__
1961extern __inline __m512d
1962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1963_mm512_sqrt_round_pd (__m512d __A, const int __R)
1964{
1965 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1966 (__v8df)
0b192937 1967 _mm512_undefined_pd (),
756c5857
AI
1968 (__mmask8) -1, __R);
1969}
1970
1971extern __inline __m512d
1972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1973_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1974 const int __R)
1975{
1976 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1977 (__v8df) __W,
1978 (__mmask8) __U, __R);
1979}
1980
1981extern __inline __m512d
1982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1983_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1984{
1985 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1986 (__v8df)
1987 _mm512_setzero_pd (),
1988 (__mmask8) __U, __R);
1989}
1990
1991extern __inline __m512
1992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993_mm512_sqrt_round_ps (__m512 __A, const int __R)
1994{
1995 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1996 (__v16sf)
0b192937 1997 _mm512_undefined_ps (),
756c5857
AI
1998 (__mmask16) -1, __R);
1999}
2000
2001extern __inline __m512
2002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2003_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
2004{
2005 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2006 (__v16sf) __W,
2007 (__mmask16) __U, __R);
2008}
2009
2010extern __inline __m512
2011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
2013{
2014 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2015 (__v16sf)
2016 _mm512_setzero_ps (),
2017 (__mmask16) __U, __R);
2018}
2019
075691af
AI
2020extern __inline __m128d
2021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2022_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
2023{
b10bc0d6
OM
2024 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2025 (__v2df) __A,
2026 (__v2df)
2027 _mm_setzero_pd (),
2028 (__mmask8) -1, __R);
2029}
2030
2031extern __inline __m128d
2032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2033_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2034 const int __R)
2035{
2036 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2037 (__v2df) __A,
2038 (__v2df) __W,
2039 (__mmask8) __U, __R);
2040}
2041
2042extern __inline __m128d
2043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2044_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
2045{
2046 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2047 (__v2df) __A,
2048 (__v2df)
2049 _mm_setzero_pd (),
2050 (__mmask8) __U, __R);
075691af
AI
2051}
2052
2053extern __inline __m128
2054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2055_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
2056{
b10bc0d6
OM
2057 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2058 (__v4sf) __A,
2059 (__v4sf)
2060 _mm_setzero_ps (),
2061 (__mmask8) -1, __R);
2062}
2063
2064extern __inline __m128
2065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2066_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2067 const int __R)
2068{
2069 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2070 (__v4sf) __A,
2071 (__v4sf) __W,
2072 (__mmask8) __U, __R);
2073}
2074
2075extern __inline __m128
2076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2077_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2078{
2079 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2080 (__v4sf) __A,
2081 (__v4sf)
2082 _mm_setzero_ps (),
2083 (__mmask8) __U, __R);
075691af 2084}
756c5857
AI
2085#else
2086#define _mm512_sqrt_round_pd(A, C) \
0b192937 2087 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2088
2089#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2090 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2091
2092#define _mm512_maskz_sqrt_round_pd(U, A, C) \
2093 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2094
2095#define _mm512_sqrt_round_ps(A, C) \
0b192937 2096 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2097
2098#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2099 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2100
2101#define _mm512_maskz_sqrt_round_ps(U, A, C) \
2102 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af 2103
b10bc0d6
OM
2104#define _mm_sqrt_round_sd(A, B, C) \
2105 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2106 (__v2df) _mm_setzero_pd (), -1, C)
2107
2108#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2109 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2110
2111#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2112 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2113 (__v2df) _mm_setzero_pd (), U, C)
2114
2115#define _mm_sqrt_round_ss(A, B, C) \
2116 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2117 (__v4sf) _mm_setzero_ps (), -1, C)
2118
2119#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2120 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
075691af 2121
b10bc0d6
OM
2122#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2123 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2124 (__v4sf) _mm_setzero_ps (), U, C)
756c5857
AI
2125#endif
2126
2127extern __inline __m512i
2128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129_mm512_cvtepi8_epi32 (__m128i __A)
2130{
2131 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2132 (__v16si)
4271e5cb 2133 _mm512_undefined_epi32 (),
756c5857
AI
2134 (__mmask16) -1);
2135}
2136
2137extern __inline __m512i
2138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2139_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2140{
2141 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2142 (__v16si) __W,
2143 (__mmask16) __U);
2144}
2145
2146extern __inline __m512i
2147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2148_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2149{
2150 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2151 (__v16si)
2152 _mm512_setzero_si512 (),
2153 (__mmask16) __U);
2154}
2155
2156extern __inline __m512i
2157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2158_mm512_cvtepi8_epi64 (__m128i __A)
2159{
2160 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2161 (__v8di)
4271e5cb 2162 _mm512_undefined_epi32 (),
756c5857
AI
2163 (__mmask8) -1);
2164}
2165
2166extern __inline __m512i
2167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2168_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2169{
2170 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2171 (__v8di) __W,
2172 (__mmask8) __U);
2173}
2174
2175extern __inline __m512i
2176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2177_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2178{
2179 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2180 (__v8di)
2181 _mm512_setzero_si512 (),
2182 (__mmask8) __U);
2183}
2184
2185extern __inline __m512i
2186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2187_mm512_cvtepi16_epi32 (__m256i __A)
2188{
2189 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2190 (__v16si)
4271e5cb 2191 _mm512_undefined_epi32 (),
756c5857
AI
2192 (__mmask16) -1);
2193}
2194
2195extern __inline __m512i
2196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2197_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2198{
2199 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2200 (__v16si) __W,
2201 (__mmask16) __U);
2202}
2203
2204extern __inline __m512i
2205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2207{
2208 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2209 (__v16si)
2210 _mm512_setzero_si512 (),
2211 (__mmask16) __U);
2212}
2213
2214extern __inline __m512i
2215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216_mm512_cvtepi16_epi64 (__m128i __A)
2217{
2218 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2219 (__v8di)
4271e5cb 2220 _mm512_undefined_epi32 (),
756c5857
AI
2221 (__mmask8) -1);
2222}
2223
2224extern __inline __m512i
2225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2226_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2227{
2228 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2229 (__v8di) __W,
2230 (__mmask8) __U);
2231}
2232
2233extern __inline __m512i
2234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2236{
2237 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2238 (__v8di)
2239 _mm512_setzero_si512 (),
2240 (__mmask8) __U);
2241}
2242
2243extern __inline __m512i
2244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2245_mm512_cvtepi32_epi64 (__m256i __X)
2246{
2247 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2248 (__v8di)
4271e5cb 2249 _mm512_undefined_epi32 (),
756c5857
AI
2250 (__mmask8) -1);
2251}
2252
2253extern __inline __m512i
2254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2255_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2256{
2257 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2258 (__v8di) __W,
2259 (__mmask8) __U);
2260}
2261
2262extern __inline __m512i
2263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2264_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2265{
2266 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2267 (__v8di)
2268 _mm512_setzero_si512 (),
2269 (__mmask8) __U);
2270}
2271
2272extern __inline __m512i
2273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274_mm512_cvtepu8_epi32 (__m128i __A)
2275{
2276 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2277 (__v16si)
4271e5cb 2278 _mm512_undefined_epi32 (),
756c5857
AI
2279 (__mmask16) -1);
2280}
2281
2282extern __inline __m512i
2283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2285{
2286 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2287 (__v16si) __W,
2288 (__mmask16) __U);
2289}
2290
2291extern __inline __m512i
2292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2293_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2294{
2295 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2296 (__v16si)
2297 _mm512_setzero_si512 (),
2298 (__mmask16) __U);
2299}
2300
2301extern __inline __m512i
2302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2303_mm512_cvtepu8_epi64 (__m128i __A)
2304{
2305 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2306 (__v8di)
4271e5cb 2307 _mm512_undefined_epi32 (),
756c5857
AI
2308 (__mmask8) -1);
2309}
2310
2311extern __inline __m512i
2312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2313_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2314{
2315 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2316 (__v8di) __W,
2317 (__mmask8) __U);
2318}
2319
2320extern __inline __m512i
2321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2322_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2323{
2324 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2325 (__v8di)
2326 _mm512_setzero_si512 (),
2327 (__mmask8) __U);
2328}
2329
2330extern __inline __m512i
2331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2332_mm512_cvtepu16_epi32 (__m256i __A)
2333{
2334 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2335 (__v16si)
4271e5cb 2336 _mm512_undefined_epi32 (),
756c5857
AI
2337 (__mmask16) -1);
2338}
2339
2340extern __inline __m512i
2341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2342_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2343{
2344 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2345 (__v16si) __W,
2346 (__mmask16) __U);
2347}
2348
2349extern __inline __m512i
2350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2351_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2352{
2353 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2354 (__v16si)
2355 _mm512_setzero_si512 (),
2356 (__mmask16) __U);
2357}
2358
2359extern __inline __m512i
2360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2361_mm512_cvtepu16_epi64 (__m128i __A)
2362{
2363 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2364 (__v8di)
4271e5cb 2365 _mm512_undefined_epi32 (),
756c5857
AI
2366 (__mmask8) -1);
2367}
2368
2369extern __inline __m512i
2370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2372{
2373 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2374 (__v8di) __W,
2375 (__mmask8) __U);
2376}
2377
2378extern __inline __m512i
2379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2380_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2381{
2382 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2383 (__v8di)
2384 _mm512_setzero_si512 (),
2385 (__mmask8) __U);
2386}
2387
2388extern __inline __m512i
2389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390_mm512_cvtepu32_epi64 (__m256i __X)
2391{
2392 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2393 (__v8di)
4271e5cb 2394 _mm512_undefined_epi32 (),
756c5857
AI
2395 (__mmask8) -1);
2396}
2397
2398extern __inline __m512i
2399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2400_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2401{
2402 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2403 (__v8di) __W,
2404 (__mmask8) __U);
2405}
2406
2407extern __inline __m512i
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2410{
2411 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2412 (__v8di)
2413 _mm512_setzero_si512 (),
2414 (__mmask8) __U);
2415}
2416
2417#ifdef __OPTIMIZE__
2418extern __inline __m512d
2419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2420_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2421{
2422 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2423 (__v8df) __B,
2424 (__v8df)
0b192937 2425 _mm512_undefined_pd (),
756c5857
AI
2426 (__mmask8) -1, __R);
2427}
2428
2429extern __inline __m512d
2430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2431_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2432 __m512d __B, const int __R)
2433{
2434 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2435 (__v8df) __B,
2436 (__v8df) __W,
2437 (__mmask8) __U, __R);
2438}
2439
2440extern __inline __m512d
2441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2442_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2443 const int __R)
2444{
2445 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2446 (__v8df) __B,
2447 (__v8df)
2448 _mm512_setzero_pd (),
2449 (__mmask8) __U, __R);
2450}
2451
2452extern __inline __m512
2453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2454_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2455{
2456 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2457 (__v16sf) __B,
2458 (__v16sf)
0b192937 2459 _mm512_undefined_ps (),
756c5857
AI
2460 (__mmask16) -1, __R);
2461}
2462
2463extern __inline __m512
2464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2465_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2466 __m512 __B, const int __R)
2467{
2468 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2469 (__v16sf) __B,
2470 (__v16sf) __W,
2471 (__mmask16) __U, __R);
2472}
2473
2474extern __inline __m512
2475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2476_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2477{
2478 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2479 (__v16sf) __B,
2480 (__v16sf)
2481 _mm512_setzero_ps (),
2482 (__mmask16) __U, __R);
2483}
2484
2485extern __inline __m512d
2486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2488{
2489 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2490 (__v8df) __B,
2491 (__v8df)
0b192937 2492 _mm512_undefined_pd (),
756c5857
AI
2493 (__mmask8) -1, __R);
2494}
2495
2496extern __inline __m512d
2497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2498_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2499 __m512d __B, const int __R)
2500{
2501 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2502 (__v8df) __B,
2503 (__v8df) __W,
2504 (__mmask8) __U, __R);
2505}
2506
2507extern __inline __m512d
2508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2509_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2510 const int __R)
2511{
2512 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2513 (__v8df) __B,
2514 (__v8df)
2515 _mm512_setzero_pd (),
2516 (__mmask8) __U, __R);
2517}
2518
2519extern __inline __m512
2520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2521_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2522{
2523 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2524 (__v16sf) __B,
2525 (__v16sf)
0b192937 2526 _mm512_undefined_ps (),
756c5857
AI
2527 (__mmask16) -1, __R);
2528}
2529
2530extern __inline __m512
2531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2532_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2533 __m512 __B, const int __R)
2534{
2535 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2536 (__v16sf) __B,
2537 (__v16sf) __W,
2538 (__mmask16) __U, __R);
2539}
2540
2541extern __inline __m512
2542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2543_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2544{
2545 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2546 (__v16sf) __B,
2547 (__v16sf)
2548 _mm512_setzero_ps (),
2549 (__mmask16) __U, __R);
2550}
2551#else
2552#define _mm512_add_round_pd(A, B, C) \
0b192937 2553 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2554
2555#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2556 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2557
2558#define _mm512_maskz_add_round_pd(U, A, B, C) \
2559 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2560
2561#define _mm512_add_round_ps(A, B, C) \
0b192937 2562 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2563
2564#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2565 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2566
2567#define _mm512_maskz_add_round_ps(U, A, B, C) \
2568 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2569
2570#define _mm512_sub_round_pd(A, B, C) \
0b192937 2571 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2572
2573#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2574 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2575
2576#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2577 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2578
2579#define _mm512_sub_round_ps(A, B, C) \
0b192937 2580 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2581
2582#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2583 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2584
2585#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2586 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2587#endif
2588
2589#ifdef __OPTIMIZE__
2590extern __inline __m512d
2591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2592_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2593{
2594 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2595 (__v8df) __B,
2596 (__v8df)
0b192937 2597 _mm512_undefined_pd (),
756c5857
AI
2598 (__mmask8) -1, __R);
2599}
2600
2601extern __inline __m512d
2602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2603_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2604 __m512d __B, const int __R)
2605{
2606 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2607 (__v8df) __B,
2608 (__v8df) __W,
2609 (__mmask8) __U, __R);
2610}
2611
2612extern __inline __m512d
2613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2614_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2615 const int __R)
2616{
2617 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2618 (__v8df) __B,
2619 (__v8df)
2620 _mm512_setzero_pd (),
2621 (__mmask8) __U, __R);
2622}
2623
2624extern __inline __m512
2625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2626_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2627{
2628 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2629 (__v16sf) __B,
2630 (__v16sf)
0b192937 2631 _mm512_undefined_ps (),
756c5857
AI
2632 (__mmask16) -1, __R);
2633}
2634
2635extern __inline __m512
2636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2637_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2638 __m512 __B, const int __R)
2639{
2640 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2641 (__v16sf) __B,
2642 (__v16sf) __W,
2643 (__mmask16) __U, __R);
2644}
2645
2646extern __inline __m512
2647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2648_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2649{
2650 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2651 (__v16sf) __B,
2652 (__v16sf)
2653 _mm512_setzero_ps (),
2654 (__mmask16) __U, __R);
2655}
2656
2657extern __inline __m512d
2658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2659_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2660{
2661 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2662 (__v8df) __V,
2663 (__v8df)
0b192937 2664 _mm512_undefined_pd (),
756c5857
AI
2665 (__mmask8) -1, __R);
2666}
2667
2668extern __inline __m512d
2669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2670_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2671 __m512d __V, const int __R)
2672{
2673 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2674 (__v8df) __V,
2675 (__v8df) __W,
2676 (__mmask8) __U, __R);
2677}
2678
2679extern __inline __m512d
2680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2681_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2682 const int __R)
2683{
2684 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2685 (__v8df) __V,
2686 (__v8df)
2687 _mm512_setzero_pd (),
2688 (__mmask8) __U, __R);
2689}
2690
2691extern __inline __m512
2692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2693_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2694{
2695 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2696 (__v16sf) __B,
2697 (__v16sf)
0b192937 2698 _mm512_undefined_ps (),
756c5857
AI
2699 (__mmask16) -1, __R);
2700}
2701
2702extern __inline __m512
2703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2704_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2705 __m512 __B, const int __R)
2706{
2707 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2708 (__v16sf) __B,
2709 (__v16sf) __W,
2710 (__mmask16) __U, __R);
2711}
2712
2713extern __inline __m512
2714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2715_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2716{
2717 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2718 (__v16sf) __B,
2719 (__v16sf)
2720 _mm512_setzero_ps (),
2721 (__mmask16) __U, __R);
2722}
2723
075691af
AI
2724extern __inline __m128d
2725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2727{
2728 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2729 (__v2df) __B,
2730 __R);
2731}
2732
f4ee3a9e
UB
2733extern __inline __m128d
2734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2735_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2736 __m128d __B, const int __R)
2737{
2738 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2739 (__v2df) __B,
2740 (__v2df) __W,
2741 (__mmask8) __U, __R);
2742}
2743
2744extern __inline __m128d
2745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2746_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2747 const int __R)
2748{
2749 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2750 (__v2df) __B,
2751 (__v2df)
2752 _mm_setzero_pd (),
2753 (__mmask8) __U, __R);
2754}
2755
075691af
AI
2756extern __inline __m128
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2759{
2760 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2761 (__v4sf) __B,
2762 __R);
2763}
2764
f4ee3a9e
UB
2765extern __inline __m128
2766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2767_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2768 __m128 __B, const int __R)
2769{
2770 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2771 (__v4sf) __B,
2772 (__v4sf) __W,
2773 (__mmask8) __U, __R);
2774}
2775
2776extern __inline __m128
2777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2778_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2779 const int __R)
2780{
2781 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2782 (__v4sf) __B,
2783 (__v4sf)
2784 _mm_setzero_ps (),
2785 (__mmask8) __U, __R);
2786}
2787
075691af
AI
2788extern __inline __m128d
2789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2791{
2792 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2793 (__v2df) __B,
2794 __R);
2795}
2796
f4ee3a9e
UB
2797extern __inline __m128d
2798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2799_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2800 __m128d __B, const int __R)
2801{
2802 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2803 (__v2df) __B,
2804 (__v2df) __W,
2805 (__mmask8) __U, __R);
2806}
2807
2808extern __inline __m128d
2809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2810_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2811 const int __R)
2812{
2813 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2814 (__v2df) __B,
2815 (__v2df)
2816 _mm_setzero_pd (),
2817 (__mmask8) __U, __R);
2818}
2819
075691af
AI
2820extern __inline __m128
2821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2823{
2824 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2825 (__v4sf) __B,
2826 __R);
2827}
2828
f4ee3a9e
UB
2829extern __inline __m128
2830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2832 __m128 __B, const int __R)
2833{
2834 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2835 (__v4sf) __B,
2836 (__v4sf) __W,
2837 (__mmask8) __U, __R);
2838}
2839
2840extern __inline __m128
2841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2842_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2843 const int __R)
2844{
2845 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2846 (__v4sf) __B,
2847 (__v4sf)
2848 _mm_setzero_ps (),
2849 (__mmask8) __U, __R);
2850}
2851
756c5857
AI
2852#else
2853#define _mm512_mul_round_pd(A, B, C) \
0b192937 2854 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2855
2856#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2857 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2858
2859#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2860 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2861
2862#define _mm512_mul_round_ps(A, B, C) \
0b192937 2863 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2864
2865#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2866 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2867
2868#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2869 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2870
2871#define _mm512_div_round_pd(A, B, C) \
0b192937 2872 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2873
2874#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2875 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2876
2877#define _mm512_maskz_div_round_pd(U, A, B, C) \
2878 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2879
2880#define _mm512_div_round_ps(A, B, C) \
0b192937 2881 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2882
2883#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2884 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2885
2886#define _mm512_maskz_div_round_ps(U, A, B, C) \
2887 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2888
2889#define _mm_mul_round_sd(A, B, C) \
2890 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2891
f4ee3a9e
UB
2892#define _mm_mask_mul_round_sd(W, U, A, B, C) \
2893 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2894
2895#define _mm_maskz_mul_round_sd(U, A, B, C) \
2896 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2897
075691af
AI
2898#define _mm_mul_round_ss(A, B, C) \
2899 (__m128)__builtin_ia32_mulss_round(A, B, C)
2900
f4ee3a9e
UB
2901#define _mm_mask_mul_round_ss(W, U, A, B, C) \
2902 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2903
2904#define _mm_maskz_mul_round_ss(U, A, B, C) \
2905 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2906
075691af
AI
2907#define _mm_div_round_sd(A, B, C) \
2908 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2909
f4ee3a9e
UB
2910#define _mm_mask_div_round_sd(W, U, A, B, C) \
2911 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2912
2913#define _mm_maskz_div_round_sd(U, A, B, C) \
2914 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2915
075691af
AI
2916#define _mm_div_round_ss(A, B, C) \
2917 (__m128)__builtin_ia32_divss_round(A, B, C)
f4ee3a9e
UB
2918
2919#define _mm_mask_div_round_ss(W, U, A, B, C) \
2920 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2921
2922#define _mm_maskz_div_round_ss(U, A, B, C) \
2923 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2924
756c5857
AI
2925#endif
2926
2927#ifdef __OPTIMIZE__
2928extern __inline __m512d
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2931{
2932 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2933 (__v8df) __B,
2934 (__v8df)
0b192937 2935 _mm512_undefined_pd (),
756c5857
AI
2936 (__mmask8) -1, __R);
2937}
2938
2939extern __inline __m512d
2940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2941_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2942 __m512d __B, const int __R)
2943{
2944 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2945 (__v8df) __B,
2946 (__v8df) __W,
2947 (__mmask8) __U, __R);
2948}
2949
2950extern __inline __m512d
2951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2952_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2953 const int __R)
2954{
2955 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2956 (__v8df) __B,
2957 (__v8df)
2958 _mm512_setzero_pd (),
2959 (__mmask8) __U, __R);
2960}
2961
2962extern __inline __m512
2963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2964_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2965{
2966 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2967 (__v16sf) __B,
2968 (__v16sf)
0b192937 2969 _mm512_undefined_ps (),
756c5857
AI
2970 (__mmask16) -1, __R);
2971}
2972
2973extern __inline __m512
2974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2975_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2976 __m512 __B, const int __R)
2977{
2978 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2979 (__v16sf) __B,
2980 (__v16sf) __W,
2981 (__mmask16) __U, __R);
2982}
2983
2984extern __inline __m512
2985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2987{
2988 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2989 (__v16sf) __B,
2990 (__v16sf)
2991 _mm512_setzero_ps (),
2992 (__mmask16) __U, __R);
2993}
2994
2995extern __inline __m512d
2996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2998{
2999 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3000 (__v8df) __B,
3001 (__v8df)
0b192937 3002 _mm512_undefined_pd (),
756c5857
AI
3003 (__mmask8) -1, __R);
3004}
3005
3006extern __inline __m512d
3007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3008_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3009 __m512d __B, const int __R)
3010{
3011 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3012 (__v8df) __B,
3013 (__v8df) __W,
3014 (__mmask8) __U, __R);
3015}
3016
3017extern __inline __m512d
3018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3019_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3020 const int __R)
3021{
3022 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3023 (__v8df) __B,
3024 (__v8df)
3025 _mm512_setzero_pd (),
3026 (__mmask8) __U, __R);
3027}
3028
3029extern __inline __m512
3030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3031_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
3032{
3033 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3034 (__v16sf) __B,
3035 (__v16sf)
0b192937 3036 _mm512_undefined_ps (),
756c5857
AI
3037 (__mmask16) -1, __R);
3038}
3039
3040extern __inline __m512
3041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3042_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3043 __m512 __B, const int __R)
3044{
3045 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3046 (__v16sf) __B,
3047 (__v16sf) __W,
3048 (__mmask16) __U, __R);
3049}
3050
3051extern __inline __m512
3052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3053_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3054{
3055 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3056 (__v16sf) __B,
3057 (__v16sf)
3058 _mm512_setzero_ps (),
3059 (__mmask16) __U, __R);
3060}
3061#else
3062#define _mm512_max_round_pd(A, B, R) \
0b192937 3063 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3064
3065#define _mm512_mask_max_round_pd(W, U, A, B, R) \
3066 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3067
3068#define _mm512_maskz_max_round_pd(U, A, B, R) \
3069 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3070
3071#define _mm512_max_round_ps(A, B, R) \
0b192937 3072 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
3073
3074#define _mm512_mask_max_round_ps(W, U, A, B, R) \
3075 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3076
3077#define _mm512_maskz_max_round_ps(U, A, B, R) \
3078 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3079
3080#define _mm512_min_round_pd(A, B, R) \
0b192937 3081 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3082
3083#define _mm512_mask_min_round_pd(W, U, A, B, R) \
3084 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3085
3086#define _mm512_maskz_min_round_pd(U, A, B, R) \
3087 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3088
3089#define _mm512_min_round_ps(A, B, R) \
0b192937 3090 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
3091
3092#define _mm512_mask_min_round_ps(W, U, A, B, R) \
3093 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3094
3095#define _mm512_maskz_min_round_ps(U, A, B, R) \
3096 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3097#endif
3098
3099#ifdef __OPTIMIZE__
3100extern __inline __m512d
3101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3103{
3104 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3105 (__v8df) __B,
3106 (__v8df)
0b192937 3107 _mm512_undefined_pd (),
756c5857
AI
3108 (__mmask8) -1, __R);
3109}
3110
3111extern __inline __m512d
3112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3114 __m512d __B, const int __R)
3115{
3116 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3117 (__v8df) __B,
3118 (__v8df) __W,
3119 (__mmask8) __U, __R);
3120}
3121
3122extern __inline __m512d
3123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3124_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3125 const int __R)
3126{
3127 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3128 (__v8df) __B,
3129 (__v8df)
3130 _mm512_setzero_pd (),
3131 (__mmask8) __U, __R);
3132}
3133
3134extern __inline __m512
3135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3136_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3137{
3138 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3139 (__v16sf) __B,
3140 (__v16sf)
0b192937 3141 _mm512_undefined_ps (),
756c5857
AI
3142 (__mmask16) -1, __R);
3143}
3144
3145extern __inline __m512
3146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3147_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3148 __m512 __B, const int __R)
3149{
3150 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3151 (__v16sf) __B,
3152 (__v16sf) __W,
3153 (__mmask16) __U, __R);
3154}
3155
3156extern __inline __m512
3157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3158_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3159 const int __R)
3160{
3161 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3162 (__v16sf) __B,
3163 (__v16sf)
3164 _mm512_setzero_ps (),
3165 (__mmask16) __U, __R);
3166}
3167
075691af
AI
3168extern __inline __m128d
3169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3170_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3171{
158061a6
OM
3172 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3173 (__v2df) __B,
3174 (__v2df)
3175 _mm_setzero_pd (),
3176 (__mmask8) -1, __R);
3177}
3178
3179extern __inline __m128d
3180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3181_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3182 const int __R)
3183{
3184 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3185 (__v2df) __B,
3186 (__v2df) __W,
3187 (__mmask8) __U, __R);
3188}
3189
3190extern __inline __m128d
3191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3192_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3193 const int __R)
3194{
3195 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3196 (__v2df) __B,
3197 (__v2df)
3198 _mm_setzero_pd (),
3199 (__mmask8) __U, __R);
075691af
AI
3200}
3201
3202extern __inline __m128
3203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3204_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3205{
158061a6
OM
3206 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3207 (__v4sf) __B,
3208 (__v4sf)
3209 _mm_setzero_ps (),
3210 (__mmask8) -1, __R);
3211}
3212
3213extern __inline __m128
3214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3215_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3216 const int __R)
3217{
3218 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3219 (__v4sf) __B,
3220 (__v4sf) __W,
3221 (__mmask8) __U, __R);
3222}
3223
3224extern __inline __m128
3225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3227{
3228 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3229 (__v4sf) __B,
3230 (__v4sf)
3231 _mm_setzero_ps (),
3232 (__mmask8) __U, __R);
075691af 3233}
756c5857
AI
3234#else
3235#define _mm512_scalef_round_pd(A, B, C) \
0b192937 3236 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
3237
3238#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3239 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3240
3241#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3242 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3243
3244#define _mm512_scalef_round_ps(A, B, C) \
0b192937 3245 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
3246
3247#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3248 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3249
3250#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3251 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
3252
3253#define _mm_scalef_round_sd(A, B, C) \
158061a6
OM
3254 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3255 (__v2df)_mm_setzero_pd (), -1, C)
075691af
AI
3256
3257#define _mm_scalef_round_ss(A, B, C) \
158061a6
OM
3258 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3259 (__v4sf)_mm_setzero_ps (), -1, C)
756c5857
AI
3260#endif
3261
3262#ifdef __OPTIMIZE__
3263extern __inline __m512d
3264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3265_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3266{
3267 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3268 (__v8df) __B,
3269 (__v8df) __C,
3270 (__mmask8) -1, __R);
3271}
3272
3273extern __inline __m512d
3274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3276 __m512d __C, const int __R)
3277{
3278 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3279 (__v8df) __B,
3280 (__v8df) __C,
3281 (__mmask8) __U, __R);
3282}
3283
3284extern __inline __m512d
3285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3286_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3287 __mmask8 __U, const int __R)
3288{
3289 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3290 (__v8df) __B,
3291 (__v8df) __C,
3292 (__mmask8) __U, __R);
3293}
3294
3295extern __inline __m512d
3296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3297_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3298 __m512d __C, const int __R)
3299{
3300 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3301 (__v8df) __B,
3302 (__v8df) __C,
3303 (__mmask8) __U, __R);
3304}
3305
3306extern __inline __m512
3307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3308_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3309{
3310 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3311 (__v16sf) __B,
3312 (__v16sf) __C,
3313 (__mmask16) -1, __R);
3314}
3315
3316extern __inline __m512
3317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3318_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3319 __m512 __C, const int __R)
3320{
3321 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3322 (__v16sf) __B,
3323 (__v16sf) __C,
3324 (__mmask16) __U, __R);
3325}
3326
3327extern __inline __m512
3328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3329_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3330 __mmask16 __U, const int __R)
3331{
3332 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3333 (__v16sf) __B,
3334 (__v16sf) __C,
3335 (__mmask16) __U, __R);
3336}
3337
3338extern __inline __m512
3339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3340_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3341 __m512 __C, const int __R)
3342{
3343 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3344 (__v16sf) __B,
3345 (__v16sf) __C,
3346 (__mmask16) __U, __R);
3347}
3348
3349extern __inline __m512d
3350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3352{
fe7f972d 3353 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 3354 (__v8df) __B,
fe7f972d 3355 (__v8df) __C,
756c5857
AI
3356 (__mmask8) -1, __R);
3357}
3358
3359extern __inline __m512d
3360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3361_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3362 __m512d __C, const int __R)
3363{
fe7f972d 3364 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 3365 (__v8df) __B,
fe7f972d 3366 (__v8df) __C,
756c5857
AI
3367 (__mmask8) __U, __R);
3368}
3369
3370extern __inline __m512d
3371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3372_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3373 __mmask8 __U, const int __R)
3374{
3375 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3376 (__v8df) __B,
3377 (__v8df) __C,
3378 (__mmask8) __U, __R);
3379}
3380
3381extern __inline __m512d
3382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3383_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3384 __m512d __C, const int __R)
3385{
fe7f972d 3386 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
756c5857 3387 (__v8df) __B,
fe7f972d 3388 (__v8df) __C,
756c5857
AI
3389 (__mmask8) __U, __R);
3390}
3391
3392extern __inline __m512
3393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3394_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3395{
fe7f972d 3396 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 3397 (__v16sf) __B,
fe7f972d 3398 (__v16sf) __C,
756c5857
AI
3399 (__mmask16) -1, __R);
3400}
3401
3402extern __inline __m512
3403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3404_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3405 __m512 __C, const int __R)
3406{
fe7f972d 3407 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 3408 (__v16sf) __B,
fe7f972d 3409 (__v16sf) __C,
756c5857
AI
3410 (__mmask16) __U, __R);
3411}
3412
3413extern __inline __m512
3414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3416 __mmask16 __U, const int __R)
3417{
3418 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3419 (__v16sf) __B,
3420 (__v16sf) __C,
3421 (__mmask16) __U, __R);
3422}
3423
3424extern __inline __m512
3425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3426_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3427 __m512 __C, const int __R)
3428{
fe7f972d 3429 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
756c5857 3430 (__v16sf) __B,
fe7f972d 3431 (__v16sf) __C,
756c5857
AI
3432 (__mmask16) __U, __R);
3433}
3434
3435extern __inline __m512d
3436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3437_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3438{
3439 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3440 (__v8df) __B,
3441 (__v8df) __C,
3442 (__mmask8) -1, __R);
3443}
3444
3445extern __inline __m512d
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3448 __m512d __C, const int __R)
3449{
3450 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3451 (__v8df) __B,
3452 (__v8df) __C,
3453 (__mmask8) __U, __R);
3454}
3455
3456extern __inline __m512d
3457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3458_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3459 __mmask8 __U, const int __R)
3460{
3461 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3462 (__v8df) __B,
3463 (__v8df) __C,
3464 (__mmask8) __U, __R);
3465}
3466
3467extern __inline __m512d
3468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3469_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3470 __m512d __C, const int __R)
3471{
3472 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3473 (__v8df) __B,
3474 (__v8df) __C,
3475 (__mmask8) __U, __R);
3476}
3477
3478extern __inline __m512
3479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3480_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3481{
3482 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3483 (__v16sf) __B,
3484 (__v16sf) __C,
3485 (__mmask16) -1, __R);
3486}
3487
3488extern __inline __m512
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3491 __m512 __C, const int __R)
3492{
3493 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3494 (__v16sf) __B,
3495 (__v16sf) __C,
3496 (__mmask16) __U, __R);
3497}
3498
3499extern __inline __m512
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3502 __mmask16 __U, const int __R)
3503{
3504 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3505 (__v16sf) __B,
3506 (__v16sf) __C,
3507 (__mmask16) __U, __R);
3508}
3509
3510extern __inline __m512
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3513 __m512 __C, const int __R)
3514{
3515 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3516 (__v16sf) __B,
3517 (__v16sf) __C,
3518 (__mmask16) __U, __R);
3519}
3520
3521extern __inline __m512d
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3524{
3525 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3526 (__v8df) __B,
3527 -(__v8df) __C,
3528 (__mmask8) -1, __R);
3529}
3530
3531extern __inline __m512d
3532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3533_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3534 __m512d __C, const int __R)
3535{
3536 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3537 (__v8df) __B,
3538 -(__v8df) __C,
3539 (__mmask8) __U, __R);
3540}
3541
3542extern __inline __m512d
3543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3544_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3545 __mmask8 __U, const int __R)
3546{
3547 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3548 (__v8df) __B,
3549 (__v8df) __C,
3550 (__mmask8) __U, __R);
3551}
3552
3553extern __inline __m512d
3554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3555_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3556 __m512d __C, const int __R)
3557{
3558 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3559 (__v8df) __B,
3560 -(__v8df) __C,
3561 (__mmask8) __U, __R);
3562}
3563
3564extern __inline __m512
3565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3567{
3568 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3569 (__v16sf) __B,
3570 -(__v16sf) __C,
3571 (__mmask16) -1, __R);
3572}
3573
3574extern __inline __m512
3575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3577 __m512 __C, const int __R)
3578{
3579 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3580 (__v16sf) __B,
3581 -(__v16sf) __C,
3582 (__mmask16) __U, __R);
3583}
3584
3585extern __inline __m512
3586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3587_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3588 __mmask16 __U, const int __R)
3589{
3590 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3591 (__v16sf) __B,
3592 (__v16sf) __C,
3593 (__mmask16) __U, __R);
3594}
3595
3596extern __inline __m512
3597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3599 __m512 __C, const int __R)
3600{
3601 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3602 (__v16sf) __B,
3603 -(__v16sf) __C,
3604 (__mmask16) __U, __R);
3605}
3606
3607extern __inline __m512d
3608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3609_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3610{
5ca94977
L
3611 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3612 (__v8df) __B,
3613 (__v8df) __C,
3614 (__mmask8) -1, __R);
756c5857
AI
3615}
3616
3617extern __inline __m512d
3618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3619_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3620 __m512d __C, const int __R)
3621{
3622 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3623 (__v8df) __B,
3624 (__v8df) __C,
3625 (__mmask8) __U, __R);
3626}
3627
3628extern __inline __m512d
3629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3630_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3631 __mmask8 __U, const int __R)
3632{
5ca94977
L
3633 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
3634 (__v8df) __B,
3635 (__v8df) __C,
3636 (__mmask8) __U, __R);
756c5857
AI
3637}
3638
3639extern __inline __m512d
3640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3642 __m512d __C, const int __R)
3643{
5ca94977
L
3644 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
3645 (__v8df) __B,
3646 (__v8df) __C,
3647 (__mmask8) __U, __R);
756c5857
AI
3648}
3649
3650extern __inline __m512
3651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3653{
5ca94977
L
3654 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3655 (__v16sf) __B,
3656 (__v16sf) __C,
3657 (__mmask16) -1, __R);
756c5857
AI
3658}
3659
3660extern __inline __m512
3661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3663 __m512 __C, const int __R)
3664{
3665 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3666 (__v16sf) __B,
3667 (__v16sf) __C,
3668 (__mmask16) __U, __R);
3669}
3670
3671extern __inline __m512
3672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3673_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3674 __mmask16 __U, const int __R)
3675{
5ca94977
L
3676 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
3677 (__v16sf) __B,
3678 (__v16sf) __C,
3679 (__mmask16) __U, __R);
756c5857
AI
3680}
3681
3682extern __inline __m512
3683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3684_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3685 __m512 __C, const int __R)
3686{
5ca94977
L
3687 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
3688 (__v16sf) __B,
3689 (__v16sf) __C,
3690 (__mmask16) __U, __R);
756c5857
AI
3691}
3692
3693extern __inline __m512d
3694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3695_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3696{
38ef6fb1
L
3697 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3698 (__v8df) __B,
3699 (__v8df) __C,
3700 (__mmask8) -1, __R);
756c5857
AI
3701}
3702
3703extern __inline __m512d
3704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3705_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3706 __m512d __C, const int __R)
3707{
3708 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3709 (__v8df) __B,
3710 (__v8df) __C,
3711 (__mmask8) __U, __R);
3712}
3713
3714extern __inline __m512d
3715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3716_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3717 __mmask8 __U, const int __R)
3718{
3719 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3720 (__v8df) __B,
3721 (__v8df) __C,
3722 (__mmask8) __U, __R);
3723}
3724
3725extern __inline __m512d
3726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3727_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3728 __m512d __C, const int __R)
3729{
38ef6fb1
L
3730 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
3731 (__v8df) __B,
3732 (__v8df) __C,
3733 (__mmask8) __U, __R);
756c5857
AI
3734}
3735
3736extern __inline __m512
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3739{
38ef6fb1
L
3740 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3741 (__v16sf) __B,
3742 (__v16sf) __C,
3743 (__mmask16) -1, __R);
756c5857
AI
3744}
3745
3746extern __inline __m512
3747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3748_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3749 __m512 __C, const int __R)
3750{
3751 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3752 (__v16sf) __B,
3753 (__v16sf) __C,
3754 (__mmask16) __U, __R);
3755}
3756
3757extern __inline __m512
3758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3759_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3760 __mmask16 __U, const int __R)
3761{
3762 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3763 (__v16sf) __B,
3764 (__v16sf) __C,
3765 (__mmask16) __U, __R);
3766}
3767
3768extern __inline __m512
3769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3770_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3771 __m512 __C, const int __R)
3772{
38ef6fb1
L
3773 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
3774 (__v16sf) __B,
3775 (__v16sf) __C,
3776 (__mmask16) __U, __R);
756c5857
AI
3777}
3778#else
3779#define _mm512_fmadd_round_pd(A, B, C, R) \
3780 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3781
3782#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3783 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3784
3785#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3786 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3787
3788#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3789 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3790
3791#define _mm512_fmadd_round_ps(A, B, C, R) \
3792 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3793
3794#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3795 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3796
3797#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3798 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3799
3800#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3801 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3802
3803#define _mm512_fmsub_round_pd(A, B, C, R) \
fe7f972d 3804 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
756c5857
AI
3805
3806#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
fe7f972d 3807 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
756c5857
AI
3808
3809#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3810 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3811
3812#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
fe7f972d 3813 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
756c5857
AI
3814
3815#define _mm512_fmsub_round_ps(A, B, C, R) \
fe7f972d 3816 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
756c5857
AI
3817
3818#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
fe7f972d 3819 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
756c5857
AI
3820
3821#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3822 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3823
3824#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
fe7f972d 3825 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
756c5857
AI
3826
3827#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3828 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3829
3830#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
a70b9661 3831 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
756c5857
AI
3832
3833#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3834 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3835
3836#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3837 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3838
3839#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3840 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3841
3842#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3843 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3844
3845#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3846 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3847
3848#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3849 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3850
3851#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3852 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3853
3854#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3855 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3856
3857#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3858 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3859
3860#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3861 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3862
3863#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3864 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3865
3866#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3867 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3868
3869#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3870 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3871
3872#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3873 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3874
3875#define _mm512_fnmadd_round_pd(A, B, C, R) \
5ca94977 3876 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
756c5857
AI
3877
3878#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
5ca94977 3879 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
756c5857
AI
3880
3881#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
5ca94977 3882 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
756c5857
AI
3883
3884#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
5ca94977 3885 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
756c5857
AI
3886
3887#define _mm512_fnmadd_round_ps(A, B, C, R) \
5ca94977 3888 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
756c5857
AI
3889
3890#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
5ca94977 3891 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
756c5857
AI
3892
3893#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
5ca94977 3894 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
756c5857
AI
3895
3896#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
5ca94977 3897 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
756c5857
AI
3898
3899#define _mm512_fnmsub_round_pd(A, B, C, R) \
38ef6fb1 3900 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
756c5857
AI
3901
3902#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3903 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3904
3905#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3906 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3907
3908#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
38ef6fb1 3909 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
756c5857
AI
3910
3911#define _mm512_fnmsub_round_ps(A, B, C, R) \
38ef6fb1 3912 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
756c5857
AI
3913
3914#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3915 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3916
3917#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3918 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3919
3920#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
38ef6fb1 3921 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
756c5857
AI
3922#endif
3923
3924extern __inline __m512i
3925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3926_mm512_abs_epi64 (__m512i __A)
3927{
3928 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3929 (__v8di)
4271e5cb 3930 _mm512_undefined_epi32 (),
756c5857
AI
3931 (__mmask8) -1);
3932}
3933
3934extern __inline __m512i
3935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3937{
3938 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3939 (__v8di) __W,
3940 (__mmask8) __U);
3941}
3942
3943extern __inline __m512i
3944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3945_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3946{
3947 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3948 (__v8di)
3949 _mm512_setzero_si512 (),
3950 (__mmask8) __U);
3951}
3952
3953extern __inline __m512i
3954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3955_mm512_abs_epi32 (__m512i __A)
3956{
3957 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3958 (__v16si)
4271e5cb 3959 _mm512_undefined_epi32 (),
756c5857
AI
3960 (__mmask16) -1);
3961}
3962
3963extern __inline __m512i
3964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3966{
3967 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3968 (__v16si) __W,
3969 (__mmask16) __U);
3970}
3971
3972extern __inline __m512i
3973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3975{
3976 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3977 (__v16si)
3978 _mm512_setzero_si512 (),
3979 (__mmask16) __U);
3980}
3981
3982extern __inline __m512
3983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984_mm512_broadcastss_ps (__m128 __A)
3985{
0b192937
UD
3986 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3987 (__v16sf)
3988 _mm512_undefined_ps (),
756c5857
AI
3989 (__mmask16) -1);
3990}
3991
3992extern __inline __m512
3993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3994_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3995{
3996 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3997 (__v16sf) __O, __M);
3998}
3999
4000extern __inline __m512
4001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4002_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
4003{
4004 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4005 (__v16sf)
4006 _mm512_setzero_ps (),
4007 __M);
4008}
4009
4010extern __inline __m512d
4011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012_mm512_broadcastsd_pd (__m128d __A)
4013{
0b192937
UD
4014 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4015 (__v8df)
4016 _mm512_undefined_pd (),
756c5857
AI
4017 (__mmask8) -1);
4018}
4019
4020extern __inline __m512d
4021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4022_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
4023{
4024 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4025 (__v8df) __O, __M);
4026}
4027
4028extern __inline __m512d
4029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4030_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
4031{
4032 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4033 (__v8df)
4034 _mm512_setzero_pd (),
4035 __M);
4036}
4037
4038extern __inline __m512i
4039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040_mm512_broadcastd_epi32 (__m128i __A)
4041{
0b192937
UD
4042 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4043 (__v16si)
4271e5cb 4044 _mm512_undefined_epi32 (),
756c5857
AI
4045 (__mmask16) -1);
4046}
4047
4048extern __inline __m512i
4049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4050_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
4051{
4052 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4053 (__v16si) __O, __M);
4054}
4055
4056extern __inline __m512i
4057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
4059{
4060 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4061 (__v16si)
4062 _mm512_setzero_si512 (),
4063 __M);
4064}
4065
4066extern __inline __m512i
4067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068_mm512_set1_epi32 (int __A)
4069{
43373412 4070 return (__m512i)(__v16si)
4071 { __A, __A, __A, __A, __A, __A, __A, __A,
4072 __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
4073}
4074
4075extern __inline __m512i
4076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4077_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4078{
4079 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4080 __M);
4081}
4082
4083extern __inline __m512i
4084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4085_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4086{
4087 return (__m512i)
4088 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4089 (__v16si) _mm512_setzero_si512 (),
4090 __M);
4091}
4092
4093extern __inline __m512i
4094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4095_mm512_broadcastq_epi64 (__m128i __A)
4096{
0b192937
UD
4097 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4098 (__v8di)
4271e5cb 4099 _mm512_undefined_epi32 (),
756c5857
AI
4100 (__mmask8) -1);
4101}
4102
4103extern __inline __m512i
4104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4105_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4106{
4107 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4108 (__v8di) __O, __M);
4109}
4110
4111extern __inline __m512i
4112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4113_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4114{
4115 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4116 (__v8di)
4117 _mm512_setzero_si512 (),
4118 __M);
4119}
4120
4121extern __inline __m512i
4122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4123_mm512_set1_epi64 (long long __A)
4124{
43373412 4125 return (__m512i)(__v8di) { __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
4126}
4127
4128extern __inline __m512i
4129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4130_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4131{
756c5857
AI
4132 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4133 __M);
756c5857
AI
4134}
4135
4136extern __inline __m512i
4137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4138_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4139{
756c5857
AI
4140 return (__m512i)
4141 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4142 (__v8di) _mm512_setzero_si512 (),
4143 __M);
756c5857
AI
4144}
4145
4146extern __inline __m512
4147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4148_mm512_broadcast_f32x4 (__m128 __A)
4149{
0b192937
UD
4150 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4151 (__v16sf)
4152 _mm512_undefined_ps (),
756c5857
AI
4153 (__mmask16) -1);
4154}
4155
4156extern __inline __m512
4157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4158_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4159{
4160 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4161 (__v16sf) __O,
4162 __M);
4163}
4164
4165extern __inline __m512
4166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4168{
4169 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4170 (__v16sf)
4171 _mm512_setzero_ps (),
4172 __M);
4173}
4174
4175extern __inline __m512i
4176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4177_mm512_broadcast_i32x4 (__m128i __A)
4178{
756c5857 4179 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 4180 (__v16si)
4271e5cb 4181 _mm512_undefined_epi32 (),
756c5857
AI
4182 (__mmask16) -1);
4183}
4184
4185extern __inline __m512i
4186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4187_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4188{
4189 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4190 (__v16si) __O,
4191 __M);
4192}
4193
4194extern __inline __m512i
4195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4197{
4198 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4199 (__v16si)
4200 _mm512_setzero_si512 (),
4201 __M);
4202}
4203
4204extern __inline __m512d
4205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206_mm512_broadcast_f64x4 (__m256d __A)
4207{
756c5857 4208 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
4209 (__v8df)
4210 _mm512_undefined_pd (),
756c5857
AI
4211 (__mmask8) -1);
4212}
4213
4214extern __inline __m512d
4215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4216_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4217{
4218 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4219 (__v8df) __O,
4220 __M);
4221}
4222
4223extern __inline __m512d
4224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4225_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4226{
4227 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4228 (__v8df)
4229 _mm512_setzero_pd (),
4230 __M);
4231}
4232
4233extern __inline __m512i
4234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4235_mm512_broadcast_i64x4 (__m256i __A)
4236{
756c5857 4237 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 4238 (__v8di)
4271e5cb 4239 _mm512_undefined_epi32 (),
756c5857
AI
4240 (__mmask8) -1);
4241}
4242
4243extern __inline __m512i
4244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4245_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4246{
4247 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4248 (__v8di) __O,
4249 __M);
4250}
4251
4252extern __inline __m512i
4253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4255{
4256 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4257 (__v8di)
4258 _mm512_setzero_si512 (),
4259 __M);
4260}
4261
4262typedef enum
4263{
4264 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4265 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4266 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4267 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4268 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4269 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4270 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4271 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4272 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4273 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4274 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4275 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4276 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4277 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4278 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4279 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4280 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4281 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4282 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4283 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4284 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4285 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4286 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4287 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4288 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4289 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4290 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4291 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4292 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4293 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4294 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4295 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4296 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4297 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4298 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4299 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4300 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4301 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4302 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4303 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4304 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4305 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4306 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4307 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4308 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4309 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4310 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4311 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4312 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4313 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4314 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4315 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4316 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4317 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4318 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4319 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4320 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4321 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4322 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4323 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4324 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4325 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4326 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4327 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4328 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4329 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4330 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4331 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4332 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4333 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4334 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4335 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4336 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4337 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4338 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4339 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4340 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4341 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4342 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4343 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4344 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4345 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4346 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4347 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4348 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4349 _MM_PERM_DDDD = 0xFF
4350} _MM_PERM_ENUM;
4351
4352#ifdef __OPTIMIZE__
4353extern __inline __m512i
4354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4355_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4356{
4357 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4358 __mask,
4359 (__v16si)
4271e5cb 4360 _mm512_undefined_epi32 (),
756c5857
AI
4361 (__mmask16) -1);
4362}
4363
4364extern __inline __m512i
4365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4366_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4367 _MM_PERM_ENUM __mask)
4368{
4369 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4370 __mask,
4371 (__v16si) __W,
4372 (__mmask16) __U);
4373}
4374
4375extern __inline __m512i
4376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4377_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4378{
4379 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4380 __mask,
4381 (__v16si)
4382 _mm512_setzero_si512 (),
4383 (__mmask16) __U);
4384}
4385
4386extern __inline __m512i
4387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4388_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4389{
4390 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4391 (__v8di) __B, __imm,
4392 (__v8di)
4271e5cb 4393 _mm512_undefined_epi32 (),
756c5857
AI
4394 (__mmask8) -1);
4395}
4396
4397extern __inline __m512i
4398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4399_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4400 __m512i __B, const int __imm)
4401{
4402 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4403 (__v8di) __B, __imm,
4404 (__v8di) __W,
4405 (__mmask8) __U);
4406}
4407
4408extern __inline __m512i
4409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4410_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4411 const int __imm)
4412{
4413 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4414 (__v8di) __B, __imm,
4415 (__v8di)
4416 _mm512_setzero_si512 (),
4417 (__mmask8) __U);
4418}
4419
4420extern __inline __m512i
4421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4422_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4423{
4424 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4425 (__v16si) __B,
4426 __imm,
4427 (__v16si)
4271e5cb 4428 _mm512_undefined_epi32 (),
756c5857
AI
4429 (__mmask16) -1);
4430}
4431
4432extern __inline __m512i
4433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4434_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4435 __m512i __B, const int __imm)
4436{
4437 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4438 (__v16si) __B,
4439 __imm,
4440 (__v16si) __W,
4441 (__mmask16) __U);
4442}
4443
4444extern __inline __m512i
4445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4446_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4447 const int __imm)
4448{
4449 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4450 (__v16si) __B,
4451 __imm,
4452 (__v16si)
4453 _mm512_setzero_si512 (),
4454 (__mmask16) __U);
4455}
4456
4457extern __inline __m512d
4458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4459_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4460{
4461 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4462 (__v8df) __B, __imm,
4463 (__v8df)
0b192937 4464 _mm512_undefined_pd (),
756c5857
AI
4465 (__mmask8) -1);
4466}
4467
4468extern __inline __m512d
4469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4470_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4471 __m512d __B, const int __imm)
4472{
4473 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4474 (__v8df) __B, __imm,
4475 (__v8df) __W,
4476 (__mmask8) __U);
4477}
4478
4479extern __inline __m512d
4480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4482 const int __imm)
4483{
4484 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4485 (__v8df) __B, __imm,
4486 (__v8df)
4487 _mm512_setzero_pd (),
4488 (__mmask8) __U);
4489}
4490
4491extern __inline __m512
4492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4493_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4494{
4495 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4496 (__v16sf) __B, __imm,
4497 (__v16sf)
0b192937 4498 _mm512_undefined_ps (),
756c5857
AI
4499 (__mmask16) -1);
4500}
4501
4502extern __inline __m512
4503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4504_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4505 __m512 __B, const int __imm)
4506{
4507 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4508 (__v16sf) __B, __imm,
4509 (__v16sf) __W,
4510 (__mmask16) __U);
4511}
4512
4513extern __inline __m512
4514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4515_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4516 const int __imm)
4517{
4518 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4519 (__v16sf) __B, __imm,
4520 (__v16sf)
4521 _mm512_setzero_ps (),
4522 (__mmask16) __U);
4523}
4524
4525#else
4526#define _mm512_shuffle_epi32(X, C) \
4527 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4528 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4529 (__mmask16)-1))
4530
4531#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4532 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4533 (__v16si)(__m512i)(W),\
4534 (__mmask16)(U)))
4535
4536#define _mm512_maskz_shuffle_epi32(U, X, C) \
4537 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4538 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4539 (__mmask16)(U)))
4540
4541#define _mm512_shuffle_i64x2(X, Y, C) \
4542 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4543 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4544 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4545 (__mmask8)-1))
4546
4547#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4548 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4549 (__v8di)(__m512i)(Y), (int)(C),\
4550 (__v8di)(__m512i)(W),\
4551 (__mmask8)(U)))
4552
4553#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4554 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4555 (__v8di)(__m512i)(Y), (int)(C),\
4556 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4557 (__mmask8)(U)))
4558
4559#define _mm512_shuffle_i32x4(X, Y, C) \
4560 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4561 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4562 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4563 (__mmask16)-1))
4564
4565#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4566 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4567 (__v16si)(__m512i)(Y), (int)(C),\
4568 (__v16si)(__m512i)(W),\
4569 (__mmask16)(U)))
4570
4571#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4572 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4573 (__v16si)(__m512i)(Y), (int)(C),\
4574 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4575 (__mmask16)(U)))
4576
4577#define _mm512_shuffle_f64x2(X, Y, C) \
4578 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4579 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4580 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4581 (__mmask8)-1))
4582
4583#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4584 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4585 (__v8df)(__m512d)(Y), (int)(C),\
4586 (__v8df)(__m512d)(W),\
4587 (__mmask8)(U)))
4588
4589#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4590 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4591 (__v8df)(__m512d)(Y), (int)(C),\
4592 (__v8df)(__m512d)_mm512_setzero_pd(),\
4593 (__mmask8)(U)))
4594
4595#define _mm512_shuffle_f32x4(X, Y, C) \
4596 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4597 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4598 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4599 (__mmask16)-1))
4600
4601#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4602 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4603 (__v16sf)(__m512)(Y), (int)(C),\
4604 (__v16sf)(__m512)(W),\
4605 (__mmask16)(U)))
4606
4607#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4608 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4609 (__v16sf)(__m512)(Y), (int)(C),\
4610 (__v16sf)(__m512)_mm512_setzero_ps(),\
4611 (__mmask16)(U)))
4612#endif
4613
4614extern __inline __m512i
4615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4616_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4617{
4618 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4619 (__v16si) __B,
4620 (__v16si)
4271e5cb 4621 _mm512_undefined_epi32 (),
756c5857
AI
4622 (__mmask16) -1);
4623}
4624
4625extern __inline __m512i
4626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4627_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4628{
4629 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4630 (__v16si) __B,
4631 (__v16si) __W,
4632 (__mmask16) __U);
4633}
4634
4635extern __inline __m512i
4636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4637_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4638{
4639 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4640 (__v16si) __B,
4641 (__v16si)
4642 _mm512_setzero_si512 (),
4643 (__mmask16) __U);
4644}
4645
4646extern __inline __m512i
4647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4649{
4650 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4651 (__v16si) __B,
4652 (__v16si)
4271e5cb 4653 _mm512_undefined_epi32 (),
756c5857
AI
4654 (__mmask16) -1);
4655}
4656
4657extern __inline __m512i
4658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4659_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4660{
4661 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4662 (__v16si) __B,
4663 (__v16si) __W,
4664 (__mmask16) __U);
4665}
4666
4667extern __inline __m512i
4668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4670{
4671 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4672 (__v16si) __B,
4673 (__v16si)
4674 _mm512_setzero_si512 (),
4675 (__mmask16) __U);
4676}
4677
4678extern __inline __m512i
4679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4680_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4681{
4682 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4683 (__v8di) __B,
4684 (__v8di)
4271e5cb 4685 _mm512_undefined_epi32 (),
756c5857
AI
4686 (__mmask8) -1);
4687}
4688
4689extern __inline __m512i
4690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4692{
4693 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4694 (__v8di) __B,
4695 (__v8di) __W,
4696 (__mmask8) __U);
4697}
4698
4699extern __inline __m512i
4700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4702{
4703 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4704 (__v8di) __B,
4705 (__v8di)
4706 _mm512_setzero_si512 (),
4707 (__mmask8) __U);
4708}
4709
4710extern __inline __m512i
4711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4712_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4713{
4714 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4715 (__v8di) __B,
4716 (__v8di)
4271e5cb 4717 _mm512_undefined_epi32 (),
756c5857
AI
4718 (__mmask8) -1);
4719}
4720
4721extern __inline __m512i
4722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4723_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4724{
4725 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4726 (__v8di) __B,
4727 (__v8di) __W,
4728 (__mmask8) __U);
4729}
4730
4731extern __inline __m512i
4732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4733_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4734{
4735 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4736 (__v8di) __B,
4737 (__v8di)
4738 _mm512_setzero_si512 (),
4739 (__mmask8) __U);
4740}
4741
4742#ifdef __OPTIMIZE__
4743extern __inline __m256i
4744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4745_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4746{
4747 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4748 (__v8si)
0b192937 4749 _mm256_undefined_si256 (),
756c5857
AI
4750 (__mmask8) -1, __R);
4751}
4752
4753extern __inline __m256i
4754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4755_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4756 const int __R)
4757{
4758 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4759 (__v8si) __W,
4760 (__mmask8) __U, __R);
4761}
4762
4763extern __inline __m256i
4764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4765_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4766{
4767 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4768 (__v8si)
4769 _mm256_setzero_si256 (),
4770 (__mmask8) __U, __R);
4771}
4772
4773extern __inline __m256i
4774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4775_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4776{
4777 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4778 (__v8si)
0b192937 4779 _mm256_undefined_si256 (),
756c5857
AI
4780 (__mmask8) -1, __R);
4781}
4782
4783extern __inline __m256i
4784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4785_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4786 const int __R)
4787{
4788 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4789 (__v8si) __W,
4790 (__mmask8) __U, __R);
4791}
4792
4793extern __inline __m256i
4794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4795_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4796{
4797 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4798 (__v8si)
4799 _mm256_setzero_si256 (),
4800 (__mmask8) __U, __R);
4801}
4802#else
4803#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4804 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4805
4806#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4807 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4808
4809#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4810 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4811
4812#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4813 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4814
4815#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4816 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4817
4818#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4819 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4820#endif
4821
4822#ifdef __OPTIMIZE__
4823extern __inline __m256i
4824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4825_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4826{
4827 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4828 (__v8si)
0b192937 4829 _mm256_undefined_si256 (),
756c5857
AI
4830 (__mmask8) -1, __R);
4831}
4832
4833extern __inline __m256i
4834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4835_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4836 const int __R)
4837{
4838 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4839 (__v8si) __W,
4840 (__mmask8) __U, __R);
4841}
4842
4843extern __inline __m256i
4844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4845_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4846{
4847 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4848 (__v8si)
4849 _mm256_setzero_si256 (),
4850 (__mmask8) __U, __R);
4851}
4852
4853extern __inline __m256i
4854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4855_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4856{
4857 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4858 (__v8si)
0b192937 4859 _mm256_undefined_si256 (),
756c5857
AI
4860 (__mmask8) -1, __R);
4861}
4862
4863extern __inline __m256i
4864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4865_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4866 const int __R)
4867{
4868 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4869 (__v8si) __W,
4870 (__mmask8) __U, __R);
4871}
4872
4873extern __inline __m256i
4874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4875_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4876{
4877 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4878 (__v8si)
4879 _mm256_setzero_si256 (),
4880 (__mmask8) __U, __R);
4881}
4882#else
4883#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 4884 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4885
4886#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4887 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4888
4889#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4890 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4891
4892#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 4893 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4894
4895#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4896 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4897
4898#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4899 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4900#endif
4901
4902#ifdef __OPTIMIZE__
4903extern __inline __m512i
4904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4905_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4906{
4907 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4908 (__v16si)
4271e5cb 4909 _mm512_undefined_epi32 (),
756c5857
AI
4910 (__mmask16) -1, __R);
4911}
4912
4913extern __inline __m512i
4914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4916 const int __R)
4917{
4918 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4919 (__v16si) __W,
4920 (__mmask16) __U, __R);
4921}
4922
4923extern __inline __m512i
4924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4925_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4926{
4927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4928 (__v16si)
4929 _mm512_setzero_si512 (),
4930 (__mmask16) __U, __R);
4931}
4932
4933extern __inline __m512i
4934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4935_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4936{
4937 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4938 (__v16si)
4271e5cb 4939 _mm512_undefined_epi32 (),
756c5857
AI
4940 (__mmask16) -1, __R);
4941}
4942
4943extern __inline __m512i
4944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4945_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4946 const int __R)
4947{
4948 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4949 (__v16si) __W,
4950 (__mmask16) __U, __R);
4951}
4952
4953extern __inline __m512i
4954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4955_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4956{
4957 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4958 (__v16si)
4959 _mm512_setzero_si512 (),
4960 (__mmask16) __U, __R);
4961}
4962#else
4963#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 4964 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4965
4966#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4967 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4968
4969#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4970 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4971
4972#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 4973 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4974
4975#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4976 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4977
4978#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4979 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4980#endif
4981
4982#ifdef __OPTIMIZE__
4983extern __inline __m512i
4984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4986{
4987 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4988 (__v16si)
4271e5cb 4989 _mm512_undefined_epi32 (),
756c5857
AI
4990 (__mmask16) -1, __R);
4991}
4992
4993extern __inline __m512i
4994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4995_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4996 const int __R)
4997{
4998 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4999 (__v16si) __W,
5000 (__mmask16) __U, __R);
5001}
5002
5003extern __inline __m512i
5004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5005_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5006{
5007 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5008 (__v16si)
5009 _mm512_setzero_si512 (),
5010 (__mmask16) __U, __R);
5011}
5012
5013extern __inline __m512i
5014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
5016{
5017 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5018 (__v16si)
4271e5cb 5019 _mm512_undefined_epi32 (),
756c5857
AI
5020 (__mmask16) -1, __R);
5021}
5022
5023extern __inline __m512i
5024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5026 const int __R)
5027{
5028 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5029 (__v16si) __W,
5030 (__mmask16) __U, __R);
5031}
5032
5033extern __inline __m512i
5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5036{
5037 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5038 (__v16si)
5039 _mm512_setzero_si512 (),
5040 (__mmask16) __U, __R);
5041}
5042#else
5043#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 5044 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5045
5046#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
5047 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
5048
5049#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
5050 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5051
5052#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 5053 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5054
5055#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
5056 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
5057
5058#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
5059 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5060#endif
5061
5062extern __inline __m128d
5063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064_mm_cvtu32_sd (__m128d __A, unsigned __B)
5065{
5066 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5067}
5068
5069#ifdef __x86_64__
5070#ifdef __OPTIMIZE__
5071extern __inline __m128d
5072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5074{
5075 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5076}
5077
5078extern __inline __m128d
5079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5080_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5081{
5082 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5083}
5084
5085extern __inline __m128d
5086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5087_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5088{
5089 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5090}
5091#else
5092#define _mm_cvt_roundu64_sd(A, B, C) \
5093 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5094
5095#define _mm_cvt_roundi64_sd(A, B, C) \
5096 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5097
5098#define _mm_cvt_roundsi64_sd(A, B, C) \
5099 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5100#endif
5101
5102#endif
5103
5104#ifdef __OPTIMIZE__
5105extern __inline __m128
5106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5107_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5108{
5109 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5110}
5111
5112extern __inline __m128
5113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5115{
5116 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5117}
5118
5119extern __inline __m128
5120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5122{
5123 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5124}
5125#else
5126#define _mm_cvt_roundu32_ss(A, B, C) \
5127 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5128
5129#define _mm_cvt_roundi32_ss(A, B, C) \
5130 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5131
5132#define _mm_cvt_roundsi32_ss(A, B, C) \
5133 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5134#endif
5135
5136#ifdef __x86_64__
5137#ifdef __OPTIMIZE__
5138extern __inline __m128
5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5141{
5142 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5143}
5144
5145extern __inline __m128
5146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5147_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5148{
5149 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5150}
5151
5152extern __inline __m128
5153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5155{
5156 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5157}
5158#else
5159#define _mm_cvt_roundu64_ss(A, B, C) \
5160 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5161
5162#define _mm_cvt_roundi64_ss(A, B, C) \
5163 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5164
5165#define _mm_cvt_roundsi64_ss(A, B, C) \
5166 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5167#endif
5168
5169#endif
5170
5171extern __inline __m128i
5172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173_mm512_cvtepi32_epi8 (__m512i __A)
5174{
0b192937
UD
5175 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5176 (__v16qi)
5177 _mm_undefined_si128 (),
756c5857
AI
5178 (__mmask16) -1);
5179}
5180
d256b866
IT
5181extern __inline void
5182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5183_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5184{
5185 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5186}
5187
756c5857
AI
5188extern __inline __m128i
5189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5190_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5191{
5192 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5193 (__v16qi) __O, __M);
5194}
5195
5196extern __inline __m128i
5197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5199{
5200 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5201 (__v16qi)
5202 _mm_setzero_si128 (),
5203 __M);
5204}
5205
5206extern __inline __m128i
5207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208_mm512_cvtsepi32_epi8 (__m512i __A)
5209{
0b192937
UD
5210 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5211 (__v16qi)
5212 _mm_undefined_si128 (),
756c5857
AI
5213 (__mmask16) -1);
5214}
5215
d256b866
IT
5216extern __inline void
5217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5219{
5220 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5221}
5222
756c5857
AI
5223extern __inline __m128i
5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5226{
5227 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5228 (__v16qi) __O, __M);
5229}
5230
5231extern __inline __m128i
5232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5234{
5235 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5236 (__v16qi)
5237 _mm_setzero_si128 (),
5238 __M);
5239}
5240
5241extern __inline __m128i
5242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5243_mm512_cvtusepi32_epi8 (__m512i __A)
5244{
0b192937
UD
5245 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5246 (__v16qi)
5247 _mm_undefined_si128 (),
756c5857
AI
5248 (__mmask16) -1);
5249}
5250
d256b866
IT
5251extern __inline void
5252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5254{
5255 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5256}
5257
756c5857
AI
5258extern __inline __m128i
5259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5260_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5261{
5262 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5263 (__v16qi) __O,
5264 __M);
5265}
5266
5267extern __inline __m128i
5268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5270{
5271 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5272 (__v16qi)
5273 _mm_setzero_si128 (),
5274 __M);
5275}
5276
5277extern __inline __m256i
5278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5279_mm512_cvtepi32_epi16 (__m512i __A)
5280{
0b192937
UD
5281 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5282 (__v16hi)
5283 _mm256_undefined_si256 (),
756c5857
AI
5284 (__mmask16) -1);
5285}
5286
d256b866
IT
5287extern __inline void
5288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5289_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5290{
5291 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5292}
5293
756c5857
AI
5294extern __inline __m256i
5295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5297{
5298 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5299 (__v16hi) __O, __M);
5300}
5301
5302extern __inline __m256i
5303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5304_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5305{
5306 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5307 (__v16hi)
5308 _mm256_setzero_si256 (),
5309 __M);
5310}
5311
5312extern __inline __m256i
5313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5314_mm512_cvtsepi32_epi16 (__m512i __A)
5315{
0b192937
UD
5316 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5317 (__v16hi)
5318 _mm256_undefined_si256 (),
756c5857
AI
5319 (__mmask16) -1);
5320}
5321
d256b866
IT
5322extern __inline void
5323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5324_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5325{
5326 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5327}
5328
756c5857
AI
5329extern __inline __m256i
5330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5331_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5332{
5333 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5334 (__v16hi) __O, __M);
5335}
5336
5337extern __inline __m256i
5338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5339_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5340{
5341 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5342 (__v16hi)
5343 _mm256_setzero_si256 (),
5344 __M);
5345}
5346
5347extern __inline __m256i
5348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5349_mm512_cvtusepi32_epi16 (__m512i __A)
5350{
0b192937
UD
5351 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5352 (__v16hi)
5353 _mm256_undefined_si256 (),
756c5857
AI
5354 (__mmask16) -1);
5355}
5356
d256b866
IT
5357extern __inline void
5358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5359_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5360{
5361 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5362}
5363
756c5857
AI
5364extern __inline __m256i
5365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5366_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5367{
5368 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5369 (__v16hi) __O,
5370 __M);
5371}
5372
5373extern __inline __m256i
5374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5376{
5377 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5378 (__v16hi)
5379 _mm256_setzero_si256 (),
5380 __M);
5381}
5382
5383extern __inline __m256i
5384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5385_mm512_cvtepi64_epi32 (__m512i __A)
5386{
0b192937
UD
5387 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5388 (__v8si)
5389 _mm256_undefined_si256 (),
756c5857
AI
5390 (__mmask8) -1);
5391}
5392
d256b866
IT
5393extern __inline void
5394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5395_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5396{
5397 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5398}
5399
756c5857
AI
5400extern __inline __m256i
5401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5402_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5403{
5404 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5405 (__v8si) __O, __M);
5406}
5407
5408extern __inline __m256i
5409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5410_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5411{
5412 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5413 (__v8si)
5414 _mm256_setzero_si256 (),
5415 __M);
5416}
5417
5418extern __inline __m256i
5419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5420_mm512_cvtsepi64_epi32 (__m512i __A)
5421{
0b192937
UD
5422 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5423 (__v8si)
5424 _mm256_undefined_si256 (),
756c5857
AI
5425 (__mmask8) -1);
5426}
5427
d256b866
IT
5428extern __inline void
5429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5430_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5431{
5432 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5433}
5434
756c5857
AI
5435extern __inline __m256i
5436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5437_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5438{
5439 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5440 (__v8si) __O, __M);
5441}
5442
5443extern __inline __m256i
5444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5445_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5446{
5447 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5448 (__v8si)
5449 _mm256_setzero_si256 (),
5450 __M);
5451}
5452
5453extern __inline __m256i
5454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5455_mm512_cvtusepi64_epi32 (__m512i __A)
5456{
0b192937
UD
5457 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5458 (__v8si)
5459 _mm256_undefined_si256 (),
756c5857
AI
5460 (__mmask8) -1);
5461}
5462
6fb82517 5463extern __inline void
d256b866
IT
5464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5465_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5466{
5467 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5468}
5469
756c5857
AI
5470extern __inline __m256i
5471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5472_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5473{
5474 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5475 (__v8si) __O, __M);
5476}
5477
5478extern __inline __m256i
5479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5480_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5481{
5482 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5483 (__v8si)
5484 _mm256_setzero_si256 (),
5485 __M);
5486}
5487
5488extern __inline __m128i
5489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5490_mm512_cvtepi64_epi16 (__m512i __A)
5491{
0b192937
UD
5492 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5493 (__v8hi)
5494 _mm_undefined_si128 (),
756c5857
AI
5495 (__mmask8) -1);
5496}
5497
d256b866
IT
5498extern __inline void
5499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5500_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5501{
5502 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5503}
5504
756c5857
AI
5505extern __inline __m128i
5506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5507_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5508{
5509 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5510 (__v8hi) __O, __M);
5511}
5512
5513extern __inline __m128i
5514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5515_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5516{
5517 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5518 (__v8hi)
5519 _mm_setzero_si128 (),
5520 __M);
5521}
5522
5523extern __inline __m128i
5524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5525_mm512_cvtsepi64_epi16 (__m512i __A)
5526{
0b192937
UD
5527 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5528 (__v8hi)
5529 _mm_undefined_si128 (),
756c5857
AI
5530 (__mmask8) -1);
5531}
5532
d256b866
IT
5533extern __inline void
5534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5535_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5536{
5537 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5538}
5539
756c5857
AI
5540extern __inline __m128i
5541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5543{
5544 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5545 (__v8hi) __O, __M);
5546}
5547
5548extern __inline __m128i
5549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5550_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5551{
5552 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5553 (__v8hi)
5554 _mm_setzero_si128 (),
5555 __M);
5556}
5557
5558extern __inline __m128i
5559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5560_mm512_cvtusepi64_epi16 (__m512i __A)
5561{
0b192937
UD
5562 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5563 (__v8hi)
5564 _mm_undefined_si128 (),
756c5857
AI
5565 (__mmask8) -1);
5566}
5567
d256b866
IT
5568extern __inline void
5569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5570_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5571{
5572 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5573}
5574
756c5857
AI
5575extern __inline __m128i
5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5578{
5579 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5580 (__v8hi) __O, __M);
5581}
5582
5583extern __inline __m128i
5584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5586{
5587 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5588 (__v8hi)
5589 _mm_setzero_si128 (),
5590 __M);
5591}
5592
5593extern __inline __m128i
5594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5595_mm512_cvtepi64_epi8 (__m512i __A)
5596{
0b192937
UD
5597 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5598 (__v16qi)
5599 _mm_undefined_si128 (),
756c5857
AI
5600 (__mmask8) -1);
5601}
5602
d256b866
IT
5603extern __inline void
5604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5605_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5606{
4a948703 5607 __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P,
5608 (__v8di) __A, __M);
d256b866
IT
5609}
5610
756c5857
AI
5611extern __inline __m128i
5612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5614{
5615 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5616 (__v16qi) __O, __M);
5617}
5618
5619extern __inline __m128i
5620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5621_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5622{
5623 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5624 (__v16qi)
5625 _mm_setzero_si128 (),
5626 __M);
5627}
5628
5629extern __inline __m128i
5630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5631_mm512_cvtsepi64_epi8 (__m512i __A)
5632{
0b192937
UD
5633 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5634 (__v16qi)
5635 _mm_undefined_si128 (),
756c5857
AI
5636 (__mmask8) -1);
5637}
5638
d256b866
IT
5639extern __inline void
5640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5641_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5642{
4a948703 5643 __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
d256b866
IT
5644}
5645
756c5857
AI
5646extern __inline __m128i
5647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5648_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5649{
5650 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5651 (__v16qi) __O, __M);
5652}
5653
5654extern __inline __m128i
5655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5657{
5658 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5659 (__v16qi)
5660 _mm_setzero_si128 (),
5661 __M);
5662}
5663
5664extern __inline __m128i
5665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5666_mm512_cvtusepi64_epi8 (__m512i __A)
5667{
0b192937
UD
5668 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5669 (__v16qi)
5670 _mm_undefined_si128 (),
756c5857
AI
5671 (__mmask8) -1);
5672}
5673
d256b866
IT
5674extern __inline void
5675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5676_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5677{
4a948703 5678 __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
d256b866
IT
5679}
5680
756c5857
AI
5681extern __inline __m128i
5682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5683_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5684{
5685 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5686 (__v16qi) __O,
5687 __M);
5688}
5689
5690extern __inline __m128i
5691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5692_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5693{
5694 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5695 (__v16qi)
5696 _mm_setzero_si128 (),
5697 __M);
5698}
5699
5700extern __inline __m512d
5701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5702_mm512_cvtepi32_pd (__m256i __A)
5703{
5704 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5705 (__v8df)
0b192937 5706 _mm512_undefined_pd (),
756c5857
AI
5707 (__mmask8) -1);
5708}
5709
5710extern __inline __m512d
5711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5713{
5714 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5715 (__v8df) __W,
5716 (__mmask8) __U);
5717}
5718
5719extern __inline __m512d
5720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5722{
5723 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5724 (__v8df)
5725 _mm512_setzero_pd (),
5726 (__mmask8) __U);
5727}
5728
5729extern __inline __m512d
5730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5731_mm512_cvtepu32_pd (__m256i __A)
5732{
5733 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5734 (__v8df)
0b192937 5735 _mm512_undefined_pd (),
756c5857
AI
5736 (__mmask8) -1);
5737}
5738
5739extern __inline __m512d
5740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5741_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5742{
5743 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5744 (__v8df) __W,
5745 (__mmask8) __U);
5746}
5747
5748extern __inline __m512d
5749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5750_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5751{
5752 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5753 (__v8df)
5754 _mm512_setzero_pd (),
5755 (__mmask8) __U);
5756}
5757
5758#ifdef __OPTIMIZE__
5759extern __inline __m512
5760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5761_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5762{
5763 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5764 (__v16sf)
0b192937 5765 _mm512_undefined_ps (),
756c5857
AI
5766 (__mmask16) -1, __R);
5767}
5768
5769extern __inline __m512
5770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5772 const int __R)
5773{
5774 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5775 (__v16sf) __W,
5776 (__mmask16) __U, __R);
5777}
5778
5779extern __inline __m512
5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5782{
5783 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5784 (__v16sf)
5785 _mm512_setzero_ps (),
5786 (__mmask16) __U, __R);
5787}
5788
5789extern __inline __m512
5790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5791_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5792{
5793 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5794 (__v16sf)
0b192937 5795 _mm512_undefined_ps (),
756c5857
AI
5796 (__mmask16) -1, __R);
5797}
5798
5799extern __inline __m512
5800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5801_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5802 const int __R)
5803{
5804 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5805 (__v16sf) __W,
5806 (__mmask16) __U, __R);
5807}
5808
5809extern __inline __m512
5810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5812{
5813 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5814 (__v16sf)
5815 _mm512_setzero_ps (),
5816 (__mmask16) __U, __R);
5817}
5818
5819#else
5820#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5821 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5822
5823#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5824 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5825
5826#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5827 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5828
5829#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5830 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5831
5832#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5833 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5834
5835#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5836 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5837#endif
5838
5839#ifdef __OPTIMIZE__
5840extern __inline __m256d
5841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5842_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5843{
5844 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5845 __imm,
5846 (__v4df)
0b192937 5847 _mm256_undefined_pd (),
756c5857
AI
5848 (__mmask8) -1);
5849}
5850
5851extern __inline __m256d
5852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5853_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5854 const int __imm)
5855{
5856 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5857 __imm,
5858 (__v4df) __W,
5859 (__mmask8) __U);
5860}
5861
5862extern __inline __m256d
5863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5864_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5865{
5866 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5867 __imm,
5868 (__v4df)
5869 _mm256_setzero_pd (),
5870 (__mmask8) __U);
5871}
5872
5873extern __inline __m128
5874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5875_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5876{
5877 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5878 __imm,
5879 (__v4sf)
0b192937 5880 _mm_undefined_ps (),
756c5857
AI
5881 (__mmask8) -1);
5882}
5883
5884extern __inline __m128
5885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5886_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5887 const int __imm)
5888{
5889 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5890 __imm,
5891 (__v4sf) __W,
5892 (__mmask8) __U);
5893}
5894
5895extern __inline __m128
5896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5897_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5898{
5899 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5900 __imm,
5901 (__v4sf)
5902 _mm_setzero_ps (),
5903 (__mmask8) __U);
5904}
5905
5906extern __inline __m256i
5907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5908_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5909{
5910 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5911 __imm,
5912 (__v4di)
0b192937 5913 _mm256_undefined_si256 (),
756c5857
AI
5914 (__mmask8) -1);
5915}
5916
5917extern __inline __m256i
5918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5919_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5920 const int __imm)
5921{
5922 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5923 __imm,
5924 (__v4di) __W,
5925 (__mmask8) __U);
5926}
5927
5928extern __inline __m256i
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5931{
5932 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5933 __imm,
5934 (__v4di)
5935 _mm256_setzero_si256 (),
5936 (__mmask8) __U);
5937}
5938
5939extern __inline __m128i
5940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5941_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5942{
5943 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5944 __imm,
5945 (__v4si)
0b192937 5946 _mm_undefined_si128 (),
756c5857
AI
5947 (__mmask8) -1);
5948}
5949
5950extern __inline __m128i
5951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5953 const int __imm)
5954{
5955 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5956 __imm,
5957 (__v4si) __W,
5958 (__mmask8) __U);
5959}
5960
5961extern __inline __m128i
5962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5963_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5964{
5965 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5966 __imm,
5967 (__v4si)
5968 _mm_setzero_si128 (),
5969 (__mmask8) __U);
5970}
5971#else
5972
5973#define _mm512_extractf64x4_pd(X, C) \
5974 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5975 (int) (C),\
0b192937 5976 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
5977 (__mmask8)-1))
5978
5979#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5980 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5981 (int) (C),\
5982 (__v4df)(__m256d)(W),\
5983 (__mmask8)(U)))
5984
5985#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5986 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5987 (int) (C),\
5988 (__v4df)(__m256d)_mm256_setzero_pd(),\
5989 (__mmask8)(U)))
5990
5991#define _mm512_extractf32x4_ps(X, C) \
5992 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5993 (int) (C),\
0b192937 5994 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
5995 (__mmask8)-1))
5996
5997#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5998 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5999 (int) (C),\
6000 (__v4sf)(__m128)(W),\
6001 (__mmask8)(U)))
6002
6003#define _mm512_maskz_extractf32x4_ps(U, X, C) \
6004 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6005 (int) (C),\
6006 (__v4sf)(__m128)_mm_setzero_ps(),\
6007 (__mmask8)(U)))
6008
6009#define _mm512_extracti64x4_epi64(X, C) \
6010 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6011 (int) (C),\
0b192937 6012 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
6013 (__mmask8)-1))
6014
6015#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
6016 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6017 (int) (C),\
6018 (__v4di)(__m256i)(W),\
6019 (__mmask8)(U)))
6020
6021#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
6022 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6023 (int) (C),\
6024 (__v4di)(__m256i)_mm256_setzero_si256 (),\
6025 (__mmask8)(U)))
6026
6027#define _mm512_extracti32x4_epi32(X, C) \
6028 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6029 (int) (C),\
0b192937 6030 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
6031 (__mmask8)-1))
6032
6033#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
6034 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6035 (int) (C),\
6036 (__v4si)(__m128i)(W),\
6037 (__mmask8)(U)))
6038
6039#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
6040 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6041 (int) (C),\
6042 (__v4si)(__m128i)_mm_setzero_si128 (),\
6043 (__mmask8)(U)))
6044#endif
6045
6046#ifdef __OPTIMIZE__
6047extern __inline __m512i
6048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6049_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
6050{
6051 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
6052 (__v4si) __B,
6053 __imm,
6054 (__v16si) __A, -1);
6055}
6056
6057extern __inline __m512
6058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
6060{
6061 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
6062 (__v4sf) __B,
6063 __imm,
6064 (__v16sf) __A, -1);
6065}
6066
6067extern __inline __m512i
6068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6070{
6071 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6072 (__v4di) __B,
6073 __imm,
6074 (__v8di)
4271e5cb 6075 _mm512_undefined_epi32 (),
756c5857
AI
6076 (__mmask8) -1);
6077}
6078
6079extern __inline __m512i
6080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6081_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6082 __m256i __B, const int __imm)
6083{
6084 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6085 (__v4di) __B,
6086 __imm,
6087 (__v8di) __W,
6088 (__mmask8) __U);
6089}
6090
6091extern __inline __m512i
6092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6093_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6094 const int __imm)
6095{
6096 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6097 (__v4di) __B,
6098 __imm,
6099 (__v8di)
6100 _mm512_setzero_si512 (),
6101 (__mmask8) __U);
6102}
6103
6104extern __inline __m512d
6105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6106_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6107{
6108 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6109 (__v4df) __B,
6110 __imm,
6111 (__v8df)
0b192937 6112 _mm512_undefined_pd (),
756c5857
AI
6113 (__mmask8) -1);
6114}
6115
6116extern __inline __m512d
6117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6118_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6119 __m256d __B, const int __imm)
6120{
6121 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6122 (__v4df) __B,
6123 __imm,
6124 (__v8df) __W,
6125 (__mmask8) __U);
6126}
6127
6128extern __inline __m512d
6129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6130_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6131 const int __imm)
6132{
6133 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6134 (__v4df) __B,
6135 __imm,
6136 (__v8df)
6137 _mm512_setzero_pd (),
6138 (__mmask8) __U);
6139}
6140#else
6141#define _mm512_insertf32x4(X, Y, C) \
6142 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6143 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6144
6145#define _mm512_inserti32x4(X, Y, C) \
6146 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6147 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6148
6149#define _mm512_insertf64x4(X, Y, C) \
6150 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6151 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 6152 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
6153 (__mmask8)-1))
6154
6155#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6156 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6157 (__v4df)(__m256d) (Y), (int) (C), \
6158 (__v8df)(__m512d)(W), \
6159 (__mmask8)(U)))
6160
6161#define _mm512_maskz_insertf64x4(U, X, Y, C) \
6162 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6163 (__v4df)(__m256d) (Y), (int) (C), \
6164 (__v8df)(__m512d)_mm512_setzero_pd(), \
6165 (__mmask8)(U)))
6166
6167#define _mm512_inserti64x4(X, Y, C) \
6168 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6169 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 6170 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
6171 (__mmask8)-1))
6172
6173#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6174 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6175 (__v4di)(__m256i) (Y), (int) (C),\
6176 (__v8di)(__m512i)(W),\
6177 (__mmask8)(U)))
6178
6179#define _mm512_maskz_inserti64x4(U, X, Y, C) \
6180 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6181 (__v4di)(__m256i) (Y), (int) (C), \
6182 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6183 (__mmask8)(U)))
6184#endif
6185
6186extern __inline __m512d
6187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188_mm512_loadu_pd (void const *__P)
6189{
c6b0037d 6190 return *(__m512d_u *)__P;
756c5857
AI
6191}
6192
6193extern __inline __m512d
6194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6195_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6196{
fc9cf6da 6197 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6198 (__v8df) __W,
6199 (__mmask8) __U);
6200}
6201
6202extern __inline __m512d
6203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6204_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6205{
fc9cf6da 6206 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6207 (__v8df)
6208 _mm512_setzero_pd (),
6209 (__mmask8) __U);
6210}
6211
6212extern __inline void
6213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6214_mm512_storeu_pd (void *__P, __m512d __A)
6215{
c6b0037d 6216 *(__m512d_u *)__P = __A;
756c5857
AI
6217}
6218
6219extern __inline void
6220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6222{
fc9cf6da 6223 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
6224 (__mmask8) __U);
6225}
6226
6227extern __inline __m512
6228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6229_mm512_loadu_ps (void const *__P)
6230{
c6b0037d 6231 return *(__m512_u *)__P;
756c5857
AI
6232}
6233
6234extern __inline __m512
6235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6236_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6237{
fc9cf6da 6238 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6239 (__v16sf) __W,
6240 (__mmask16) __U);
6241}
6242
6243extern __inline __m512
6244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6245_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6246{
fc9cf6da 6247 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6248 (__v16sf)
6249 _mm512_setzero_ps (),
6250 (__mmask16) __U);
6251}
6252
6253extern __inline void
6254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6255_mm512_storeu_ps (void *__P, __m512 __A)
6256{
c6b0037d 6257 *(__m512_u *)__P = __A;
756c5857
AI
6258}
6259
6260extern __inline void
6261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6262_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6263{
fc9cf6da 6264 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
6265 (__mmask16) __U);
6266}
6267
459d21c6
JJ
6268extern __inline __m128
6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
6271{
6272 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
6273}
6274
6275extern __inline __m128
6276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277_mm_maskz_load_ss (__mmask8 __U, const float *__P)
6278{
6279 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
6280 __U);
6281}
6282
6283extern __inline __m128d
6284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6285_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
6286{
6287 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
6288}
6289
6290extern __inline __m128d
6291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6292_mm_maskz_load_sd (__mmask8 __U, const double *__P)
6293{
6294 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
6295 __U);
6296}
6297
6298extern __inline __m128
6299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6300_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6301{
6302 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6303 (__v4sf) __W, __U);
6304}
6305
6306extern __inline __m128
6307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6308_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
6309{
6310 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6311 (__v4sf) _mm_setzero_ps (), __U);
6312}
6313
6314extern __inline __m128d
6315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6316_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6317{
6318 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6319 (__v2df) __W, __U);
6320}
6321
6322extern __inline __m128d
6323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6324_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
6325{
6326 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6327 (__v2df) _mm_setzero_pd (),
6328 __U);
6329}
6330
6331extern __inline void
6332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6333_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
6334{
6335 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
6336}
6337
6338extern __inline void
6339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6340_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
6341{
6342 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
6343}
6344
4c98bdad
SP
6345extern __inline __m512i
6346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6347_mm512_loadu_epi64 (void const *__P)
6348{
6349 return *(__m512i_u *) __P;
6350}
6351
756c5857
AI
6352extern __inline __m512i
6353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6354_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6355{
fc9cf6da 6356 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6357 (__v8di) __W,
6358 (__mmask8) __U);
6359}
6360
6361extern __inline __m512i
6362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6363_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6364{
fc9cf6da 6365 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6366 (__v8di)
6367 _mm512_setzero_si512 (),
6368 (__mmask8) __U);
6369}
6370
4c98bdad
SP
6371extern __inline void
6372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6373_mm512_storeu_epi64 (void *__P, __m512i __A)
6374{
6375 *(__m512i_u *) __P = (__m512i_u) __A;
6376}
6377
756c5857
AI
6378extern __inline void
6379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6380_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6381{
fc9cf6da 6382 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
6383 (__mmask8) __U);
6384}
6385
6386extern __inline __m512i
6387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6388_mm512_loadu_si512 (void const *__P)
756c5857 6389{
c6b0037d 6390 return *(__m512i_u *)__P;
756c5857
AI
6391}
6392
4c98bdad
SP
6393extern __inline __m512i
6394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6395_mm512_loadu_epi32 (void const *__P)
6396{
6397 return *(__m512i_u *) __P;
6398}
6399
756c5857
AI
6400extern __inline __m512i
6401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6402_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6403{
fc9cf6da 6404 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6405 (__v16si) __W,
6406 (__mmask16) __U);
6407}
6408
6409extern __inline __m512i
6410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6411_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6412{
fc9cf6da 6413 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6414 (__v16si)
6415 _mm512_setzero_si512 (),
6416 (__mmask16) __U);
6417}
6418
6419extern __inline void
6420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6421_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 6422{
c6b0037d 6423 *(__m512i_u *)__P = __A;
756c5857
AI
6424}
6425
4c98bdad
SP
6426extern __inline void
6427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6428_mm512_storeu_epi32 (void *__P, __m512i __A)
6429{
6430 *(__m512i_u *) __P = (__m512i_u) __A;
6431}
6432
756c5857
AI
6433extern __inline void
6434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6435_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6436{
fc9cf6da 6437 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
6438 (__mmask16) __U);
6439}
6440
6441extern __inline __m512d
6442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6443_mm512_permutevar_pd (__m512d __A, __m512i __C)
6444{
6445 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6446 (__v8di) __C,
6447 (__v8df)
0b192937 6448 _mm512_undefined_pd (),
756c5857
AI
6449 (__mmask8) -1);
6450}
6451
6452extern __inline __m512d
6453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6454_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6455{
6456 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6457 (__v8di) __C,
6458 (__v8df) __W,
6459 (__mmask8) __U);
6460}
6461
6462extern __inline __m512d
6463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6464_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6465{
6466 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6467 (__v8di) __C,
6468 (__v8df)
6469 _mm512_setzero_pd (),
6470 (__mmask8) __U);
6471}
6472
6473extern __inline __m512
6474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6475_mm512_permutevar_ps (__m512 __A, __m512i __C)
6476{
6477 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6478 (__v16si) __C,
6479 (__v16sf)
0b192937 6480 _mm512_undefined_ps (),
756c5857
AI
6481 (__mmask16) -1);
6482}
6483
6484extern __inline __m512
6485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6486_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6487{
6488 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6489 (__v16si) __C,
6490 (__v16sf) __W,
6491 (__mmask16) __U);
6492}
6493
6494extern __inline __m512
6495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6496_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6497{
6498 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6499 (__v16si) __C,
6500 (__v16sf)
6501 _mm512_setzero_ps (),
6502 (__mmask16) __U);
6503}
6504
6505extern __inline __m512i
6506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6507_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6508{
6509 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6510 /* idx */ ,
6511 (__v8di) __A,
6512 (__v8di) __B,
6513 (__mmask8) -1);
6514}
6515
6516extern __inline __m512i
6517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6518_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6519 __m512i __B)
6520{
6521 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6522 /* idx */ ,
6523 (__v8di) __A,
6524 (__v8di) __B,
6525 (__mmask8) __U);
6526}
6527
6528extern __inline __m512i
6529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6531 __mmask8 __U, __m512i __B)
6532{
6533 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6534 (__v8di) __I
6535 /* idx */ ,
6536 (__v8di) __B,
6537 (__mmask8) __U);
6538}
6539
6540extern __inline __m512i
6541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6543 __m512i __I, __m512i __B)
6544{
6545 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6546 /* idx */ ,
6547 (__v8di) __A,
6548 (__v8di) __B,
6549 (__mmask8) __U);
6550}
6551
6552extern __inline __m512i
6553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6554_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6555{
6556 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6557 /* idx */ ,
6558 (__v16si) __A,
6559 (__v16si) __B,
6560 (__mmask16) -1);
6561}
6562
6563extern __inline __m512i
6564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6565_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6566 __m512i __I, __m512i __B)
6567{
6568 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6569 /* idx */ ,
6570 (__v16si) __A,
6571 (__v16si) __B,
6572 (__mmask16) __U);
6573}
6574
6575extern __inline __m512i
6576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6577_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6578 __mmask16 __U, __m512i __B)
6579{
6580 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6581 (__v16si) __I
6582 /* idx */ ,
6583 (__v16si) __B,
6584 (__mmask16) __U);
6585}
6586
6587extern __inline __m512i
6588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6589_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6590 __m512i __I, __m512i __B)
6591{
6592 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6593 /* idx */ ,
6594 (__v16si) __A,
6595 (__v16si) __B,
6596 (__mmask16) __U);
6597}
6598
6599extern __inline __m512d
6600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6601_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6602{
6603 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6604 /* idx */ ,
6605 (__v8df) __A,
6606 (__v8df) __B,
6607 (__mmask8) -1);
6608}
6609
6610extern __inline __m512d
6611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6612_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6613 __m512d __B)
6614{
6615 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6616 /* idx */ ,
6617 (__v8df) __A,
6618 (__v8df) __B,
6619 (__mmask8) __U);
6620}
6621
6622extern __inline __m512d
6623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6624_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6625 __m512d __B)
6626{
6627 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6628 (__v8di) __I
6629 /* idx */ ,
6630 (__v8df) __B,
6631 (__mmask8) __U);
6632}
6633
6634extern __inline __m512d
6635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6636_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6637 __m512d __B)
6638{
6639 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6640 /* idx */ ,
6641 (__v8df) __A,
6642 (__v8df) __B,
6643 (__mmask8) __U);
6644}
6645
6646extern __inline __m512
6647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6648_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6649{
6650 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6651 /* idx */ ,
6652 (__v16sf) __A,
6653 (__v16sf) __B,
6654 (__mmask16) -1);
6655}
6656
6657extern __inline __m512
6658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6659_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6660{
6661 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6662 /* idx */ ,
6663 (__v16sf) __A,
6664 (__v16sf) __B,
6665 (__mmask16) __U);
6666}
6667
6668extern __inline __m512
6669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6670_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6671 __m512 __B)
6672{
6673 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6674 (__v16si) __I
6675 /* idx */ ,
6676 (__v16sf) __B,
6677 (__mmask16) __U);
6678}
6679
6680extern __inline __m512
6681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6682_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6683 __m512 __B)
6684{
6685 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6686 /* idx */ ,
6687 (__v16sf) __A,
6688 (__v16sf) __B,
6689 (__mmask16) __U);
6690}
6691
6692#ifdef __OPTIMIZE__
6693extern __inline __m512d
6694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6695_mm512_permute_pd (__m512d __X, const int __C)
6696{
6697 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6698 (__v8df)
0b192937 6699 _mm512_undefined_pd (),
756c5857
AI
6700 (__mmask8) -1);
6701}
6702
6703extern __inline __m512d
6704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6706{
6707 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6708 (__v8df) __W,
6709 (__mmask8) __U);
6710}
6711
6712extern __inline __m512d
6713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6714_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6715{
6716 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6717 (__v8df)
6718 _mm512_setzero_pd (),
6719 (__mmask8) __U);
6720}
6721
6722extern __inline __m512
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm512_permute_ps (__m512 __X, const int __C)
6725{
6726 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6727 (__v16sf)
0b192937 6728 _mm512_undefined_ps (),
756c5857
AI
6729 (__mmask16) -1);
6730}
6731
6732extern __inline __m512
6733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6735{
6736 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6737 (__v16sf) __W,
6738 (__mmask16) __U);
6739}
6740
6741extern __inline __m512
6742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6743_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6744{
6745 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6746 (__v16sf)
6747 _mm512_setzero_ps (),
6748 (__mmask16) __U);
6749}
6750#else
6751#define _mm512_permute_pd(X, C) \
6752 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6753 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6754 (__mmask8)(-1)))
6755
6756#define _mm512_mask_permute_pd(W, U, X, C) \
6757 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6758 (__v8df)(__m512d)(W), \
6759 (__mmask8)(U)))
6760
6761#define _mm512_maskz_permute_pd(U, X, C) \
6762 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6763 (__v8df)(__m512d)_mm512_setzero_pd(), \
6764 (__mmask8)(U)))
6765
6766#define _mm512_permute_ps(X, C) \
6767 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6768 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6769 (__mmask16)(-1)))
6770
6771#define _mm512_mask_permute_ps(W, U, X, C) \
6772 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6773 (__v16sf)(__m512)(W), \
6774 (__mmask16)(U)))
6775
6776#define _mm512_maskz_permute_ps(U, X, C) \
6777 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6778 (__v16sf)(__m512)_mm512_setzero_ps(), \
6779 (__mmask16)(U)))
6780#endif
6781
6782#ifdef __OPTIMIZE__
6783extern __inline __m512i
6784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6785_mm512_permutex_epi64 (__m512i __X, const int __I)
6786{
6787 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6788 (__v8di)
4271e5cb 6789 _mm512_undefined_epi32 (),
756c5857
AI
6790 (__mmask8) (-1));
6791}
6792
6793extern __inline __m512i
6794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6795_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6796 __m512i __X, const int __I)
6797{
6798 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6799 (__v8di) __W,
6800 (__mmask8) __M);
6801}
6802
6803extern __inline __m512i
6804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6805_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6806{
6807 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6808 (__v8di)
6809 _mm512_setzero_si512 (),
6810 (__mmask8) __M);
6811}
6812
6813extern __inline __m512d
6814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815_mm512_permutex_pd (__m512d __X, const int __M)
6816{
6817 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6818 (__v8df)
0b192937 6819 _mm512_undefined_pd (),
756c5857
AI
6820 (__mmask8) -1);
6821}
6822
6823extern __inline __m512d
6824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6825_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6826{
6827 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6828 (__v8df) __W,
6829 (__mmask8) __U);
6830}
6831
6832extern __inline __m512d
6833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6835{
6836 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6837 (__v8df)
6838 _mm512_setzero_pd (),
6839 (__mmask8) __U);
6840}
6841#else
6842#define _mm512_permutex_pd(X, M) \
6843 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6844 (__v8df)(__m512d)_mm512_undefined_pd(),\
6845 (__mmask8)-1))
756c5857
AI
6846
6847#define _mm512_mask_permutex_pd(W, U, X, M) \
6848 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6849 (__v8df)(__m512d)(W), (__mmask8)(U)))
6850
6851#define _mm512_maskz_permutex_pd(U, X, M) \
6852 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6853 (__v8df)(__m512d)_mm512_setzero_pd(),\
6854 (__mmask8)(U)))
6855
6856#define _mm512_permutex_epi64(X, I) \
6857 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6858 (int)(I), \
0b192937 6859 (__v8di)(__m512i) \
4271e5cb 6860 (_mm512_undefined_epi32 ()),\
756c5857
AI
6861 (__mmask8)(-1)))
6862
6863#define _mm512_maskz_permutex_epi64(M, X, I) \
6864 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6865 (int)(I), \
6866 (__v8di)(__m512i) \
6867 (_mm512_setzero_si512 ()),\
6868 (__mmask8)(M)))
6869
6870#define _mm512_mask_permutex_epi64(W, M, X, I) \
6871 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6872 (int)(I), \
6873 (__v8di)(__m512i)(W), \
6874 (__mmask8)(M)))
6875#endif
6876
6877extern __inline __m512i
6878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6879_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6880{
583a9919
KY
6881 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6882 (__v8di) __X,
756c5857
AI
6883 (__v8di)
6884 _mm512_setzero_si512 (),
6885 __M);
6886}
6887
6888extern __inline __m512i
6889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6891{
583a9919
KY
6892 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6893 (__v8di) __X,
756c5857 6894 (__v8di)
4271e5cb 6895 _mm512_undefined_epi32 (),
756c5857
AI
6896 (__mmask8) -1);
6897}
6898
6899extern __inline __m512i
6900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6901_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6902 __m512i __Y)
6903{
583a9919
KY
6904 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6905 (__v8di) __X,
756c5857
AI
6906 (__v8di) __W,
6907 __M);
6908}
6909
6910extern __inline __m512i
6911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6912_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6913{
583a9919
KY
6914 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6915 (__v16si) __X,
756c5857
AI
6916 (__v16si)
6917 _mm512_setzero_si512 (),
6918 __M);
6919}
6920
6921extern __inline __m512i
6922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6923_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6924{
583a9919
KY
6925 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6926 (__v16si) __X,
756c5857 6927 (__v16si)
4271e5cb 6928 _mm512_undefined_epi32 (),
756c5857
AI
6929 (__mmask16) -1);
6930}
6931
6932extern __inline __m512i
6933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6934_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6935 __m512i __Y)
6936{
583a9919
KY
6937 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6938 (__v16si) __X,
756c5857
AI
6939 (__v16si) __W,
6940 __M);
6941}
6942
6943extern __inline __m512d
6944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6945_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6946{
6947 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6948 (__v8di) __X,
6949 (__v8df)
0b192937 6950 _mm512_undefined_pd (),
756c5857
AI
6951 (__mmask8) -1);
6952}
6953
6954extern __inline __m512d
6955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6956_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6957{
6958 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6959 (__v8di) __X,
6960 (__v8df) __W,
6961 (__mmask8) __U);
6962}
6963
6964extern __inline __m512d
6965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6967{
6968 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6969 (__v8di) __X,
6970 (__v8df)
6971 _mm512_setzero_pd (),
6972 (__mmask8) __U);
6973}
6974
6975extern __inline __m512
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6978{
6979 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6980 (__v16si) __X,
6981 (__v16sf)
0b192937 6982 _mm512_undefined_ps (),
756c5857
AI
6983 (__mmask16) -1);
6984}
6985
6986extern __inline __m512
6987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6989{
6990 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6991 (__v16si) __X,
6992 (__v16sf) __W,
6993 (__mmask16) __U);
6994}
6995
6996extern __inline __m512
6997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6998_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6999{
7000 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
7001 (__v16si) __X,
7002 (__v16sf)
7003 _mm512_setzero_ps (),
7004 (__mmask16) __U);
7005}
7006
7007#ifdef __OPTIMIZE__
7008extern __inline __m512
7009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
7011{
7012 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7013 (__v16sf) __V, __imm,
7014 (__v16sf)
0b192937 7015 _mm512_undefined_ps (),
756c5857
AI
7016 (__mmask16) -1);
7017}
7018
7019extern __inline __m512
7020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
7022 __m512 __V, const int __imm)
7023{
7024 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7025 (__v16sf) __V, __imm,
7026 (__v16sf) __W,
7027 (__mmask16) __U);
7028}
7029
7030extern __inline __m512
7031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
7033{
7034 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7035 (__v16sf) __V, __imm,
7036 (__v16sf)
7037 _mm512_setzero_ps (),
7038 (__mmask16) __U);
7039}
7040
7041extern __inline __m512d
7042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
7044{
7045 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7046 (__v8df) __V, __imm,
7047 (__v8df)
0b192937 7048 _mm512_undefined_pd (),
756c5857
AI
7049 (__mmask8) -1);
7050}
7051
7052extern __inline __m512d
7053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
7055 __m512d __V, const int __imm)
7056{
7057 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7058 (__v8df) __V, __imm,
7059 (__v8df) __W,
7060 (__mmask8) __U);
7061}
7062
7063extern __inline __m512d
7064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
7066 const int __imm)
7067{
7068 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7069 (__v8df) __V, __imm,
7070 (__v8df)
7071 _mm512_setzero_pd (),
7072 (__mmask8) __U);
7073}
7074
7075extern __inline __m512d
7076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7077_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
756c5857
AI
7078 const int __imm, const int __R)
7079{
040d2bba
WX
7080 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
7081 (__v8df) __B,
7082 (__v8di) __C,
756c5857 7083 __imm,
040d2bba 7084 (__mmask8) -1, __R);
756c5857
AI
7085}
7086
7087extern __inline __m512d
7088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7089_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
7090 __m512i __C, const int __imm, const int __R)
756c5857
AI
7091{
7092 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
040d2bba
WX
7093 (__v8df) __B,
7094 (__v8di) __C,
756c5857
AI
7095 __imm,
7096 (__mmask8) __U, __R);
7097}
7098
7099extern __inline __m512d
7100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7101_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
7102 __m512i __C, const int __imm, const int __R)
756c5857
AI
7103{
7104 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
040d2bba
WX
7105 (__v8df) __B,
7106 (__v8di) __C,
756c5857
AI
7107 __imm,
7108 (__mmask8) __U, __R);
7109}
7110
7111extern __inline __m512
7112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7113_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
756c5857
AI
7114 const int __imm, const int __R)
7115{
040d2bba
WX
7116 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7117 (__v16sf) __B,
7118 (__v16si) __C,
756c5857 7119 __imm,
040d2bba 7120 (__mmask16) -1, __R);
756c5857
AI
7121}
7122
7123extern __inline __m512
7124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7125_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7126 __m512i __C, const int __imm, const int __R)
756c5857
AI
7127{
7128 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
040d2bba
WX
7129 (__v16sf) __B,
7130 (__v16si) __C,
756c5857
AI
7131 __imm,
7132 (__mmask16) __U, __R);
7133}
7134
7135extern __inline __m512
7136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7137_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7138 __m512i __C, const int __imm, const int __R)
756c5857
AI
7139{
7140 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
040d2bba
WX
7141 (__v16sf) __B,
7142 (__v16si) __C,
756c5857
AI
7143 __imm,
7144 (__mmask16) __U, __R);
7145}
7146
7147extern __inline __m128d
7148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7149_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
756c5857
AI
7150 const int __imm, const int __R)
7151{
040d2bba
WX
7152 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7153 (__v2df) __B,
7154 (__v2di) __C, __imm,
7155 (__mmask8) -1, __R);
756c5857
AI
7156}
7157
7158extern __inline __m128d
7159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7160_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
7161 __m128i __C, const int __imm, const int __R)
756c5857
AI
7162{
7163 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
040d2bba
WX
7164 (__v2df) __B,
7165 (__v2di) __C, __imm,
756c5857
AI
7166 (__mmask8) __U, __R);
7167}
7168
7169extern __inline __m128d
7170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7171_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7172 __m128i __C, const int __imm, const int __R)
756c5857
AI
7173{
7174 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
040d2bba
WX
7175 (__v2df) __B,
7176 (__v2di) __C,
756c5857
AI
7177 __imm,
7178 (__mmask8) __U, __R);
7179}
7180
7181extern __inline __m128
7182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 7183_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
756c5857
AI
7184 const int __imm, const int __R)
7185{
040d2bba
WX
7186 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7187 (__v4sf) __B,
7188 (__v4si) __C, __imm,
7189 (__mmask8) -1, __R);
756c5857
AI
7190}
7191
7192extern __inline __m128
7193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7194_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7195 __m128i __C, const int __imm, const int __R)
756c5857
AI
7196{
7197 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
040d2bba
WX
7198 (__v4sf) __B,
7199 (__v4si) __C, __imm,
756c5857
AI
7200 (__mmask8) __U, __R);
7201}
7202
7203extern __inline __m128
7204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
7205_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7206 __m128i __C, const int __imm, const int __R)
756c5857
AI
7207{
7208 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
040d2bba
WX
7209 (__v4sf) __B,
7210 (__v4si) __C, __imm,
756c5857
AI
7211 (__mmask8) __U, __R);
7212}
7213
7214#else
7215#define _mm512_shuffle_pd(X, Y, C) \
7216 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7217 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 7218 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
7219 (__mmask8)-1))
7220
7221#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7222 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7223 (__v8df)(__m512d)(Y), (int)(C),\
7224 (__v8df)(__m512d)(W),\
7225 (__mmask8)(U)))
7226
7227#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7228 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7229 (__v8df)(__m512d)(Y), (int)(C),\
7230 (__v8df)(__m512d)_mm512_setzero_pd(),\
7231 (__mmask8)(U)))
7232
7233#define _mm512_shuffle_ps(X, Y, C) \
7234 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7235 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 7236 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
7237 (__mmask16)-1))
7238
7239#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7240 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7241 (__v16sf)(__m512)(Y), (int)(C),\
7242 (__v16sf)(__m512)(W),\
7243 (__mmask16)(U)))
7244
7245#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7246 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7247 (__v16sf)(__m512)(Y), (int)(C),\
7248 (__v16sf)(__m512)_mm512_setzero_ps(),\
7249 (__mmask16)(U)))
7250
040d2bba
WX
7251#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7252 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7253 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7254 (__mmask8)(-1), (R)))
756c5857 7255
040d2bba 7256#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
756c5857 7257 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
040d2bba 7258 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
7259 (__mmask8)(U), (R)))
7260
040d2bba 7261#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
756c5857 7262 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
040d2bba 7263 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
7264 (__mmask8)(U), (R)))
7265
040d2bba
WX
7266#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7267 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7268 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7269 (__mmask16)(-1), (R)))
756c5857 7270
040d2bba 7271#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
756c5857 7272 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
040d2bba
WX
7273 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7274 (__mmask16)(U), (R)))
756c5857 7275
040d2bba 7276#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
756c5857 7277 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
040d2bba 7278 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
7279 (__mmask16)(U), (R)))
7280
040d2bba
WX
7281#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7282 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7283 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7284 (__mmask8)(-1), (R)))
756c5857 7285
040d2bba 7286#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
756c5857 7287 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
040d2bba
WX
7288 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7289 (__mmask8)(U), (R)))
756c5857 7290
040d2bba 7291#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
756c5857 7292 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
040d2bba 7293 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
7294 (__mmask8)(U), (R)))
7295
040d2bba
WX
7296#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7297 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7298 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7299 (__mmask8)(-1), (R)))
756c5857 7300
040d2bba 7301#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
756c5857 7302 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
040d2bba
WX
7303 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7304 (__mmask8)(U), (R)))
756c5857 7305
040d2bba 7306#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
756c5857 7307 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
040d2bba 7308 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
7309 (__mmask8)(U), (R)))
7310#endif
7311
7312extern __inline __m512
7313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7314_mm512_movehdup_ps (__m512 __A)
7315{
7316 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7317 (__v16sf)
0b192937 7318 _mm512_undefined_ps (),
756c5857
AI
7319 (__mmask16) -1);
7320}
7321
7322extern __inline __m512
7323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7324_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7325{
7326 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7327 (__v16sf) __W,
7328 (__mmask16) __U);
7329}
7330
7331extern __inline __m512
7332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7333_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7334{
7335 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7336 (__v16sf)
7337 _mm512_setzero_ps (),
7338 (__mmask16) __U);
7339}
7340
7341extern __inline __m512
7342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7343_mm512_moveldup_ps (__m512 __A)
7344{
7345 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7346 (__v16sf)
0b192937 7347 _mm512_undefined_ps (),
756c5857
AI
7348 (__mmask16) -1);
7349}
7350
7351extern __inline __m512
7352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7353_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7354{
7355 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7356 (__v16sf) __W,
7357 (__mmask16) __U);
7358}
7359
7360extern __inline __m512
7361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7362_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7363{
7364 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7365 (__v16sf)
7366 _mm512_setzero_ps (),
7367 (__mmask16) __U);
7368}
7369
7370extern __inline __m512i
7371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7372_mm512_or_si512 (__m512i __A, __m512i __B)
7373{
2069d6fc 7374 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7375}
7376
7377extern __inline __m512i
7378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379_mm512_or_epi32 (__m512i __A, __m512i __B)
7380{
2069d6fc 7381 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7382}
7383
7384extern __inline __m512i
7385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7386_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7387{
7388 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7389 (__v16si) __B,
7390 (__v16si) __W,
7391 (__mmask16) __U);
7392}
7393
7394extern __inline __m512i
7395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7396_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7397{
7398 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7399 (__v16si) __B,
7400 (__v16si)
7401 _mm512_setzero_si512 (),
7402 (__mmask16) __U);
7403}
7404
7405extern __inline __m512i
7406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7407_mm512_or_epi64 (__m512i __A, __m512i __B)
7408{
2069d6fc 7409 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
7410}
7411
7412extern __inline __m512i
7413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7414_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7415{
7416 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7417 (__v8di) __B,
7418 (__v8di) __W,
7419 (__mmask8) __U);
7420}
7421
7422extern __inline __m512i
7423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7424_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7425{
7426 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7427 (__v8di) __B,
7428 (__v8di)
7429 _mm512_setzero_si512 (),
7430 (__mmask8) __U);
7431}
7432
7433extern __inline __m512i
7434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7435_mm512_xor_si512 (__m512i __A, __m512i __B)
7436{
2069d6fc 7437 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7438}
7439
7440extern __inline __m512i
7441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7442_mm512_xor_epi32 (__m512i __A, __m512i __B)
7443{
2069d6fc 7444 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7445}
7446
7447extern __inline __m512i
7448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7449_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7450{
7451 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7452 (__v16si) __B,
7453 (__v16si) __W,
7454 (__mmask16) __U);
7455}
7456
7457extern __inline __m512i
7458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7459_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7460{
7461 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7462 (__v16si) __B,
7463 (__v16si)
7464 _mm512_setzero_si512 (),
7465 (__mmask16) __U);
7466}
7467
7468extern __inline __m512i
7469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7470_mm512_xor_epi64 (__m512i __A, __m512i __B)
7471{
2069d6fc 7472 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
7473}
7474
7475extern __inline __m512i
7476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7477_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7478{
7479 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7480 (__v8di) __B,
7481 (__v8di) __W,
7482 (__mmask8) __U);
7483}
7484
7485extern __inline __m512i
7486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7487_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7488{
7489 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7490 (__v8di) __B,
7491 (__v8di)
7492 _mm512_setzero_si512 (),
7493 (__mmask8) __U);
7494}
7495
7496#ifdef __OPTIMIZE__
7497extern __inline __m512i
7498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499_mm512_rol_epi32 (__m512i __A, const int __B)
7500{
7501 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7502 (__v16si)
4271e5cb 7503 _mm512_undefined_epi32 (),
756c5857
AI
7504 (__mmask16) -1);
7505}
7506
7507extern __inline __m512i
7508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7509_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7510{
7511 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7512 (__v16si) __W,
7513 (__mmask16) __U);
7514}
7515
7516extern __inline __m512i
7517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7518_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7519{
7520 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7521 (__v16si)
7522 _mm512_setzero_si512 (),
7523 (__mmask16) __U);
7524}
7525
7526extern __inline __m512i
7527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7528_mm512_ror_epi32 (__m512i __A, int __B)
7529{
7530 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7531 (__v16si)
4271e5cb 7532 _mm512_undefined_epi32 (),
756c5857
AI
7533 (__mmask16) -1);
7534}
7535
7536extern __inline __m512i
7537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7538_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7539{
7540 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7541 (__v16si) __W,
7542 (__mmask16) __U);
7543}
7544
7545extern __inline __m512i
7546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7547_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7548{
7549 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7550 (__v16si)
7551 _mm512_setzero_si512 (),
7552 (__mmask16) __U);
7553}
7554
7555extern __inline __m512i
7556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7557_mm512_rol_epi64 (__m512i __A, const int __B)
7558{
7559 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7560 (__v8di)
4271e5cb 7561 _mm512_undefined_epi32 (),
756c5857
AI
7562 (__mmask8) -1);
7563}
7564
7565extern __inline __m512i
7566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7567_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7568{
7569 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7570 (__v8di) __W,
7571 (__mmask8) __U);
7572}
7573
7574extern __inline __m512i
7575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7577{
7578 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7579 (__v8di)
7580 _mm512_setzero_si512 (),
7581 (__mmask8) __U);
7582}
7583
7584extern __inline __m512i
7585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7586_mm512_ror_epi64 (__m512i __A, int __B)
7587{
7588 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7589 (__v8di)
4271e5cb 7590 _mm512_undefined_epi32 (),
756c5857
AI
7591 (__mmask8) -1);
7592}
7593
7594extern __inline __m512i
7595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7596_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7597{
7598 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7599 (__v8di) __W,
7600 (__mmask8) __U);
7601}
7602
7603extern __inline __m512i
7604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7605_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7606{
7607 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7608 (__v8di)
7609 _mm512_setzero_si512 (),
7610 (__mmask8) __U);
7611}
7612
7613#else
7614#define _mm512_rol_epi32(A, B) \
7615 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7616 (int)(B), \
4271e5cb 7617 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7618 (__mmask16)(-1)))
7619#define _mm512_mask_rol_epi32(W, U, A, B) \
7620 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7621 (int)(B), \
7622 (__v16si)(__m512i)(W), \
7623 (__mmask16)(U)))
7624#define _mm512_maskz_rol_epi32(U, A, B) \
7625 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7626 (int)(B), \
7627 (__v16si)_mm512_setzero_si512 (), \
7628 (__mmask16)(U)))
7629#define _mm512_ror_epi32(A, B) \
7630 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7631 (int)(B), \
4271e5cb 7632 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7633 (__mmask16)(-1)))
7634#define _mm512_mask_ror_epi32(W, U, A, B) \
7635 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7636 (int)(B), \
7637 (__v16si)(__m512i)(W), \
7638 (__mmask16)(U)))
7639#define _mm512_maskz_ror_epi32(U, A, B) \
7640 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7641 (int)(B), \
7642 (__v16si)_mm512_setzero_si512 (), \
7643 (__mmask16)(U)))
7644#define _mm512_rol_epi64(A, B) \
7645 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7646 (int)(B), \
4271e5cb 7647 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7648 (__mmask8)(-1)))
7649#define _mm512_mask_rol_epi64(W, U, A, B) \
7650 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7651 (int)(B), \
7652 (__v8di)(__m512i)(W), \
7653 (__mmask8)(U)))
7654#define _mm512_maskz_rol_epi64(U, A, B) \
7655 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7656 (int)(B), \
7657 (__v8di)_mm512_setzero_si512 (), \
7658 (__mmask8)(U)))
7659
7660#define _mm512_ror_epi64(A, B) \
7661 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7662 (int)(B), \
4271e5cb 7663 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7664 (__mmask8)(-1)))
7665#define _mm512_mask_ror_epi64(W, U, A, B) \
7666 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7667 (int)(B), \
7668 (__v8di)(__m512i)(W), \
7669 (__mmask8)(U)))
7670#define _mm512_maskz_ror_epi64(U, A, B) \
7671 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7672 (int)(B), \
7673 (__v8di)_mm512_setzero_si512 (), \
7674 (__mmask8)(U)))
7675#endif
7676
7677extern __inline __m512i
7678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7679_mm512_and_si512 (__m512i __A, __m512i __B)
7680{
2069d6fc 7681 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7682}
7683
7684extern __inline __m512i
7685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686_mm512_and_epi32 (__m512i __A, __m512i __B)
7687{
2069d6fc 7688 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7689}
7690
7691extern __inline __m512i
7692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7693_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7694{
7695 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7696 (__v16si) __B,
7697 (__v16si) __W,
7698 (__mmask16) __U);
7699}
7700
7701extern __inline __m512i
7702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7703_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7704{
7705 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7706 (__v16si) __B,
7707 (__v16si)
7708 _mm512_setzero_si512 (),
7709 (__mmask16) __U);
7710}
7711
7712extern __inline __m512i
7713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7714_mm512_and_epi64 (__m512i __A, __m512i __B)
7715{
2069d6fc 7716 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7717}
7718
7719extern __inline __m512i
7720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7721_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7722{
7723 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7724 (__v8di) __B,
7725 (__v8di) __W, __U);
7726}
7727
7728extern __inline __m512i
7729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7731{
7732 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7733 (__v8di) __B,
7734 (__v8di)
7735 _mm512_setzero_pd (),
7736 __U);
7737}
7738
7739extern __inline __m512i
7740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741_mm512_andnot_si512 (__m512i __A, __m512i __B)
7742{
7743 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7744 (__v16si) __B,
7745 (__v16si)
4271e5cb 7746 _mm512_undefined_epi32 (),
756c5857
AI
7747 (__mmask16) -1);
7748}
7749
7750extern __inline __m512i
7751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7752_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7753{
7754 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7755 (__v16si) __B,
7756 (__v16si)
4271e5cb 7757 _mm512_undefined_epi32 (),
756c5857
AI
7758 (__mmask16) -1);
7759}
7760
7761extern __inline __m512i
7762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7763_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7764{
7765 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7766 (__v16si) __B,
7767 (__v16si) __W,
7768 (__mmask16) __U);
7769}
7770
7771extern __inline __m512i
7772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7773_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7774{
7775 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7776 (__v16si) __B,
7777 (__v16si)
7778 _mm512_setzero_si512 (),
7779 (__mmask16) __U);
7780}
7781
7782extern __inline __m512i
7783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7784_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7785{
7786 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7787 (__v8di) __B,
7788 (__v8di)
4271e5cb 7789 _mm512_undefined_epi32 (),
756c5857
AI
7790 (__mmask8) -1);
7791}
7792
7793extern __inline __m512i
7794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7795_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7796{
7797 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7798 (__v8di) __B,
7799 (__v8di) __W, __U);
7800}
7801
7802extern __inline __m512i
7803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7804_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7805{
7806 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7807 (__v8di) __B,
7808 (__v8di)
7809 _mm512_setzero_pd (),
7810 __U);
7811}
7812
7813extern __inline __mmask16
7814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7815_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7816{
7817 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7818 (__v16si) __B,
7819 (__mmask16) -1);
7820}
7821
7822extern __inline __mmask16
7823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7824_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7825{
7826 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7827 (__v16si) __B, __U);
7828}
7829
7830extern __inline __mmask8
7831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7832_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7833{
7834 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7835 (__v8di) __B,
7836 (__mmask8) -1);
7837}
7838
7839extern __inline __mmask8
7840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7842{
7843 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7844}
7845
260d3642
IT
7846extern __inline __mmask16
7847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7849{
7850 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7851 (__v16si) __B,
7852 (__mmask16) -1);
7853}
7854
7855extern __inline __mmask16
7856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7857_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7858{
7859 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7860 (__v16si) __B, __U);
7861}
7862
7863extern __inline __mmask8
7864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7865_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7866{
7867 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7868 (__v8di) __B,
7869 (__mmask8) -1);
7870}
7871
7872extern __inline __mmask8
7873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7874_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7875{
7876 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7877 (__v8di) __B, __U);
7878}
7879
dcb2c527
JJ
7880extern __inline __m512
7881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7882_mm512_abs_ps (__m512 __A)
7883{
7884 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7885 _mm512_set1_epi32 (0x7fffffff));
7886}
7887
7888extern __inline __m512
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7891{
7892 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7893 _mm512_set1_epi32 (0x7fffffff));
7894}
7895
7896extern __inline __m512d
7897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 7898_mm512_abs_pd (__m512d __A)
dcb2c527
JJ
7899{
7900 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7901 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7902}
7903
7904extern __inline __m512d
7905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 7906_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
dcb2c527
JJ
7907{
7908 return (__m512d)
7909 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7910 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7911}
7912
756c5857
AI
7913extern __inline __m512i
7914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7915_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7916{
7917 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7918 (__v16si) __B,
7919 (__v16si)
4271e5cb 7920 _mm512_undefined_epi32 (),
756c5857
AI
7921 (__mmask16) -1);
7922}
7923
7924extern __inline __m512i
7925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7927 __m512i __B)
7928{
7929 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7930 (__v16si) __B,
7931 (__v16si) __W,
7932 (__mmask16) __U);
7933}
7934
7935extern __inline __m512i
7936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7938{
7939 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7940 (__v16si) __B,
7941 (__v16si)
7942 _mm512_setzero_si512 (),
7943 (__mmask16) __U);
7944}
7945
7946extern __inline __m512i
7947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7948_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7949{
7950 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7951 (__v8di) __B,
7952 (__v8di)
4271e5cb 7953 _mm512_undefined_epi32 (),
756c5857
AI
7954 (__mmask8) -1);
7955}
7956
7957extern __inline __m512i
7958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7959_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7960{
7961 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7962 (__v8di) __B,
7963 (__v8di) __W,
7964 (__mmask8) __U);
7965}
7966
7967extern __inline __m512i
7968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7969_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7970{
7971 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7972 (__v8di) __B,
7973 (__v8di)
7974 _mm512_setzero_si512 (),
7975 (__mmask8) __U);
7976}
7977
7978extern __inline __m512i
7979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7980_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7981{
7982 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7983 (__v16si) __B,
7984 (__v16si)
4271e5cb 7985 _mm512_undefined_epi32 (),
756c5857
AI
7986 (__mmask16) -1);
7987}
7988
7989extern __inline __m512i
7990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7991_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7992 __m512i __B)
7993{
7994 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7995 (__v16si) __B,
7996 (__v16si) __W,
7997 (__mmask16) __U);
7998}
7999
8000extern __inline __m512i
8001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8002_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
8003{
8004 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
8005 (__v16si) __B,
8006 (__v16si)
8007 _mm512_setzero_si512 (),
8008 (__mmask16) __U);
8009}
8010
8011extern __inline __m512i
8012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8013_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
8014{
8015 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8016 (__v8di) __B,
8017 (__v8di)
4271e5cb 8018 _mm512_undefined_epi32 (),
756c5857
AI
8019 (__mmask8) -1);
8020}
8021
8022extern __inline __m512i
8023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8025{
8026 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8027 (__v8di) __B,
8028 (__v8di) __W,
8029 (__mmask8) __U);
8030}
8031
8032extern __inline __m512i
8033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8034_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
8035{
8036 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8037 (__v8di) __B,
8038 (__v8di)
8039 _mm512_setzero_si512 (),
8040 (__mmask8) __U);
8041}
8042
8043#ifdef __x86_64__
8044#ifdef __OPTIMIZE__
8045extern __inline unsigned long long
8046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8047_mm_cvt_roundss_u64 (__m128 __A, const int __R)
8048{
8049 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
8050}
8051
8052extern __inline long long
8053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054_mm_cvt_roundss_si64 (__m128 __A, const int __R)
8055{
8056 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8057}
8058
8059extern __inline long long
8060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8061_mm_cvt_roundss_i64 (__m128 __A, const int __R)
8062{
8063 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8064}
8065
8066extern __inline unsigned long long
8067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
8069{
8070 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
8071}
8072
8073extern __inline long long
8074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
8076{
8077 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8078}
8079
8080extern __inline long long
8081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
8083{
8084 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8085}
8086#else
8087#define _mm_cvt_roundss_u64(A, B) \
8088 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
8089
8090#define _mm_cvt_roundss_si64(A, B) \
8091 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8092
8093#define _mm_cvt_roundss_i64(A, B) \
8094 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8095
8096#define _mm_cvtt_roundss_u64(A, B) \
8097 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
8098
8099#define _mm_cvtt_roundss_i64(A, B) \
8100 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8101
8102#define _mm_cvtt_roundss_si64(A, B) \
8103 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8104#endif
8105#endif
8106
8107#ifdef __OPTIMIZE__
8108extern __inline unsigned
8109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8110_mm_cvt_roundss_u32 (__m128 __A, const int __R)
8111{
8112 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
8113}
8114
8115extern __inline int
8116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117_mm_cvt_roundss_si32 (__m128 __A, const int __R)
8118{
8119 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8120}
8121
8122extern __inline int
8123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8124_mm_cvt_roundss_i32 (__m128 __A, const int __R)
8125{
8126 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8127}
8128
8129extern __inline unsigned
8130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8131_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
8132{
8133 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
8134}
8135
8136extern __inline int
8137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
8139{
8140 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8141}
8142
8143extern __inline int
8144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8145_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
8146{
8147 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8148}
8149#else
8150#define _mm_cvt_roundss_u32(A, B) \
8151 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
8152
8153#define _mm_cvt_roundss_si32(A, B) \
8154 ((int)__builtin_ia32_vcvtss2si32(A, B))
8155
8156#define _mm_cvt_roundss_i32(A, B) \
8157 ((int)__builtin_ia32_vcvtss2si32(A, B))
8158
8159#define _mm_cvtt_roundss_u32(A, B) \
8160 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
8161
8162#define _mm_cvtt_roundss_si32(A, B) \
8163 ((int)__builtin_ia32_vcvttss2si32(A, B))
8164
8165#define _mm_cvtt_roundss_i32(A, B) \
8166 ((int)__builtin_ia32_vcvttss2si32(A, B))
8167#endif
8168
8169#ifdef __x86_64__
8170#ifdef __OPTIMIZE__
8171extern __inline unsigned long long
8172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8173_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8174{
8175 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8176}
8177
8178extern __inline long long
8179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8181{
8182 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8183}
8184
8185extern __inline long long
8186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8188{
8189 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8190}
8191
8192extern __inline unsigned long long
8193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8194_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8195{
8196 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8197}
8198
8199extern __inline long long
8200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8202{
8203 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8204}
8205
8206extern __inline long long
8207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8208_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8209{
8210 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8211}
8212#else
8213#define _mm_cvt_roundsd_u64(A, B) \
8214 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8215
8216#define _mm_cvt_roundsd_si64(A, B) \
8217 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8218
8219#define _mm_cvt_roundsd_i64(A, B) \
8220 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8221
8222#define _mm_cvtt_roundsd_u64(A, B) \
8223 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8224
8225#define _mm_cvtt_roundsd_si64(A, B) \
8226 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8227
8228#define _mm_cvtt_roundsd_i64(A, B) \
8229 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8230#endif
8231#endif
8232
8233#ifdef __OPTIMIZE__
8234extern __inline unsigned
8235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8236_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8237{
8238 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8239}
8240
8241extern __inline int
8242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8244{
8245 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8246}
8247
8248extern __inline int
8249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8250_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8251{
8252 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8253}
8254
8255extern __inline unsigned
8256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8257_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8258{
8259 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8260}
8261
8262extern __inline int
8263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8264_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8265{
8266 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8267}
8268
8269extern __inline int
8270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8271_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8272{
8273 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8274}
8275#else
8276#define _mm_cvt_roundsd_u32(A, B) \
8277 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8278
8279#define _mm_cvt_roundsd_si32(A, B) \
8280 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8281
8282#define _mm_cvt_roundsd_i32(A, B) \
8283 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8284
8285#define _mm_cvtt_roundsd_u32(A, B) \
8286 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8287
8288#define _mm_cvtt_roundsd_si32(A, B) \
8289 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8290
8291#define _mm_cvtt_roundsd_i32(A, B) \
8292 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8293#endif
8294
8295extern __inline __m512d
8296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8297_mm512_movedup_pd (__m512d __A)
8298{
8299 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8300 (__v8df)
0b192937 8301 _mm512_undefined_pd (),
756c5857
AI
8302 (__mmask8) -1);
8303}
8304
8305extern __inline __m512d
8306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8307_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8308{
8309 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8310 (__v8df) __W,
8311 (__mmask8) __U);
8312}
8313
8314extern __inline __m512d
8315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8316_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8317{
8318 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8319 (__v8df)
8320 _mm512_setzero_pd (),
8321 (__mmask8) __U);
8322}
8323
8324extern __inline __m512d
8325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8326_mm512_unpacklo_pd (__m512d __A, __m512d __B)
8327{
8328 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8329 (__v8df) __B,
8330 (__v8df)
0b192937 8331 _mm512_undefined_pd (),
756c5857
AI
8332 (__mmask8) -1);
8333}
8334
8335extern __inline __m512d
8336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8337_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8338{
8339 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8340 (__v8df) __B,
8341 (__v8df) __W,
8342 (__mmask8) __U);
8343}
8344
8345extern __inline __m512d
8346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8347_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8348{
8349 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8350 (__v8df) __B,
8351 (__v8df)
8352 _mm512_setzero_pd (),
8353 (__mmask8) __U);
8354}
8355
8356extern __inline __m512d
8357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8358_mm512_unpackhi_pd (__m512d __A, __m512d __B)
8359{
8360 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8361 (__v8df) __B,
8362 (__v8df)
0b192937 8363 _mm512_undefined_pd (),
756c5857
AI
8364 (__mmask8) -1);
8365}
8366
8367extern __inline __m512d
8368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8370{
8371 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8372 (__v8df) __B,
8373 (__v8df) __W,
8374 (__mmask8) __U);
8375}
8376
8377extern __inline __m512d
8378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8379_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8380{
8381 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8382 (__v8df) __B,
8383 (__v8df)
8384 _mm512_setzero_pd (),
8385 (__mmask8) __U);
8386}
8387
8388extern __inline __m512
8389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390_mm512_unpackhi_ps (__m512 __A, __m512 __B)
8391{
8392 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8393 (__v16sf) __B,
8394 (__v16sf)
0b192937 8395 _mm512_undefined_ps (),
756c5857
AI
8396 (__mmask16) -1);
8397}
8398
8399extern __inline __m512
8400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8401_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8402{
8403 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8404 (__v16sf) __B,
8405 (__v16sf) __W,
8406 (__mmask16) __U);
8407}
8408
8409extern __inline __m512
8410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8411_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8412{
8413 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8414 (__v16sf) __B,
8415 (__v16sf)
8416 _mm512_setzero_ps (),
8417 (__mmask16) __U);
8418}
8419
8420#ifdef __OPTIMIZE__
8421extern __inline __m512d
8422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8423_mm512_cvt_roundps_pd (__m256 __A, const int __R)
8424{
8425 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8426 (__v8df)
0b192937 8427 _mm512_undefined_pd (),
756c5857
AI
8428 (__mmask8) -1, __R);
8429}
8430
8431extern __inline __m512d
8432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8433_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8434 const int __R)
8435{
8436 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8437 (__v8df) __W,
8438 (__mmask8) __U, __R);
8439}
8440
8441extern __inline __m512d
8442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8443_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8444{
8445 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8446 (__v8df)
8447 _mm512_setzero_pd (),
8448 (__mmask8) __U, __R);
8449}
8450
8451extern __inline __m512
8452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8453_mm512_cvt_roundph_ps (__m256i __A, const int __R)
8454{
8455 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8456 (__v16sf)
0b192937 8457 _mm512_undefined_ps (),
756c5857
AI
8458 (__mmask16) -1, __R);
8459}
8460
8461extern __inline __m512
8462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8463_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8464 const int __R)
8465{
8466 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8467 (__v16sf) __W,
8468 (__mmask16) __U, __R);
8469}
8470
8471extern __inline __m512
8472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8473_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8474{
8475 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8476 (__v16sf)
8477 _mm512_setzero_ps (),
8478 (__mmask16) __U, __R);
8479}
8480
8481extern __inline __m256i
8482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8483_mm512_cvt_roundps_ph (__m512 __A, const int __I)
8484{
8485 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8486 __I,
8487 (__v16hi)
0b192937 8488 _mm256_undefined_si256 (),
756c5857
AI
8489 -1);
8490}
8491
8492extern __inline __m256i
8493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8494_mm512_cvtps_ph (__m512 __A, const int __I)
8495{
8496 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8497 __I,
8498 (__v16hi)
0b192937 8499 _mm256_undefined_si256 (),
756c5857
AI
8500 -1);
8501}
8502
8503extern __inline __m256i
8504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8505_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8506 const int __I)
8507{
8508 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8509 __I,
8510 (__v16hi) __U,
8511 (__mmask16) __W);
8512}
8513
8514extern __inline __m256i
8515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8516_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8517{
8518 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8519 __I,
8520 (__v16hi) __U,
8521 (__mmask16) __W);
8522}
8523
8524extern __inline __m256i
8525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8526_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8527{
8528 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8529 __I,
8530 (__v16hi)
8531 _mm256_setzero_si256 (),
8532 (__mmask16) __W);
8533}
8534
8535extern __inline __m256i
8536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8537_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8538{
8539 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8540 __I,
8541 (__v16hi)
8542 _mm256_setzero_si256 (),
8543 (__mmask16) __W);
8544}
8545#else
8546#define _mm512_cvt_roundps_pd(A, B) \
0b192937 8547 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
8548
8549#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8550 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8551
8552#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8553 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8554
8555#define _mm512_cvt_roundph_ps(A, B) \
0b192937 8556 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
8557
8558#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8559 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8560
8561#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8562 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8563
8564#define _mm512_cvt_roundps_ph(A, I) \
0c8217b1 8565 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
0b192937 8566 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857 8567#define _mm512_cvtps_ph(A, I) \
0c8217b1 8568 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
0b192937 8569 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857 8570#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
0c8217b1 8571 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8572 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8573#define _mm512_mask_cvtps_ph(U, W, A, I) \
0c8217b1 8574 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8575 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8576#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
0c8217b1 8577 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8578 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8579#define _mm512_maskz_cvtps_ph(W, A, I) \
0c8217b1 8580 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
756c5857
AI
8581 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8582#endif
8583
8584#ifdef __OPTIMIZE__
8585extern __inline __m256
8586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8587_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8588{
8589 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8590 (__v8sf)
0b192937 8591 _mm256_undefined_ps (),
756c5857
AI
8592 (__mmask8) -1, __R);
8593}
8594
8595extern __inline __m256
8596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8597_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8598 const int __R)
8599{
8600 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8601 (__v8sf) __W,
8602 (__mmask8) __U, __R);
8603}
8604
8605extern __inline __m256
8606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8607_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8608{
8609 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8610 (__v8sf)
8611 _mm256_setzero_ps (),
8612 (__mmask8) __U, __R);
8613}
8614
075691af
AI
8615extern __inline __m128
8616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8617_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8618{
8619 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8620 (__v2df) __B,
8621 __R);
8622}
8623
8624extern __inline __m128d
8625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8626_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8627{
8628 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8629 (__v4sf) __B,
8630 __R);
8631}
756c5857
AI
8632#else
8633#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 8634 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
8635
8636#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8637 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8638
8639#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8640 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
8641
8642#define _mm_cvt_roundsd_ss(A, B, C) \
8643 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8644
8645#define _mm_cvt_roundss_sd(A, B, C) \
8646 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
756c5857
AI
8647#endif
8648
8649extern __inline void
8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651_mm512_stream_si512 (__m512i * __P, __m512i __A)
8652{
8653 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8654}
8655
8656extern __inline void
8657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8658_mm512_stream_ps (float *__P, __m512 __A)
8659{
8660 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8661}
8662
8663extern __inline void
8664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8665_mm512_stream_pd (double *__P, __m512d __A)
8666{
8667 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8668}
8669
c56a42b9
KY
8670extern __inline __m512i
8671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672_mm512_stream_load_si512 (void *__P)
8673{
8674 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8675}
8676
9c3c2608
UB
8677/* Constants for mantissa extraction */
8678typedef enum
8679{
8680 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8681 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8682 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8683 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8684} _MM_MANTISSA_NORM_ENUM;
8685
8686typedef enum
8687{
8688 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8689 _MM_MANT_SIGN_zero, /* sign = 0 */
8690 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8691} _MM_MANTISSA_SIGN_ENUM;
8692
756c5857 8693#ifdef __OPTIMIZE__
075691af
AI
8694extern __inline __m128
8695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8696_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8697{
8698 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8699 (__v4sf) __B,
8700 __R);
8701}
8702
68d872d7
SP
8703extern __inline __m128
8704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8705_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8706 __m128 __B, const int __R)
8707{
8708 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8709 (__v4sf) __B,
8710 (__v4sf) __W,
8711 (__mmask8) __U, __R);
8712}
8713
8714extern __inline __m128
8715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8716_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8717 const int __R)
8718{
8719 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8720 (__v4sf) __B,
8721 (__v4sf)
8722 _mm_setzero_ps (),
8723 (__mmask8) __U, __R);
8724}
8725
075691af
AI
8726extern __inline __m128d
8727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8728_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8729{
8730 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8731 (__v2df) __B,
8732 __R);
8733}
8734
68d872d7
SP
8735extern __inline __m128d
8736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8737_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8738 __m128d __B, const int __R)
8739{
8740 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8741 (__v2df) __B,
8742 (__v2df) __W,
8743 (__mmask8) __U, __R);
8744}
8745
8746extern __inline __m128d
8747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8749 const int __R)
8750{
8751 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8752 (__v2df) __B,
8753 (__v2df)
8754 _mm_setzero_pd (),
8755 (__mmask8) __U, __R);
8756}
8757
756c5857
AI
8758extern __inline __m512
8759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8760_mm512_getexp_round_ps (__m512 __A, const int __R)
8761{
8762 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8763 (__v16sf)
0b192937 8764 _mm512_undefined_ps (),
756c5857
AI
8765 (__mmask16) -1, __R);
8766}
8767
8768extern __inline __m512
8769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8770_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8771 const int __R)
8772{
8773 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8774 (__v16sf) __W,
8775 (__mmask16) __U, __R);
8776}
8777
8778extern __inline __m512
8779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8781{
8782 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8783 (__v16sf)
8784 _mm512_setzero_ps (),
8785 (__mmask16) __U, __R);
8786}
8787
8788extern __inline __m512d
8789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8790_mm512_getexp_round_pd (__m512d __A, const int __R)
8791{
8792 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8793 (__v8df)
0b192937 8794 _mm512_undefined_pd (),
756c5857
AI
8795 (__mmask8) -1, __R);
8796}
8797
8798extern __inline __m512d
8799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8801 const int __R)
8802{
8803 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8804 (__v8df) __W,
8805 (__mmask8) __U, __R);
8806}
8807
8808extern __inline __m512d
8809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8810_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8811{
8812 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8813 (__v8df)
8814 _mm512_setzero_pd (),
8815 (__mmask8) __U, __R);
8816}
8817
756c5857
AI
8818extern __inline __m512d
8819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8821 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8822{
8823 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8824 (__C << 2) | __B,
0b192937 8825 _mm512_undefined_pd (),
756c5857
AI
8826 (__mmask8) -1, __R);
8827}
8828
8829extern __inline __m512d
8830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8831_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8832 _MM_MANTISSA_NORM_ENUM __B,
8833 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8834{
8835 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8836 (__C << 2) | __B,
8837 (__v8df) __W, __U,
8838 __R);
8839}
8840
8841extern __inline __m512d
8842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8843_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8844 _MM_MANTISSA_NORM_ENUM __B,
8845 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8846{
8847 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8848 (__C << 2) | __B,
8849 (__v8df)
8850 _mm512_setzero_pd (),
8851 __U, __R);
8852}
8853
8854extern __inline __m512
8855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8857 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8858{
8859 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8860 (__C << 2) | __B,
0b192937 8861 _mm512_undefined_ps (),
756c5857
AI
8862 (__mmask16) -1, __R);
8863}
8864
8865extern __inline __m512
8866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8867_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8868 _MM_MANTISSA_NORM_ENUM __B,
8869 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8870{
8871 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8872 (__C << 2) | __B,
8873 (__v16sf) __W, __U,
8874 __R);
8875}
8876
8877extern __inline __m512
8878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8880 _MM_MANTISSA_NORM_ENUM __B,
8881 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8882{
8883 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8884 (__C << 2) | __B,
8885 (__v16sf)
8886 _mm512_setzero_ps (),
8887 __U, __R);
8888}
8889
075691af
AI
8890extern __inline __m128d
8891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892_mm_getmant_round_sd (__m128d __A, __m128d __B,
8893 _MM_MANTISSA_NORM_ENUM __C,
8894 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8895{
8896 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8897 (__v2df) __B,
8898 (__D << 2) | __C,
8899 __R);
8900}
8901
68d872d7
SP
8902extern __inline __m128d
8903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8904_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8905 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8906 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8907{
8908 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8909 (__v2df) __B,
8910 (__D << 2) | __C,
8911 (__v2df) __W,
8912 __U, __R);
8913}
8914
8915extern __inline __m128d
8916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8917_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8918 _MM_MANTISSA_NORM_ENUM __C,
8919 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8920{
8921 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8922 (__v2df) __B,
8923 (__D << 2) | __C,
8924 (__v2df)
8925 _mm_setzero_pd(),
8926 __U, __R);
8927}
8928
075691af
AI
8929extern __inline __m128
8930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8931_mm_getmant_round_ss (__m128 __A, __m128 __B,
8932 _MM_MANTISSA_NORM_ENUM __C,
8933 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8934{
8935 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8936 (__v4sf) __B,
8937 (__D << 2) | __C,
8938 __R);
8939}
8940
68d872d7
SP
8941extern __inline __m128
8942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8944 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8945 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8946{
8947 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8948 (__v4sf) __B,
8949 (__D << 2) | __C,
8950 (__v4sf) __W,
8951 __U, __R);
8952}
8953
8954extern __inline __m128
8955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8956_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8957 _MM_MANTISSA_NORM_ENUM __C,
8958 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8959{
8960 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8961 (__v4sf) __B,
8962 (__D << 2) | __C,
8963 (__v4sf)
8964 _mm_setzero_ps(),
8965 __U, __R);
8966}
8967
756c5857
AI
8968#else
8969#define _mm512_getmant_round_pd(X, B, C, R) \
8970 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8971 (int)(((C)<<2) | (B)), \
0b192937 8972 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
8973 (__mmask8)-1,\
8974 (R)))
8975
8976#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8977 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8978 (int)(((C)<<2) | (B)), \
8979 (__v8df)(__m512d)(W), \
8980 (__mmask8)(U),\
8981 (R)))
8982
8983#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8984 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8985 (int)(((C)<<2) | (B)), \
8986 (__v8df)(__m512d)_mm512_setzero_pd(), \
8987 (__mmask8)(U),\
8988 (R)))
8989#define _mm512_getmant_round_ps(X, B, C, R) \
8990 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8991 (int)(((C)<<2) | (B)), \
0b192937 8992 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
8993 (__mmask16)-1,\
8994 (R)))
8995
8996#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8997 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8998 (int)(((C)<<2) | (B)), \
8999 (__v16sf)(__m512)(W), \
9000 (__mmask16)(U),\
9001 (R)))
9002
9003#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
9004 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
9005 (int)(((C)<<2) | (B)), \
9006 (__v16sf)(__m512)_mm512_setzero_ps(), \
9007 (__mmask16)(U),\
9008 (R)))
075691af
AI
9009#define _mm_getmant_round_sd(X, Y, C, D, R) \
9010 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
9011 (__v2df)(__m128d)(Y), \
9012 (int)(((D)<<2) | (C)), \
9013 (R)))
9014
68d872d7
SP
9015#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
9016 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9017 (__v2df)(__m128d)(Y), \
9018 (int)(((D)<<2) | (C)), \
9019 (__v2df)(__m128d)(W), \
9020 (__mmask8)(U),\
9021 (R)))
9022
9023#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
9024 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9025 (__v2df)(__m128d)(Y), \
9026 (int)(((D)<<2) | (C)), \
9027 (__v2df)(__m128d)_mm_setzero_pd(), \
9028 (__mmask8)(U),\
9029 (R)))
9030
075691af
AI
9031#define _mm_getmant_round_ss(X, Y, C, D, R) \
9032 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
9033 (__v4sf)(__m128)(Y), \
9034 (int)(((D)<<2) | (C)), \
9035 (R)))
9036
68d872d7
SP
9037#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
9038 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9039 (__v4sf)(__m128)(Y), \
9040 (int)(((D)<<2) | (C)), \
9041 (__v4sf)(__m128)(W), \
9042 (__mmask8)(U),\
9043 (R)))
9044
9045#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
9046 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9047 (__v4sf)(__m128)(Y), \
9048 (int)(((D)<<2) | (C)), \
9049 (__v4sf)(__m128)_mm_setzero_ps(), \
9050 (__mmask8)(U),\
9051 (R)))
9052
075691af
AI
9053#define _mm_getexp_round_ss(A, B, R) \
9054 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
9055
68d872d7
SP
9056#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
9057 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
9058
9059#define _mm_maskz_getexp_round_ss(U, A, B, C) \
9060 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
9061
075691af
AI
9062#define _mm_getexp_round_sd(A, B, R) \
9063 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
9064
68d872d7
SP
9065#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
9066 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
9067
9068#define _mm_maskz_getexp_round_sd(U, A, B, C) \
9069 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
9070
9071
756c5857
AI
9072#define _mm512_getexp_round_ps(A, R) \
9073 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 9074 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
9075
9076#define _mm512_mask_getexp_round_ps(W, U, A, R) \
9077 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9078 (__v16sf)(__m512)(W), (__mmask16)(U), R))
9079
9080#define _mm512_maskz_getexp_round_ps(U, A, R) \
9081 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9082 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
9083
9084#define _mm512_getexp_round_pd(A, R) \
9085 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 9086 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
9087
9088#define _mm512_mask_getexp_round_pd(W, U, A, R) \
9089 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9090 (__v8df)(__m512d)(W), (__mmask8)(U), R))
9091
9092#define _mm512_maskz_getexp_round_pd(U, A, R) \
9093 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9094 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
9095#endif
9096
9097#ifdef __OPTIMIZE__
9098extern __inline __m512
9099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9100_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
9101{
9102 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
9103 (__v16sf)
9104 _mm512_undefined_ps (),
9105 -1, __R);
756c5857
AI
9106}
9107
9108extern __inline __m512
9109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9110_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
9111 const int __imm, const int __R)
9112{
9113 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
9114 (__v16sf) __A,
9115 (__mmask16) __B, __R);
9116}
9117
9118extern __inline __m512
9119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9120_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
9121 const int __imm, const int __R)
9122{
9123 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
9124 __imm,
9125 (__v16sf)
9126 _mm512_setzero_ps (),
9127 (__mmask16) __A, __R);
9128}
9129
9130extern __inline __m512d
9131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9132_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
9133{
9134 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
9135 (__v8df)
9136 _mm512_undefined_pd (),
9137 -1, __R);
756c5857
AI
9138}
9139
9140extern __inline __m512d
9141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9142_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
9143 __m512d __C, const int __imm, const int __R)
9144{
9145 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
9146 (__v8df) __A,
9147 (__mmask8) __B, __R);
9148}
9149
9150extern __inline __m512d
9151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9152_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
9153 const int __imm, const int __R)
9154{
9155 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
9156 __imm,
9157 (__v8df)
9158 _mm512_setzero_pd (),
9159 (__mmask8) __A, __R);
9160}
075691af
AI
9161
9162extern __inline __m128
9163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
a7c4d6d1
HL
9164_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
9165 const int __R)
9166{
9167 return (__m128)
9168 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
9169 (__v4sf) __B, __imm,
9170 (__v4sf)
9171 _mm_setzero_ps (),
9172 (__mmask8) -1,
9173 __R);
9174}
9175
9176extern __inline __m128
9177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9178_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
9179 __m128 __D, const int __imm, const int __R)
075691af 9180{
a7c4d6d1
HL
9181 return (__m128)
9182 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
9183 (__v4sf) __D, __imm,
9184 (__v4sf) __A,
9185 (__mmask8) __B,
9186 __R);
9187}
9188
9189extern __inline __m128
9190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9191_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
9192 const int __imm, const int __R)
9193{
9194 return (__m128)
9195 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
9196 (__v4sf) __C, __imm,
9197 (__v4sf)
9198 _mm_setzero_ps (),
9199 (__mmask8) __A,
9200 __R);
075691af
AI
9201}
9202
9203extern __inline __m128d
9204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9205_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9206 const int __R)
9207{
a7c4d6d1
HL
9208 return (__m128d)
9209 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
9210 (__v2df) __B, __imm,
9211 (__v2df)
9212 _mm_setzero_pd (),
9213 (__mmask8) -1,
9214 __R);
9215}
9216
9217extern __inline __m128d
9218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9219_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
9220 __m128d __D, const int __imm, const int __R)
9221{
9222 return (__m128d)
9223 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
9224 (__v2df) __D, __imm,
9225 (__v2df) __A,
9226 (__mmask8) __B,
9227 __R);
9228}
9229
9230extern __inline __m128d
9231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9232_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
9233 const int __imm, const int __R)
9234{
9235 return (__m128d)
9236 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
9237 (__v2df) __C, __imm,
9238 (__v2df)
9239 _mm_setzero_pd (),
9240 (__mmask8) __A,
9241 __R);
075691af
AI
9242}
9243
756c5857
AI
9244#else
9245#define _mm512_roundscale_round_ps(A, B, R) \
9246 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 9247 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
9248#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9249 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9250 (int)(D), \
9251 (__v16sf)(__m512)(A), \
9252 (__mmask16)(B), R))
9253#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9254 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9255 (int)(C), \
9256 (__v16sf)_mm512_setzero_ps(),\
9257 (__mmask16)(A), R))
9258#define _mm512_roundscale_round_pd(A, B, R) \
9259 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 9260 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
9261#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9262 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9263 (int)(D), \
9264 (__v8df)(__m512d)(A), \
9265 (__mmask8)(B), R))
9266#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9267 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9268 (int)(C), \
9269 (__v8df)_mm512_setzero_pd(),\
9270 (__mmask8)(A), R))
a7c4d6d1
HL
9271#define _mm_roundscale_round_ss(A, B, I, R) \
9272 ((__m128) \
9273 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
9274 (__v4sf) (__m128) (B), \
9275 (int) (I), \
9276 (__v4sf) _mm_setzero_ps (), \
9277 (__mmask8) (-1), \
9278 (int) (R)))
9279#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
9280 ((__m128) \
9281 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
9282 (__v4sf) (__m128) (C), \
9283 (int) (I), \
9284 (__v4sf) (__m128) (A), \
9285 (__mmask8) (U), \
9286 (int) (R)))
9287#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
9288 ((__m128) \
9289 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
9290 (__v4sf) (__m128) (B), \
9291 (int) (I), \
9292 (__v4sf) _mm_setzero_ps (), \
9293 (__mmask8) (U), \
9294 (int) (R)))
9295#define _mm_roundscale_round_sd(A, B, I, R) \
9296 ((__m128d) \
9297 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
9298 (__v2df) (__m128d) (B), \
9299 (int) (I), \
9300 (__v2df) _mm_setzero_pd (), \
9301 (__mmask8) (-1), \
9302 (int) (R)))
9303#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
9304 ((__m128d) \
9305 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
9306 (__v2df) (__m128d) (C), \
9307 (int) (I), \
9308 (__v2df) (__m128d) (A), \
9309 (__mmask8) (U), \
9310 (int) (R)))
9311#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
9312 ((__m128d) \
9313 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
9314 (__v2df) (__m128d) (B), \
9315 (int) (I), \
9316 (__v2df) _mm_setzero_pd (), \
9317 (__mmask8) (U), \
9318 (int) (R)))
756c5857
AI
9319#endif
9320
9321extern __inline __m512
9322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9323_mm512_floor_ps (__m512 __A)
9324{
9325 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9326 _MM_FROUND_FLOOR,
9327 (__v16sf) __A, -1,
9328 _MM_FROUND_CUR_DIRECTION);
9329}
9330
9331extern __inline __m512d
9332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9333_mm512_floor_pd (__m512d __A)
9334{
9335 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9336 _MM_FROUND_FLOOR,
9337 (__v8df) __A, -1,
9338 _MM_FROUND_CUR_DIRECTION);
9339}
9340
9341extern __inline __m512
9342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9343_mm512_ceil_ps (__m512 __A)
9344{
9345 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9346 _MM_FROUND_CEIL,
9347 (__v16sf) __A, -1,
9348 _MM_FROUND_CUR_DIRECTION);
9349}
9350
9351extern __inline __m512d
9352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9353_mm512_ceil_pd (__m512d __A)
9354{
9355 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9356 _MM_FROUND_CEIL,
9357 (__v8df) __A, -1,
9358 _MM_FROUND_CUR_DIRECTION);
9359}
9360
9361extern __inline __m512
9362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9363_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9364{
9365 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9366 _MM_FROUND_FLOOR,
9367 (__v16sf) __W, __U,
9368 _MM_FROUND_CUR_DIRECTION);
9369}
9370
9371extern __inline __m512d
9372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9373_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9374{
9375 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9376 _MM_FROUND_FLOOR,
9377 (__v8df) __W, __U,
9378 _MM_FROUND_CUR_DIRECTION);
9379}
9380
9381extern __inline __m512
9382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9383_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9384{
9385 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9386 _MM_FROUND_CEIL,
9387 (__v16sf) __W, __U,
9388 _MM_FROUND_CUR_DIRECTION);
9389}
9390
9391extern __inline __m512d
9392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9394{
9395 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9396 _MM_FROUND_CEIL,
9397 (__v8df) __W, __U,
9398 _MM_FROUND_CUR_DIRECTION);
9399}
9400
756c5857 9401#ifdef __OPTIMIZE__
756c5857
AI
9402extern __inline __m512i
9403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9404_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9405{
9406 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9407 (__v16si) __B, __imm,
9408 (__v16si)
4271e5cb 9409 _mm512_undefined_epi32 (),
756c5857
AI
9410 (__mmask16) -1);
9411}
9412
9413extern __inline __m512i
9414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9415_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9416 __m512i __B, const int __imm)
9417{
9418 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9419 (__v16si) __B, __imm,
9420 (__v16si) __W,
9421 (__mmask16) __U);
9422}
9423
9424extern __inline __m512i
9425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9426_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9427 const int __imm)
9428{
9429 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9430 (__v16si) __B, __imm,
9431 (__v16si)
9432 _mm512_setzero_si512 (),
9433 (__mmask16) __U);
9434}
9435
9436extern __inline __m512i
9437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9439{
9440 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9441 (__v8di) __B, __imm,
9442 (__v8di)
4271e5cb 9443 _mm512_undefined_epi32 (),
756c5857
AI
9444 (__mmask8) -1);
9445}
9446
9447extern __inline __m512i
9448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9449_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9450 __m512i __B, const int __imm)
9451{
9452 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9453 (__v8di) __B, __imm,
9454 (__v8di) __W,
9455 (__mmask8) __U);
9456}
9457
9458extern __inline __m512i
9459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9460_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9461 const int __imm)
9462{
9463 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9464 (__v8di) __B, __imm,
9465 (__v8di)
9466 _mm512_setzero_si512 (),
9467 (__mmask8) __U);
9468}
9469#else
756c5857
AI
9470#define _mm512_alignr_epi32(X, Y, C) \
9471 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 9472 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
9473 (__mmask16)-1))
9474
9475#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9476 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9477 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9478 (__mmask16)(U)))
9479
9480#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9481 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 9482 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
9483 (__mmask16)(U)))
9484
9485#define _mm512_alignr_epi64(X, Y, C) \
9486 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 9487 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 9488 (__mmask8)-1))
756c5857
AI
9489
9490#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9491 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9492 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9493
9494#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9495 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 9496 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
9497 (__mmask8)(U)))
9498#endif
9499
9500extern __inline __mmask16
9501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9503{
9504 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9505 (__v16si) __B,
9506 (__mmask16) -1);
9507}
9508
9509extern __inline __mmask16
9510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9512{
9513 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9514 (__v16si) __B, __U);
9515}
9516
9517extern __inline __mmask8
9518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9519_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9520{
9521 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9522 (__v8di) __B, __U);
9523}
9524
9525extern __inline __mmask8
9526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9527_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9528{
9529 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9530 (__v8di) __B,
9531 (__mmask8) -1);
9532}
9533
9534extern __inline __mmask16
9535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9537{
9538 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9539 (__v16si) __B,
9540 (__mmask16) -1);
9541}
9542
9543extern __inline __mmask16
9544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9545_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9546{
9547 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9548 (__v16si) __B, __U);
9549}
9550
9551extern __inline __mmask8
9552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9553_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9554{
9555 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9556 (__v8di) __B, __U);
9557}
9558
9559extern __inline __mmask8
9560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9562{
9563 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9564 (__v8di) __B,
9565 (__mmask8) -1);
9566}
9567
d256b866
IT
9568extern __inline __mmask16
9569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9570_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9571{
9572 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9573 (__v16si) __Y, 5,
9574 (__mmask16) -1);
9575}
9576
275be1da
IT
9577extern __inline __mmask16
9578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9579_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9580{
9581 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9582 (__v16si) __Y, 5,
9583 (__mmask16) __M);
9584}
9585
9586extern __inline __mmask16
9587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9588_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9589{
9590 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9591 (__v16si) __Y, 5,
9592 (__mmask16) __M);
9593}
9594
d256b866
IT
9595extern __inline __mmask16
9596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9597_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9598{
9599 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9600 (__v16si) __Y, 5,
9601 (__mmask16) -1);
9602}
9603
275be1da
IT
9604extern __inline __mmask8
9605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9606_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9607{
9608 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9609 (__v8di) __Y, 5,
9610 (__mmask8) __M);
9611}
9612
d256b866
IT
9613extern __inline __mmask8
9614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9615_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9616{
9617 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9618 (__v8di) __Y, 5,
9619 (__mmask8) -1);
9620}
9621
275be1da
IT
9622extern __inline __mmask8
9623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9624_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9625{
9626 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9627 (__v8di) __Y, 5,
9628 (__mmask8) __M);
9629}
9630
d256b866
IT
9631extern __inline __mmask8
9632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9633_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9634{
9635 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9636 (__v8di) __Y, 5,
9637 (__mmask8) -1);
9638}
9639
275be1da
IT
9640extern __inline __mmask16
9641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9642_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9643{
9644 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9645 (__v16si) __Y, 2,
9646 (__mmask16) __M);
9647}
9648
d256b866
IT
9649extern __inline __mmask16
9650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9651_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9652{
9653 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9654 (__v16si) __Y, 2,
9655 (__mmask16) -1);
9656}
9657
275be1da
IT
9658extern __inline __mmask16
9659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9660_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9661{
9662 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9663 (__v16si) __Y, 2,
9664 (__mmask16) __M);
9665}
9666
d256b866
IT
9667extern __inline __mmask16
9668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9669_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9670{
9671 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9672 (__v16si) __Y, 2,
9673 (__mmask16) -1);
9674}
9675
275be1da
IT
9676extern __inline __mmask8
9677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9678_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9679{
9680 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9681 (__v8di) __Y, 2,
9682 (__mmask8) __M);
9683}
9684
d256b866
IT
9685extern __inline __mmask8
9686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9687_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9688{
9689 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9690 (__v8di) __Y, 2,
9691 (__mmask8) -1);
9692}
9693
275be1da
IT
9694extern __inline __mmask8
9695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9696_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9697{
9698 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9699 (__v8di) __Y, 2,
9700 (__mmask8) __M);
9701}
9702
d256b866
IT
9703extern __inline __mmask8
9704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9706{
9707 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9708 (__v8di) __Y, 2,
9709 (__mmask8) -1);
9710}
9711
275be1da
IT
9712extern __inline __mmask16
9713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9714_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9715{
9716 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9717 (__v16si) __Y, 1,
9718 (__mmask16) __M);
9719}
9720
d256b866
IT
9721extern __inline __mmask16
9722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9723_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9724{
9725 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9726 (__v16si) __Y, 1,
9727 (__mmask16) -1);
9728}
9729
275be1da
IT
9730extern __inline __mmask16
9731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9732_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9733{
9734 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9735 (__v16si) __Y, 1,
9736 (__mmask16) __M);
9737}
9738
d256b866
IT
9739extern __inline __mmask16
9740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9741_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9742{
9743 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9744 (__v16si) __Y, 1,
9745 (__mmask16) -1);
9746}
9747
275be1da
IT
9748extern __inline __mmask8
9749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9751{
9752 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9753 (__v8di) __Y, 1,
9754 (__mmask8) __M);
9755}
9756
d256b866
IT
9757extern __inline __mmask8
9758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9760{
9761 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9762 (__v8di) __Y, 1,
9763 (__mmask8) -1);
9764}
9765
275be1da
IT
9766extern __inline __mmask8
9767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9768_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9769{
9770 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9771 (__v8di) __Y, 1,
9772 (__mmask8) __M);
9773}
9774
d256b866
IT
9775extern __inline __mmask8
9776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9777_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9778{
9779 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9780 (__v8di) __Y, 1,
9781 (__mmask8) -1);
9782}
9783
9784extern __inline __mmask16
9785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9786_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9787{
9788 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9789 (__v16si) __Y, 4,
9790 (__mmask16) -1);
9791}
9792
275be1da
IT
9793extern __inline __mmask16
9794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9795_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9796{
9797 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9798 (__v16si) __Y, 4,
9799 (__mmask16) __M);
9800}
9801
9802extern __inline __mmask16
9803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9804_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9805{
9806 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9807 (__v16si) __Y, 4,
9808 (__mmask16) __M);
9809}
9810
d256b866
IT
9811extern __inline __mmask16
9812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9813_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9814{
9815 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9816 (__v16si) __Y, 4,
9817 (__mmask16) -1);
9818}
9819
275be1da
IT
9820extern __inline __mmask8
9821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 9822_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
275be1da
IT
9823{
9824 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9825 (__v8di) __Y, 4,
9826 (__mmask8) __M);
9827}
9828
d256b866
IT
9829extern __inline __mmask8
9830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9831_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9832{
9833 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9834 (__v8di) __Y, 4,
9835 (__mmask8) -1);
9836}
9837
275be1da
IT
9838extern __inline __mmask8
9839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9840_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9841{
9842 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9843 (__v8di) __Y, 4,
9844 (__mmask8) __M);
9845}
9846
d256b866
IT
9847extern __inline __mmask8
9848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9849_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9850{
9851 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9852 (__v8di) __Y, 4,
9853 (__mmask8) -1);
9854}
9855
756c5857
AI
9856#define _MM_CMPINT_EQ 0x0
9857#define _MM_CMPINT_LT 0x1
9858#define _MM_CMPINT_LE 0x2
9859#define _MM_CMPINT_UNUSED 0x3
9860#define _MM_CMPINT_NE 0x4
9861#define _MM_CMPINT_NLT 0x5
9862#define _MM_CMPINT_GE 0x5
9863#define _MM_CMPINT_NLE 0x6
9864#define _MM_CMPINT_GT 0x6
9865
9866#ifdef __OPTIMIZE__
d8ea3e7c
AS
9867extern __inline __mmask16
9868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9869_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9870{
9871 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9872 (__mmask8) __B);
9873}
9874
9875extern __inline __mmask16
9876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9877_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9878{
9879 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9880 (__mmask8) __B);
9881}
9882
756c5857
AI
9883extern __inline __mmask8
9884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9885_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9886{
9887 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9888 (__v8di) __Y, __P,
9889 (__mmask8) -1);
9890}
9891
9892extern __inline __mmask16
9893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9894_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9895{
9896 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9897 (__v16si) __Y, __P,
9898 (__mmask16) -1);
9899}
9900
9901extern __inline __mmask8
9902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9904{
9905 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9906 (__v8di) __Y, __P,
9907 (__mmask8) -1);
9908}
9909
9910extern __inline __mmask16
9911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9912_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9913{
9914 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9915 (__v16si) __Y, __P,
9916 (__mmask16) -1);
9917}
9918
9919extern __inline __mmask8
9920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9921_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9922 const int __R)
9923{
9924 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9925 (__v8df) __Y, __P,
9926 (__mmask8) -1, __R);
9927}
9928
9929extern __inline __mmask16
9930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9931_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9932{
9933 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9934 (__v16sf) __Y, __P,
9935 (__mmask16) -1, __R);
9936}
9937
9938extern __inline __mmask8
9939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9940_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9941 const int __P)
9942{
9943 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9944 (__v8di) __Y, __P,
9945 (__mmask8) __U);
9946}
9947
9948extern __inline __mmask16
9949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9950_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9951 const int __P)
9952{
9953 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9954 (__v16si) __Y, __P,
9955 (__mmask16) __U);
9956}
9957
9958extern __inline __mmask8
9959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9961 const int __P)
9962{
9963 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9964 (__v8di) __Y, __P,
9965 (__mmask8) __U);
9966}
9967
9968extern __inline __mmask16
9969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9970_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9971 const int __P)
9972{
9973 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9974 (__v16si) __Y, __P,
9975 (__mmask16) __U);
9976}
9977
9978extern __inline __mmask8
9979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9980_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9981 const int __P, const int __R)
9982{
9983 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9984 (__v8df) __Y, __P,
9985 (__mmask8) __U, __R);
9986}
9987
9988extern __inline __mmask16
9989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9990_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9991 const int __P, const int __R)
9992{
9993 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9994 (__v16sf) __Y, __P,
9995 (__mmask16) __U, __R);
9996}
9997
9998extern __inline __mmask8
9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
10001{
10002 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
10003 (__v2df) __Y, __P,
10004 (__mmask8) -1, __R);
10005}
10006
10007extern __inline __mmask8
10008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10009_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
10010 const int __P, const int __R)
10011{
10012 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
10013 (__v2df) __Y, __P,
10014 (__mmask8) __M, __R);
10015}
10016
10017extern __inline __mmask8
10018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10019_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
10020{
10021 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
10022 (__v4sf) __Y, __P,
10023 (__mmask8) -1, __R);
10024}
10025
10026extern __inline __mmask8
10027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10028_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
10029 const int __P, const int __R)
10030{
10031 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
10032 (__v4sf) __Y, __P,
10033 (__mmask8) __M, __R);
10034}
10035
10036#else
d8ea3e7c
AS
10037#define _kshiftli_mask16(X, Y) \
10038 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
10039
10040#define _kshiftri_mask16(X, Y) \
10041 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
10042
756c5857
AI
10043#define _mm512_cmp_epi64_mask(X, Y, P) \
10044 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
10045 (__v8di)(__m512i)(Y), (int)(P),\
10046 (__mmask8)-1))
10047
10048#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
10049 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
10050 (__v16si)(__m512i)(Y), (int)(P), \
10051 (__mmask16)-1))
756c5857
AI
10052
10053#define _mm512_cmp_epu64_mask(X, Y, P) \
10054 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
10055 (__v8di)(__m512i)(Y), (int)(P),\
10056 (__mmask8)-1))
10057
10058#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
10059 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
10060 (__v16si)(__m512i)(Y), (int)(P), \
10061 (__mmask16)-1))
756c5857 10062
383321ec 10063#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
10064 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
10065 (__v8df)(__m512d)(Y), (int)(P),\
10066 (__mmask8)-1, R))
10067
383321ec 10068#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
10069 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
10070 (__v16sf)(__m512)(Y), (int)(P),\
10071 (__mmask16)-1, R))
10072
383321ec 10073#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
10074 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
10075 (__v8di)(__m512i)(Y), (int)(P),\
0c8217b1 10076 (__mmask8)(M)))
756c5857 10077
383321ec
UB
10078#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
10079 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
10080 (__v16si)(__m512i)(Y), (int)(P), \
0c8217b1 10081 (__mmask16)(M)))
756c5857 10082
383321ec 10083#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
10084 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
10085 (__v8di)(__m512i)(Y), (int)(P),\
0c8217b1 10086 (__mmask8)(M)))
756c5857 10087
383321ec
UB
10088#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
10089 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
10090 (__v16si)(__m512i)(Y), (int)(P), \
0c8217b1 10091 (__mmask16)(M)))
756c5857 10092
383321ec 10093#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
10094 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
10095 (__v8df)(__m512d)(Y), (int)(P),\
0c8217b1 10096 (__mmask8)(M), R))
756c5857 10097
383321ec 10098#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
10099 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
10100 (__v16sf)(__m512)(Y), (int)(P),\
0c8217b1 10101 (__mmask16)(M), R))
756c5857 10102
383321ec 10103#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
10104 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
10105 (__v2df)(__m128d)(Y), (int)(P),\
10106 (__mmask8)-1, R))
10107
383321ec 10108#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
10109 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
10110 (__v2df)(__m128d)(Y), (int)(P),\
10111 (M), R))
10112
383321ec 10113#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
10114 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
10115 (__v4sf)(__m128)(Y), (int)(P), \
10116 (__mmask8)-1, R))
10117
383321ec 10118#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
10119 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
10120 (__v4sf)(__m128)(Y), (int)(P), \
10121 (M), R))
10122#endif
10123
10124#ifdef __OPTIMIZE__
10125extern __inline __m512
10126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10127_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 10128{
b5fd0b71
JJ
10129 __m512 __v1_old = _mm512_undefined_ps ();
10130 __mmask16 __mask = 0xFFFF;
756c5857 10131
b5fd0b71 10132 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
10133 __addr,
10134 (__v16si) __index,
b5fd0b71 10135 __mask, __scale);
756c5857
AI
10136}
10137
10138extern __inline __m512
10139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 10140_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
0e171d54 10141 __m512i __index, void const *__addr, int __scale)
756c5857 10142{
b5fd0b71 10143 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
10144 __addr,
10145 (__v16si) __index,
10146 __mask, __scale);
10147}
10148
10149extern __inline __m512d
10150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10151_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
756c5857 10152{
b5fd0b71
JJ
10153 __m512d __v1_old = _mm512_undefined_pd ();
10154 __mmask8 __mask = 0xFF;
756c5857 10155
b5fd0b71 10156 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 10157 __addr,
b5fd0b71 10158 (__v8si) __index, __mask,
756c5857
AI
10159 __scale);
10160}
10161
10162extern __inline __m512d
10163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10164_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10165 __m256i __index, void const *__addr, int __scale)
756c5857
AI
10166{
10167 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
10168 __addr,
10169 (__v8si) __index,
10170 __mask, __scale);
10171}
10172
10173extern __inline __m256
10174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10175_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 10176{
b5fd0b71
JJ
10177 __m256 __v1_old = _mm256_undefined_ps ();
10178 __mmask8 __mask = 0xFF;
756c5857 10179
b5fd0b71 10180 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 10181 __addr,
b5fd0b71 10182 (__v8di) __index, __mask,
756c5857
AI
10183 __scale);
10184}
10185
10186extern __inline __m256
10187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10188_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
0e171d54 10189 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10190{
10191 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
10192 __addr,
10193 (__v8di) __index,
10194 __mask, __scale);
10195}
10196
10197extern __inline __m512d
10198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10199_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
756c5857 10200{
b5fd0b71
JJ
10201 __m512d __v1_old = _mm512_undefined_pd ();
10202 __mmask8 __mask = 0xFF;
756c5857 10203
b5fd0b71 10204 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 10205 __addr,
b5fd0b71 10206 (__v8di) __index, __mask,
756c5857
AI
10207 __scale);
10208}
10209
10210extern __inline __m512d
10211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10212_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10213 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10214{
10215 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10216 __addr,
10217 (__v8di) __index,
10218 __mask, __scale);
10219}
10220
10221extern __inline __m512i
10222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10223_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10224{
b5fd0b71
JJ
10225 __m512i __v1_old = _mm512_undefined_epi32 ();
10226 __mmask16 __mask = 0xFFFF;
756c5857 10227
b5fd0b71 10228 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
10229 __addr,
10230 (__v16si) __index,
b5fd0b71 10231 __mask, __scale);
756c5857
AI
10232}
10233
10234extern __inline __m512i
10235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10236_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
0e171d54 10237 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10238{
10239 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10240 __addr,
10241 (__v16si) __index,
10242 __mask, __scale);
10243}
10244
10245extern __inline __m512i
10246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10247_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
756c5857 10248{
b5fd0b71
JJ
10249 __m512i __v1_old = _mm512_undefined_epi32 ();
10250 __mmask8 __mask = 0xFF;
756c5857 10251
b5fd0b71 10252 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 10253 __addr,
b5fd0b71 10254 (__v8si) __index, __mask,
756c5857
AI
10255 __scale);
10256}
10257
10258extern __inline __m512i
10259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10260_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10261 __m256i __index, void const *__addr,
756c5857
AI
10262 int __scale)
10263{
10264 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10265 __addr,
10266 (__v8si) __index,
10267 __mask, __scale);
10268}
10269
10270extern __inline __m256i
10271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10272_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10273{
b5fd0b71
JJ
10274 __m256i __v1_old = _mm256_undefined_si256 ();
10275 __mmask8 __mask = 0xFF;
756c5857 10276
b5fd0b71 10277 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
10278 __addr,
10279 (__v8di) __index,
b5fd0b71 10280 __mask, __scale);
756c5857
AI
10281}
10282
10283extern __inline __m256i
10284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10285_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
0e171d54 10286 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10287{
10288 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10289 __addr,
10290 (__v8di) __index,
10291 __mask, __scale);
10292}
10293
10294extern __inline __m512i
10295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10296_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
756c5857 10297{
b5fd0b71
JJ
10298 __m512i __v1_old = _mm512_undefined_epi32 ();
10299 __mmask8 __mask = 0xFF;
756c5857 10300
b5fd0b71 10301 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 10302 __addr,
b5fd0b71 10303 (__v8di) __index, __mask,
756c5857
AI
10304 __scale);
10305}
10306
10307extern __inline __m512i
10308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10309_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10310 __m512i __index, void const *__addr,
756c5857
AI
10311 int __scale)
10312{
10313 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10314 __addr,
10315 (__v8di) __index,
10316 __mask, __scale);
10317}
10318
10319extern __inline void
10320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10321_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
756c5857
AI
10322{
10323 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10324 (__v16si) __index, (__v16sf) __v1, __scale);
10325}
10326
10327extern __inline void
10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10329_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
756c5857
AI
10330 __m512i __index, __m512 __v1, int __scale)
10331{
10332 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10333 (__v16sf) __v1, __scale);
10334}
10335
10336extern __inline void
10337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10338_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
756c5857
AI
10339 int __scale)
10340{
10341 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10342 (__v8si) __index, (__v8df) __v1, __scale);
10343}
10344
10345extern __inline void
10346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10347_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10348 __m256i __index, __m512d __v1, int __scale)
10349{
10350 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10351 (__v8df) __v1, __scale);
10352}
10353
10354extern __inline void
10355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10356_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
756c5857
AI
10357{
10358 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10359 (__v8di) __index, (__v8sf) __v1, __scale);
10360}
10361
10362extern __inline void
10363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10364_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
756c5857
AI
10365 __m512i __index, __m256 __v1, int __scale)
10366{
10367 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10368 (__v8sf) __v1, __scale);
10369}
10370
10371extern __inline void
10372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10373_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
756c5857
AI
10374 int __scale)
10375{
10376 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10377 (__v8di) __index, (__v8df) __v1, __scale);
10378}
10379
10380extern __inline void
10381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10382_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10383 __m512i __index, __m512d __v1, int __scale)
10384{
10385 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10386 (__v8df) __v1, __scale);
10387}
10388
10389extern __inline void
10390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10391_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10392 __m512i __v1, int __scale)
10393{
10394 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10395 (__v16si) __index, (__v16si) __v1, __scale);
10396}
10397
10398extern __inline void
10399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10400_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
756c5857
AI
10401 __m512i __index, __m512i __v1, int __scale)
10402{
10403 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10404 (__v16si) __v1, __scale);
10405}
10406
10407extern __inline void
10408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10409_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
756c5857
AI
10410 __m512i __v1, int __scale)
10411{
10412 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10413 (__v8si) __index, (__v8di) __v1, __scale);
10414}
10415
10416extern __inline void
10417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10418_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10419 __m256i __index, __m512i __v1, int __scale)
10420{
10421 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10422 (__v8di) __v1, __scale);
10423}
10424
10425extern __inline void
10426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10427_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10428 __m256i __v1, int __scale)
10429{
10430 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10431 (__v8di) __index, (__v8si) __v1, __scale);
10432}
10433
10434extern __inline void
10435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10436_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
756c5857
AI
10437 __m512i __index, __m256i __v1, int __scale)
10438{
10439 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10440 (__v8si) __v1, __scale);
10441}
10442
10443extern __inline void
10444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10445_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
756c5857
AI
10446 __m512i __v1, int __scale)
10447{
10448 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10449 (__v8di) __index, (__v8di) __v1, __scale);
10450}
10451
10452extern __inline void
10453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10454_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10455 __m512i __index, __m512i __v1, int __scale)
10456{
10457 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10458 (__v8di) __v1, __scale);
10459}
10460#else
10461#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 10462 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
78cef090
JJ
10463 (void const *) (ADDR), \
10464 (__v16si)(__m512i) (INDEX), \
10465 (__mmask16)0xFFFF, \
10466 (int) (SCALE))
756c5857
AI
10467
10468#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10469 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \
10470 (void const *) (ADDR), \
10471 (__v16si)(__m512i) (INDEX), \
10472 (__mmask16) (MASK), \
10473 (int) (SCALE))
756c5857
AI
10474
10475#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 10476 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
78cef090
JJ
10477 (void const *) (ADDR), \
10478 (__v8si)(__m256i) (INDEX), \
10479 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10480
10481#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10482 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \
10483 (void const *) (ADDR), \
10484 (__v8si)(__m256i) (INDEX), \
10485 (__mmask8) (MASK), \
10486 (int) (SCALE))
756c5857
AI
10487
10488#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 10489 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
78cef090
JJ
10490 (void const *) (ADDR), \
10491 (__v8di)(__m512i) (INDEX), \
10492 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10493
10494#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10495 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \
10496 (void const *) (ADDR), \
10497 (__v8di)(__m512i) (INDEX), \
10498 (__mmask8) (MASK), \
10499 (int) (SCALE))
756c5857
AI
10500
10501#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 10502 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
78cef090
JJ
10503 (void const *) (ADDR), \
10504 (__v8di)(__m512i) (INDEX), \
10505 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10506
10507#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10508 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \
10509 (void const *) (ADDR), \
10510 (__v8di)(__m512i) (INDEX), \
10511 (__mmask8) (MASK), \
10512 (int) (SCALE))
756c5857
AI
10513
10514#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
78cef090
JJ
10515 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
10516 (void const *) (ADDR), \
10517 (__v16si)(__m512i) (INDEX), \
10518 (__mmask16)0xFFFF, \
10519 (int) (SCALE))
756c5857
AI
10520
10521#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10522 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \
10523 (void const *) (ADDR), \
10524 (__v16si)(__m512i) (INDEX), \
10525 (__mmask16) (MASK), \
10526 (int) (SCALE))
756c5857
AI
10527
10528#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
78cef090
JJ
10529 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
10530 (void const *) (ADDR), \
10531 (__v8si)(__m256i) (INDEX), \
10532 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10533
10534#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10535 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \
10536 (void const *) (ADDR), \
10537 (__v8si)(__m256i) (INDEX), \
10538 (__mmask8) (MASK), \
10539 (int) (SCALE))
10540
10541#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10542 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
10543 (void const *) (ADDR), \
10544 (__v8di)(__m512i) (INDEX), \
10545 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10546
10547#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10548 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \
10549 (void const *) (ADDR), \
10550 (__v8di)(__m512i) (INDEX), \
10551 (__mmask8) (MASK), \
10552 (int) (SCALE))
756c5857
AI
10553
10554#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
78cef090
JJ
10555 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
10556 (void const *) (ADDR), \
10557 (__v8di)(__m512i) (INDEX), \
10558 (__mmask8)0xFF, (int) (SCALE))
756c5857
AI
10559
10560#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
78cef090
JJ
10561 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \
10562 (void const *) (ADDR), \
10563 (__v8di)(__m512i) (INDEX), \
10564 (__mmask8) (MASK), \
10565 (int) (SCALE))
756c5857
AI
10566
10567#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10568 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \
10569 (__v16si)(__m512i) (INDEX), \
10570 (__v16sf)(__m512) (V1), (int) (SCALE))
756c5857
AI
10571
10572#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10573 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
10574 (__v16si)(__m512i) (INDEX), \
10575 (__v16sf)(__m512) (V1), (int) (SCALE))
756c5857
AI
10576
10577#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10578 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \
10579 (__v8si)(__m256i) (INDEX), \
10580 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10581
10582#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10583 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \
10584 (__v8si)(__m256i) (INDEX), \
10585 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10586
10587#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10588 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \
10589 (__v8di)(__m512i) (INDEX), \
10590 (__v8sf)(__m256) (V1), (int) (SCALE))
756c5857
AI
10591
10592#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10593 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
10594 (__v8di)(__m512i) (INDEX), \
10595 (__v8sf)(__m256) (V1), (int) (SCALE))
756c5857
AI
10596
10597#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10598 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \
10599 (__v8di)(__m512i) (INDEX), \
10600 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10601
10602#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10603 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \
10604 (__v8di)(__m512i) (INDEX), \
10605 (__v8df)(__m512d) (V1), (int) (SCALE))
756c5857
AI
10606
10607#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10608 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \
10609 (__v16si)(__m512i) (INDEX), \
10610 (__v16si)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10611
10612#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10613 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \
10614 (__v16si)(__m512i) (INDEX), \
10615 (__v16si)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10616
10617#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10618 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \
10619 (__v8si)(__m256i) (INDEX), \
10620 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10621
10622#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10623 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \
10624 (__v8si)(__m256i) (INDEX), \
10625 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10626
10627#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10628 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \
10629 (__v8di)(__m512i) (INDEX), \
10630 (__v8si)(__m256i) (V1), (int) (SCALE))
756c5857
AI
10631
10632#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10633 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \
10634 (__v8di)(__m512i) (INDEX), \
10635 (__v8si)(__m256i) (V1), (int) (SCALE))
756c5857
AI
10636
10637#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
78cef090
JJ
10638 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \
10639 (__v8di)(__m512i) (INDEX), \
10640 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10641
10642#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
78cef090
JJ
10643 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \
10644 (__v8di)(__m512i) (INDEX), \
10645 (__v8di)(__m512i) (V1), (int) (SCALE))
756c5857
AI
10646#endif
10647
10648extern __inline __m512d
10649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10650_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10651{
10652 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10653 (__v8df) __W,
10654 (__mmask8) __U);
10655}
10656
10657extern __inline __m512d
10658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10659_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10660{
10661 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10662 (__v8df)
10663 _mm512_setzero_pd (),
10664 (__mmask8) __U);
10665}
10666
10667extern __inline void
10668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10669_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10670{
10671 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10672 (__mmask8) __U);
10673}
10674
10675extern __inline __m512
10676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10677_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10678{
10679 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10680 (__v16sf) __W,
10681 (__mmask16) __U);
10682}
10683
10684extern __inline __m512
10685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10687{
10688 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10689 (__v16sf)
10690 _mm512_setzero_ps (),
10691 (__mmask16) __U);
10692}
10693
10694extern __inline void
10695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10696_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10697{
10698 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10699 (__mmask16) __U);
10700}
10701
10702extern __inline __m512i
10703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10704_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10705{
10706 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10707 (__v8di) __W,
10708 (__mmask8) __U);
10709}
10710
10711extern __inline __m512i
10712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10714{
10715 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10716 (__v8di)
10717 _mm512_setzero_si512 (),
10718 (__mmask8) __U);
10719}
10720
10721extern __inline void
10722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10723_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10724{
10725 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10726 (__mmask8) __U);
10727}
10728
10729extern __inline __m512i
10730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10732{
10733 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10734 (__v16si) __W,
10735 (__mmask16) __U);
10736}
10737
10738extern __inline __m512i
10739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10740_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10741{
10742 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10743 (__v16si)
10744 _mm512_setzero_si512 (),
10745 (__mmask16) __U);
10746}
10747
10748extern __inline void
10749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10750_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10751{
10752 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10753 (__mmask16) __U);
10754}
10755
10756extern __inline __m512d
10757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10758_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10759{
10760 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10761 (__v8df) __W,
10762 (__mmask8) __U);
10763}
10764
10765extern __inline __m512d
10766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10767_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10768{
10769 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10770 (__v8df)
10771 _mm512_setzero_pd (),
10772 (__mmask8) __U);
10773}
10774
10775extern __inline __m512d
10776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10778{
10779 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10780 (__v8df) __W,
10781 (__mmask8) __U);
10782}
10783
10784extern __inline __m512d
10785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10786_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10787{
10788 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10789 (__v8df)
10790 _mm512_setzero_pd (),
10791 (__mmask8) __U);
10792}
10793
10794extern __inline __m512
10795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10796_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10797{
10798 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10799 (__v16sf) __W,
10800 (__mmask16) __U);
10801}
10802
10803extern __inline __m512
10804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10805_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10806{
10807 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10808 (__v16sf)
10809 _mm512_setzero_ps (),
10810 (__mmask16) __U);
10811}
10812
10813extern __inline __m512
10814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10815_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10816{
10817 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10818 (__v16sf) __W,
10819 (__mmask16) __U);
10820}
10821
10822extern __inline __m512
10823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10824_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10825{
10826 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10827 (__v16sf)
10828 _mm512_setzero_ps (),
10829 (__mmask16) __U);
10830}
10831
10832extern __inline __m512i
10833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10834_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10835{
10836 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10837 (__v8di) __W,
10838 (__mmask8) __U);
10839}
10840
10841extern __inline __m512i
10842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10844{
10845 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10846 (__v8di)
10847 _mm512_setzero_si512 (),
10848 (__mmask8) __U);
10849}
10850
10851extern __inline __m512i
10852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10854{
10855 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10856 (__v8di) __W,
10857 (__mmask8) __U);
10858}
10859
10860extern __inline __m512i
10861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10862_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10863{
10864 return (__m512i)
10865 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10866 (__v8di)
10867 _mm512_setzero_si512 (),
10868 (__mmask8) __U);
10869}
10870
10871extern __inline __m512i
10872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10874{
10875 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10876 (__v16si) __W,
10877 (__mmask16) __U);
10878}
10879
10880extern __inline __m512i
10881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10882_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10883{
10884 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10885 (__v16si)
10886 _mm512_setzero_si512 (),
10887 (__mmask16) __U);
10888}
10889
10890extern __inline __m512i
10891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10892_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10893{
10894 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10895 (__v16si) __W,
10896 (__mmask16) __U);
10897}
10898
10899extern __inline __m512i
10900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10901_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10902{
10903 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10904 (__v16si)
10905 _mm512_setzero_si512
10906 (), (__mmask16) __U);
10907}
10908
10909/* Mask arithmetic operations */
6901ea62
AS
10910#define _kand_mask16 _mm512_kand
10911#define _kandn_mask16 _mm512_kandn
10912#define _knot_mask16 _mm512_knot
10913#define _kor_mask16 _mm512_kor
10914#define _kxnor_mask16 _mm512_kxnor
10915#define _kxor_mask16 _mm512_kxor
10916
dea06111
AS
10917extern __inline unsigned char
10918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10919_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10920{
10921 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10922 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10923}
10924
10925extern __inline unsigned char
10926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10927_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10928{
10929 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10930 (__mmask16) __B);
10931}
10932
10933extern __inline unsigned char
10934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10935_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10936{
10937 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10938 (__mmask16) __B);
10939}
10940
7cdb6e4c
AS
10941extern __inline unsigned int
10942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943_cvtmask16_u32 (__mmask16 __A)
10944{
10945 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10946}
10947
10948extern __inline __mmask16
10949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10950_cvtu32_mask16 (unsigned int __A)
10951{
10952 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10953}
10954
10955extern __inline __mmask16
10956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10957_load_mask16 (__mmask16 *__A)
10958{
10959 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10960}
10961
10962extern __inline void
10963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10964_store_mask16 (__mmask16 *__A, __mmask16 __B)
10965{
10966 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10967}
10968
756c5857
AI
10969extern __inline __mmask16
10970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10971_mm512_kand (__mmask16 __A, __mmask16 __B)
10972{
10973 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10974}
10975
10976extern __inline __mmask16
10977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10978_mm512_kandn (__mmask16 __A, __mmask16 __B)
10979{
6901ea62
AS
10980 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10981 (__mmask16) __B);
756c5857
AI
10982}
10983
10984extern __inline __mmask16
10985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10986_mm512_kor (__mmask16 __A, __mmask16 __B)
10987{
10988 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10989}
10990
10991extern __inline int
10992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10994{
10995 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10996 (__mmask16) __B);
10997}
10998
10999extern __inline int
11000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11001_mm512_kortestc (__mmask16 __A, __mmask16 __B)
11002{
11003 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
11004 (__mmask16) __B);
11005}
11006
11007extern __inline __mmask16
11008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11009_mm512_kxnor (__mmask16 __A, __mmask16 __B)
11010{
11011 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
11012}
11013
11014extern __inline __mmask16
11015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11016_mm512_kxor (__mmask16 __A, __mmask16 __B)
11017{
11018 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
11019}
11020
11021extern __inline __mmask16
11022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023_mm512_knot (__mmask16 __A)
11024{
11025 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
11026}
11027
11028extern __inline __mmask16
11029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11030_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
11031{
11032 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
11033}
11034
6901ea62
AS
11035extern __inline __mmask16
11036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11037_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
11038{
11039 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
11040}
11041
756c5857
AI
11042#ifdef __OPTIMIZE__
11043extern __inline __m512i
11044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11045_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
11046 const int __imm)
11047{
11048 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
11049 (__v4si) __D,
11050 __imm,
11051 (__v16si)
11052 _mm512_setzero_si512 (),
11053 __B);
11054}
11055
11056extern __inline __m512
11057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11058_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
11059 const int __imm)
11060{
11061 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
11062 (__v4sf) __D,
11063 __imm,
11064 (__v16sf)
11065 _mm512_setzero_ps (), __B);
11066}
11067
11068extern __inline __m512i
11069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11070_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
11071 __m128i __D, const int __imm)
11072{
11073 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
11074 (__v4si) __D,
11075 __imm,
11076 (__v16si) __A,
11077 __B);
11078}
11079
11080extern __inline __m512
11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
11083 __m128 __D, const int __imm)
11084{
11085 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
11086 (__v4sf) __D,
11087 __imm,
11088 (__v16sf) __A, __B);
11089}
11090#else
11091#define _mm512_maskz_insertf32x4(A, X, Y, C) \
11092 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
11093 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
18379eea 11094 (__mmask16)(A)))
756c5857
AI
11095
11096#define _mm512_maskz_inserti32x4(A, X, Y, C) \
11097 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
11098 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
18379eea 11099 (__mmask16)(A)))
756c5857
AI
11100
11101#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
11102 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
11103 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
18379eea 11104 (__mmask16)(B)))
756c5857
AI
11105
11106#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
11107 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
11108 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
18379eea 11109 (__mmask16)(B)))
756c5857
AI
11110#endif
11111
11112extern __inline __m512i
11113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11114_mm512_max_epi64 (__m512i __A, __m512i __B)
11115{
11116 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11117 (__v8di) __B,
11118 (__v8di)
4271e5cb 11119 _mm512_undefined_epi32 (),
756c5857
AI
11120 (__mmask8) -1);
11121}
11122
11123extern __inline __m512i
11124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11126{
11127 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11128 (__v8di) __B,
11129 (__v8di)
11130 _mm512_setzero_si512 (),
11131 __M);
11132}
11133
11134extern __inline __m512i
11135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11137{
11138 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11139 (__v8di) __B,
11140 (__v8di) __W, __M);
11141}
11142
11143extern __inline __m512i
11144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11145_mm512_min_epi64 (__m512i __A, __m512i __B)
11146{
11147 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11148 (__v8di) __B,
11149 (__v8di)
4271e5cb 11150 _mm512_undefined_epi32 (),
756c5857
AI
11151 (__mmask8) -1);
11152}
11153
11154extern __inline __m512i
11155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11156_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11157{
11158 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11159 (__v8di) __B,
11160 (__v8di) __W, __M);
11161}
11162
11163extern __inline __m512i
11164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11165_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11166{
11167 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11168 (__v8di) __B,
11169 (__v8di)
11170 _mm512_setzero_si512 (),
11171 __M);
11172}
11173
11174extern __inline __m512i
11175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11176_mm512_max_epu64 (__m512i __A, __m512i __B)
11177{
11178 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11179 (__v8di) __B,
11180 (__v8di)
4271e5cb 11181 _mm512_undefined_epi32 (),
756c5857
AI
11182 (__mmask8) -1);
11183}
11184
11185extern __inline __m512i
11186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11187_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11188{
11189 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11190 (__v8di) __B,
11191 (__v8di)
11192 _mm512_setzero_si512 (),
11193 __M);
11194}
11195
11196extern __inline __m512i
11197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11198_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11199{
11200 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11201 (__v8di) __B,
11202 (__v8di) __W, __M);
11203}
11204
11205extern __inline __m512i
11206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11207_mm512_min_epu64 (__m512i __A, __m512i __B)
11208{
11209 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11210 (__v8di) __B,
11211 (__v8di)
4271e5cb 11212 _mm512_undefined_epi32 (),
756c5857
AI
11213 (__mmask8) -1);
11214}
11215
11216extern __inline __m512i
11217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11218_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11219{
11220 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11221 (__v8di) __B,
11222 (__v8di) __W, __M);
11223}
11224
11225extern __inline __m512i
11226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11227_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11228{
11229 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11230 (__v8di) __B,
11231 (__v8di)
11232 _mm512_setzero_si512 (),
11233 __M);
11234}
11235
11236extern __inline __m512i
11237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11238_mm512_max_epi32 (__m512i __A, __m512i __B)
11239{
11240 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11241 (__v16si) __B,
11242 (__v16si)
4271e5cb 11243 _mm512_undefined_epi32 (),
756c5857
AI
11244 (__mmask16) -1);
11245}
11246
11247extern __inline __m512i
11248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11250{
11251 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11252 (__v16si) __B,
11253 (__v16si)
11254 _mm512_setzero_si512 (),
11255 __M);
11256}
11257
11258extern __inline __m512i
11259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11260_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11261{
11262 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11263 (__v16si) __B,
11264 (__v16si) __W, __M);
11265}
11266
11267extern __inline __m512i
11268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269_mm512_min_epi32 (__m512i __A, __m512i __B)
11270{
11271 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11272 (__v16si) __B,
11273 (__v16si)
4271e5cb 11274 _mm512_undefined_epi32 (),
756c5857
AI
11275 (__mmask16) -1);
11276}
11277
11278extern __inline __m512i
11279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11281{
11282 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11283 (__v16si) __B,
11284 (__v16si)
11285 _mm512_setzero_si512 (),
11286 __M);
11287}
11288
11289extern __inline __m512i
11290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11292{
11293 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11294 (__v16si) __B,
11295 (__v16si) __W, __M);
11296}
11297
11298extern __inline __m512i
11299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11300_mm512_max_epu32 (__m512i __A, __m512i __B)
11301{
11302 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11303 (__v16si) __B,
11304 (__v16si)
4271e5cb 11305 _mm512_undefined_epi32 (),
756c5857
AI
11306 (__mmask16) -1);
11307}
11308
11309extern __inline __m512i
11310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11311_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11312{
11313 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11314 (__v16si) __B,
11315 (__v16si)
11316 _mm512_setzero_si512 (),
11317 __M);
11318}
11319
11320extern __inline __m512i
11321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11322_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11323{
11324 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11325 (__v16si) __B,
11326 (__v16si) __W, __M);
11327}
11328
11329extern __inline __m512i
11330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11331_mm512_min_epu32 (__m512i __A, __m512i __B)
11332{
11333 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11334 (__v16si) __B,
11335 (__v16si)
4271e5cb 11336 _mm512_undefined_epi32 (),
756c5857
AI
11337 (__mmask16) -1);
11338}
11339
11340extern __inline __m512i
11341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11342_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11343{
11344 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11345 (__v16si) __B,
11346 (__v16si)
11347 _mm512_setzero_si512 (),
11348 __M);
11349}
11350
11351extern __inline __m512i
11352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11353_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11354{
11355 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11356 (__v16si) __B,
11357 (__v16si) __W, __M);
11358}
11359
11360extern __inline __m512
11361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11362_mm512_unpacklo_ps (__m512 __A, __m512 __B)
11363{
11364 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11365 (__v16sf) __B,
11366 (__v16sf)
0b192937 11367 _mm512_undefined_ps (),
756c5857
AI
11368 (__mmask16) -1);
11369}
11370
11371extern __inline __m512
11372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11373_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11374{
11375 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11376 (__v16sf) __B,
11377 (__v16sf) __W,
11378 (__mmask16) __U);
11379}
11380
11381extern __inline __m512
11382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11383_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11384{
11385 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11386 (__v16sf) __B,
11387 (__v16sf)
11388 _mm512_setzero_ps (),
11389 (__mmask16) __U);
11390}
11391
075691af
AI
11392#ifdef __OPTIMIZE__
11393extern __inline __m128d
11394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11395_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11396{
11397 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11398 (__v2df) __B,
11399 __R);
11400}
11401
f4ee3a9e
UB
11402extern __inline __m128d
11403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11404_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11405 __m128d __B, const int __R)
11406{
11407 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11408 (__v2df) __B,
11409 (__v2df) __W,
11410 (__mmask8) __U, __R);
11411}
11412
11413extern __inline __m128d
11414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11415_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11416 const int __R)
11417{
11418 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11419 (__v2df) __B,
11420 (__v2df)
11421 _mm_setzero_pd (),
11422 (__mmask8) __U, __R);
11423}
11424
075691af
AI
11425extern __inline __m128
11426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11428{
11429 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11430 (__v4sf) __B,
11431 __R);
11432}
11433
f4ee3a9e
UB
11434extern __inline __m128
11435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11436_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11437 __m128 __B, const int __R)
11438{
11439 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11440 (__v4sf) __B,
11441 (__v4sf) __W,
11442 (__mmask8) __U, __R);
11443}
11444
11445extern __inline __m128
11446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11447_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11448 const int __R)
11449{
11450 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11451 (__v4sf) __B,
11452 (__v4sf)
11453 _mm_setzero_ps (),
11454 (__mmask8) __U, __R);
11455}
11456
075691af
AI
11457extern __inline __m128d
11458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11459_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11460{
11461 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11462 (__v2df) __B,
11463 __R);
11464}
11465
f4ee3a9e
UB
11466extern __inline __m128d
11467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11468_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11469 __m128d __B, const int __R)
11470{
11471 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11472 (__v2df) __B,
11473 (__v2df) __W,
11474 (__mmask8) __U, __R);
11475}
11476
11477extern __inline __m128d
11478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11479_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11480 const int __R)
11481{
11482 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11483 (__v2df) __B,
11484 (__v2df)
11485 _mm_setzero_pd (),
11486 (__mmask8) __U, __R);
11487}
11488
075691af
AI
11489extern __inline __m128
11490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11491_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11492{
11493 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11494 (__v4sf) __B,
11495 __R);
11496}
11497
f4ee3a9e
UB
11498extern __inline __m128
11499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11500_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11501 __m128 __B, const int __R)
11502{
11503 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11504 (__v4sf) __B,
11505 (__v4sf) __W,
11506 (__mmask8) __U, __R);
11507}
11508
11509extern __inline __m128
11510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11512 const int __R)
11513{
11514 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11515 (__v4sf) __B,
11516 (__v4sf)
11517 _mm_setzero_ps (),
11518 (__mmask8) __U, __R);
11519}
11520
075691af
AI
11521#else
11522#define _mm_max_round_sd(A, B, C) \
f4ee3a9e
UB
11523 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11524
11525#define _mm_mask_max_round_sd(W, U, A, B, C) \
11526 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11527
11528#define _mm_maskz_max_round_sd(U, A, B, C) \
11529 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11530
11531#define _mm_max_round_ss(A, B, C) \
f4ee3a9e
UB
11532 (__m128)__builtin_ia32_maxss_round(A, B, C)
11533
11534#define _mm_mask_max_round_ss(W, U, A, B, C) \
11535 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11536
11537#define _mm_maskz_max_round_ss(U, A, B, C) \
11538 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
075691af
AI
11539
11540#define _mm_min_round_sd(A, B, C) \
f4ee3a9e
UB
11541 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11542
11543#define _mm_mask_min_round_sd(W, U, A, B, C) \
11544 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11545
11546#define _mm_maskz_min_round_sd(U, A, B, C) \
11547 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11548
11549#define _mm_min_round_ss(A, B, C) \
f4ee3a9e
UB
11550 (__m128)__builtin_ia32_minss_round(A, B, C)
11551
11552#define _mm_mask_min_round_ss(W, U, A, B, C) \
11553 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11554
11555#define _mm_maskz_min_round_ss(U, A, B, C) \
11556 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11557
075691af
AI
11558#endif
11559
756c5857
AI
11560extern __inline __m512d
11561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11563{
11564 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11565 (__v8df) __W,
11566 (__mmask8) __U);
11567}
11568
11569extern __inline __m512
11570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11571_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11572{
11573 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11574 (__v16sf) __W,
11575 (__mmask16) __U);
11576}
11577
11578extern __inline __m512i
11579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11580_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11581{
11582 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11583 (__v8di) __W,
11584 (__mmask8) __U);
11585}
11586
11587extern __inline __m512i
11588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11590{
11591 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11592 (__v16si) __W,
11593 (__mmask16) __U);
11594}
11595
075691af
AI
11596#ifdef __OPTIMIZE__
11597extern __inline __m128d
11598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11599_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11600{
11601 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11602 (__v2df) __A,
11603 (__v2df) __B,
11604 __R);
11605}
11606
11607extern __inline __m128
11608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11610{
11611 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11612 (__v4sf) __A,
11613 (__v4sf) __B,
11614 __R);
11615}
11616
11617extern __inline __m128d
11618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11619_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11620{
11621 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11622 (__v2df) __A,
11623 -(__v2df) __B,
11624 __R);
11625}
11626
11627extern __inline __m128
11628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11629_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11630{
11631 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11632 (__v4sf) __A,
11633 -(__v4sf) __B,
11634 __R);
11635}
11636
11637extern __inline __m128d
11638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11639_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11640{
11641 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11642 -(__v2df) __A,
11643 (__v2df) __B,
11644 __R);
11645}
11646
11647extern __inline __m128
11648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11649_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11650{
11651 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11652 -(__v4sf) __A,
11653 (__v4sf) __B,
11654 __R);
11655}
11656
11657extern __inline __m128d
11658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11659_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11660{
11661 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11662 -(__v2df) __A,
11663 -(__v2df) __B,
11664 __R);
11665}
11666
11667extern __inline __m128
11668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11669_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11670{
11671 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11672 -(__v4sf) __A,
11673 -(__v4sf) __B,
11674 __R);
11675}
11676#else
11677#define _mm_fmadd_round_sd(A, B, C, R) \
11678 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11679
11680#define _mm_fmadd_round_ss(A, B, C, R) \
11681 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11682
11683#define _mm_fmsub_round_sd(A, B, C, R) \
11684 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11685
11686#define _mm_fmsub_round_ss(A, B, C, R) \
11687 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11688
11689#define _mm_fnmadd_round_sd(A, B, C, R) \
11690 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11691
11692#define _mm_fnmadd_round_ss(A, B, C, R) \
11693 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11694
11695#define _mm_fnmsub_round_sd(A, B, C, R) \
11696 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11697
11698#define _mm_fnmsub_round_ss(A, B, C, R) \
11699 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11700#endif
11701
5c4ade6d
JJ
11702extern __inline __m128d
11703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11704_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11705{
11706 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11707 (__v2df) __A,
11708 (__v2df) __B,
11709 (__mmask8) __U,
11710 _MM_FROUND_CUR_DIRECTION);
11711}
11712
11713extern __inline __m128
11714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11715_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11716{
11717 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11718 (__v4sf) __A,
11719 (__v4sf) __B,
11720 (__mmask8) __U,
11721 _MM_FROUND_CUR_DIRECTION);
11722}
11723
11724extern __inline __m128d
11725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11726_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11727{
11728 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11729 (__v2df) __A,
11730 (__v2df) __B,
11731 (__mmask8) __U,
11732 _MM_FROUND_CUR_DIRECTION);
11733}
11734
11735extern __inline __m128
11736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11738{
11739 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11740 (__v4sf) __A,
11741 (__v4sf) __B,
11742 (__mmask8) __U,
11743 _MM_FROUND_CUR_DIRECTION);
11744}
11745
11746extern __inline __m128d
11747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11748_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11749{
11750 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11751 (__v2df) __A,
11752 (__v2df) __B,
11753 (__mmask8) __U,
11754 _MM_FROUND_CUR_DIRECTION);
11755}
11756
11757extern __inline __m128
11758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11759_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11760{
11761 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11762 (__v4sf) __A,
11763 (__v4sf) __B,
11764 (__mmask8) __U,
11765 _MM_FROUND_CUR_DIRECTION);
11766}
11767
11768extern __inline __m128d
11769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11770_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11771{
11772 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11773 (__v2df) __A,
11774 -(__v2df) __B,
11775 (__mmask8) __U,
11776 _MM_FROUND_CUR_DIRECTION);
11777}
11778
11779extern __inline __m128
11780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11781_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11782{
11783 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11784 (__v4sf) __A,
11785 -(__v4sf) __B,
11786 (__mmask8) __U,
11787 _MM_FROUND_CUR_DIRECTION);
11788}
11789
11790extern __inline __m128d
11791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11792_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11793{
11794 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11795 (__v2df) __A,
11796 (__v2df) __B,
11797 (__mmask8) __U,
11798 _MM_FROUND_CUR_DIRECTION);
11799}
11800
11801extern __inline __m128
11802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11804{
11805 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
11806 (__v4sf) __A,
11807 (__v4sf) __B,
11808 (__mmask8) __U,
11809 _MM_FROUND_CUR_DIRECTION);
11810}
11811
11812extern __inline __m128d
11813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11815{
11816 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11817 (__v2df) __A,
11818 -(__v2df) __B,
11819 (__mmask8) __U,
11820 _MM_FROUND_CUR_DIRECTION);
11821}
11822
11823extern __inline __m128
11824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11825_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11826{
11827 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11828 (__v4sf) __A,
11829 -(__v4sf) __B,
11830 (__mmask8) __U,
11831 _MM_FROUND_CUR_DIRECTION);
11832}
11833
11834extern __inline __m128d
11835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11836_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11837{
11838 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11839 -(__v2df) __A,
11840 (__v2df) __B,
11841 (__mmask8) __U,
11842 _MM_FROUND_CUR_DIRECTION);
11843}
11844
11845extern __inline __m128
11846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11847_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11848{
11849 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11850 -(__v4sf) __A,
11851 (__v4sf) __B,
11852 (__mmask8) __U,
11853 _MM_FROUND_CUR_DIRECTION);
11854}
11855
11856extern __inline __m128d
11857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11859{
11860 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11861 -(__v2df) __A,
11862 (__v2df) __B,
11863 (__mmask8) __U,
11864 _MM_FROUND_CUR_DIRECTION);
11865}
11866
11867extern __inline __m128
11868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11869_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11870{
11871 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11872 -(__v4sf) __A,
11873 (__v4sf) __B,
11874 (__mmask8) __U,
11875 _MM_FROUND_CUR_DIRECTION);
11876}
11877
11878extern __inline __m128d
11879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11880_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11881{
11882 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11883 -(__v2df) __A,
11884 (__v2df) __B,
11885 (__mmask8) __U,
11886 _MM_FROUND_CUR_DIRECTION);
11887}
11888
11889extern __inline __m128
11890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11891_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11892{
11893 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11894 -(__v4sf) __A,
11895 (__v4sf) __B,
11896 (__mmask8) __U,
11897 _MM_FROUND_CUR_DIRECTION);
11898}
11899
11900extern __inline __m128d
11901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11902_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11903{
11904 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11905 -(__v2df) __A,
11906 -(__v2df) __B,
11907 (__mmask8) __U,
11908 _MM_FROUND_CUR_DIRECTION);
11909}
11910
11911extern __inline __m128
11912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11914{
11915 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11916 -(__v4sf) __A,
11917 -(__v4sf) __B,
11918 (__mmask8) __U,
11919 _MM_FROUND_CUR_DIRECTION);
11920}
11921
11922extern __inline __m128d
11923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11924_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11925{
11926 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11927 -(__v2df) __A,
11928 (__v2df) __B,
11929 (__mmask8) __U,
11930 _MM_FROUND_CUR_DIRECTION);
11931}
11932
11933extern __inline __m128
11934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11935_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11936{
11937 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
11938 -(__v4sf) __A,
11939 (__v4sf) __B,
11940 (__mmask8) __U,
11941 _MM_FROUND_CUR_DIRECTION);
11942}
11943
11944extern __inline __m128d
11945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11946_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11947{
11948 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11949 -(__v2df) __A,
11950 -(__v2df) __B,
11951 (__mmask8) __U,
11952 _MM_FROUND_CUR_DIRECTION);
11953}
11954
11955extern __inline __m128
11956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11957_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11958{
11959 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11960 -(__v4sf) __A,
11961 -(__v4sf) __B,
11962 (__mmask8) __U,
11963 _MM_FROUND_CUR_DIRECTION);
11964}
11965
11966#ifdef __OPTIMIZE__
11967extern __inline __m128d
11968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11969_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
11970 const int __R)
11971{
11972 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11973 (__v2df) __A,
11974 (__v2df) __B,
11975 (__mmask8) __U, __R);
11976}
11977
11978extern __inline __m128
11979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11980_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
11981 const int __R)
11982{
11983 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11984 (__v4sf) __A,
11985 (__v4sf) __B,
11986 (__mmask8) __U, __R);
11987}
11988
11989extern __inline __m128d
11990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11991_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
11992 const int __R)
11993{
11994 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11995 (__v2df) __A,
11996 (__v2df) __B,
11997 (__mmask8) __U, __R);
11998}
11999
12000extern __inline __m128
12001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12002_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12003 const int __R)
12004{
12005 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12006 (__v4sf) __A,
12007 (__v4sf) __B,
12008 (__mmask8) __U, __R);
12009}
12010
12011extern __inline __m128d
12012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12013_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12014 const int __R)
12015{
12016 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12017 (__v2df) __A,
12018 (__v2df) __B,
12019 (__mmask8) __U, __R);
12020}
12021
12022extern __inline __m128
12023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12024_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12025 const int __R)
12026{
12027 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12028 (__v4sf) __A,
12029 (__v4sf) __B,
12030 (__mmask8) __U, __R);
12031}
12032
12033extern __inline __m128d
12034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12035_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12036 const int __R)
12037{
12038 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12039 (__v2df) __A,
12040 -(__v2df) __B,
12041 (__mmask8) __U, __R);
12042}
12043
12044extern __inline __m128
12045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12047 const int __R)
12048{
12049 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12050 (__v4sf) __A,
12051 -(__v4sf) __B,
12052 (__mmask8) __U, __R);
12053}
12054
12055extern __inline __m128d
12056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12057_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12058 const int __R)
12059{
12060 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12061 (__v2df) __A,
12062 (__v2df) __B,
12063 (__mmask8) __U, __R);
12064}
12065
12066extern __inline __m128
12067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12068_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12069 const int __R)
12070{
12071 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12072 (__v4sf) __A,
12073 (__v4sf) __B,
12074 (__mmask8) __U, __R);
12075}
12076
12077extern __inline __m128d
12078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12079_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12080 const int __R)
12081{
12082 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12083 (__v2df) __A,
12084 -(__v2df) __B,
12085 (__mmask8) __U, __R);
12086}
12087
12088extern __inline __m128
12089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12090_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12091 const int __R)
12092{
12093 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12094 (__v4sf) __A,
12095 -(__v4sf) __B,
12096 (__mmask8) __U, __R);
12097}
12098
12099extern __inline __m128d
12100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12101_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12102 const int __R)
12103{
12104 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12105 -(__v2df) __A,
12106 (__v2df) __B,
12107 (__mmask8) __U, __R);
12108}
12109
12110extern __inline __m128
12111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12112_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12113 const int __R)
12114{
12115 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12116 -(__v4sf) __A,
12117 (__v4sf) __B,
12118 (__mmask8) __U, __R);
12119}
12120
12121extern __inline __m128d
12122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12123_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12124 const int __R)
12125{
12126 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
12127 -(__v2df) __A,
12128 (__v2df) __B,
12129 (__mmask8) __U, __R);
12130}
12131
12132extern __inline __m128
12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12135 const int __R)
12136{
12137 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12138 -(__v4sf) __A,
12139 (__v4sf) __B,
12140 (__mmask8) __U, __R);
12141}
12142
12143extern __inline __m128d
12144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12146 const int __R)
12147{
12148 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12149 -(__v2df) __A,
12150 (__v2df) __B,
12151 (__mmask8) __U, __R);
12152}
12153
12154extern __inline __m128
12155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12157 const int __R)
12158{
12159 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12160 -(__v4sf) __A,
12161 (__v4sf) __B,
12162 (__mmask8) __U, __R);
12163}
12164
12165extern __inline __m128d
12166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12167_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12168 const int __R)
12169{
12170 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12171 -(__v2df) __A,
12172 -(__v2df) __B,
12173 (__mmask8) __U, __R);
12174}
12175
12176extern __inline __m128
12177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12178_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12179 const int __R)
12180{
12181 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12182 -(__v4sf) __A,
12183 -(__v4sf) __B,
12184 (__mmask8) __U, __R);
12185}
12186
12187extern __inline __m128d
12188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12189_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12190 const int __R)
12191{
12192 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12193 -(__v2df) __A,
12194 (__v2df) __B,
12195 (__mmask8) __U, __R);
12196}
12197
12198extern __inline __m128
12199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12200_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12201 const int __R)
12202{
12203 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12204 -(__v4sf) __A,
12205 (__v4sf) __B,
12206 (__mmask8) __U, __R);
12207}
12208
12209extern __inline __m128d
12210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12211_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12212 const int __R)
12213{
12214 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12215 -(__v2df) __A,
12216 -(__v2df) __B,
12217 (__mmask8) __U, __R);
12218}
12219
12220extern __inline __m128
12221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12222_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12223 const int __R)
12224{
12225 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12226 -(__v4sf) __A,
12227 -(__v4sf) __B,
12228 (__mmask8) __U, __R);
12229}
12230#else
12231#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
12232 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
12233
12234#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
12235 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
12236
12237#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
12238 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
12239
12240#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
12241 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
12242
12243#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
aadd9a6e 12244 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
5c4ade6d
JJ
12245
12246#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
aadd9a6e 12247 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
5c4ade6d
JJ
12248
12249#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
12250 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
12251
12252#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
12253 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
12254
12255#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
12256 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
12257
12258#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
12259 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
12260
12261#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
aadd9a6e 12262 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
5c4ade6d
JJ
12263
12264#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
aadd9a6e 12265 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
5c4ade6d
JJ
12266
12267#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
12268 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
12269
12270#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
12271 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
12272
12273#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
12274 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
12275
12276#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
12277 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
12278
12279#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
aadd9a6e 12280 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
5c4ade6d
JJ
12281
12282#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
aadd9a6e 12283 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
5c4ade6d
JJ
12284
12285#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
12286 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
12287
12288#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
12289 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
12290
12291#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
12292 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
12293
12294#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
12295 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
12296
12297#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
aadd9a6e 12298 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
5c4ade6d
JJ
12299
12300#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
aadd9a6e 12301 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
5c4ade6d
JJ
12302#endif
12303
756c5857
AI
12304#ifdef __OPTIMIZE__
12305extern __inline int
12306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12307_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
12308{
12309 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
12310}
12311
12312extern __inline int
12313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12314_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
12315{
12316 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
12317}
12318#else
12319#define _mm_comi_round_ss(A, B, C, D)\
12320__builtin_ia32_vcomiss(A, B, C, D)
12321#define _mm_comi_round_sd(A, B, C, D)\
12322__builtin_ia32_vcomisd(A, B, C, D)
12323#endif
12324
12325extern __inline __m512d
12326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327_mm512_sqrt_pd (__m512d __A)
12328{
12329 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12330 (__v8df)
0b192937 12331 _mm512_undefined_pd (),
756c5857
AI
12332 (__mmask8) -1,
12333 _MM_FROUND_CUR_DIRECTION);
12334}
12335
12336extern __inline __m512d
12337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12338_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
12339{
12340 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12341 (__v8df) __W,
12342 (__mmask8) __U,
12343 _MM_FROUND_CUR_DIRECTION);
12344}
12345
12346extern __inline __m512d
12347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12348_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
12349{
12350 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12351 (__v8df)
12352 _mm512_setzero_pd (),
12353 (__mmask8) __U,
12354 _MM_FROUND_CUR_DIRECTION);
12355}
12356
12357extern __inline __m512
12358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12359_mm512_sqrt_ps (__m512 __A)
12360{
12361 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12362 (__v16sf)
0b192937 12363 _mm512_undefined_ps (),
756c5857
AI
12364 (__mmask16) -1,
12365 _MM_FROUND_CUR_DIRECTION);
12366}
12367
12368extern __inline __m512
12369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12370_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
12371{
12372 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12373 (__v16sf) __W,
12374 (__mmask16) __U,
12375 _MM_FROUND_CUR_DIRECTION);
12376}
12377
12378extern __inline __m512
12379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12380_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
12381{
12382 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12383 (__v16sf)
12384 _mm512_setzero_ps (),
12385 (__mmask16) __U,
12386 _MM_FROUND_CUR_DIRECTION);
12387}
12388
12389extern __inline __m512d
12390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12391_mm512_add_pd (__m512d __A, __m512d __B)
12392{
2069d6fc 12393 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
12394}
12395
12396extern __inline __m512d
12397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12398_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12399{
12400 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12401 (__v8df) __B,
12402 (__v8df) __W,
12403 (__mmask8) __U,
12404 _MM_FROUND_CUR_DIRECTION);
12405}
12406
12407extern __inline __m512d
12408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
12410{
12411 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12412 (__v8df) __B,
12413 (__v8df)
12414 _mm512_setzero_pd (),
12415 (__mmask8) __U,
12416 _MM_FROUND_CUR_DIRECTION);
12417}
12418
12419extern __inline __m512
12420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12421_mm512_add_ps (__m512 __A, __m512 __B)
12422{
2069d6fc 12423 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
12424}
12425
12426extern __inline __m512
12427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12428_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12429{
12430 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12431 (__v16sf) __B,
12432 (__v16sf) __W,
12433 (__mmask16) __U,
12434 _MM_FROUND_CUR_DIRECTION);
12435}
12436
12437extern __inline __m512
12438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12439_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
12440{
12441 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12442 (__v16sf) __B,
12443 (__v16sf)
12444 _mm512_setzero_ps (),
12445 (__mmask16) __U,
12446 _MM_FROUND_CUR_DIRECTION);
12447}
12448
1853f5c7
SP
12449extern __inline __m128d
12450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12451_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12452{
12453 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12454 (__v2df) __B,
12455 (__v2df) __W,
12456 (__mmask8) __U,
12457 _MM_FROUND_CUR_DIRECTION);
12458}
12459
12460extern __inline __m128d
12461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12462_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
12463{
12464 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12465 (__v2df) __B,
12466 (__v2df)
12467 _mm_setzero_pd (),
12468 (__mmask8) __U,
12469 _MM_FROUND_CUR_DIRECTION);
12470}
12471
12472extern __inline __m128
12473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12475{
12476 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12477 (__v4sf) __B,
12478 (__v4sf) __W,
12479 (__mmask8) __U,
12480 _MM_FROUND_CUR_DIRECTION);
12481}
12482
12483extern __inline __m128
12484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12485_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
12486{
12487 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12488 (__v4sf) __B,
12489 (__v4sf)
12490 _mm_setzero_ps (),
12491 (__mmask8) __U,
12492 _MM_FROUND_CUR_DIRECTION);
12493}
12494
756c5857
AI
12495extern __inline __m512d
12496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12497_mm512_sub_pd (__m512d __A, __m512d __B)
12498{
2069d6fc 12499 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
12500}
12501
12502extern __inline __m512d
12503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12504_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12505{
12506 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12507 (__v8df) __B,
12508 (__v8df) __W,
12509 (__mmask8) __U,
12510 _MM_FROUND_CUR_DIRECTION);
12511}
12512
12513extern __inline __m512d
12514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12515_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
12516{
12517 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12518 (__v8df) __B,
12519 (__v8df)
12520 _mm512_setzero_pd (),
12521 (__mmask8) __U,
12522 _MM_FROUND_CUR_DIRECTION);
12523}
12524
12525extern __inline __m512
12526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12527_mm512_sub_ps (__m512 __A, __m512 __B)
12528{
2069d6fc 12529 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
12530}
12531
12532extern __inline __m512
12533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12534_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12535{
12536 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12537 (__v16sf) __B,
12538 (__v16sf) __W,
12539 (__mmask16) __U,
12540 _MM_FROUND_CUR_DIRECTION);
12541}
12542
12543extern __inline __m512
12544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12545_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
12546{
12547 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12548 (__v16sf) __B,
12549 (__v16sf)
12550 _mm512_setzero_ps (),
12551 (__mmask16) __U,
12552 _MM_FROUND_CUR_DIRECTION);
12553}
12554
1853f5c7
SP
12555extern __inline __m128d
12556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12557_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12558{
12559 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12560 (__v2df) __B,
12561 (__v2df) __W,
12562 (__mmask8) __U,
12563 _MM_FROUND_CUR_DIRECTION);
12564}
12565
12566extern __inline __m128d
12567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12568_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
12569{
12570 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12571 (__v2df) __B,
12572 (__v2df)
12573 _mm_setzero_pd (),
12574 (__mmask8) __U,
12575 _MM_FROUND_CUR_DIRECTION);
12576}
12577
12578extern __inline __m128
12579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12580_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12581{
12582 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12583 (__v4sf) __B,
12584 (__v4sf) __W,
12585 (__mmask8) __U,
12586 _MM_FROUND_CUR_DIRECTION);
12587}
12588
12589extern __inline __m128
12590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12591_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
12592{
12593 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12594 (__v4sf) __B,
12595 (__v4sf)
12596 _mm_setzero_ps (),
12597 (__mmask8) __U,
12598 _MM_FROUND_CUR_DIRECTION);
12599}
12600
756c5857
AI
12601extern __inline __m512d
12602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12603_mm512_mul_pd (__m512d __A, __m512d __B)
12604{
2069d6fc 12605 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
12606}
12607
12608extern __inline __m512d
12609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12610_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12611{
12612 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12613 (__v8df) __B,
12614 (__v8df) __W,
12615 (__mmask8) __U,
12616 _MM_FROUND_CUR_DIRECTION);
12617}
12618
12619extern __inline __m512d
12620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12621_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
12622{
12623 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12624 (__v8df) __B,
12625 (__v8df)
12626 _mm512_setzero_pd (),
12627 (__mmask8) __U,
12628 _MM_FROUND_CUR_DIRECTION);
12629}
12630
12631extern __inline __m512
12632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12633_mm512_mul_ps (__m512 __A, __m512 __B)
12634{
2069d6fc 12635 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
12636}
12637
12638extern __inline __m512
12639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12640_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12641{
12642 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12643 (__v16sf) __B,
12644 (__v16sf) __W,
12645 (__mmask16) __U,
12646 _MM_FROUND_CUR_DIRECTION);
12647}
12648
12649extern __inline __m512
12650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12651_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
12652{
12653 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12654 (__v16sf) __B,
12655 (__v16sf)
12656 _mm512_setzero_ps (),
12657 (__mmask16) __U,
12658 _MM_FROUND_CUR_DIRECTION);
12659}
12660
f4ee3a9e
UB
12661extern __inline __m128d
12662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12663_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
12664 __m128d __B)
12665{
12666 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12667 (__v2df) __B,
12668 (__v2df) __W,
12669 (__mmask8) __U,
12670 _MM_FROUND_CUR_DIRECTION);
12671}
12672
12673extern __inline __m128d
12674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12675_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
12676{
12677 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12678 (__v2df) __B,
12679 (__v2df)
12680 _mm_setzero_pd (),
12681 (__mmask8) __U,
12682 _MM_FROUND_CUR_DIRECTION);
12683}
12684
12685extern __inline __m128
12686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12687_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
12688 __m128 __B)
12689{
12690 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12691 (__v4sf) __B,
12692 (__v4sf) __W,
12693 (__mmask8) __U,
12694 _MM_FROUND_CUR_DIRECTION);
12695}
12696
12697extern __inline __m128
12698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12699_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
12700{
12701 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12702 (__v4sf) __B,
12703 (__v4sf)
12704 _mm_setzero_ps (),
12705 (__mmask8) __U,
12706 _MM_FROUND_CUR_DIRECTION);
12707}
12708
756c5857
AI
12709extern __inline __m512d
12710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12711_mm512_div_pd (__m512d __M, __m512d __V)
12712{
2069d6fc 12713 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
12714}
12715
12716extern __inline __m512d
12717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12718_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
12719{
12720 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12721 (__v8df) __V,
12722 (__v8df) __W,
12723 (__mmask8) __U,
12724 _MM_FROUND_CUR_DIRECTION);
12725}
12726
12727extern __inline __m512d
12728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12729_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
12730{
12731 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12732 (__v8df) __V,
12733 (__v8df)
12734 _mm512_setzero_pd (),
12735 (__mmask8) __U,
12736 _MM_FROUND_CUR_DIRECTION);
12737}
12738
12739extern __inline __m512
12740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12741_mm512_div_ps (__m512 __A, __m512 __B)
12742{
2069d6fc 12743 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
12744}
12745
12746extern __inline __m512
12747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12748_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12749{
12750 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12751 (__v16sf) __B,
12752 (__v16sf) __W,
12753 (__mmask16) __U,
12754 _MM_FROUND_CUR_DIRECTION);
12755}
12756
12757extern __inline __m512
12758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12759_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
12760{
12761 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12762 (__v16sf) __B,
12763 (__v16sf)
12764 _mm512_setzero_ps (),
12765 (__mmask16) __U,
12766 _MM_FROUND_CUR_DIRECTION);
12767}
12768
f4ee3a9e
UB
12769extern __inline __m128d
12770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12771_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
12772 __m128d __B)
12773{
12774 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12775 (__v2df) __B,
12776 (__v2df) __W,
12777 (__mmask8) __U,
12778 _MM_FROUND_CUR_DIRECTION);
12779}
12780
12781extern __inline __m128d
12782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12783_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
12784{
12785 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12786 (__v2df) __B,
12787 (__v2df)
12788 _mm_setzero_pd (),
12789 (__mmask8) __U,
12790 _MM_FROUND_CUR_DIRECTION);
12791}
12792
12793extern __inline __m128
12794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12795_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
12796 __m128 __B)
12797{
12798 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
12799 (__v4sf) __B,
12800 (__v4sf) __W,
12801 (__mmask8) __U,
12802 _MM_FROUND_CUR_DIRECTION);
12803}
12804
12805extern __inline __m128
12806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12807_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
12808{
12809 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
12810 (__v4sf) __B,
12811 (__v4sf)
12812 _mm_setzero_ps (),
12813 (__mmask8) __U,
12814 _MM_FROUND_CUR_DIRECTION);
12815}
12816
756c5857
AI
12817extern __inline __m512d
12818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12819_mm512_max_pd (__m512d __A, __m512d __B)
12820{
12821 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12822 (__v8df) __B,
12823 (__v8df)
0b192937 12824 _mm512_undefined_pd (),
756c5857
AI
12825 (__mmask8) -1,
12826 _MM_FROUND_CUR_DIRECTION);
12827}
12828
12829extern __inline __m512d
12830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12831_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12832{
12833 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12834 (__v8df) __B,
12835 (__v8df) __W,
12836 (__mmask8) __U,
12837 _MM_FROUND_CUR_DIRECTION);
12838}
12839
12840extern __inline __m512d
12841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12842_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
12843{
12844 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12845 (__v8df) __B,
12846 (__v8df)
12847 _mm512_setzero_pd (),
12848 (__mmask8) __U,
12849 _MM_FROUND_CUR_DIRECTION);
12850}
12851
12852extern __inline __m512
12853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12854_mm512_max_ps (__m512 __A, __m512 __B)
12855{
12856 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12857 (__v16sf) __B,
12858 (__v16sf)
0b192937 12859 _mm512_undefined_ps (),
756c5857
AI
12860 (__mmask16) -1,
12861 _MM_FROUND_CUR_DIRECTION);
12862}
12863
12864extern __inline __m512
12865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12866_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12867{
12868 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12869 (__v16sf) __B,
12870 (__v16sf) __W,
12871 (__mmask16) __U,
12872 _MM_FROUND_CUR_DIRECTION);
12873}
12874
12875extern __inline __m512
12876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12877_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
12878{
12879 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12880 (__v16sf) __B,
12881 (__v16sf)
12882 _mm512_setzero_ps (),
12883 (__mmask16) __U,
12884 _MM_FROUND_CUR_DIRECTION);
12885}
12886
dc7401c0
SP
12887extern __inline __m128d
12888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12889_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12890{
12891 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12892 (__v2df) __B,
12893 (__v2df) __W,
12894 (__mmask8) __U,
12895 _MM_FROUND_CUR_DIRECTION);
12896}
12897
12898extern __inline __m128d
12899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12900_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
12901{
12902 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12903 (__v2df) __B,
12904 (__v2df)
12905 _mm_setzero_pd (),
12906 (__mmask8) __U,
12907 _MM_FROUND_CUR_DIRECTION);
12908}
12909
12910extern __inline __m128
12911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12912_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12913{
12914 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12915 (__v4sf) __B,
12916 (__v4sf) __W,
12917 (__mmask8) __U,
12918 _MM_FROUND_CUR_DIRECTION);
12919}
12920
12921extern __inline __m128
12922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12923_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
12924{
12925 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12926 (__v4sf) __B,
12927 (__v4sf)
12928 _mm_setzero_ps (),
12929 (__mmask8) __U,
12930 _MM_FROUND_CUR_DIRECTION);
12931}
12932
756c5857
AI
12933extern __inline __m512d
12934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12935_mm512_min_pd (__m512d __A, __m512d __B)
12936{
12937 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12938 (__v8df) __B,
12939 (__v8df)
0b192937 12940 _mm512_undefined_pd (),
756c5857
AI
12941 (__mmask8) -1,
12942 _MM_FROUND_CUR_DIRECTION);
12943}
12944
12945extern __inline __m512d
12946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12947_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12948{
12949 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12950 (__v8df) __B,
12951 (__v8df) __W,
12952 (__mmask8) __U,
12953 _MM_FROUND_CUR_DIRECTION);
12954}
12955
12956extern __inline __m512d
12957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12958_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12959{
12960 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12961 (__v8df) __B,
12962 (__v8df)
12963 _mm512_setzero_pd (),
12964 (__mmask8) __U,
12965 _MM_FROUND_CUR_DIRECTION);
12966}
12967
12968extern __inline __m512
12969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12970_mm512_min_ps (__m512 __A, __m512 __B)
12971{
12972 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12973 (__v16sf) __B,
12974 (__v16sf)
0b192937 12975 _mm512_undefined_ps (),
756c5857
AI
12976 (__mmask16) -1,
12977 _MM_FROUND_CUR_DIRECTION);
12978}
12979
12980extern __inline __m512
12981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12982_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12983{
12984 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12985 (__v16sf) __B,
12986 (__v16sf) __W,
12987 (__mmask16) __U,
12988 _MM_FROUND_CUR_DIRECTION);
12989}
12990
12991extern __inline __m512
12992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12993_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12994{
12995 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12996 (__v16sf) __B,
12997 (__v16sf)
12998 _mm512_setzero_ps (),
12999 (__mmask16) __U,
13000 _MM_FROUND_CUR_DIRECTION);
13001}
13002
dc7401c0
SP
13003extern __inline __m128d
13004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13005_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13006{
13007 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
13008 (__v2df) __B,
13009 (__v2df) __W,
13010 (__mmask8) __U,
13011 _MM_FROUND_CUR_DIRECTION);
13012}
13013
13014extern __inline __m128d
13015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13016_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
13017{
13018 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
13019 (__v2df) __B,
13020 (__v2df)
13021 _mm_setzero_pd (),
13022 (__mmask8) __U,
13023 _MM_FROUND_CUR_DIRECTION);
13024}
13025
13026extern __inline __m128
13027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13028_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13029{
13030 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
13031 (__v4sf) __B,
13032 (__v4sf) __W,
13033 (__mmask8) __U,
13034 _MM_FROUND_CUR_DIRECTION);
13035}
13036
13037extern __inline __m128
13038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13039_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
13040{
13041 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
13042 (__v4sf) __B,
13043 (__v4sf)
13044 _mm_setzero_ps (),
13045 (__mmask8) __U,
13046 _MM_FROUND_CUR_DIRECTION);
13047}
13048
756c5857
AI
13049extern __inline __m512d
13050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13051_mm512_scalef_pd (__m512d __A, __m512d __B)
13052{
13053 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13054 (__v8df) __B,
13055 (__v8df)
0b192937 13056 _mm512_undefined_pd (),
756c5857
AI
13057 (__mmask8) -1,
13058 _MM_FROUND_CUR_DIRECTION);
13059}
13060
13061extern __inline __m512d
13062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13063_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13064{
13065 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13066 (__v8df) __B,
13067 (__v8df) __W,
13068 (__mmask8) __U,
13069 _MM_FROUND_CUR_DIRECTION);
13070}
13071
13072extern __inline __m512d
13073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13074_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
13075{
13076 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
13077 (__v8df) __B,
13078 (__v8df)
13079 _mm512_setzero_pd (),
13080 (__mmask8) __U,
13081 _MM_FROUND_CUR_DIRECTION);
13082}
13083
13084extern __inline __m512
13085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13086_mm512_scalef_ps (__m512 __A, __m512 __B)
13087{
13088 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13089 (__v16sf) __B,
13090 (__v16sf)
0b192937 13091 _mm512_undefined_ps (),
756c5857
AI
13092 (__mmask16) -1,
13093 _MM_FROUND_CUR_DIRECTION);
13094}
13095
13096extern __inline __m512
13097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13098_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13099{
13100 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13101 (__v16sf) __B,
13102 (__v16sf) __W,
13103 (__mmask16) __U,
13104 _MM_FROUND_CUR_DIRECTION);
13105}
13106
13107extern __inline __m512
13108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13109_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
13110{
13111 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
13112 (__v16sf) __B,
13113 (__v16sf)
13114 _mm512_setzero_ps (),
13115 (__mmask16) __U,
13116 _MM_FROUND_CUR_DIRECTION);
13117}
13118
075691af
AI
13119extern __inline __m128d
13120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13121_mm_scalef_sd (__m128d __A, __m128d __B)
13122{
158061a6
OM
13123 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
13124 (__v2df) __B,
13125 (__v2df)
13126 _mm_setzero_pd (),
13127 (__mmask8) -1,
13128 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13129}
13130
13131extern __inline __m128
13132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13133_mm_scalef_ss (__m128 __A, __m128 __B)
13134{
158061a6
OM
13135 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
13136 (__v4sf) __B,
13137 (__v4sf)
13138 _mm_setzero_ps (),
13139 (__mmask8) -1,
13140 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13141}
13142
756c5857
AI
13143extern __inline __m512d
13144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13145_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13146{
13147 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13148 (__v8df) __B,
13149 (__v8df) __C,
13150 (__mmask8) -1,
13151 _MM_FROUND_CUR_DIRECTION);
13152}
13153
13154extern __inline __m512d
13155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13156_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13157{
13158 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13159 (__v8df) __B,
13160 (__v8df) __C,
13161 (__mmask8) __U,
13162 _MM_FROUND_CUR_DIRECTION);
13163}
13164
13165extern __inline __m512d
13166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13167_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13168{
13169 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
13170 (__v8df) __B,
13171 (__v8df) __C,
13172 (__mmask8) __U,
13173 _MM_FROUND_CUR_DIRECTION);
13174}
13175
13176extern __inline __m512d
13177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13178_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13179{
13180 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
13181 (__v8df) __B,
13182 (__v8df) __C,
13183 (__mmask8) __U,
13184 _MM_FROUND_CUR_DIRECTION);
13185}
13186
13187extern __inline __m512
13188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13189_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13190{
13191 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13192 (__v16sf) __B,
13193 (__v16sf) __C,
13194 (__mmask16) -1,
13195 _MM_FROUND_CUR_DIRECTION);
13196}
13197
13198extern __inline __m512
13199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13200_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13201{
13202 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13203 (__v16sf) __B,
13204 (__v16sf) __C,
13205 (__mmask16) __U,
13206 _MM_FROUND_CUR_DIRECTION);
13207}
13208
13209extern __inline __m512
13210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13211_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13212{
13213 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
13214 (__v16sf) __B,
13215 (__v16sf) __C,
13216 (__mmask16) __U,
13217 _MM_FROUND_CUR_DIRECTION);
13218}
13219
13220extern __inline __m512
13221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13222_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13223{
13224 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
13225 (__v16sf) __B,
13226 (__v16sf) __C,
13227 (__mmask16) __U,
13228 _MM_FROUND_CUR_DIRECTION);
13229}
13230
13231extern __inline __m512d
13232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13233_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13234{
fe7f972d 13235 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 13236 (__v8df) __B,
fe7f972d 13237 (__v8df) __C,
756c5857
AI
13238 (__mmask8) -1,
13239 _MM_FROUND_CUR_DIRECTION);
13240}
13241
13242extern __inline __m512d
13243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13244_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13245{
fe7f972d 13246 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
756c5857 13247 (__v8df) __B,
fe7f972d 13248 (__v8df) __C,
756c5857
AI
13249 (__mmask8) __U,
13250 _MM_FROUND_CUR_DIRECTION);
13251}
13252
13253extern __inline __m512d
13254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13255_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13256{
13257 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
13258 (__v8df) __B,
13259 (__v8df) __C,
13260 (__mmask8) __U,
13261 _MM_FROUND_CUR_DIRECTION);
13262}
13263
13264extern __inline __m512d
13265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13266_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13267{
fe7f972d 13268 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
756c5857 13269 (__v8df) __B,
fe7f972d 13270 (__v8df) __C,
756c5857
AI
13271 (__mmask8) __U,
13272 _MM_FROUND_CUR_DIRECTION);
13273}
13274
13275extern __inline __m512
13276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13277_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13278{
fe7f972d 13279 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 13280 (__v16sf) __B,
fe7f972d 13281 (__v16sf) __C,
756c5857
AI
13282 (__mmask16) -1,
13283 _MM_FROUND_CUR_DIRECTION);
13284}
13285
13286extern __inline __m512
13287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13288_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13289{
fe7f972d 13290 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
756c5857 13291 (__v16sf) __B,
fe7f972d 13292 (__v16sf) __C,
756c5857
AI
13293 (__mmask16) __U,
13294 _MM_FROUND_CUR_DIRECTION);
13295}
13296
13297extern __inline __m512
13298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13299_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13300{
13301 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
13302 (__v16sf) __B,
13303 (__v16sf) __C,
13304 (__mmask16) __U,
13305 _MM_FROUND_CUR_DIRECTION);
13306}
13307
13308extern __inline __m512
13309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13310_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13311{
fe7f972d 13312 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
756c5857 13313 (__v16sf) __B,
fe7f972d 13314 (__v16sf) __C,
756c5857
AI
13315 (__mmask16) __U,
13316 _MM_FROUND_CUR_DIRECTION);
13317}
13318
13319extern __inline __m512d
13320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13321_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
13322{
13323 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13324 (__v8df) __B,
13325 (__v8df) __C,
13326 (__mmask8) -1,
13327 _MM_FROUND_CUR_DIRECTION);
13328}
13329
13330extern __inline __m512d
13331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13332_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13333{
13334 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13335 (__v8df) __B,
13336 (__v8df) __C,
13337 (__mmask8) __U,
13338 _MM_FROUND_CUR_DIRECTION);
13339}
13340
13341extern __inline __m512d
13342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13343_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13344{
13345 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
13346 (__v8df) __B,
13347 (__v8df) __C,
13348 (__mmask8) __U,
13349 _MM_FROUND_CUR_DIRECTION);
13350}
13351
13352extern __inline __m512d
13353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13354_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13355{
13356 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13357 (__v8df) __B,
13358 (__v8df) __C,
13359 (__mmask8) __U,
13360 _MM_FROUND_CUR_DIRECTION);
13361}
13362
13363extern __inline __m512
13364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13365_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
13366{
13367 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13368 (__v16sf) __B,
13369 (__v16sf) __C,
13370 (__mmask16) -1,
13371 _MM_FROUND_CUR_DIRECTION);
13372}
13373
13374extern __inline __m512
13375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13376_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13377{
13378 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13379 (__v16sf) __B,
13380 (__v16sf) __C,
13381 (__mmask16) __U,
13382 _MM_FROUND_CUR_DIRECTION);
13383}
13384
13385extern __inline __m512
13386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13387_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13388{
13389 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
13390 (__v16sf) __B,
13391 (__v16sf) __C,
13392 (__mmask16) __U,
13393 _MM_FROUND_CUR_DIRECTION);
13394}
13395
13396extern __inline __m512
13397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13398_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13399{
13400 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13401 (__v16sf) __B,
13402 (__v16sf) __C,
13403 (__mmask16) __U,
13404 _MM_FROUND_CUR_DIRECTION);
13405}
13406
13407extern __inline __m512d
13408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13409_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
13410{
13411 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13412 (__v8df) __B,
13413 -(__v8df) __C,
13414 (__mmask8) -1,
13415 _MM_FROUND_CUR_DIRECTION);
13416}
13417
13418extern __inline __m512d
13419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13420_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13421{
13422 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13423 (__v8df) __B,
13424 -(__v8df) __C,
13425 (__mmask8) __U,
13426 _MM_FROUND_CUR_DIRECTION);
13427}
13428
13429extern __inline __m512d
13430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13431_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13432{
13433 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
13434 (__v8df) __B,
13435 (__v8df) __C,
13436 (__mmask8) __U,
13437 _MM_FROUND_CUR_DIRECTION);
13438}
13439
13440extern __inline __m512d
13441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13442_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13443{
13444 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13445 (__v8df) __B,
13446 -(__v8df) __C,
13447 (__mmask8) __U,
13448 _MM_FROUND_CUR_DIRECTION);
13449}
13450
13451extern __inline __m512
13452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13453_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
13454{
13455 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13456 (__v16sf) __B,
13457 -(__v16sf) __C,
13458 (__mmask16) -1,
13459 _MM_FROUND_CUR_DIRECTION);
13460}
13461
13462extern __inline __m512
13463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13464_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13465{
13466 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13467 (__v16sf) __B,
13468 -(__v16sf) __C,
13469 (__mmask16) __U,
13470 _MM_FROUND_CUR_DIRECTION);
13471}
13472
13473extern __inline __m512
13474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13475_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13476{
13477 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
13478 (__v16sf) __B,
13479 (__v16sf) __C,
13480 (__mmask16) __U,
13481 _MM_FROUND_CUR_DIRECTION);
13482}
13483
13484extern __inline __m512
13485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13486_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13487{
13488 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13489 (__v16sf) __B,
13490 -(__v16sf) __C,
13491 (__mmask16) __U,
13492 _MM_FROUND_CUR_DIRECTION);
13493}
13494
13495extern __inline __m512d
13496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13497_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13498{
5ca94977
L
13499 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13500 (__v8df) __B,
13501 (__v8df) __C,
13502 (__mmask8) -1,
13503 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13504}
13505
13506extern __inline __m512d
13507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13508_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13509{
13510 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13511 (__v8df) __B,
13512 (__v8df) __C,
13513 (__mmask8) __U,
13514 _MM_FROUND_CUR_DIRECTION);
13515}
13516
13517extern __inline __m512d
13518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13519_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13520{
5ca94977
L
13521 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
13522 (__v8df) __B,
13523 (__v8df) __C,
13524 (__mmask8) __U,
13525 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13526}
13527
13528extern __inline __m512d
13529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13530_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13531{
5ca94977
L
13532 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
13533 (__v8df) __B,
13534 (__v8df) __C,
13535 (__mmask8) __U,
13536 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13537}
13538
13539extern __inline __m512
13540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13541_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13542{
5ca94977
L
13543 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13544 (__v16sf) __B,
13545 (__v16sf) __C,
13546 (__mmask16) -1,
13547 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13548}
13549
13550extern __inline __m512
13551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13552_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13553{
13554 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13555 (__v16sf) __B,
13556 (__v16sf) __C,
13557 (__mmask16) __U,
13558 _MM_FROUND_CUR_DIRECTION);
13559}
13560
13561extern __inline __m512
13562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13563_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13564{
5ca94977
L
13565 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
13566 (__v16sf) __B,
13567 (__v16sf) __C,
13568 (__mmask16) __U,
13569 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13570}
13571
13572extern __inline __m512
13573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13574_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13575{
5ca94977
L
13576 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
13577 (__v16sf) __B,
13578 (__v16sf) __C,
13579 (__mmask16) __U,
13580 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13581}
13582
13583extern __inline __m512d
13584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13585_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13586{
38ef6fb1
L
13587 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13588 (__v8df) __B,
13589 (__v8df) __C,
13590 (__mmask8) -1,
13591 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13592}
13593
13594extern __inline __m512d
13595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13596_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13597{
13598 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13599 (__v8df) __B,
13600 (__v8df) __C,
13601 (__mmask8) __U,
13602 _MM_FROUND_CUR_DIRECTION);
13603}
13604
13605extern __inline __m512d
13606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13607_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13608{
13609 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
13610 (__v8df) __B,
13611 (__v8df) __C,
13612 (__mmask8) __U,
13613 _MM_FROUND_CUR_DIRECTION);
13614}
13615
13616extern __inline __m512d
13617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13618_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13619{
38ef6fb1
L
13620 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
13621 (__v8df) __B,
13622 (__v8df) __C,
13623 (__mmask8) __U,
13624 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13625}
13626
13627extern __inline __m512
13628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13629_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13630{
38ef6fb1
L
13631 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13632 (__v16sf) __B,
13633 (__v16sf) __C,
13634 (__mmask16) -1,
13635 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13636}
13637
13638extern __inline __m512
13639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13640_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13641{
13642 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13643 (__v16sf) __B,
13644 (__v16sf) __C,
13645 (__mmask16) __U,
13646 _MM_FROUND_CUR_DIRECTION);
13647}
13648
13649extern __inline __m512
13650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13651_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13652{
13653 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
13654 (__v16sf) __B,
13655 (__v16sf) __C,
13656 (__mmask16) __U,
13657 _MM_FROUND_CUR_DIRECTION);
13658}
13659
13660extern __inline __m512
13661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13662_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13663{
38ef6fb1
L
13664 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
13665 (__v16sf) __B,
13666 (__v16sf) __C,
13667 (__mmask16) __U,
13668 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13669}
13670
13671extern __inline __m256i
13672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13673_mm512_cvttpd_epi32 (__m512d __A)
13674{
13675 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13676 (__v8si)
0b192937 13677 _mm256_undefined_si256 (),
756c5857
AI
13678 (__mmask8) -1,
13679 _MM_FROUND_CUR_DIRECTION);
13680}
13681
13682extern __inline __m256i
13683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13684_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13685{
13686 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13687 (__v8si) __W,
13688 (__mmask8) __U,
13689 _MM_FROUND_CUR_DIRECTION);
13690}
13691
13692extern __inline __m256i
13693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13694_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
13695{
13696 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13697 (__v8si)
13698 _mm256_setzero_si256 (),
13699 (__mmask8) __U,
13700 _MM_FROUND_CUR_DIRECTION);
13701}
13702
13703extern __inline __m256i
13704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13705_mm512_cvttpd_epu32 (__m512d __A)
13706{
13707 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13708 (__v8si)
0b192937 13709 _mm256_undefined_si256 (),
756c5857
AI
13710 (__mmask8) -1,
13711 _MM_FROUND_CUR_DIRECTION);
13712}
13713
13714extern __inline __m256i
13715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13716_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13717{
13718 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13719 (__v8si) __W,
13720 (__mmask8) __U,
13721 _MM_FROUND_CUR_DIRECTION);
13722}
13723
13724extern __inline __m256i
13725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13726_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
13727{
13728 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13729 (__v8si)
13730 _mm256_setzero_si256 (),
13731 (__mmask8) __U,
13732 _MM_FROUND_CUR_DIRECTION);
13733}
13734
13735extern __inline __m256i
13736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13737_mm512_cvtpd_epi32 (__m512d __A)
13738{
13739 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13740 (__v8si)
0b192937 13741 _mm256_undefined_si256 (),
756c5857
AI
13742 (__mmask8) -1,
13743 _MM_FROUND_CUR_DIRECTION);
13744}
13745
13746extern __inline __m256i
13747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13748_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13749{
13750 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13751 (__v8si) __W,
13752 (__mmask8) __U,
13753 _MM_FROUND_CUR_DIRECTION);
13754}
13755
13756extern __inline __m256i
13757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13758_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
13759{
13760 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13761 (__v8si)
13762 _mm256_setzero_si256 (),
13763 (__mmask8) __U,
13764 _MM_FROUND_CUR_DIRECTION);
13765}
13766
13767extern __inline __m256i
13768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13769_mm512_cvtpd_epu32 (__m512d __A)
13770{
13771 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13772 (__v8si)
0b192937 13773 _mm256_undefined_si256 (),
756c5857
AI
13774 (__mmask8) -1,
13775 _MM_FROUND_CUR_DIRECTION);
13776}
13777
13778extern __inline __m256i
13779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13780_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13781{
13782 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13783 (__v8si) __W,
13784 (__mmask8) __U,
13785 _MM_FROUND_CUR_DIRECTION);
13786}
13787
13788extern __inline __m256i
13789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13790_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
13791{
13792 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13793 (__v8si)
13794 _mm256_setzero_si256 (),
13795 (__mmask8) __U,
13796 _MM_FROUND_CUR_DIRECTION);
13797}
13798
13799extern __inline __m512i
13800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13801_mm512_cvttps_epi32 (__m512 __A)
13802{
13803 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13804 (__v16si)
4271e5cb 13805 _mm512_undefined_epi32 (),
756c5857
AI
13806 (__mmask16) -1,
13807 _MM_FROUND_CUR_DIRECTION);
13808}
13809
13810extern __inline __m512i
13811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13812_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13813{
13814 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13815 (__v16si) __W,
13816 (__mmask16) __U,
13817 _MM_FROUND_CUR_DIRECTION);
13818}
13819
13820extern __inline __m512i
13821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13822_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
13823{
13824 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13825 (__v16si)
13826 _mm512_setzero_si512 (),
13827 (__mmask16) __U,
13828 _MM_FROUND_CUR_DIRECTION);
13829}
13830
13831extern __inline __m512i
13832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13833_mm512_cvttps_epu32 (__m512 __A)
13834{
13835 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13836 (__v16si)
4271e5cb 13837 _mm512_undefined_epi32 (),
756c5857
AI
13838 (__mmask16) -1,
13839 _MM_FROUND_CUR_DIRECTION);
13840}
13841
13842extern __inline __m512i
13843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13844_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13845{
13846 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13847 (__v16si) __W,
13848 (__mmask16) __U,
13849 _MM_FROUND_CUR_DIRECTION);
13850}
13851
13852extern __inline __m512i
13853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13854_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
13855{
13856 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13857 (__v16si)
13858 _mm512_setzero_si512 (),
13859 (__mmask16) __U,
13860 _MM_FROUND_CUR_DIRECTION);
13861}
13862
13863extern __inline __m512i
13864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13865_mm512_cvtps_epi32 (__m512 __A)
13866{
13867 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13868 (__v16si)
4271e5cb 13869 _mm512_undefined_epi32 (),
756c5857
AI
13870 (__mmask16) -1,
13871 _MM_FROUND_CUR_DIRECTION);
13872}
13873
13874extern __inline __m512i
13875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13876_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13877{
13878 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13879 (__v16si) __W,
13880 (__mmask16) __U,
13881 _MM_FROUND_CUR_DIRECTION);
13882}
13883
13884extern __inline __m512i
13885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13886_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
13887{
13888 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13889 (__v16si)
13890 _mm512_setzero_si512 (),
13891 (__mmask16) __U,
13892 _MM_FROUND_CUR_DIRECTION);
13893}
13894
13895extern __inline __m512i
13896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13897_mm512_cvtps_epu32 (__m512 __A)
13898{
13899 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13900 (__v16si)
4271e5cb 13901 _mm512_undefined_epi32 (),
756c5857
AI
13902 (__mmask16) -1,
13903 _MM_FROUND_CUR_DIRECTION);
13904}
13905
13906extern __inline __m512i
13907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13908_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13909{
13910 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13911 (__v16si) __W,
13912 (__mmask16) __U,
13913 _MM_FROUND_CUR_DIRECTION);
13914}
13915
13916extern __inline __m512i
13917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13918_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
13919{
13920 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13921 (__v16si)
13922 _mm512_setzero_si512 (),
13923 (__mmask16) __U,
13924 _MM_FROUND_CUR_DIRECTION);
13925}
13926
dcb2c527
JJ
13927extern __inline double
13928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13929_mm512_cvtsd_f64 (__m512d __A)
13930{
13931 return __A[0];
13932}
13933
13934extern __inline float
13935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13936_mm512_cvtss_f32 (__m512 __A)
13937{
13938 return __A[0];
13939}
13940
756c5857
AI
13941#ifdef __x86_64__
13942extern __inline __m128
13943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13944_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
13945{
13946 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
13947 _MM_FROUND_CUR_DIRECTION);
13948}
13949
13950extern __inline __m128d
13951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13952_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
13953{
13954 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
13955 _MM_FROUND_CUR_DIRECTION);
13956}
13957#endif
13958
13959extern __inline __m128
13960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13961_mm_cvtu32_ss (__m128 __A, unsigned __B)
13962{
13963 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
13964 _MM_FROUND_CUR_DIRECTION);
13965}
13966
13967extern __inline __m512
13968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13969_mm512_cvtepi32_ps (__m512i __A)
13970{
13971 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13972 (__v16sf)
0b192937 13973 _mm512_undefined_ps (),
756c5857
AI
13974 (__mmask16) -1,
13975 _MM_FROUND_CUR_DIRECTION);
13976}
13977
13978extern __inline __m512
13979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13980_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13981{
13982 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13983 (__v16sf) __W,
13984 (__mmask16) __U,
13985 _MM_FROUND_CUR_DIRECTION);
13986}
13987
13988extern __inline __m512
13989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13990_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
13991{
13992 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13993 (__v16sf)
13994 _mm512_setzero_ps (),
13995 (__mmask16) __U,
13996 _MM_FROUND_CUR_DIRECTION);
13997}
13998
13999extern __inline __m512
14000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14001_mm512_cvtepu32_ps (__m512i __A)
14002{
14003 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14004 (__v16sf)
0b192937 14005 _mm512_undefined_ps (),
756c5857
AI
14006 (__mmask16) -1,
14007 _MM_FROUND_CUR_DIRECTION);
14008}
14009
14010extern __inline __m512
14011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14012_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14013{
14014 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14015 (__v16sf) __W,
14016 (__mmask16) __U,
14017 _MM_FROUND_CUR_DIRECTION);
14018}
14019
14020extern __inline __m512
14021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14022_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
14023{
14024 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14025 (__v16sf)
14026 _mm512_setzero_ps (),
14027 (__mmask16) __U,
14028 _MM_FROUND_CUR_DIRECTION);
14029}
14030
14031#ifdef __OPTIMIZE__
14032extern __inline __m512d
14033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14034_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
756c5857 14035{
040d2bba
WX
14036 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
14037 (__v8df) __B,
14038 (__v8di) __C,
756c5857 14039 __imm,
040d2bba 14040 (__mmask8) -1,
756c5857
AI
14041 _MM_FROUND_CUR_DIRECTION);
14042}
14043
14044extern __inline __m512d
14045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14046_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
14047 __m512i __C, const int __imm)
756c5857
AI
14048{
14049 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
040d2bba
WX
14050 (__v8df) __B,
14051 (__v8di) __C,
756c5857
AI
14052 __imm,
14053 (__mmask8) __U,
14054 _MM_FROUND_CUR_DIRECTION);
14055}
14056
14057extern __inline __m512d
14058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14059_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
14060 __m512i __C, const int __imm)
756c5857
AI
14061{
14062 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
040d2bba
WX
14063 (__v8df) __B,
14064 (__v8di) __C,
756c5857
AI
14065 __imm,
14066 (__mmask8) __U,
14067 _MM_FROUND_CUR_DIRECTION);
14068}
14069
14070extern __inline __m512
14071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14072_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
756c5857 14073{
040d2bba
WX
14074 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
14075 (__v16sf) __B,
14076 (__v16si) __C,
756c5857 14077 __imm,
040d2bba 14078 (__mmask16) -1,
756c5857
AI
14079 _MM_FROUND_CUR_DIRECTION);
14080}
14081
14082extern __inline __m512
14083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14084_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
14085 __m512i __C, const int __imm)
756c5857
AI
14086{
14087 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
040d2bba
WX
14088 (__v16sf) __B,
14089 (__v16si) __C,
756c5857
AI
14090 __imm,
14091 (__mmask16) __U,
14092 _MM_FROUND_CUR_DIRECTION);
14093}
14094
14095extern __inline __m512
14096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14097_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
14098 __m512i __C, const int __imm)
756c5857
AI
14099{
14100 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
040d2bba
WX
14101 (__v16sf) __B,
14102 (__v16si) __C,
756c5857
AI
14103 __imm,
14104 (__mmask16) __U,
14105 _MM_FROUND_CUR_DIRECTION);
14106}
14107
14108extern __inline __m128d
14109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14110_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
756c5857 14111{
040d2bba
WX
14112 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
14113 (__v2df) __B,
14114 (__v2di) __C, __imm,
14115 (__mmask8) -1,
756c5857
AI
14116 _MM_FROUND_CUR_DIRECTION);
14117}
14118
14119extern __inline __m128d
14120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14121_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
14122 __m128i __C, const int __imm)
756c5857
AI
14123{
14124 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
040d2bba
WX
14125 (__v2df) __B,
14126 (__v2di) __C, __imm,
756c5857
AI
14127 (__mmask8) __U,
14128 _MM_FROUND_CUR_DIRECTION);
14129}
14130
14131extern __inline __m128d
14132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14133_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
14134 __m128i __C, const int __imm)
756c5857
AI
14135{
14136 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
040d2bba
WX
14137 (__v2df) __B,
14138 (__v2di) __C,
756c5857
AI
14139 __imm,
14140 (__mmask8) __U,
14141 _MM_FROUND_CUR_DIRECTION);
14142}
14143
14144extern __inline __m128
14145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 14146_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
756c5857 14147{
040d2bba
WX
14148 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
14149 (__v4sf) __B,
14150 (__v4si) __C, __imm,
14151 (__mmask8) -1,
756c5857
AI
14152 _MM_FROUND_CUR_DIRECTION);
14153}
14154
14155extern __inline __m128
14156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14157_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
14158 __m128i __C, const int __imm)
756c5857
AI
14159{
14160 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
040d2bba
WX
14161 (__v4sf) __B,
14162 (__v4si) __C, __imm,
756c5857
AI
14163 (__mmask8) __U,
14164 _MM_FROUND_CUR_DIRECTION);
14165}
14166
14167extern __inline __m128
14168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
14169_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
14170 __m128i __C, const int __imm)
756c5857
AI
14171{
14172 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
040d2bba
WX
14173 (__v4sf) __B,
14174 (__v4si) __C, __imm,
756c5857
AI
14175 (__mmask8) __U,
14176 _MM_FROUND_CUR_DIRECTION);
14177}
14178#else
040d2bba
WX
14179#define _mm512_fixupimm_pd(X, Y, Z, C) \
14180 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
14181 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
14182 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14183
040d2bba 14184#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
756c5857 14185 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
040d2bba 14186 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
14187 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14188
040d2bba 14189#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
756c5857 14190 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
040d2bba 14191 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
756c5857
AI
14192 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14193
040d2bba
WX
14194#define _mm512_fixupimm_ps(X, Y, Z, C) \
14195 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
14196 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
14197 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14198
040d2bba 14199#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
756c5857 14200 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
040d2bba 14201 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
14202 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14203
040d2bba 14204#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
756c5857 14205 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
040d2bba 14206 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
756c5857
AI
14207 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14208
040d2bba
WX
14209#define _mm_fixupimm_sd(X, Y, Z, C) \
14210 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
14211 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
14212 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14213
040d2bba 14214#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
756c5857 14215 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
040d2bba 14216 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
14217 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14218
040d2bba 14219#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
756c5857 14220 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
040d2bba 14221 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
756c5857
AI
14222 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14223
040d2bba
WX
14224#define _mm_fixupimm_ss(X, Y, Z, C) \
14225 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
14226 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
14227 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 14228
040d2bba 14229#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
756c5857 14230 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
040d2bba 14231 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
14232 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14233
040d2bba 14234#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
756c5857 14235 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
040d2bba 14236 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
756c5857
AI
14237 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14238#endif
14239
14240#ifdef __x86_64__
14241extern __inline unsigned long long
14242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14243_mm_cvtss_u64 (__m128 __A)
14244{
14245 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
14246 __A,
14247 _MM_FROUND_CUR_DIRECTION);
14248}
14249
14250extern __inline unsigned long long
14251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14252_mm_cvttss_u64 (__m128 __A)
14253{
14254 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
14255 __A,
14256 _MM_FROUND_CUR_DIRECTION);
14257}
14258
14259extern __inline long long
14260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14261_mm_cvttss_i64 (__m128 __A)
14262{
14263 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
14264 _MM_FROUND_CUR_DIRECTION);
14265}
14266#endif /* __x86_64__ */
14267
14268extern __inline unsigned
14269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14270_mm_cvtss_u32 (__m128 __A)
14271{
14272 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
14273 _MM_FROUND_CUR_DIRECTION);
14274}
14275
14276extern __inline unsigned
14277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14278_mm_cvttss_u32 (__m128 __A)
14279{
14280 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
14281 _MM_FROUND_CUR_DIRECTION);
14282}
14283
14284extern __inline int
14285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14286_mm_cvttss_i32 (__m128 __A)
14287{
14288 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
14289 _MM_FROUND_CUR_DIRECTION);
14290}
14291
14292#ifdef __x86_64__
14293extern __inline unsigned long long
14294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14295_mm_cvtsd_u64 (__m128d __A)
14296{
14297 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
14298 __A,
14299 _MM_FROUND_CUR_DIRECTION);
14300}
14301
14302extern __inline unsigned long long
14303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14304_mm_cvttsd_u64 (__m128d __A)
14305{
14306 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
14307 __A,
14308 _MM_FROUND_CUR_DIRECTION);
14309}
14310
14311extern __inline long long
14312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14313_mm_cvttsd_i64 (__m128d __A)
14314{
14315 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
14316 _MM_FROUND_CUR_DIRECTION);
14317}
14318#endif /* __x86_64__ */
14319
14320extern __inline unsigned
14321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14322_mm_cvtsd_u32 (__m128d __A)
14323{
14324 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
14325 _MM_FROUND_CUR_DIRECTION);
14326}
14327
14328extern __inline unsigned
14329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14330_mm_cvttsd_u32 (__m128d __A)
14331{
14332 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
14333 _MM_FROUND_CUR_DIRECTION);
14334}
14335
14336extern __inline int
14337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14338_mm_cvttsd_i32 (__m128d __A)
14339{
14340 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
14341 _MM_FROUND_CUR_DIRECTION);
14342}
14343
14344extern __inline __m512d
14345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14346_mm512_cvtps_pd (__m256 __A)
14347{
14348 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14349 (__v8df)
0b192937 14350 _mm512_undefined_pd (),
756c5857
AI
14351 (__mmask8) -1,
14352 _MM_FROUND_CUR_DIRECTION);
14353}
14354
14355extern __inline __m512d
14356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14357_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
14358{
14359 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14360 (__v8df) __W,
14361 (__mmask8) __U,
14362 _MM_FROUND_CUR_DIRECTION);
14363}
14364
14365extern __inline __m512d
14366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14367_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
14368{
14369 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14370 (__v8df)
14371 _mm512_setzero_pd (),
14372 (__mmask8) __U,
14373 _MM_FROUND_CUR_DIRECTION);
14374}
14375
14376extern __inline __m512
14377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14378_mm512_cvtph_ps (__m256i __A)
14379{
14380 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14381 (__v16sf)
0b192937 14382 _mm512_undefined_ps (),
756c5857
AI
14383 (__mmask16) -1,
14384 _MM_FROUND_CUR_DIRECTION);
14385}
14386
14387extern __inline __m512
14388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14389_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
14390{
14391 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14392 (__v16sf) __W,
14393 (__mmask16) __U,
14394 _MM_FROUND_CUR_DIRECTION);
14395}
14396
14397extern __inline __m512
14398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14399_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
14400{
14401 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14402 (__v16sf)
14403 _mm512_setzero_ps (),
14404 (__mmask16) __U,
14405 _MM_FROUND_CUR_DIRECTION);
14406}
14407
14408extern __inline __m256
14409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14410_mm512_cvtpd_ps (__m512d __A)
14411{
14412 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14413 (__v8sf)
0b192937 14414 _mm256_undefined_ps (),
756c5857
AI
14415 (__mmask8) -1,
14416 _MM_FROUND_CUR_DIRECTION);
14417}
14418
14419extern __inline __m256
14420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14421_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
14422{
14423 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14424 (__v8sf) __W,
14425 (__mmask8) __U,
14426 _MM_FROUND_CUR_DIRECTION);
14427}
14428
14429extern __inline __m256
14430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14431_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
14432{
14433 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14434 (__v8sf)
14435 _mm256_setzero_ps (),
14436 (__mmask8) __U,
14437 _MM_FROUND_CUR_DIRECTION);
14438}
14439
14440#ifdef __OPTIMIZE__
14441extern __inline __m512
14442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14443_mm512_getexp_ps (__m512 __A)
14444{
14445 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14446 (__v16sf)
0b192937 14447 _mm512_undefined_ps (),
756c5857
AI
14448 (__mmask16) -1,
14449 _MM_FROUND_CUR_DIRECTION);
14450}
14451
14452extern __inline __m512
14453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14454_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
14455{
14456 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14457 (__v16sf) __W,
14458 (__mmask16) __U,
14459 _MM_FROUND_CUR_DIRECTION);
14460}
14461
14462extern __inline __m512
14463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14464_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
14465{
14466 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14467 (__v16sf)
14468 _mm512_setzero_ps (),
14469 (__mmask16) __U,
14470 _MM_FROUND_CUR_DIRECTION);
14471}
14472
14473extern __inline __m512d
14474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14475_mm512_getexp_pd (__m512d __A)
14476{
14477 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14478 (__v8df)
0b192937 14479 _mm512_undefined_pd (),
756c5857
AI
14480 (__mmask8) -1,
14481 _MM_FROUND_CUR_DIRECTION);
14482}
14483
14484extern __inline __m512d
14485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14486_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
14487{
14488 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14489 (__v8df) __W,
14490 (__mmask8) __U,
14491 _MM_FROUND_CUR_DIRECTION);
14492}
14493
14494extern __inline __m512d
14495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14496_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
14497{
14498 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14499 (__v8df)
14500 _mm512_setzero_pd (),
14501 (__mmask8) __U,
14502 _MM_FROUND_CUR_DIRECTION);
14503}
14504
075691af
AI
14505extern __inline __m128
14506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14507_mm_getexp_ss (__m128 __A, __m128 __B)
14508{
14509 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
14510 (__v4sf) __B,
14511 _MM_FROUND_CUR_DIRECTION);
14512}
14513
68d872d7
SP
14514extern __inline __m128
14515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14516_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
14517{
14518 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14519 (__v4sf) __B,
14520 (__v4sf) __W,
14521 (__mmask8) __U,
14522 _MM_FROUND_CUR_DIRECTION);
14523}
14524
14525extern __inline __m128
14526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14527_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
14528{
14529 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14530 (__v4sf) __B,
14531 (__v4sf)
14532 _mm_setzero_ps (),
14533 (__mmask8) __U,
14534 _MM_FROUND_CUR_DIRECTION);
14535}
14536
075691af
AI
14537extern __inline __m128d
14538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14539_mm_getexp_sd (__m128d __A, __m128d __B)
14540{
14541 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
14542 (__v2df) __B,
14543 _MM_FROUND_CUR_DIRECTION);
14544}
14545
68d872d7
SP
14546extern __inline __m128d
14547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14548_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
14549{
14550 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14551 (__v2df) __B,
14552 (__v2df) __W,
14553 (__mmask8) __U,
14554 _MM_FROUND_CUR_DIRECTION);
14555}
14556
14557extern __inline __m128d
14558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14559_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
14560{
14561 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14562 (__v2df) __B,
14563 (__v2df)
14564 _mm_setzero_pd (),
14565 (__mmask8) __U,
14566 _MM_FROUND_CUR_DIRECTION);
14567}
14568
756c5857
AI
14569extern __inline __m512d
14570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14571_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
14572 _MM_MANTISSA_SIGN_ENUM __C)
14573{
14574 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14575 (__C << 2) | __B,
0b192937 14576 _mm512_undefined_pd (),
756c5857
AI
14577 (__mmask8) -1,
14578 _MM_FROUND_CUR_DIRECTION);
14579}
14580
14581extern __inline __m512d
14582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14583_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
14584 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14585{
14586 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14587 (__C << 2) | __B,
14588 (__v8df) __W, __U,
14589 _MM_FROUND_CUR_DIRECTION);
14590}
14591
14592extern __inline __m512d
14593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14594_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
14595 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14596{
14597 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14598 (__C << 2) | __B,
14599 (__v8df)
14600 _mm512_setzero_pd (),
14601 __U,
14602 _MM_FROUND_CUR_DIRECTION);
14603}
14604
14605extern __inline __m512
14606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14607_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
14608 _MM_MANTISSA_SIGN_ENUM __C)
14609{
14610 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14611 (__C << 2) | __B,
0b192937 14612 _mm512_undefined_ps (),
756c5857
AI
14613 (__mmask16) -1,
14614 _MM_FROUND_CUR_DIRECTION);
14615}
14616
14617extern __inline __m512
14618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14619_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
14620 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14621{
14622 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14623 (__C << 2) | __B,
14624 (__v16sf) __W, __U,
14625 _MM_FROUND_CUR_DIRECTION);
14626}
14627
14628extern __inline __m512
14629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14630_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
14631 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14632{
14633 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14634 (__C << 2) | __B,
14635 (__v16sf)
14636 _mm512_setzero_ps (),
14637 __U,
14638 _MM_FROUND_CUR_DIRECTION);
14639}
14640
075691af
AI
14641extern __inline __m128d
14642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14643_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
14644 _MM_MANTISSA_SIGN_ENUM __D)
14645{
14646 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
14647 (__v2df) __B,
14648 (__D << 2) | __C,
14649 _MM_FROUND_CUR_DIRECTION);
14650}
14651
68d872d7
SP
14652extern __inline __m128d
14653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14654_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
14655 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14656{
14657 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14658 (__v2df) __B,
14659 (__D << 2) | __C,
14660 (__v2df) __W,
14661 __U,
14662 _MM_FROUND_CUR_DIRECTION);
14663}
14664
14665extern __inline __m128d
14666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14667_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
14668 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14669{
14670 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14671 (__v2df) __B,
14672 (__D << 2) | __C,
14673 (__v2df)
14674 _mm_setzero_pd(),
14675 __U,
14676 _MM_FROUND_CUR_DIRECTION);
14677}
14678
075691af
AI
14679extern __inline __m128
14680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14681_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
14682 _MM_MANTISSA_SIGN_ENUM __D)
14683{
14684 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
14685 (__v4sf) __B,
14686 (__D << 2) | __C,
14687 _MM_FROUND_CUR_DIRECTION);
14688}
14689
68d872d7
SP
14690extern __inline __m128
14691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14692_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
14693 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14694{
14695 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14696 (__v4sf) __B,
14697 (__D << 2) | __C,
14698 (__v4sf) __W,
14699 __U,
14700 _MM_FROUND_CUR_DIRECTION);
14701}
14702
14703extern __inline __m128
14704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14705_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
14706 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14707{
14708 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14709 (__v4sf) __B,
14710 (__D << 2) | __C,
14711 (__v4sf)
14712 _mm_setzero_ps(),
14713 __U,
14714 _MM_FROUND_CUR_DIRECTION);
14715}
14716
756c5857
AI
14717#else
14718#define _mm512_getmant_pd(X, B, C) \
14719 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14720 (int)(((C)<<2) | (B)), \
0b192937 14721 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
14722 (__mmask8)-1,\
14723 _MM_FROUND_CUR_DIRECTION))
14724
14725#define _mm512_mask_getmant_pd(W, U, X, B, C) \
14726 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14727 (int)(((C)<<2) | (B)), \
14728 (__v8df)(__m512d)(W), \
14729 (__mmask8)(U),\
14730 _MM_FROUND_CUR_DIRECTION))
14731
14732#define _mm512_maskz_getmant_pd(U, X, B, C) \
14733 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14734 (int)(((C)<<2) | (B)), \
0b192937 14735 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
14736 (__mmask8)(U),\
14737 _MM_FROUND_CUR_DIRECTION))
14738#define _mm512_getmant_ps(X, B, C) \
14739 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14740 (int)(((C)<<2) | (B)), \
0b192937 14741 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
14742 (__mmask16)-1,\
14743 _MM_FROUND_CUR_DIRECTION))
14744
14745#define _mm512_mask_getmant_ps(W, U, X, B, C) \
14746 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14747 (int)(((C)<<2) | (B)), \
14748 (__v16sf)(__m512)(W), \
14749 (__mmask16)(U),\
14750 _MM_FROUND_CUR_DIRECTION))
14751
14752#define _mm512_maskz_getmant_ps(U, X, B, C) \
14753 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14754 (int)(((C)<<2) | (B)), \
0b192937 14755 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
14756 (__mmask16)(U),\
14757 _MM_FROUND_CUR_DIRECTION))
075691af
AI
14758#define _mm_getmant_sd(X, Y, C, D) \
14759 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
14760 (__v2df)(__m128d)(Y), \
14761 (int)(((D)<<2) | (C)), \
14762 _MM_FROUND_CUR_DIRECTION))
14763
68d872d7
SP
14764#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
14765 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
14766 (__v2df)(__m128d)(Y), \
14767 (int)(((D)<<2) | (C)), \
14768 (__v2df)(__m128d)(W), \
14769 (__mmask8)(U),\
14770 _MM_FROUND_CUR_DIRECTION))
14771
14772#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
14773 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
14774 (__v2df)(__m128d)(Y), \
14775 (int)(((D)<<2) | (C)), \
14776 (__v2df)_mm_setzero_pd(), \
14777 (__mmask8)(U),\
14778 _MM_FROUND_CUR_DIRECTION))
14779
075691af
AI
14780#define _mm_getmant_ss(X, Y, C, D) \
14781 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
14782 (__v4sf)(__m128)(Y), \
14783 (int)(((D)<<2) | (C)), \
14784 _MM_FROUND_CUR_DIRECTION))
14785
68d872d7
SP
14786#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
14787 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
14788 (__v4sf)(__m128)(Y), \
14789 (int)(((D)<<2) | (C)), \
14790 (__v4sf)(__m128)(W), \
14791 (__mmask8)(U),\
14792 _MM_FROUND_CUR_DIRECTION))
14793
14794#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
14795 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
14796 (__v4sf)(__m128)(Y), \
14797 (int)(((D)<<2) | (C)), \
14798 (__v4sf)_mm_setzero_ps(), \
14799 (__mmask8)(U),\
14800 _MM_FROUND_CUR_DIRECTION))
14801
075691af 14802#define _mm_getexp_ss(A, B) \
68d872d7 14803 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
075691af
AI
14804 _MM_FROUND_CUR_DIRECTION))
14805
68d872d7
SP
14806#define _mm_mask_getexp_ss(W, U, A, B) \
14807 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
14808 _MM_FROUND_CUR_DIRECTION)
14809
14810#define _mm_maskz_getexp_ss(U, A, B) \
14811 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
14812 _MM_FROUND_CUR_DIRECTION)
14813
075691af 14814#define _mm_getexp_sd(A, B) \
68d872d7 14815 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
075691af
AI
14816 _MM_FROUND_CUR_DIRECTION))
14817
68d872d7
SP
14818#define _mm_mask_getexp_sd(W, U, A, B) \
14819 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
14820 _MM_FROUND_CUR_DIRECTION)
14821
14822#define _mm_maskz_getexp_sd(U, A, B) \
14823 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
14824 _MM_FROUND_CUR_DIRECTION)
14825
756c5857
AI
14826#define _mm512_getexp_ps(A) \
14827 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 14828 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14829
14830#define _mm512_mask_getexp_ps(W, U, A) \
14831 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14832 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14833
14834#define _mm512_maskz_getexp_ps(U, A) \
14835 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14836 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14837
14838#define _mm512_getexp_pd(A) \
14839 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 14840 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14841
14842#define _mm512_mask_getexp_pd(W, U, A) \
14843 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14844 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14845
14846#define _mm512_maskz_getexp_pd(U, A) \
14847 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14848 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14849#endif
14850
14851#ifdef __OPTIMIZE__
14852extern __inline __m512
14853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14854_mm512_roundscale_ps (__m512 __A, const int __imm)
14855{
14856 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
14857 (__v16sf)
14858 _mm512_undefined_ps (),
14859 -1,
756c5857
AI
14860 _MM_FROUND_CUR_DIRECTION);
14861}
14862
14863extern __inline __m512
14864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14865_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
14866 const int __imm)
14867{
14868 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
14869 (__v16sf) __A,
14870 (__mmask16) __B,
14871 _MM_FROUND_CUR_DIRECTION);
14872}
14873
14874extern __inline __m512
14875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14876_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
14877{
14878 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
14879 __imm,
14880 (__v16sf)
14881 _mm512_setzero_ps (),
14882 (__mmask16) __A,
14883 _MM_FROUND_CUR_DIRECTION);
14884}
14885
14886extern __inline __m512d
14887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14888_mm512_roundscale_pd (__m512d __A, const int __imm)
14889{
14890 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
14891 (__v8df)
14892 _mm512_undefined_pd (),
14893 -1,
756c5857
AI
14894 _MM_FROUND_CUR_DIRECTION);
14895}
14896
14897extern __inline __m512d
14898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14899_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
14900 const int __imm)
14901{
14902 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
14903 (__v8df) __A,
14904 (__mmask8) __B,
14905 _MM_FROUND_CUR_DIRECTION);
14906}
14907
14908extern __inline __m512d
14909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14910_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
14911{
14912 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
14913 __imm,
14914 (__v8df)
14915 _mm512_setzero_pd (),
14916 (__mmask8) __A,
14917 _MM_FROUND_CUR_DIRECTION);
14918}
14919
075691af
AI
14920extern __inline __m128
14921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14922_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
14923{
a7c4d6d1
HL
14924 return (__m128)
14925 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
14926 (__v4sf) __B, __imm,
14927 (__v4sf)
14928 _mm_setzero_ps (),
14929 (__mmask8) -1,
14930 _MM_FROUND_CUR_DIRECTION);
14931}
14932
14933
14934extern __inline __m128
14935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14936_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
14937 const int __imm)
14938{
14939 return (__m128)
14940 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
14941 (__v4sf) __D, __imm,
14942 (__v4sf) __A,
14943 (__mmask8) __B,
14944 _MM_FROUND_CUR_DIRECTION);
14945}
14946
14947extern __inline __m128
14948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14949_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
14950 const int __imm)
14951{
14952 return (__m128)
14953 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
14954 (__v4sf) __C, __imm,
14955 (__v4sf)
14956 _mm_setzero_ps (),
14957 (__mmask8) __A,
14958 _MM_FROUND_CUR_DIRECTION);
075691af
AI
14959}
14960
14961extern __inline __m128d
14962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14963_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
14964{
a7c4d6d1
HL
14965 return (__m128d)
14966 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
14967 (__v2df) __B, __imm,
14968 (__v2df)
14969 _mm_setzero_pd (),
14970 (__mmask8) -1,
14971 _MM_FROUND_CUR_DIRECTION);
14972}
14973
14974extern __inline __m128d
14975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14976_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
14977 const int __imm)
14978{
14979 return (__m128d)
14980 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
14981 (__v2df) __D, __imm,
14982 (__v2df) __A,
14983 (__mmask8) __B,
14984 _MM_FROUND_CUR_DIRECTION);
14985}
14986
14987extern __inline __m128d
14988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14989_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
14990 const int __imm)
14991{
14992 return (__m128d)
14993 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
14994 (__v2df) __C, __imm,
14995 (__v2df)
14996 _mm_setzero_pd (),
14997 (__mmask8) __A,
14998 _MM_FROUND_CUR_DIRECTION);
075691af
AI
14999}
15000
756c5857
AI
15001#else
15002#define _mm512_roundscale_ps(A, B) \
15003 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 15004 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15005#define _mm512_mask_roundscale_ps(A, B, C, D) \
15006 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
15007 (int)(D), \
15008 (__v16sf)(__m512)(A), \
15009 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
15010#define _mm512_maskz_roundscale_ps(A, B, C) \
15011 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
15012 (int)(C), \
15013 (__v16sf)_mm512_setzero_ps(),\
15014 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
15015#define _mm512_roundscale_pd(A, B) \
15016 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 15017 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15018#define _mm512_mask_roundscale_pd(A, B, C, D) \
15019 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
15020 (int)(D), \
15021 (__v8df)(__m512d)(A), \
15022 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
15023#define _mm512_maskz_roundscale_pd(A, B, C) \
15024 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
15025 (int)(C), \
15026 (__v8df)_mm512_setzero_pd(),\
15027 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
a7c4d6d1
HL
15028#define _mm_roundscale_ss(A, B, I) \
15029 ((__m128) \
15030 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
15031 (__v4sf) (__m128) (B), \
15032 (int) (I), \
15033 (__v4sf) _mm_setzero_ps (), \
15034 (__mmask8) (-1), \
15035 _MM_FROUND_CUR_DIRECTION))
15036#define _mm_mask_roundscale_ss(A, U, B, C, I) \
15037 ((__m128) \
15038 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
15039 (__v4sf) (__m128) (C), \
15040 (int) (I), \
15041 (__v4sf) (__m128) (A), \
15042 (__mmask8) (U), \
15043 _MM_FROUND_CUR_DIRECTION))
15044#define _mm_maskz_roundscale_ss(U, A, B, I) \
15045 ((__m128) \
15046 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
15047 (__v4sf) (__m128) (B), \
15048 (int) (I), \
15049 (__v4sf) _mm_setzero_ps (), \
15050 (__mmask8) (U), \
15051 _MM_FROUND_CUR_DIRECTION))
15052#define _mm_roundscale_sd(A, B, I) \
15053 ((__m128d) \
15054 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
15055 (__v2df) (__m128d) (B), \
15056 (int) (I), \
15057 (__v2df) _mm_setzero_pd (), \
15058 (__mmask8) (-1), \
15059 _MM_FROUND_CUR_DIRECTION))
15060#define _mm_mask_roundscale_sd(A, U, B, C, I) \
15061 ((__m128d) \
15062 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
15063 (__v2df) (__m128d) (C), \
15064 (int) (I), \
15065 (__v2df) (__m128d) (A), \
15066 (__mmask8) (U), \
15067 _MM_FROUND_CUR_DIRECTION))
15068#define _mm_maskz_roundscale_sd(U, A, B, I) \
15069 ((__m128d) \
15070 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
15071 (__v2df) (__m128d) (B), \
15072 (int) (I), \
15073 (__v2df) _mm_setzero_pd (), \
15074 (__mmask8) (U), \
15075 _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15076#endif
15077
15078#ifdef __OPTIMIZE__
15079extern __inline __mmask8
15080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15081_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
15082{
15083 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15084 (__v8df) __Y, __P,
15085 (__mmask8) -1,
15086 _MM_FROUND_CUR_DIRECTION);
15087}
15088
15089extern __inline __mmask16
15090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15091_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
15092{
15093 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15094 (__v16sf) __Y, __P,
15095 (__mmask16) -1,
15096 _MM_FROUND_CUR_DIRECTION);
15097}
15098
15099extern __inline __mmask16
15100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15101_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
15102{
15103 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15104 (__v16sf) __Y, __P,
15105 (__mmask16) __U,
15106 _MM_FROUND_CUR_DIRECTION);
15107}
15108
15109extern __inline __mmask8
15110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15111_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
15112{
15113 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15114 (__v8df) __Y, __P,
15115 (__mmask8) __U,
15116 _MM_FROUND_CUR_DIRECTION);
15117}
15118
12d69dbf
JJ
15119extern __inline __mmask8
15120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15121_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
15122{
15123 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15124 (__v2df) __Y, __P,
15125 (__mmask8) -1,
15126 _MM_FROUND_CUR_DIRECTION);
15127}
15128
15129extern __inline __mmask8
15130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15131_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
15132{
15133 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15134 (__v2df) __Y, __P,
15135 (__mmask8) __M,
15136 _MM_FROUND_CUR_DIRECTION);
15137}
15138
15139extern __inline __mmask8
15140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15141_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
15142{
15143 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15144 (__v4sf) __Y, __P,
15145 (__mmask8) -1,
15146 _MM_FROUND_CUR_DIRECTION);
15147}
15148
15149extern __inline __mmask8
15150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15151_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
15152{
15153 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15154 (__v4sf) __Y, __P,
15155 (__mmask8) __M,
15156 _MM_FROUND_CUR_DIRECTION);
15157}
15158
15159#else
15160#define _mm512_cmp_pd_mask(X, Y, P) \
15161 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15162 (__v8df)(__m512d)(Y), (int)(P),\
15163 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15164
15165#define _mm512_cmp_ps_mask(X, Y, P) \
15166 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15167 (__v16sf)(__m512)(Y), (int)(P),\
15168 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
15169
15170#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
15171 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15172 (__v8df)(__m512d)(Y), (int)(P),\
15173 (__mmask8)(M), _MM_FROUND_CUR_DIRECTION))
15174
15175#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
15176 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15177 (__v16sf)(__m512)(Y), (int)(P),\
15178 (__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
15179
15180#define _mm_cmp_sd_mask(X, Y, P) \
15181 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15182 (__v2df)(__m128d)(Y), (int)(P),\
15183 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15184
15185#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
15186 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15187 (__v2df)(__m128d)(Y), (int)(P),\
15188 M,_MM_FROUND_CUR_DIRECTION))
15189
15190#define _mm_cmp_ss_mask(X, Y, P) \
15191 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15192 (__v4sf)(__m128)(Y), (int)(P), \
15193 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15194
15195#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
15196 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15197 (__v4sf)(__m128)(Y), (int)(P), \
15198 M,_MM_FROUND_CUR_DIRECTION))
15199#endif
15200
7e23f4a6
OM
15201extern __inline __mmask8
15202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15203_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
15204{
15205 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15206 (__v8df) __Y, _CMP_EQ_OQ,
15207 (__mmask8) -1,
15208 _MM_FROUND_CUR_DIRECTION);
15209}
15210
15211extern __inline __mmask8
15212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15213_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15214{
15215 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15216 (__v8df) __Y, _CMP_EQ_OQ,
15217 (__mmask8) __U,
15218 _MM_FROUND_CUR_DIRECTION);
15219}
15220
15221extern __inline __mmask8
15222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15223_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
15224{
15225 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15226 (__v8df) __Y, _CMP_LT_OS,
15227 (__mmask8) -1,
15228 _MM_FROUND_CUR_DIRECTION);
15229}
15230
15231extern __inline __mmask8
15232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15233_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15234{
15235 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15236 (__v8df) __Y, _CMP_LT_OS,
15237 (__mmask8) __U,
15238 _MM_FROUND_CUR_DIRECTION);
15239}
15240
15241extern __inline __mmask8
15242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15243_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
15244{
15245 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15246 (__v8df) __Y, _CMP_LE_OS,
15247 (__mmask8) -1,
15248 _MM_FROUND_CUR_DIRECTION);
15249}
15250
15251extern __inline __mmask8
15252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15253_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15254{
15255 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15256 (__v8df) __Y, _CMP_LE_OS,
15257 (__mmask8) __U,
15258 _MM_FROUND_CUR_DIRECTION);
15259}
15260
15261extern __inline __mmask8
15262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15263_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
15264{
15265 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15266 (__v8df) __Y, _CMP_UNORD_Q,
15267 (__mmask8) -1,
15268 _MM_FROUND_CUR_DIRECTION);
15269}
15270
15271extern __inline __mmask8
15272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15273_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15274{
15275 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15276 (__v8df) __Y, _CMP_UNORD_Q,
15277 (__mmask8) __U,
15278 _MM_FROUND_CUR_DIRECTION);
15279}
15280
15281extern __inline __mmask8
15282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15283_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
15284{
15285 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15286 (__v8df) __Y, _CMP_NEQ_UQ,
15287 (__mmask8) -1,
15288 _MM_FROUND_CUR_DIRECTION);
15289}
15290
15291extern __inline __mmask8
15292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15293_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15294{
15295 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15296 (__v8df) __Y, _CMP_NEQ_UQ,
15297 (__mmask8) __U,
15298 _MM_FROUND_CUR_DIRECTION);
15299}
15300
15301extern __inline __mmask8
15302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15303_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
15304{
15305 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15306 (__v8df) __Y, _CMP_NLT_US,
15307 (__mmask8) -1,
15308 _MM_FROUND_CUR_DIRECTION);
15309}
15310
15311extern __inline __mmask8
15312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15313_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15314{
15315 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15316 (__v8df) __Y, _CMP_NLT_US,
15317 (__mmask8) __U,
15318 _MM_FROUND_CUR_DIRECTION);
15319}
15320
15321extern __inline __mmask8
15322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15323_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
15324{
15325 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15326 (__v8df) __Y, _CMP_NLE_US,
15327 (__mmask8) -1,
15328 _MM_FROUND_CUR_DIRECTION);
15329}
15330
15331extern __inline __mmask8
15332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15333_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15334{
15335 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15336 (__v8df) __Y, _CMP_NLE_US,
15337 (__mmask8) __U,
15338 _MM_FROUND_CUR_DIRECTION);
15339}
15340
15341extern __inline __mmask8
15342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15343_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
15344{
15345 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15346 (__v8df) __Y, _CMP_ORD_Q,
15347 (__mmask8) -1,
15348 _MM_FROUND_CUR_DIRECTION);
15349}
15350
15351extern __inline __mmask8
15352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15353_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15354{
15355 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15356 (__v8df) __Y, _CMP_ORD_Q,
15357 (__mmask8) __U,
15358 _MM_FROUND_CUR_DIRECTION);
15359}
15360
15361extern __inline __mmask16
15362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15363_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
15364{
15365 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15366 (__v16sf) __Y, _CMP_EQ_OQ,
15367 (__mmask16) -1,
15368 _MM_FROUND_CUR_DIRECTION);
15369}
15370
15371extern __inline __mmask16
15372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15373_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15374{
15375 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15376 (__v16sf) __Y, _CMP_EQ_OQ,
15377 (__mmask16) __U,
15378 _MM_FROUND_CUR_DIRECTION);
15379}
15380
15381extern __inline __mmask16
15382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15383_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
15384{
15385 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15386 (__v16sf) __Y, _CMP_LT_OS,
15387 (__mmask16) -1,
15388 _MM_FROUND_CUR_DIRECTION);
15389}
15390
15391extern __inline __mmask16
15392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15393_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15394{
15395 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15396 (__v16sf) __Y, _CMP_LT_OS,
15397 (__mmask16) __U,
15398 _MM_FROUND_CUR_DIRECTION);
15399}
15400
15401extern __inline __mmask16
15402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15403_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
15404{
15405 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15406 (__v16sf) __Y, _CMP_LE_OS,
15407 (__mmask16) -1,
15408 _MM_FROUND_CUR_DIRECTION);
15409}
15410
15411extern __inline __mmask16
15412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15413_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15414{
15415 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15416 (__v16sf) __Y, _CMP_LE_OS,
15417 (__mmask16) __U,
15418 _MM_FROUND_CUR_DIRECTION);
15419}
15420
15421extern __inline __mmask16
15422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15423_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
15424{
15425 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15426 (__v16sf) __Y, _CMP_UNORD_Q,
15427 (__mmask16) -1,
15428 _MM_FROUND_CUR_DIRECTION);
15429}
15430
15431extern __inline __mmask16
15432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15433_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15434{
15435 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15436 (__v16sf) __Y, _CMP_UNORD_Q,
15437 (__mmask16) __U,
15438 _MM_FROUND_CUR_DIRECTION);
15439}
15440
15441extern __inline __mmask16
15442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15443_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
15444{
15445 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15446 (__v16sf) __Y, _CMP_NEQ_UQ,
15447 (__mmask16) -1,
15448 _MM_FROUND_CUR_DIRECTION);
15449}
15450
15451extern __inline __mmask16
15452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15453_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15454{
15455 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15456 (__v16sf) __Y, _CMP_NEQ_UQ,
15457 (__mmask16) __U,
15458 _MM_FROUND_CUR_DIRECTION);
15459}
15460
15461extern __inline __mmask16
15462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15463_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
15464{
15465 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15466 (__v16sf) __Y, _CMP_NLT_US,
15467 (__mmask16) -1,
15468 _MM_FROUND_CUR_DIRECTION);
15469}
15470
15471extern __inline __mmask16
15472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15473_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15474{
15475 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15476 (__v16sf) __Y, _CMP_NLT_US,
15477 (__mmask16) __U,
15478 _MM_FROUND_CUR_DIRECTION);
15479}
15480
15481extern __inline __mmask16
15482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15483_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
15484{
15485 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15486 (__v16sf) __Y, _CMP_NLE_US,
15487 (__mmask16) -1,
15488 _MM_FROUND_CUR_DIRECTION);
15489}
15490
15491extern __inline __mmask16
15492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15493_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15494{
15495 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15496 (__v16sf) __Y, _CMP_NLE_US,
15497 (__mmask16) __U,
15498 _MM_FROUND_CUR_DIRECTION);
15499}
15500
15501extern __inline __mmask16
15502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15503_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
15504{
15505 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15506 (__v16sf) __Y, _CMP_ORD_Q,
15507 (__mmask16) -1,
15508 _MM_FROUND_CUR_DIRECTION);
15509}
15510
15511extern __inline __mmask16
15512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15513_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15514{
15515 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15516 (__v16sf) __Y, _CMP_ORD_Q,
15517 (__mmask16) __U,
15518 _MM_FROUND_CUR_DIRECTION);
15519}
15520
2196a885
KY
15521extern __inline __mmask16
15522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15523_mm512_kmov (__mmask16 __A)
15524{
7cdb6e4c 15525 return __builtin_ia32_kmovw (__A);
2196a885
KY
15526}
15527
275be1da
IT
15528extern __inline __m512
15529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15530_mm512_castpd_ps (__m512d __A)
15531{
15532 return (__m512) (__A);
15533}
15534
15535extern __inline __m512i
15536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15537_mm512_castpd_si512 (__m512d __A)
15538{
15539 return (__m512i) (__A);
15540}
15541
15542extern __inline __m512d
15543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15544_mm512_castps_pd (__m512 __A)
15545{
15546 return (__m512d) (__A);
15547}
15548
15549extern __inline __m512i
15550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15551_mm512_castps_si512 (__m512 __A)
15552{
15553 return (__m512i) (__A);
15554}
15555
15556extern __inline __m512
15557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15558_mm512_castsi512_ps (__m512i __A)
15559{
15560 return (__m512) (__A);
15561}
15562
15563extern __inline __m512d
15564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15565_mm512_castsi512_pd (__m512i __A)
15566{
15567 return (__m512d) (__A);
15568}
15569
15570extern __inline __m128d
15571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15572_mm512_castpd512_pd128 (__m512d __A)
15573{
15574 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
15575}
15576
15577extern __inline __m128
15578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15579_mm512_castps512_ps128 (__m512 __A)
15580{
15581 return _mm512_extractf32x4_ps(__A, 0);
15582}
15583
15584extern __inline __m128i
15585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15586_mm512_castsi512_si128 (__m512i __A)
15587{
15588 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
15589}
15590
15591extern __inline __m256d
15592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15593_mm512_castpd512_pd256 (__m512d __A)
15594{
15595 return _mm512_extractf64x4_pd(__A, 0);
15596}
15597
15598extern __inline __m256
15599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15600_mm512_castps512_ps256 (__m512 __A)
15601{
15602 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
15603}
15604
15605extern __inline __m256i
15606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15607_mm512_castsi512_si256 (__m512i __A)
15608{
15609 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
15610}
15611
15612extern __inline __m512d
15613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15614_mm512_castpd128_pd512 (__m128d __A)
15615{
15616 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
15617}
15618
15619extern __inline __m512
15620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15621_mm512_castps128_ps512 (__m128 __A)
15622{
15623 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
15624}
15625
15626extern __inline __m512i
15627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15628_mm512_castsi128_si512 (__m128i __A)
15629{
15630 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
15631}
15632
15633extern __inline __m512d
15634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15635_mm512_castpd256_pd512 (__m256d __A)
15636{
15637 return __builtin_ia32_pd512_256pd (__A);
15638}
15639
15640extern __inline __m512
15641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15642_mm512_castps256_ps512 (__m256 __A)
15643{
15644 return __builtin_ia32_ps512_256ps (__A);
15645}
15646
15647extern __inline __m512i
15648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15649_mm512_castsi256_si512 (__m256i __A)
15650{
15651 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
15652}
15653
e6b2dc24
JJ
15654extern __inline __m512d
15655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15656_mm512_zextpd128_pd512 (__m128d __A)
15657{
15658 return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0);
15659}
15660
15661extern __inline __m512
15662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15663_mm512_zextps128_ps512 (__m128 __A)
15664{
15665 return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0);
15666}
15667
15668extern __inline __m512i
15669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15670_mm512_zextsi128_si512 (__m128i __A)
15671{
15672 return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0);
15673}
15674
15675extern __inline __m512d
15676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15677_mm512_zextpd256_pd512 (__m256d __A)
15678{
15679 return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0);
15680}
15681
15682extern __inline __m512
15683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15684_mm512_zextps256_ps512 (__m256 __A)
15685{
15686 return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0);
15687}
15688
15689extern __inline __m512i
15690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15691_mm512_zextsi256_si512 (__m256i __A)
15692{
15693 return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0);
15694}
15695
275be1da
IT
15696extern __inline __mmask16
15697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15698_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
15699{
15700 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15701 (__v16si) __B, 0,
15702 (__mmask16) -1);
15703}
15704
15705extern __inline __mmask16
15706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15707_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15708{
15709 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15710 (__v16si) __B, 0, __U);
15711}
15712
15713extern __inline __mmask8
15714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15715_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15716{
15717 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15718 (__v8di) __B, 0, __U);
15719}
15720
15721extern __inline __mmask8
15722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15723_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
15724{
15725 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15726 (__v8di) __B, 0,
15727 (__mmask8) -1);
15728}
15729
15730extern __inline __mmask16
15731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15732_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
15733{
15734 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15735 (__v16si) __B, 6,
15736 (__mmask16) -1);
15737}
15738
15739extern __inline __mmask16
15740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15741_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15742{
15743 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15744 (__v16si) __B, 6, __U);
15745}
15746
15747extern __inline __mmask8
15748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15749_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15750{
15751 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15752 (__v8di) __B, 6, __U);
15753}
15754
15755extern __inline __mmask8
15756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15757_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
15758{
15759 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15760 (__v8di) __B, 6,
15761 (__mmask8) -1);
15762}
15763
167a5b77
JJ
15764#undef __MM512_REDUCE_OP
15765#define __MM512_REDUCE_OP(op) \
15766 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
15767 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
15768 __m256i __T3 = (__m256i) (__T1 op __T2); \
15769 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
15770 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
15771 __v4si __T6 = __T4 op __T5; \
15772 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15773 __v4si __T8 = __T6 op __T7; \
15774 return __T8[0] op __T8[1]
15775
15776extern __inline int
15777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15778_mm512_reduce_add_epi32 (__m512i __A)
15779{
15780 __MM512_REDUCE_OP (+);
15781}
15782
15783extern __inline int
15784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15785_mm512_reduce_mul_epi32 (__m512i __A)
15786{
15787 __MM512_REDUCE_OP (*);
15788}
15789
15790extern __inline int
15791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15792_mm512_reduce_and_epi32 (__m512i __A)
15793{
15794 __MM512_REDUCE_OP (&);
15795}
15796
15797extern __inline int
15798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15799_mm512_reduce_or_epi32 (__m512i __A)
15800{
15801 __MM512_REDUCE_OP (|);
15802}
15803
15804extern __inline int
15805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15806_mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
15807{
15808 __A = _mm512_maskz_mov_epi32 (__U, __A);
15809 __MM512_REDUCE_OP (+);
15810}
15811
15812extern __inline int
15813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15814_mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
15815{
15816 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
15817 __MM512_REDUCE_OP (*);
15818}
15819
15820extern __inline int
15821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15822_mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
15823{
15824 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
15825 __MM512_REDUCE_OP (&);
15826}
15827
15828extern __inline int
15829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15830_mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
15831{
15832 __A = _mm512_maskz_mov_epi32 (__U, __A);
15833 __MM512_REDUCE_OP (|);
15834}
15835
15836#undef __MM512_REDUCE_OP
15837#define __MM512_REDUCE_OP(op) \
15838 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
15839 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
15840 __m256i __T3 = _mm256_##op (__T1, __T2); \
15841 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
15842 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
15843 __m128i __T6 = _mm_##op (__T4, __T5); \
15844 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
15845 (__v4si) { 2, 3, 0, 1 }); \
15846 __m128i __T8 = _mm_##op (__T6, __T7); \
15847 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
15848 (__v4si) { 1, 0, 1, 0 }); \
15849 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
15850 return __T10[0]
15851
15852extern __inline int
15853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15854_mm512_reduce_min_epi32 (__m512i __A)
15855{
15856 __MM512_REDUCE_OP (min_epi32);
15857}
15858
15859extern __inline int
15860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15861_mm512_reduce_max_epi32 (__m512i __A)
15862{
15863 __MM512_REDUCE_OP (max_epi32);
15864}
15865
15866extern __inline unsigned int
15867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15868_mm512_reduce_min_epu32 (__m512i __A)
15869{
15870 __MM512_REDUCE_OP (min_epu32);
15871}
15872
15873extern __inline unsigned int
15874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15875_mm512_reduce_max_epu32 (__m512i __A)
15876{
15877 __MM512_REDUCE_OP (max_epu32);
15878}
15879
15880extern __inline int
15881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15882_mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
15883{
15884 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
15885 __MM512_REDUCE_OP (min_epi32);
15886}
15887
15888extern __inline int
15889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15890_mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
15891{
15892 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
15893 __MM512_REDUCE_OP (max_epi32);
15894}
15895
15896extern __inline unsigned int
15897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15898_mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
15899{
15900 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
15901 __MM512_REDUCE_OP (min_epu32);
15902}
15903
15904extern __inline unsigned int
15905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15906_mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
15907{
15908 __A = _mm512_maskz_mov_epi32 (__U, __A);
15909 __MM512_REDUCE_OP (max_epu32);
15910}
15911
15912#undef __MM512_REDUCE_OP
15913#define __MM512_REDUCE_OP(op) \
15914 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
15915 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
15916 __m256 __T3 = __T1 op __T2; \
15917 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
15918 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
15919 __m128 __T6 = __T4 op __T5; \
15920 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15921 __m128 __T8 = __T6 op __T7; \
15922 return __T8[0] op __T8[1]
15923
15924extern __inline float
15925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15926_mm512_reduce_add_ps (__m512 __A)
15927{
15928 __MM512_REDUCE_OP (+);
15929}
15930
15931extern __inline float
15932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15933_mm512_reduce_mul_ps (__m512 __A)
15934{
15935 __MM512_REDUCE_OP (*);
15936}
15937
15938extern __inline float
15939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15940_mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
15941{
15942 __A = _mm512_maskz_mov_ps (__U, __A);
15943 __MM512_REDUCE_OP (+);
15944}
15945
15946extern __inline float
15947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15948_mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
15949{
15950 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
15951 __MM512_REDUCE_OP (*);
15952}
15953
15954#undef __MM512_REDUCE_OP
15955#define __MM512_REDUCE_OP(op) \
15956 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
15957 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
15958 __m256 __T3 = _mm256_##op (__T1, __T2); \
15959 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
15960 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
15961 __m128 __T6 = _mm_##op (__T4, __T5); \
15962 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15963 __m128 __T8 = _mm_##op (__T6, __T7); \
15964 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
15965 __m128 __T10 = _mm_##op (__T8, __T9); \
15966 return __T10[0]
15967
15968extern __inline float
15969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15970_mm512_reduce_min_ps (__m512 __A)
15971{
15972 __MM512_REDUCE_OP (min_ps);
15973}
15974
15975extern __inline float
15976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15977_mm512_reduce_max_ps (__m512 __A)
15978{
15979 __MM512_REDUCE_OP (max_ps);
15980}
15981
15982extern __inline float
15983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15984_mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
15985{
15986 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
15987 __MM512_REDUCE_OP (min_ps);
15988}
15989
15990extern __inline float
15991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15992_mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
15993{
15994 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
15995 __MM512_REDUCE_OP (max_ps);
15996}
15997
15998#undef __MM512_REDUCE_OP
15999#define __MM512_REDUCE_OP(op) \
16000 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
16001 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
16002 __m256i __T3 = (__m256i) (__T1 op __T2); \
16003 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
16004 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
16005 __v2di __T6 = __T4 op __T5; \
16006 return __T6[0] op __T6[1]
16007
16008extern __inline long long
16009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16010_mm512_reduce_add_epi64 (__m512i __A)
16011{
16012 __MM512_REDUCE_OP (+);
16013}
16014
16015extern __inline long long
16016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16017_mm512_reduce_mul_epi64 (__m512i __A)
16018{
16019 __MM512_REDUCE_OP (*);
16020}
16021
16022extern __inline long long
16023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16024_mm512_reduce_and_epi64 (__m512i __A)
16025{
16026 __MM512_REDUCE_OP (&);
16027}
16028
16029extern __inline long long
16030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16031_mm512_reduce_or_epi64 (__m512i __A)
16032{
16033 __MM512_REDUCE_OP (|);
16034}
16035
16036extern __inline long long
16037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16038_mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
16039{
16040 __A = _mm512_maskz_mov_epi64 (__U, __A);
16041 __MM512_REDUCE_OP (+);
16042}
16043
16044extern __inline long long
16045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16046_mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
16047{
16048 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
16049 __MM512_REDUCE_OP (*);
16050}
16051
16052extern __inline long long
16053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16054_mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
16055{
16056 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16057 __MM512_REDUCE_OP (&);
16058}
16059
16060extern __inline long long
16061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16062_mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
16063{
16064 __A = _mm512_maskz_mov_epi64 (__U, __A);
16065 __MM512_REDUCE_OP (|);
16066}
16067
16068#undef __MM512_REDUCE_OP
16069#define __MM512_REDUCE_OP(op) \
16070 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
16071 __m512i __T2 = _mm512_##op (__A, __T1); \
16072 __m512i __T3 \
16073 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
16074 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
16075 __m512i __T4 = _mm512_##op (__T2, __T3); \
16076 __m512i __T5 \
16077 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
16078 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
16079 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
16080 return __T6[0]
16081
16082extern __inline long long
16083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16084_mm512_reduce_min_epi64 (__m512i __A)
16085{
16086 __MM512_REDUCE_OP (min_epi64);
16087}
16088
16089extern __inline long long
16090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16091_mm512_reduce_max_epi64 (__m512i __A)
16092{
16093 __MM512_REDUCE_OP (max_epi64);
16094}
16095
16096extern __inline long long
16097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16098_mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
16099{
16100 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
16101 __U, __A);
16102 __MM512_REDUCE_OP (min_epi64);
16103}
16104
16105extern __inline long long
16106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16107_mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
16108{
16109 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
16110 __U, __A);
16111 __MM512_REDUCE_OP (max_epi64);
16112}
16113
16114extern __inline unsigned long long
16115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16116_mm512_reduce_min_epu64 (__m512i __A)
16117{
16118 __MM512_REDUCE_OP (min_epu64);
16119}
16120
16121extern __inline unsigned long long
16122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16123_mm512_reduce_max_epu64 (__m512i __A)
16124{
16125 __MM512_REDUCE_OP (max_epu64);
16126}
16127
16128extern __inline unsigned long long
16129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16130_mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
16131{
16132 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16133 __MM512_REDUCE_OP (min_epu64);
16134}
16135
16136extern __inline unsigned long long
16137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16138_mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
16139{
16140 __A = _mm512_maskz_mov_epi64 (__U, __A);
16141 __MM512_REDUCE_OP (max_epu64);
16142}
16143
16144#undef __MM512_REDUCE_OP
16145#define __MM512_REDUCE_OP(op) \
16146 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16147 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16148 __m256d __T3 = __T1 op __T2; \
16149 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16150 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16151 __m128d __T6 = __T4 op __T5; \
16152 return __T6[0] op __T6[1]
16153
16154extern __inline double
16155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16156_mm512_reduce_add_pd (__m512d __A)
16157{
16158 __MM512_REDUCE_OP (+);
16159}
16160
16161extern __inline double
16162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16163_mm512_reduce_mul_pd (__m512d __A)
16164{
16165 __MM512_REDUCE_OP (*);
16166}
16167
16168extern __inline double
16169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16170_mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
16171{
16172 __A = _mm512_maskz_mov_pd (__U, __A);
16173 __MM512_REDUCE_OP (+);
16174}
16175
16176extern __inline double
16177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16178_mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
16179{
16180 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
16181 __MM512_REDUCE_OP (*);
16182}
16183
16184#undef __MM512_REDUCE_OP
16185#define __MM512_REDUCE_OP(op) \
16186 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16187 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16188 __m256d __T3 = _mm256_##op (__T1, __T2); \
16189 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16190 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16191 __m128d __T6 = _mm_##op (__T4, __T5); \
16192 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
16193 __m128d __T8 = _mm_##op (__T6, __T7); \
16194 return __T8[0]
16195
16196extern __inline double
16197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16198_mm512_reduce_min_pd (__m512d __A)
16199{
16200 __MM512_REDUCE_OP (min_pd);
16201}
16202
16203extern __inline double
16204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16205_mm512_reduce_max_pd (__m512d __A)
16206{
16207 __MM512_REDUCE_OP (max_pd);
16208}
16209
16210extern __inline double
16211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16212_mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
16213{
16214 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
16215 __MM512_REDUCE_OP (min_pd);
16216}
16217
16218extern __inline double
16219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16220_mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
16221{
16222 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
16223 __MM512_REDUCE_OP (max_pd);
16224}
16225
16226#undef __MM512_REDUCE_OP
16227
756c5857
AI
16228#ifdef __DISABLE_AVX512F__
16229#undef __DISABLE_AVX512F__
16230#pragma GCC pop_options
16231#endif /* __DISABLE_AVX512F__ */
16232
16233#endif /* _AVX512FINTRIN_H_INCLUDED */
This page took 4.074953 seconds and 5 git commands to generate.