]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/avx512fintrin.h
x86-tune-costs.h (znver1_cost): Make AVX256 vector loads cost the same as AVX128...
[gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
85ec4feb 1/* Copyright (C) 2013-2018 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
31#ifndef __AVX512F__
32#pragma GCC push_options
33#pragma GCC target("avx512f")
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40typedef long long __v8di __attribute__ ((__vector_size__ (64)));
2069d6fc 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
756c5857 42typedef int __v16si __attribute__ ((__vector_size__ (64)));
2069d6fc 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
7d9088c2 44typedef short __v32hi __attribute__ ((__vector_size__ (64)));
2069d6fc 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
7d9088c2 46typedef char __v64qi __attribute__ ((__vector_size__ (64)));
2069d6fc 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
756c5857
AI
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
c6b0037d
MG
55/* Unaligned version of the same type. */
56typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
59
756c5857
AI
60typedef unsigned char __mmask8;
61typedef unsigned short __mmask16;
62
dcb2c527
JJ
63extern __inline __mmask16
64__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65_mm512_int2mask (int __M)
66{
67 return (__mmask16) __M;
68}
69
70extern __inline int
71__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72_mm512_mask2int (__mmask16 __M)
73{
74 return (int) __M;
75}
76
756c5857
AI
77extern __inline __m512i
78__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79_mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
82{
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
85}
86
87/* Create the vector [A B C D E F G H I J K L M N O P]. */
88extern __inline __m512i
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
94{
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
98}
99
4e6a811f
JJ
100extern __inline __m512i
101__attribute__((__gnu_inline__, __always_inline__, __artificial__))
102_mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
103 short __q27, short __q26, short __q25, short __q24,
104 short __q23, short __q22, short __q21, short __q20,
105 short __q19, short __q18, short __q17, short __q16,
106 short __q15, short __q14, short __q13, short __q12,
107 short __q11, short __q10, short __q09, short __q08,
108 short __q07, short __q06, short __q05, short __q04,
109 short __q03, short __q02, short __q01, short __q00)
110{
111 return __extension__ (__m512i)(__v32hi){
112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
116 };
117}
118
119extern __inline __m512i
120__attribute__((__gnu_inline__, __always_inline__, __artificial__))
121_mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
122 char __q59, char __q58, char __q57, char __q56,
123 char __q55, char __q54, char __q53, char __q52,
124 char __q51, char __q50, char __q49, char __q48,
125 char __q47, char __q46, char __q45, char __q44,
126 char __q43, char __q42, char __q41, char __q40,
127 char __q39, char __q38, char __q37, char __q36,
128 char __q35, char __q34, char __q33, char __q32,
129 char __q31, char __q30, char __q29, char __q28,
130 char __q27, char __q26, char __q25, char __q24,
131 char __q23, char __q22, char __q21, char __q20,
132 char __q19, char __q18, char __q17, char __q16,
133 char __q15, char __q14, char __q13, char __q12,
134 char __q11, char __q10, char __q09, char __q08,
135 char __q07, char __q06, char __q05, char __q04,
136 char __q03, char __q02, char __q01, char __q00)
137{
138 return __extension__ (__m512i)(__v64qi){
139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
147 };
148}
149
756c5857
AI
150extern __inline __m512d
151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152_mm512_set_pd (double __A, double __B, double __C, double __D,
153 double __E, double __F, double __G, double __H)
154{
155 return __extension__ (__m512d)
156 { __H, __G, __F, __E, __D, __C, __B, __A };
157}
158
159extern __inline __m512
160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161_mm512_set_ps (float __A, float __B, float __C, float __D,
162 float __E, float __F, float __G, float __H,
163 float __I, float __J, float __K, float __L,
164 float __M, float __N, float __O, float __P)
165{
166 return __extension__ (__m512)
167 { __P, __O, __N, __M, __L, __K, __J, __I,
168 __H, __G, __F, __E, __D, __C, __B, __A };
169}
170
171#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
173
174#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
175 e8,e9,e10,e11,e12,e13,e14,e15) \
176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
177
178#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
180
181#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
183
0b192937
UD
184extern __inline __m512
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm512_undefined_ps (void)
187{
188 __m512 __Y = __Y;
189 return __Y;
190}
191
dcb2c527
JJ
192#define _mm512_undefined _mm512_undefined_ps
193
0b192937
UD
194extern __inline __m512d
195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
196_mm512_undefined_pd (void)
197{
198 __m512d __Y = __Y;
199 return __Y;
200}
201
202extern __inline __m512i
203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271e5cb 204_mm512_undefined_epi32 (void)
0b192937
UD
205{
206 __m512i __Y = __Y;
207 return __Y;
208}
209
4271e5cb
UB
210#define _mm512_undefined_si512 _mm512_undefined_epi32
211
7d9088c2
UD
212extern __inline __m512i
213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214_mm512_set1_epi8 (char __A)
215{
216 return __extension__ (__m512i)(__v64qi)
217 { __A, __A, __A, __A, __A, __A, __A, __A,
218 __A, __A, __A, __A, __A, __A, __A, __A,
219 __A, __A, __A, __A, __A, __A, __A, __A,
220 __A, __A, __A, __A, __A, __A, __A, __A,
221 __A, __A, __A, __A, __A, __A, __A, __A,
222 __A, __A, __A, __A, __A, __A, __A, __A,
223 __A, __A, __A, __A, __A, __A, __A, __A,
224 __A, __A, __A, __A, __A, __A, __A, __A };
225}
226
227extern __inline __m512i
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm512_set1_epi16 (short __A)
230{
231 return __extension__ (__m512i)(__v32hi)
232 { __A, __A, __A, __A, __A, __A, __A, __A,
233 __A, __A, __A, __A, __A, __A, __A, __A,
234 __A, __A, __A, __A, __A, __A, __A, __A,
235 __A, __A, __A, __A, __A, __A, __A, __A };
236}
237
2b2384e8
UD
238extern __inline __m512d
239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240_mm512_set1_pd (double __A)
241{
242 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
243 (__v2df) { __A, },
244 (__v8df)
245 _mm512_undefined_pd (),
246 (__mmask8) -1);
247}
248
249extern __inline __m512
250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251_mm512_set1_ps (float __A)
252{
253 return (__m512) __builtin_ia32_broadcastss512 (__extension__
254 (__v4sf) { __A, },
255 (__v16sf)
256 _mm512_undefined_ps (),
257 (__mmask16) -1);
258}
259
7d9088c2
UD
260/* Create the vector [A B C D A B C D A B C D A B C D]. */
261extern __inline __m512i
262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
263_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
264{
265 return __extension__ (__m512i)(__v16si)
266 { __D, __C, __B, __A, __D, __C, __B, __A,
267 __D, __C, __B, __A, __D, __C, __B, __A };
268}
269
270extern __inline __m512i
271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272_mm512_set4_epi64 (long long __A, long long __B, long long __C,
273 long long __D)
274{
275 return __extension__ (__m512i) (__v8di)
276 { __D, __C, __B, __A, __D, __C, __B, __A };
277}
278
279extern __inline __m512d
280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281_mm512_set4_pd (double __A, double __B, double __C, double __D)
282{
283 return __extension__ (__m512d)
284 { __D, __C, __B, __A, __D, __C, __B, __A };
285}
286
287extern __inline __m512
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm512_set4_ps (float __A, float __B, float __C, float __D)
290{
291 return __extension__ (__m512)
292 { __D, __C, __B, __A, __D, __C, __B, __A,
293 __D, __C, __B, __A, __D, __C, __B, __A };
294}
295
296#define _mm512_setr4_epi64(e0,e1,e2,e3) \
297 _mm512_set4_epi64(e3,e2,e1,e0)
298
299#define _mm512_setr4_epi32(e0,e1,e2,e3) \
300 _mm512_set4_epi32(e3,e2,e1,e0)
301
302#define _mm512_setr4_pd(e0,e1,e2,e3) \
303 _mm512_set4_pd(e3,e2,e1,e0)
304
305#define _mm512_setr4_ps(e0,e1,e2,e3) \
306 _mm512_set4_ps(e3,e2,e1,e0)
307
756c5857
AI
308extern __inline __m512
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm512_setzero_ps (void)
311{
312 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
313 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
314}
315
4e6a811f
JJ
316extern __inline __m512
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm512_setzero (void)
319{
320 return _mm512_setzero_ps ();
321}
322
756c5857
AI
323extern __inline __m512d
324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
325_mm512_setzero_pd (void)
326{
327 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
328}
329
7d9088c2
UD
330extern __inline __m512i
331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332_mm512_setzero_epi32 (void)
333{
334 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
335}
336
756c5857
AI
337extern __inline __m512i
338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339_mm512_setzero_si512 (void)
340{
341 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
342}
343
344extern __inline __m512d
345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
347{
348 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
349 (__v8df) __W,
350 (__mmask8) __U);
351}
352
353extern __inline __m512d
354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
356{
357 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
358 (__v8df)
359 _mm512_setzero_pd (),
360 (__mmask8) __U);
361}
362
363extern __inline __m512
364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
365_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
366{
367 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
368 (__v16sf) __W,
369 (__mmask16) __U);
370}
371
372extern __inline __m512
373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
375{
376 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
377 (__v16sf)
378 _mm512_setzero_ps (),
379 (__mmask16) __U);
380}
381
382extern __inline __m512d
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm512_load_pd (void const *__P)
385{
386 return *(__m512d *) __P;
387}
388
389extern __inline __m512d
390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
392{
393 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
394 (__v8df) __W,
395 (__mmask8) __U);
396}
397
398extern __inline __m512d
399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
400_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
401{
402 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
403 (__v8df)
404 _mm512_setzero_pd (),
405 (__mmask8) __U);
406}
407
408extern __inline void
409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
410_mm512_store_pd (void *__P, __m512d __A)
411{
412 *(__m512d *) __P = __A;
413}
414
415extern __inline void
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
418{
419 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
420 (__mmask8) __U);
421}
422
423extern __inline __m512
424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425_mm512_load_ps (void const *__P)
426{
427 return *(__m512 *) __P;
428}
429
430extern __inline __m512
431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
432_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
433{
434 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
435 (__v16sf) __W,
436 (__mmask16) __U);
437}
438
439extern __inline __m512
440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
442{
443 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
444 (__v16sf)
445 _mm512_setzero_ps (),
446 (__mmask16) __U);
447}
448
449extern __inline void
450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451_mm512_store_ps (void *__P, __m512 __A)
452{
453 *(__m512 *) __P = __A;
454}
455
456extern __inline void
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
459{
460 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
461 (__mmask16) __U);
462}
463
464extern __inline __m512i
465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
466_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
467{
468 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
469 (__v8di) __W,
470 (__mmask8) __U);
471}
472
473extern __inline __m512i
474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
476{
477 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
478 (__v8di)
479 _mm512_setzero_si512 (),
480 (__mmask8) __U);
481}
482
483extern __inline __m512i
484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
485_mm512_load_epi64 (void const *__P)
486{
487 return *(__m512i *) __P;
488}
489
490extern __inline __m512i
491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
493{
494 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
495 (__v8di) __W,
496 (__mmask8) __U);
497}
498
499extern __inline __m512i
500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
502{
503 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
504 (__v8di)
505 _mm512_setzero_si512 (),
506 (__mmask8) __U);
507}
508
509extern __inline void
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm512_store_epi64 (void *__P, __m512i __A)
512{
513 *(__m512i *) __P = __A;
514}
515
516extern __inline void
517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
519{
520 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
521 (__mmask8) __U);
522}
523
524extern __inline __m512i
525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
526_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
527{
528 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
529 (__v16si) __W,
530 (__mmask16) __U);
531}
532
533extern __inline __m512i
534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
536{
537 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 (__mmask16) __U);
541}
542
543extern __inline __m512i
544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
545_mm512_load_si512 (void const *__P)
546{
547 return *(__m512i *) __P;
548}
549
550extern __inline __m512i
551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
552_mm512_load_epi32 (void const *__P)
553{
554 return *(__m512i *) __P;
555}
556
557extern __inline __m512i
558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
559_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
560{
561 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
562 (__v16si) __W,
563 (__mmask16) __U);
564}
565
566extern __inline __m512i
567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
569{
570 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
571 (__v16si)
572 _mm512_setzero_si512 (),
573 (__mmask16) __U);
574}
575
576extern __inline void
577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
578_mm512_store_si512 (void *__P, __m512i __A)
579{
580 *(__m512i *) __P = __A;
581}
582
583extern __inline void
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585_mm512_store_epi32 (void *__P, __m512i __A)
586{
587 *(__m512i *) __P = __A;
588}
589
590extern __inline void
591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
593{
594 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
595 (__mmask16) __U);
596}
597
598extern __inline __m512i
599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600_mm512_mullo_epi32 (__m512i __A, __m512i __B)
601{
2069d6fc 602 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
603}
604
605extern __inline __m512i
606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
608{
609 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
610 (__v16si) __B,
611 (__v16si)
612 _mm512_setzero_si512 (),
613 __M);
614}
615
616extern __inline __m512i
617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
619{
620 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
621 (__v16si) __B,
622 (__v16si) __W, __M);
623}
624
503ac4e0
JJ
625extern __inline __m512i
626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627_mm512_mullox_epi64 (__m512i __A, __m512i __B)
628{
629 return (__m512i) ((__v8du) __A * (__v8du) __B);
630}
631
632extern __inline __m512i
633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
635{
636 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
637}
638
756c5857
AI
639extern __inline __m512i
640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
641_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
642{
643 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
644 (__v16si) __Y,
645 (__v16si)
4271e5cb 646 _mm512_undefined_epi32 (),
756c5857
AI
647 (__mmask16) -1);
648}
649
650extern __inline __m512i
651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
653{
654 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
655 (__v16si) __Y,
656 (__v16si) __W,
657 (__mmask16) __U);
658}
659
660extern __inline __m512i
661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
663{
664 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
665 (__v16si) __Y,
666 (__v16si)
667 _mm512_setzero_si512 (),
668 (__mmask16) __U);
669}
670
671extern __inline __m512i
672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673_mm512_srav_epi32 (__m512i __X, __m512i __Y)
674{
675 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
676 (__v16si) __Y,
677 (__v16si)
4271e5cb 678 _mm512_undefined_epi32 (),
756c5857
AI
679 (__mmask16) -1);
680}
681
682extern __inline __m512i
683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
684_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
685{
686 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
687 (__v16si) __Y,
688 (__v16si) __W,
689 (__mmask16) __U);
690}
691
692extern __inline __m512i
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
695{
696 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
697 (__v16si) __Y,
698 (__v16si)
699 _mm512_setzero_si512 (),
700 (__mmask16) __U);
701}
702
703extern __inline __m512i
704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
705_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
706{
707 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
708 (__v16si) __Y,
709 (__v16si)
4271e5cb 710 _mm512_undefined_epi32 (),
756c5857
AI
711 (__mmask16) -1);
712}
713
714extern __inline __m512i
715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
716_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
717{
718 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
719 (__v16si) __Y,
720 (__v16si) __W,
721 (__mmask16) __U);
722}
723
724extern __inline __m512i
725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
726_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
727{
728 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
729 (__v16si) __Y,
730 (__v16si)
731 _mm512_setzero_si512 (),
732 (__mmask16) __U);
733}
734
735extern __inline __m512i
736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737_mm512_add_epi64 (__m512i __A, __m512i __B)
738{
2069d6fc 739 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
740}
741
742extern __inline __m512i
743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
745{
746 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
747 (__v8di) __B,
748 (__v8di) __W,
749 (__mmask8) __U);
750}
751
752extern __inline __m512i
753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
755{
756 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
757 (__v8di) __B,
758 (__v8di)
759 _mm512_setzero_si512 (),
760 (__mmask8) __U);
761}
762
763extern __inline __m512i
764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765_mm512_sub_epi64 (__m512i __A, __m512i __B)
766{
2069d6fc 767 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
768}
769
770extern __inline __m512i
771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
772_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
773{
774 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
775 (__v8di) __B,
776 (__v8di) __W,
777 (__mmask8) __U);
778}
779
780extern __inline __m512i
781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
783{
784 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
785 (__v8di) __B,
786 (__v8di)
787 _mm512_setzero_si512 (),
788 (__mmask8) __U);
789}
790
791extern __inline __m512i
792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
793_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
794{
795 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
796 (__v8di) __Y,
797 (__v8di)
0b192937 798 _mm512_undefined_pd (),
756c5857
AI
799 (__mmask8) -1);
800}
801
802extern __inline __m512i
803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
805{
806 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
807 (__v8di) __Y,
808 (__v8di) __W,
809 (__mmask8) __U);
810}
811
812extern __inline __m512i
813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
815{
816 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
817 (__v8di) __Y,
818 (__v8di)
819 _mm512_setzero_si512 (),
820 (__mmask8) __U);
821}
822
823extern __inline __m512i
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm512_srav_epi64 (__m512i __X, __m512i __Y)
826{
827 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
828 (__v8di) __Y,
829 (__v8di)
4271e5cb 830 _mm512_undefined_epi32 (),
756c5857
AI
831 (__mmask8) -1);
832}
833
834extern __inline __m512i
835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
837{
838 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
839 (__v8di) __Y,
840 (__v8di) __W,
841 (__mmask8) __U);
842}
843
844extern __inline __m512i
845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
847{
848 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
849 (__v8di) __Y,
850 (__v8di)
851 _mm512_setzero_si512 (),
852 (__mmask8) __U);
853}
854
855extern __inline __m512i
856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
858{
859 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
860 (__v8di) __Y,
861 (__v8di)
4271e5cb 862 _mm512_undefined_epi32 (),
756c5857
AI
863 (__mmask8) -1);
864}
865
866extern __inline __m512i
867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
869{
870 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
871 (__v8di) __Y,
872 (__v8di) __W,
873 (__mmask8) __U);
874}
875
876extern __inline __m512i
877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
879{
880 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
881 (__v8di) __Y,
882 (__v8di)
883 _mm512_setzero_si512 (),
884 (__mmask8) __U);
885}
886
887extern __inline __m512i
888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889_mm512_add_epi32 (__m512i __A, __m512i __B)
890{
2069d6fc 891 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
892}
893
894extern __inline __m512i
895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
896_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
897{
898 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
899 (__v16si) __B,
900 (__v16si) __W,
901 (__mmask16) __U);
902}
903
904extern __inline __m512i
905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
907{
908 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
909 (__v16si) __B,
910 (__v16si)
911 _mm512_setzero_si512 (),
912 (__mmask16) __U);
913}
914
915extern __inline __m512i
916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
917_mm512_mul_epi32 (__m512i __X, __m512i __Y)
918{
919 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
920 (__v16si) __Y,
921 (__v8di)
4271e5cb 922 _mm512_undefined_epi32 (),
756c5857
AI
923 (__mmask8) -1);
924}
925
926extern __inline __m512i
927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
929{
930 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
931 (__v16si) __Y,
932 (__v8di) __W, __M);
933}
934
935extern __inline __m512i
936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
938{
939 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
940 (__v16si) __Y,
941 (__v8di)
942 _mm512_setzero_si512 (),
943 __M);
944}
945
946extern __inline __m512i
947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948_mm512_sub_epi32 (__m512i __A, __m512i __B)
949{
2069d6fc 950 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
951}
952
953extern __inline __m512i
954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
956{
957 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
958 (__v16si) __B,
959 (__v16si) __W,
960 (__mmask16) __U);
961}
962
963extern __inline __m512i
964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
965_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
966{
967 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
968 (__v16si) __B,
969 (__v16si)
970 _mm512_setzero_si512 (),
971 (__mmask16) __U);
972}
973
974extern __inline __m512i
975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976_mm512_mul_epu32 (__m512i __X, __m512i __Y)
977{
978 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
979 (__v16si) __Y,
980 (__v8di)
4271e5cb 981 _mm512_undefined_epi32 (),
756c5857
AI
982 (__mmask8) -1);
983}
984
985extern __inline __m512i
986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
988{
989 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
990 (__v16si) __Y,
991 (__v8di) __W, __M);
992}
993
994extern __inline __m512i
995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
997{
998 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
999 (__v16si) __Y,
1000 (__v8di)
1001 _mm512_setzero_si512 (),
1002 __M);
1003}
1004
1005#ifdef __OPTIMIZE__
1006extern __inline __m512i
1007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008_mm512_slli_epi64 (__m512i __A, unsigned int __B)
1009{
1010 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1011 (__v8di)
4271e5cb 1012 _mm512_undefined_epi32 (),
756c5857
AI
1013 (__mmask8) -1);
1014}
1015
1016extern __inline __m512i
1017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1019 unsigned int __B)
1020{
1021 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1022 (__v8di) __W,
1023 (__mmask8) __U);
1024}
1025
1026extern __inline __m512i
1027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1028_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1029{
1030 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1031 (__v8di)
1032 _mm512_setzero_si512 (),
1033 (__mmask8) __U);
1034}
1035#else
1036#define _mm512_slli_epi64(X, C) \
1037 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1038 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1039 (__mmask8)-1))
1040
1041#define _mm512_mask_slli_epi64(W, U, X, C) \
1042 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1043 (__v8di)(__m512i)(W),\
1044 (__mmask8)(U)))
1045
1046#define _mm512_maskz_slli_epi64(U, X, C) \
1047 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1048 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1049 (__mmask8)(U)))
1050#endif
1051
1052extern __inline __m512i
1053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054_mm512_sll_epi64 (__m512i __A, __m128i __B)
1055{
1056 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1057 (__v2di) __B,
1058 (__v8di)
4271e5cb 1059 _mm512_undefined_epi32 (),
756c5857
AI
1060 (__mmask8) -1);
1061}
1062
1063extern __inline __m512i
1064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1066{
1067 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1068 (__v2di) __B,
1069 (__v8di) __W,
1070 (__mmask8) __U);
1071}
1072
1073extern __inline __m512i
1074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1076{
1077 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1078 (__v2di) __B,
1079 (__v8di)
1080 _mm512_setzero_si512 (),
1081 (__mmask8) __U);
1082}
1083
1084#ifdef __OPTIMIZE__
1085extern __inline __m512i
1086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1087_mm512_srli_epi64 (__m512i __A, unsigned int __B)
1088{
1089 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1090 (__v8di)
4271e5cb 1091 _mm512_undefined_epi32 (),
756c5857
AI
1092 (__mmask8) -1);
1093}
1094
1095extern __inline __m512i
1096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1097_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1098 __m512i __A, unsigned int __B)
1099{
1100 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1101 (__v8di) __W,
1102 (__mmask8) __U);
1103}
1104
1105extern __inline __m512i
1106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1107_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1108{
1109 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1110 (__v8di)
1111 _mm512_setzero_si512 (),
1112 (__mmask8) __U);
1113}
1114#else
1115#define _mm512_srli_epi64(X, C) \
1116 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1117 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1118 (__mmask8)-1))
1119
1120#define _mm512_mask_srli_epi64(W, U, X, C) \
1121 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1122 (__v8di)(__m512i)(W),\
1123 (__mmask8)(U)))
1124
1125#define _mm512_maskz_srli_epi64(U, X, C) \
1126 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1127 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1128 (__mmask8)(U)))
1129#endif
1130
1131extern __inline __m512i
1132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133_mm512_srl_epi64 (__m512i __A, __m128i __B)
1134{
1135 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1136 (__v2di) __B,
1137 (__v8di)
4271e5cb 1138 _mm512_undefined_epi32 (),
756c5857
AI
1139 (__mmask8) -1);
1140}
1141
1142extern __inline __m512i
1143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1145{
1146 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1147 (__v2di) __B,
1148 (__v8di) __W,
1149 (__mmask8) __U);
1150}
1151
1152extern __inline __m512i
1153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1155{
1156 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1157 (__v2di) __B,
1158 (__v8di)
1159 _mm512_setzero_si512 (),
1160 (__mmask8) __U);
1161}
1162
1163#ifdef __OPTIMIZE__
1164extern __inline __m512i
1165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1166_mm512_srai_epi64 (__m512i __A, unsigned int __B)
1167{
1168 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1169 (__v8di)
4271e5cb 1170 _mm512_undefined_epi32 (),
756c5857
AI
1171 (__mmask8) -1);
1172}
1173
1174extern __inline __m512i
1175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1176_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1177 unsigned int __B)
1178{
1179 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1180 (__v8di) __W,
1181 (__mmask8) __U);
1182}
1183
1184extern __inline __m512i
1185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1187{
1188 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1189 (__v8di)
1190 _mm512_setzero_si512 (),
1191 (__mmask8) __U);
1192}
1193#else
1194#define _mm512_srai_epi64(X, C) \
1195 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
4271e5cb 1196 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1197 (__mmask8)-1))
1198
1199#define _mm512_mask_srai_epi64(W, U, X, C) \
1200 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1201 (__v8di)(__m512i)(W),\
1202 (__mmask8)(U)))
1203
1204#define _mm512_maskz_srai_epi64(U, X, C) \
1205 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1206 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1207 (__mmask8)(U)))
1208#endif
1209
1210extern __inline __m512i
1211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212_mm512_sra_epi64 (__m512i __A, __m128i __B)
1213{
1214 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1215 (__v2di) __B,
1216 (__v8di)
4271e5cb 1217 _mm512_undefined_epi32 (),
756c5857
AI
1218 (__mmask8) -1);
1219}
1220
1221extern __inline __m512i
1222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1224{
1225 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1226 (__v2di) __B,
1227 (__v8di) __W,
1228 (__mmask8) __U);
1229}
1230
1231extern __inline __m512i
1232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1233_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1234{
1235 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1236 (__v2di) __B,
1237 (__v8di)
1238 _mm512_setzero_si512 (),
1239 (__mmask8) __U);
1240}
1241
1242#ifdef __OPTIMIZE__
1243extern __inline __m512i
1244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1245_mm512_slli_epi32 (__m512i __A, unsigned int __B)
1246{
1247 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1248 (__v16si)
4271e5cb 1249 _mm512_undefined_epi32 (),
756c5857
AI
1250 (__mmask16) -1);
1251}
1252
1253extern __inline __m512i
1254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1255_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1256 unsigned int __B)
1257{
1258 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1259 (__v16si) __W,
1260 (__mmask16) __U);
1261}
1262
1263extern __inline __m512i
1264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1266{
1267 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1268 (__v16si)
1269 _mm512_setzero_si512 (),
1270 (__mmask16) __U);
1271}
1272#else
1273#define _mm512_slli_epi32(X, C) \
1274 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1275 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1276 (__mmask16)-1))
1277
1278#define _mm512_mask_slli_epi32(W, U, X, C) \
1279 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1280 (__v16si)(__m512i)(W),\
1281 (__mmask16)(U)))
1282
1283#define _mm512_maskz_slli_epi32(U, X, C) \
1284 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1285 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1286 (__mmask16)(U)))
1287#endif
1288
1289extern __inline __m512i
1290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291_mm512_sll_epi32 (__m512i __A, __m128i __B)
1292{
1293 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1294 (__v4si) __B,
1295 (__v16si)
4271e5cb 1296 _mm512_undefined_epi32 (),
756c5857
AI
1297 (__mmask16) -1);
1298}
1299
1300extern __inline __m512i
1301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1303{
1304 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1305 (__v4si) __B,
1306 (__v16si) __W,
1307 (__mmask16) __U);
1308}
1309
1310extern __inline __m512i
1311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1313{
1314 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1315 (__v4si) __B,
1316 (__v16si)
1317 _mm512_setzero_si512 (),
1318 (__mmask16) __U);
1319}
1320
1321#ifdef __OPTIMIZE__
1322extern __inline __m512i
1323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1324_mm512_srli_epi32 (__m512i __A, unsigned int __B)
1325{
1326 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1327 (__v16si)
4271e5cb 1328 _mm512_undefined_epi32 (),
756c5857
AI
1329 (__mmask16) -1);
1330}
1331
1332extern __inline __m512i
1333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1334_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1335 __m512i __A, unsigned int __B)
1336{
1337 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1338 (__v16si) __W,
1339 (__mmask16) __U);
1340}
1341
1342extern __inline __m512i
1343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1345{
1346 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1347 (__v16si)
1348 _mm512_setzero_si512 (),
1349 (__mmask16) __U);
1350}
1351#else
1352#define _mm512_srli_epi32(X, C) \
1353 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1354 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1355 (__mmask16)-1))
1356
1357#define _mm512_mask_srli_epi32(W, U, X, C) \
1358 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1359 (__v16si)(__m512i)(W),\
1360 (__mmask16)(U)))
1361
1362#define _mm512_maskz_srli_epi32(U, X, C) \
1363 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1364 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1365 (__mmask16)(U)))
1366#endif
1367
1368extern __inline __m512i
1369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370_mm512_srl_epi32 (__m512i __A, __m128i __B)
1371{
1372 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1373 (__v4si) __B,
1374 (__v16si)
4271e5cb 1375 _mm512_undefined_epi32 (),
756c5857
AI
1376 (__mmask16) -1);
1377}
1378
1379extern __inline __m512i
1380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1382{
1383 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1384 (__v4si) __B,
1385 (__v16si) __W,
1386 (__mmask16) __U);
1387}
1388
1389extern __inline __m512i
1390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1392{
1393 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1394 (__v4si) __B,
1395 (__v16si)
1396 _mm512_setzero_si512 (),
1397 (__mmask16) __U);
1398}
1399
1400#ifdef __OPTIMIZE__
1401extern __inline __m512i
1402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403_mm512_srai_epi32 (__m512i __A, unsigned int __B)
1404{
1405 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1406 (__v16si)
4271e5cb 1407 _mm512_undefined_epi32 (),
756c5857
AI
1408 (__mmask16) -1);
1409}
1410
1411extern __inline __m512i
1412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1414 unsigned int __B)
1415{
1416 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1417 (__v16si) __W,
1418 (__mmask16) __U);
1419}
1420
1421extern __inline __m512i
1422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1423_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1424{
1425 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1426 (__v16si)
1427 _mm512_setzero_si512 (),
1428 (__mmask16) __U);
1429}
1430#else
1431#define _mm512_srai_epi32(X, C) \
1432 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 1433 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
1434 (__mmask16)-1))
1435
1436#define _mm512_mask_srai_epi32(W, U, X, C) \
1437 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1438 (__v16si)(__m512i)(W),\
1439 (__mmask16)(U)))
1440
1441#define _mm512_maskz_srai_epi32(U, X, C) \
1442 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1443 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1444 (__mmask16)(U)))
1445#endif
1446
1447extern __inline __m512i
1448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449_mm512_sra_epi32 (__m512i __A, __m128i __B)
1450{
1451 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1452 (__v4si) __B,
1453 (__v16si)
4271e5cb 1454 _mm512_undefined_epi32 (),
756c5857
AI
1455 (__mmask16) -1);
1456}
1457
1458extern __inline __m512i
1459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1461{
1462 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1463 (__v4si) __B,
1464 (__v16si) __W,
1465 (__mmask16) __U);
1466}
1467
1468extern __inline __m512i
1469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1471{
1472 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1473 (__v4si) __B,
1474 (__v16si)
1475 _mm512_setzero_si512 (),
1476 (__mmask16) __U);
1477}
1478
075691af
AI
1479#ifdef __OPTIMIZE__
1480extern __inline __m128d
1481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1483{
1484 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1485 (__v2df) __B,
1486 __R);
1487}
1488
1853f5c7
SP
1489extern __inline __m128d
1490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1491_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1492 __m128d __B, const int __R)
1493{
1494 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1495 (__v2df) __B,
1496 (__v2df) __W,
1497 (__mmask8) __U, __R);
1498}
1499
1500extern __inline __m128d
1501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1502_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1503 const int __R)
1504{
1505 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1506 (__v2df) __B,
1507 (__v2df)
1508 _mm_setzero_pd (),
1509 (__mmask8) __U, __R);
1510}
1511
075691af
AI
1512extern __inline __m128
1513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1515{
1516 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1517 (__v4sf) __B,
1518 __R);
1519}
1520
1853f5c7
SP
1521extern __inline __m128
1522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1524 __m128 __B, const int __R)
1525{
1526 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1527 (__v4sf) __B,
1528 (__v4sf) __W,
1529 (__mmask8) __U, __R);
1530}
1531
1532extern __inline __m128
1533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1535 const int __R)
1536{
1537 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1538 (__v4sf) __B,
1539 (__v4sf)
1540 _mm_setzero_ps (),
1541 (__mmask8) __U, __R);
1542}
1543
075691af
AI
1544extern __inline __m128d
1545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1546_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1547{
1548 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1549 (__v2df) __B,
1550 __R);
1551}
1552
1853f5c7
SP
1553extern __inline __m128d
1554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1556 __m128d __B, const int __R)
1557{
1558 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1559 (__v2df) __B,
1560 (__v2df) __W,
1561 (__mmask8) __U, __R);
1562}
1563
1564extern __inline __m128d
1565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1566_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1567 const int __R)
1568{
1569 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1570 (__v2df) __B,
1571 (__v2df)
1572 _mm_setzero_pd (),
1573 (__mmask8) __U, __R);
1574}
1575
075691af
AI
1576extern __inline __m128
1577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1579{
1580 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1581 (__v4sf) __B,
1582 __R);
1583}
1584
1853f5c7
SP
1585extern __inline __m128
1586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1587_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1588 __m128 __B, const int __R)
1589{
1590 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1591 (__v4sf) __B,
1592 (__v4sf) __W,
1593 (__mmask8) __U, __R);
1594}
1595
1596extern __inline __m128
1597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1599 const int __R)
1600{
1601 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1602 (__v4sf) __B,
1603 (__v4sf)
1604 _mm_setzero_ps (),
1605 (__mmask8) __U, __R);
1606}
1607
075691af
AI
1608#else
1609#define _mm_add_round_sd(A, B, C) \
1610 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1611
1853f5c7
SP
1612#define _mm_mask_add_round_sd(W, U, A, B, C) \
1613 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1614
1615#define _mm_maskz_add_round_sd(U, A, B, C) \
1616 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1617
075691af
AI
1618#define _mm_add_round_ss(A, B, C) \
1619 (__m128)__builtin_ia32_addss_round(A, B, C)
1620
1853f5c7
SP
1621#define _mm_mask_add_round_ss(W, U, A, B, C) \
1622 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1623
1624#define _mm_maskz_add_round_ss(U, A, B, C) \
1625 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1626
075691af
AI
1627#define _mm_sub_round_sd(A, B, C) \
1628 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1629
1853f5c7
SP
1630#define _mm_mask_sub_round_sd(W, U, A, B, C) \
1631 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1632
1633#define _mm_maskz_sub_round_sd(U, A, B, C) \
1634 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1635
075691af
AI
1636#define _mm_sub_round_ss(A, B, C) \
1637 (__m128)__builtin_ia32_subss_round(A, B, C)
1853f5c7
SP
1638
1639#define _mm_mask_sub_round_ss(W, U, A, B, C) \
1640 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1641
1642#define _mm_maskz_sub_round_ss(U, A, B, C) \
1643 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1644
075691af
AI
1645#endif
1646
756c5857
AI
1647#ifdef __OPTIMIZE__
1648extern __inline __m512i
1649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1650_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1651 const int __imm)
756c5857
AI
1652{
1653 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1654 (__v8di) __B,
b5fd0b71 1655 (__v8di) __C, __imm,
756c5857
AI
1656 (__mmask8) -1);
1657}
1658
1659extern __inline __m512i
1660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
b5fd0b71 1662 __m512i __C, const int __imm)
756c5857
AI
1663{
1664 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1665 (__v8di) __B,
b5fd0b71 1666 (__v8di) __C, __imm,
756c5857
AI
1667 (__mmask8) __U);
1668}
1669
1670extern __inline __m512i
1671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
b5fd0b71 1673 __m512i __C, const int __imm)
756c5857
AI
1674{
1675 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1676 (__v8di) __B,
1677 (__v8di) __C,
b5fd0b71 1678 __imm, (__mmask8) __U);
756c5857
AI
1679}
1680
1681extern __inline __m512i
1682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1683_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1684 const int __imm)
756c5857
AI
1685{
1686 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1687 (__v16si) __B,
1688 (__v16si) __C,
b5fd0b71 1689 __imm, (__mmask16) -1);
756c5857
AI
1690}
1691
1692extern __inline __m512i
1693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
b5fd0b71 1695 __m512i __C, const int __imm)
756c5857
AI
1696{
1697 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1698 (__v16si) __B,
1699 (__v16si) __C,
b5fd0b71 1700 __imm, (__mmask16) __U);
756c5857
AI
1701}
1702
1703extern __inline __m512i
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
b5fd0b71 1706 __m512i __C, const int __imm)
756c5857
AI
1707{
1708 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1709 (__v16si) __B,
1710 (__v16si) __C,
b5fd0b71 1711 __imm, (__mmask16) __U);
756c5857
AI
1712}
1713#else
1714#define _mm512_ternarylogic_epi64(A, B, C, I) \
1715 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1716 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1717#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1718 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1719 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1720#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1721 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1722 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1723#define _mm512_ternarylogic_epi32(A, B, C, I) \
1724 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1725 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1726 (__mmask16)-1))
1727#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1728 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1729 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1730 (__mmask16)(U)))
1731#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1732 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1733 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1734 (__mmask16)(U)))
1735#endif
1736
1737extern __inline __m512d
1738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739_mm512_rcp14_pd (__m512d __A)
1740{
1741 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1742 (__v8df)
0b192937 1743 _mm512_undefined_pd (),
756c5857
AI
1744 (__mmask8) -1);
1745}
1746
1747extern __inline __m512d
1748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1750{
1751 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1752 (__v8df) __W,
1753 (__mmask8) __U);
1754}
1755
1756extern __inline __m512d
1757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1758_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1759{
1760 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1761 (__v8df)
1762 _mm512_setzero_pd (),
1763 (__mmask8) __U);
1764}
1765
1766extern __inline __m512
1767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768_mm512_rcp14_ps (__m512 __A)
1769{
1770 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1771 (__v16sf)
0b192937 1772 _mm512_undefined_ps (),
756c5857
AI
1773 (__mmask16) -1);
1774}
1775
1776extern __inline __m512
1777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1779{
1780 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1781 (__v16sf) __W,
1782 (__mmask16) __U);
1783}
1784
1785extern __inline __m512
1786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1788{
1789 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1790 (__v16sf)
1791 _mm512_setzero_ps (),
1792 (__mmask16) __U);
1793}
1794
075691af
AI
1795extern __inline __m128d
1796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1797_mm_rcp14_sd (__m128d __A, __m128d __B)
1798{
df62b4af
IT
1799 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1800 (__v2df) __A);
075691af
AI
1801}
1802
f4ee3a9e
UB
1803extern __inline __m128d
1804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1806{
1807 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1808 (__v2df) __A,
1809 (__v2df) __W,
1810 (__mmask8) __U);
1811}
1812
1813extern __inline __m128d
1814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1816{
1817 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1818 (__v2df) __A,
1819 (__v2df) _mm_setzero_ps (),
1820 (__mmask8) __U);
1821}
1822
075691af
AI
1823extern __inline __m128
1824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825_mm_rcp14_ss (__m128 __A, __m128 __B)
1826{
df62b4af
IT
1827 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1828 (__v4sf) __A);
075691af
AI
1829}
1830
f4ee3a9e
UB
1831extern __inline __m128
1832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1833_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1834{
1835 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1836 (__v4sf) __A,
1837 (__v4sf) __W,
1838 (__mmask8) __U);
1839}
1840
1841extern __inline __m128
1842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1843_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1844{
1845 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1846 (__v4sf) __A,
1847 (__v4sf) _mm_setzero_ps (),
1848 (__mmask8) __U);
1849}
1850
756c5857
AI
1851extern __inline __m512d
1852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853_mm512_rsqrt14_pd (__m512d __A)
1854{
1855 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1856 (__v8df)
0b192937 1857 _mm512_undefined_pd (),
756c5857
AI
1858 (__mmask8) -1);
1859}
1860
1861extern __inline __m512d
1862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1864{
1865 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1866 (__v8df) __W,
1867 (__mmask8) __U);
1868}
1869
1870extern __inline __m512d
1871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1872_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1873{
1874 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1875 (__v8df)
1876 _mm512_setzero_pd (),
1877 (__mmask8) __U);
1878}
1879
1880extern __inline __m512
1881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1882_mm512_rsqrt14_ps (__m512 __A)
1883{
1884 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1885 (__v16sf)
0b192937 1886 _mm512_undefined_ps (),
756c5857
AI
1887 (__mmask16) -1);
1888}
1889
1890extern __inline __m512
1891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1893{
1894 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1895 (__v16sf) __W,
1896 (__mmask16) __U);
1897}
1898
1899extern __inline __m512
1900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1901_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1902{
1903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1904 (__v16sf)
1905 _mm512_setzero_ps (),
1906 (__mmask16) __U);
1907}
1908
075691af
AI
1909extern __inline __m128d
1910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1911_mm_rsqrt14_sd (__m128d __A, __m128d __B)
1912{
df62b4af
IT
1913 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1914 (__v2df) __A);
075691af
AI
1915}
1916
d7a33a4c
JK
1917extern __inline __m128d
1918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1920{
1921 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1922 (__v2df) __A,
1923 (__v2df) __W,
1924 (__mmask8) __U);
1925}
1926
1927extern __inline __m128d
1928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1929_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1930{
1931 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1932 (__v2df) __A,
1933 (__v2df) _mm_setzero_pd (),
1934 (__mmask8) __U);
1935}
1936
075691af
AI
1937extern __inline __m128
1938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1939_mm_rsqrt14_ss (__m128 __A, __m128 __B)
1940{
df62b4af
IT
1941 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1942 (__v4sf) __A);
075691af
AI
1943}
1944
d7a33a4c
JK
1945extern __inline __m128
1946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1948{
1949 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1950 (__v4sf) __A,
1951 (__v4sf) __W,
1952 (__mmask8) __U);
1953}
1954
1955extern __inline __m128
1956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1957_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1958{
1959 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1960 (__v4sf) __A,
1961 (__v4sf) _mm_setzero_ps (),
1962 (__mmask8) __U);
1963}
1964
756c5857
AI
1965#ifdef __OPTIMIZE__
1966extern __inline __m512d
1967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968_mm512_sqrt_round_pd (__m512d __A, const int __R)
1969{
1970 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1971 (__v8df)
0b192937 1972 _mm512_undefined_pd (),
756c5857
AI
1973 (__mmask8) -1, __R);
1974}
1975
1976extern __inline __m512d
1977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1979 const int __R)
1980{
1981 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1982 (__v8df) __W,
1983 (__mmask8) __U, __R);
1984}
1985
1986extern __inline __m512d
1987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1988_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1989{
1990 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1991 (__v8df)
1992 _mm512_setzero_pd (),
1993 (__mmask8) __U, __R);
1994}
1995
1996extern __inline __m512
1997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1998_mm512_sqrt_round_ps (__m512 __A, const int __R)
1999{
2000 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2001 (__v16sf)
0b192937 2002 _mm512_undefined_ps (),
756c5857
AI
2003 (__mmask16) -1, __R);
2004}
2005
2006extern __inline __m512
2007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
2009{
2010 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2011 (__v16sf) __W,
2012 (__mmask16) __U, __R);
2013}
2014
2015extern __inline __m512
2016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2017_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
2018{
2019 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2020 (__v16sf)
2021 _mm512_setzero_ps (),
2022 (__mmask16) __U, __R);
2023}
2024
075691af
AI
2025extern __inline __m128d
2026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2027_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
2028{
b10bc0d6
OM
2029 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2030 (__v2df) __A,
2031 (__v2df)
2032 _mm_setzero_pd (),
2033 (__mmask8) -1, __R);
2034}
2035
2036extern __inline __m128d
2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2039 const int __R)
2040{
2041 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2042 (__v2df) __A,
2043 (__v2df) __W,
2044 (__mmask8) __U, __R);
2045}
2046
2047extern __inline __m128d
2048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2049_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
2050{
2051 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2052 (__v2df) __A,
2053 (__v2df)
2054 _mm_setzero_pd (),
2055 (__mmask8) __U, __R);
075691af
AI
2056}
2057
2058extern __inline __m128
2059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2060_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
2061{
b10bc0d6
OM
2062 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2063 (__v4sf) __A,
2064 (__v4sf)
2065 _mm_setzero_ps (),
2066 (__mmask8) -1, __R);
2067}
2068
2069extern __inline __m128
2070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2071_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2072 const int __R)
2073{
2074 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2075 (__v4sf) __A,
2076 (__v4sf) __W,
2077 (__mmask8) __U, __R);
2078}
2079
2080extern __inline __m128
2081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2083{
2084 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2085 (__v4sf) __A,
2086 (__v4sf)
2087 _mm_setzero_ps (),
2088 (__mmask8) __U, __R);
075691af 2089}
756c5857
AI
2090#else
2091#define _mm512_sqrt_round_pd(A, C) \
0b192937 2092 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2093
2094#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2095 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2096
2097#define _mm512_maskz_sqrt_round_pd(U, A, C) \
2098 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2099
2100#define _mm512_sqrt_round_ps(A, C) \
0b192937 2101 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2102
2103#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2104 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2105
2106#define _mm512_maskz_sqrt_round_ps(U, A, C) \
2107 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
075691af 2108
b10bc0d6
OM
2109#define _mm_sqrt_round_sd(A, B, C) \
2110 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2111 (__v2df) _mm_setzero_pd (), -1, C)
2112
2113#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2114 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2115
2116#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2117 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2118 (__v2df) _mm_setzero_pd (), U, C)
2119
2120#define _mm_sqrt_round_ss(A, B, C) \
2121 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2122 (__v4sf) _mm_setzero_ps (), -1, C)
2123
2124#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2125 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
075691af 2126
b10bc0d6
OM
2127#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2128 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2129 (__v4sf) _mm_setzero_ps (), U, C)
756c5857
AI
2130#endif
2131
2132extern __inline __m512i
2133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134_mm512_cvtepi8_epi32 (__m128i __A)
2135{
2136 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2137 (__v16si)
4271e5cb 2138 _mm512_undefined_epi32 (),
756c5857
AI
2139 (__mmask16) -1);
2140}
2141
2142extern __inline __m512i
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2145{
2146 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2147 (__v16si) __W,
2148 (__mmask16) __U);
2149}
2150
2151extern __inline __m512i
2152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2153_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2154{
2155 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2156 (__v16si)
2157 _mm512_setzero_si512 (),
2158 (__mmask16) __U);
2159}
2160
2161extern __inline __m512i
2162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163_mm512_cvtepi8_epi64 (__m128i __A)
2164{
2165 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2166 (__v8di)
4271e5cb 2167 _mm512_undefined_epi32 (),
756c5857
AI
2168 (__mmask8) -1);
2169}
2170
2171extern __inline __m512i
2172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2174{
2175 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2176 (__v8di) __W,
2177 (__mmask8) __U);
2178}
2179
2180extern __inline __m512i
2181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2183{
2184 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2185 (__v8di)
2186 _mm512_setzero_si512 (),
2187 (__mmask8) __U);
2188}
2189
2190extern __inline __m512i
2191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192_mm512_cvtepi16_epi32 (__m256i __A)
2193{
2194 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2195 (__v16si)
4271e5cb 2196 _mm512_undefined_epi32 (),
756c5857
AI
2197 (__mmask16) -1);
2198}
2199
2200extern __inline __m512i
2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2203{
2204 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2205 (__v16si) __W,
2206 (__mmask16) __U);
2207}
2208
2209extern __inline __m512i
2210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2211_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2212{
2213 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2214 (__v16si)
2215 _mm512_setzero_si512 (),
2216 (__mmask16) __U);
2217}
2218
2219extern __inline __m512i
2220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221_mm512_cvtepi16_epi64 (__m128i __A)
2222{
2223 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2224 (__v8di)
4271e5cb 2225 _mm512_undefined_epi32 (),
756c5857
AI
2226 (__mmask8) -1);
2227}
2228
2229extern __inline __m512i
2230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2232{
2233 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2234 (__v8di) __W,
2235 (__mmask8) __U);
2236}
2237
2238extern __inline __m512i
2239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2241{
2242 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2243 (__v8di)
2244 _mm512_setzero_si512 (),
2245 (__mmask8) __U);
2246}
2247
2248extern __inline __m512i
2249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250_mm512_cvtepi32_epi64 (__m256i __X)
2251{
2252 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2253 (__v8di)
4271e5cb 2254 _mm512_undefined_epi32 (),
756c5857
AI
2255 (__mmask8) -1);
2256}
2257
2258extern __inline __m512i
2259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2261{
2262 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2263 (__v8di) __W,
2264 (__mmask8) __U);
2265}
2266
2267extern __inline __m512i
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2270{
2271 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2272 (__v8di)
2273 _mm512_setzero_si512 (),
2274 (__mmask8) __U);
2275}
2276
2277extern __inline __m512i
2278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279_mm512_cvtepu8_epi32 (__m128i __A)
2280{
2281 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2282 (__v16si)
4271e5cb 2283 _mm512_undefined_epi32 (),
756c5857
AI
2284 (__mmask16) -1);
2285}
2286
2287extern __inline __m512i
2288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2289_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2290{
2291 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2292 (__v16si) __W,
2293 (__mmask16) __U);
2294}
2295
2296extern __inline __m512i
2297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2298_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2299{
2300 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2301 (__v16si)
2302 _mm512_setzero_si512 (),
2303 (__mmask16) __U);
2304}
2305
2306extern __inline __m512i
2307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2308_mm512_cvtepu8_epi64 (__m128i __A)
2309{
2310 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2311 (__v8di)
4271e5cb 2312 _mm512_undefined_epi32 (),
756c5857
AI
2313 (__mmask8) -1);
2314}
2315
2316extern __inline __m512i
2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2319{
2320 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2321 (__v8di) __W,
2322 (__mmask8) __U);
2323}
2324
2325extern __inline __m512i
2326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2328{
2329 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2330 (__v8di)
2331 _mm512_setzero_si512 (),
2332 (__mmask8) __U);
2333}
2334
2335extern __inline __m512i
2336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337_mm512_cvtepu16_epi32 (__m256i __A)
2338{
2339 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2340 (__v16si)
4271e5cb 2341 _mm512_undefined_epi32 (),
756c5857
AI
2342 (__mmask16) -1);
2343}
2344
2345extern __inline __m512i
2346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2348{
2349 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2350 (__v16si) __W,
2351 (__mmask16) __U);
2352}
2353
2354extern __inline __m512i
2355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2357{
2358 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2359 (__v16si)
2360 _mm512_setzero_si512 (),
2361 (__mmask16) __U);
2362}
2363
2364extern __inline __m512i
2365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366_mm512_cvtepu16_epi64 (__m128i __A)
2367{
2368 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2369 (__v8di)
4271e5cb 2370 _mm512_undefined_epi32 (),
756c5857
AI
2371 (__mmask8) -1);
2372}
2373
2374extern __inline __m512i
2375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2377{
2378 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2379 (__v8di) __W,
2380 (__mmask8) __U);
2381}
2382
2383extern __inline __m512i
2384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2386{
2387 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2388 (__v8di)
2389 _mm512_setzero_si512 (),
2390 (__mmask8) __U);
2391}
2392
2393extern __inline __m512i
2394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2395_mm512_cvtepu32_epi64 (__m256i __X)
2396{
2397 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2398 (__v8di)
4271e5cb 2399 _mm512_undefined_epi32 (),
756c5857
AI
2400 (__mmask8) -1);
2401}
2402
2403extern __inline __m512i
2404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2406{
2407 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2408 (__v8di) __W,
2409 (__mmask8) __U);
2410}
2411
2412extern __inline __m512i
2413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2415{
2416 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2417 (__v8di)
2418 _mm512_setzero_si512 (),
2419 (__mmask8) __U);
2420}
2421
2422#ifdef __OPTIMIZE__
2423extern __inline __m512d
2424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2426{
2427 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2428 (__v8df) __B,
2429 (__v8df)
0b192937 2430 _mm512_undefined_pd (),
756c5857
AI
2431 (__mmask8) -1, __R);
2432}
2433
2434extern __inline __m512d
2435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2436_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2437 __m512d __B, const int __R)
2438{
2439 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2440 (__v8df) __B,
2441 (__v8df) __W,
2442 (__mmask8) __U, __R);
2443}
2444
2445extern __inline __m512d
2446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2447_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2448 const int __R)
2449{
2450 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2451 (__v8df) __B,
2452 (__v8df)
2453 _mm512_setzero_pd (),
2454 (__mmask8) __U, __R);
2455}
2456
2457extern __inline __m512
2458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2459_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2460{
2461 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2462 (__v16sf) __B,
2463 (__v16sf)
0b192937 2464 _mm512_undefined_ps (),
756c5857
AI
2465 (__mmask16) -1, __R);
2466}
2467
2468extern __inline __m512
2469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2471 __m512 __B, const int __R)
2472{
2473 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2474 (__v16sf) __B,
2475 (__v16sf) __W,
2476 (__mmask16) __U, __R);
2477}
2478
2479extern __inline __m512
2480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2482{
2483 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2484 (__v16sf) __B,
2485 (__v16sf)
2486 _mm512_setzero_ps (),
2487 (__mmask16) __U, __R);
2488}
2489
2490extern __inline __m512d
2491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2493{
2494 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2495 (__v8df) __B,
2496 (__v8df)
0b192937 2497 _mm512_undefined_pd (),
756c5857
AI
2498 (__mmask8) -1, __R);
2499}
2500
2501extern __inline __m512d
2502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2503_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2504 __m512d __B, const int __R)
2505{
2506 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2507 (__v8df) __B,
2508 (__v8df) __W,
2509 (__mmask8) __U, __R);
2510}
2511
2512extern __inline __m512d
2513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2515 const int __R)
2516{
2517 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2518 (__v8df) __B,
2519 (__v8df)
2520 _mm512_setzero_pd (),
2521 (__mmask8) __U, __R);
2522}
2523
2524extern __inline __m512
2525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2527{
2528 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2529 (__v16sf) __B,
2530 (__v16sf)
0b192937 2531 _mm512_undefined_ps (),
756c5857
AI
2532 (__mmask16) -1, __R);
2533}
2534
2535extern __inline __m512
2536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2538 __m512 __B, const int __R)
2539{
2540 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2541 (__v16sf) __B,
2542 (__v16sf) __W,
2543 (__mmask16) __U, __R);
2544}
2545
2546extern __inline __m512
2547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2549{
2550 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2551 (__v16sf) __B,
2552 (__v16sf)
2553 _mm512_setzero_ps (),
2554 (__mmask16) __U, __R);
2555}
2556#else
2557#define _mm512_add_round_pd(A, B, C) \
0b192937 2558 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2559
2560#define _mm512_mask_add_round_pd(W, U, A, B, C) \
2561 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2562
2563#define _mm512_maskz_add_round_pd(U, A, B, C) \
2564 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2565
2566#define _mm512_add_round_ps(A, B, C) \
0b192937 2567 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2568
2569#define _mm512_mask_add_round_ps(W, U, A, B, C) \
2570 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2571
2572#define _mm512_maskz_add_round_ps(U, A, B, C) \
2573 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2574
2575#define _mm512_sub_round_pd(A, B, C) \
0b192937 2576 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2577
2578#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2579 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2580
2581#define _mm512_maskz_sub_round_pd(U, A, B, C) \
2582 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2583
2584#define _mm512_sub_round_ps(A, B, C) \
0b192937 2585 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2586
2587#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2588 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2589
2590#define _mm512_maskz_sub_round_ps(U, A, B, C) \
2591 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2592#endif
2593
2594#ifdef __OPTIMIZE__
2595extern __inline __m512d
2596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2598{
2599 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2600 (__v8df) __B,
2601 (__v8df)
0b192937 2602 _mm512_undefined_pd (),
756c5857
AI
2603 (__mmask8) -1, __R);
2604}
2605
2606extern __inline __m512d
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2609 __m512d __B, const int __R)
2610{
2611 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2612 (__v8df) __B,
2613 (__v8df) __W,
2614 (__mmask8) __U, __R);
2615}
2616
2617extern __inline __m512d
2618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2620 const int __R)
2621{
2622 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2623 (__v8df) __B,
2624 (__v8df)
2625 _mm512_setzero_pd (),
2626 (__mmask8) __U, __R);
2627}
2628
2629extern __inline __m512
2630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2631_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2632{
2633 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2634 (__v16sf) __B,
2635 (__v16sf)
0b192937 2636 _mm512_undefined_ps (),
756c5857
AI
2637 (__mmask16) -1, __R);
2638}
2639
2640extern __inline __m512
2641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2643 __m512 __B, const int __R)
2644{
2645 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2646 (__v16sf) __B,
2647 (__v16sf) __W,
2648 (__mmask16) __U, __R);
2649}
2650
2651extern __inline __m512
2652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2654{
2655 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2656 (__v16sf) __B,
2657 (__v16sf)
2658 _mm512_setzero_ps (),
2659 (__mmask16) __U, __R);
2660}
2661
2662extern __inline __m512d
2663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2665{
2666 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2667 (__v8df) __V,
2668 (__v8df)
0b192937 2669 _mm512_undefined_pd (),
756c5857
AI
2670 (__mmask8) -1, __R);
2671}
2672
2673extern __inline __m512d
2674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2675_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2676 __m512d __V, const int __R)
2677{
2678 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2679 (__v8df) __V,
2680 (__v8df) __W,
2681 (__mmask8) __U, __R);
2682}
2683
2684extern __inline __m512d
2685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2686_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2687 const int __R)
2688{
2689 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2690 (__v8df) __V,
2691 (__v8df)
2692 _mm512_setzero_pd (),
2693 (__mmask8) __U, __R);
2694}
2695
2696extern __inline __m512
2697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2699{
2700 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2701 (__v16sf) __B,
2702 (__v16sf)
0b192937 2703 _mm512_undefined_ps (),
756c5857
AI
2704 (__mmask16) -1, __R);
2705}
2706
2707extern __inline __m512
2708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2709_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2710 __m512 __B, const int __R)
2711{
2712 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2713 (__v16sf) __B,
2714 (__v16sf) __W,
2715 (__mmask16) __U, __R);
2716}
2717
2718extern __inline __m512
2719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2720_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2721{
2722 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2723 (__v16sf) __B,
2724 (__v16sf)
2725 _mm512_setzero_ps (),
2726 (__mmask16) __U, __R);
2727}
2728
075691af
AI
2729extern __inline __m128d
2730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2731_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2732{
2733 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2734 (__v2df) __B,
2735 __R);
2736}
2737
f4ee3a9e
UB
2738extern __inline __m128d
2739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2740_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2741 __m128d __B, const int __R)
2742{
2743 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2744 (__v2df) __B,
2745 (__v2df) __W,
2746 (__mmask8) __U, __R);
2747}
2748
2749extern __inline __m128d
2750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2752 const int __R)
2753{
2754 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2755 (__v2df) __B,
2756 (__v2df)
2757 _mm_setzero_pd (),
2758 (__mmask8) __U, __R);
2759}
2760
075691af
AI
2761extern __inline __m128
2762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2763_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2764{
2765 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2766 (__v4sf) __B,
2767 __R);
2768}
2769
f4ee3a9e
UB
2770extern __inline __m128
2771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2772_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2773 __m128 __B, const int __R)
2774{
2775 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2776 (__v4sf) __B,
2777 (__v4sf) __W,
2778 (__mmask8) __U, __R);
2779}
2780
2781extern __inline __m128
2782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2783_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2784 const int __R)
2785{
2786 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2787 (__v4sf) __B,
2788 (__v4sf)
2789 _mm_setzero_ps (),
2790 (__mmask8) __U, __R);
2791}
2792
075691af
AI
2793extern __inline __m128d
2794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2795_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2796{
2797 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2798 (__v2df) __B,
2799 __R);
2800}
2801
f4ee3a9e
UB
2802extern __inline __m128d
2803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2804_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2805 __m128d __B, const int __R)
2806{
2807 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2808 (__v2df) __B,
2809 (__v2df) __W,
2810 (__mmask8) __U, __R);
2811}
2812
2813extern __inline __m128d
2814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2816 const int __R)
2817{
2818 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2819 (__v2df) __B,
2820 (__v2df)
2821 _mm_setzero_pd (),
2822 (__mmask8) __U, __R);
2823}
2824
075691af
AI
2825extern __inline __m128
2826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2827_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2828{
2829 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2830 (__v4sf) __B,
2831 __R);
2832}
2833
f4ee3a9e
UB
2834extern __inline __m128
2835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2836_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2837 __m128 __B, const int __R)
2838{
2839 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2840 (__v4sf) __B,
2841 (__v4sf) __W,
2842 (__mmask8) __U, __R);
2843}
2844
2845extern __inline __m128
2846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2847_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2848 const int __R)
2849{
2850 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2851 (__v4sf) __B,
2852 (__v4sf)
2853 _mm_setzero_ps (),
2854 (__mmask8) __U, __R);
2855}
2856
756c5857
AI
2857#else
2858#define _mm512_mul_round_pd(A, B, C) \
0b192937 2859 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2860
2861#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2862 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2863
2864#define _mm512_maskz_mul_round_pd(U, A, B, C) \
2865 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2866
2867#define _mm512_mul_round_ps(A, B, C) \
0b192937 2868 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2869
2870#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2871 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2872
2873#define _mm512_maskz_mul_round_ps(U, A, B, C) \
2874 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2875
2876#define _mm512_div_round_pd(A, B, C) \
0b192937 2877 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
2878
2879#define _mm512_mask_div_round_pd(W, U, A, B, C) \
2880 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2881
2882#define _mm512_maskz_div_round_pd(U, A, B, C) \
2883 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2884
2885#define _mm512_div_round_ps(A, B, C) \
0b192937 2886 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
2887
2888#define _mm512_mask_div_round_ps(W, U, A, B, C) \
2889 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2890
2891#define _mm512_maskz_div_round_ps(U, A, B, C) \
2892 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
2893
2894#define _mm_mul_round_sd(A, B, C) \
2895 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2896
f4ee3a9e
UB
2897#define _mm_mask_mul_round_sd(W, U, A, B, C) \
2898 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2899
2900#define _mm_maskz_mul_round_sd(U, A, B, C) \
2901 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2902
075691af
AI
2903#define _mm_mul_round_ss(A, B, C) \
2904 (__m128)__builtin_ia32_mulss_round(A, B, C)
2905
f4ee3a9e
UB
2906#define _mm_mask_mul_round_ss(W, U, A, B, C) \
2907 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2908
2909#define _mm_maskz_mul_round_ss(U, A, B, C) \
2910 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2911
075691af
AI
2912#define _mm_div_round_sd(A, B, C) \
2913 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2914
f4ee3a9e
UB
2915#define _mm_mask_div_round_sd(W, U, A, B, C) \
2916 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2917
2918#define _mm_maskz_div_round_sd(U, A, B, C) \
2919 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2920
075691af
AI
2921#define _mm_div_round_ss(A, B, C) \
2922 (__m128)__builtin_ia32_divss_round(A, B, C)
f4ee3a9e
UB
2923
2924#define _mm_mask_div_round_ss(W, U, A, B, C) \
2925 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2926
2927#define _mm_maskz_div_round_ss(U, A, B, C) \
2928 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2929
756c5857
AI
2930#endif
2931
2932#ifdef __OPTIMIZE__
2933extern __inline __m512d
2934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2935_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2936{
2937 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2938 (__v8df) __B,
2939 (__v8df)
0b192937 2940 _mm512_undefined_pd (),
756c5857
AI
2941 (__mmask8) -1, __R);
2942}
2943
2944extern __inline __m512d
2945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2946_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2947 __m512d __B, const int __R)
2948{
2949 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2950 (__v8df) __B,
2951 (__v8df) __W,
2952 (__mmask8) __U, __R);
2953}
2954
2955extern __inline __m512d
2956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2958 const int __R)
2959{
2960 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2961 (__v8df) __B,
2962 (__v8df)
2963 _mm512_setzero_pd (),
2964 (__mmask8) __U, __R);
2965}
2966
2967extern __inline __m512
2968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2969_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2970{
2971 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2972 (__v16sf) __B,
2973 (__v16sf)
0b192937 2974 _mm512_undefined_ps (),
756c5857
AI
2975 (__mmask16) -1, __R);
2976}
2977
2978extern __inline __m512
2979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2980_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2981 __m512 __B, const int __R)
2982{
2983 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2984 (__v16sf) __B,
2985 (__v16sf) __W,
2986 (__mmask16) __U, __R);
2987}
2988
2989extern __inline __m512
2990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2991_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2992{
2993 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2994 (__v16sf) __B,
2995 (__v16sf)
2996 _mm512_setzero_ps (),
2997 (__mmask16) __U, __R);
2998}
2999
3000extern __inline __m512d
3001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3002_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
3003{
3004 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3005 (__v8df) __B,
3006 (__v8df)
0b192937 3007 _mm512_undefined_pd (),
756c5857
AI
3008 (__mmask8) -1, __R);
3009}
3010
3011extern __inline __m512d
3012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3014 __m512d __B, const int __R)
3015{
3016 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 (__v8df) __W,
3019 (__mmask8) __U, __R);
3020}
3021
3022extern __inline __m512d
3023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3025 const int __R)
3026{
3027 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df)
3030 _mm512_setzero_pd (),
3031 (__mmask8) __U, __R);
3032}
3033
3034extern __inline __m512
3035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
3037{
3038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3039 (__v16sf) __B,
3040 (__v16sf)
0b192937 3041 _mm512_undefined_ps (),
756c5857
AI
3042 (__mmask16) -1, __R);
3043}
3044
3045extern __inline __m512
3046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3048 __m512 __B, const int __R)
3049{
3050 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3051 (__v16sf) __B,
3052 (__v16sf) __W,
3053 (__mmask16) __U, __R);
3054}
3055
3056extern __inline __m512
3057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3059{
3060 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3061 (__v16sf) __B,
3062 (__v16sf)
3063 _mm512_setzero_ps (),
3064 (__mmask16) __U, __R);
3065}
3066#else
3067#define _mm512_max_round_pd(A, B, R) \
0b192937 3068 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3069
3070#define _mm512_mask_max_round_pd(W, U, A, B, R) \
3071 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3072
3073#define _mm512_maskz_max_round_pd(U, A, B, R) \
3074 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3075
3076#define _mm512_max_round_ps(A, B, R) \
0b192937 3077 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857
AI
3078
3079#define _mm512_mask_max_round_ps(W, U, A, B, R) \
3080 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3081
3082#define _mm512_maskz_max_round_ps(U, A, B, R) \
3083 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3084
3085#define _mm512_min_round_pd(A, B, R) \
0b192937 3086 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857
AI
3087
3088#define _mm512_mask_min_round_pd(W, U, A, B, R) \
3089 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3090
3091#define _mm512_maskz_min_round_pd(U, A, B, R) \
3092 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3093
3094#define _mm512_min_round_ps(A, B, R) \
0b192937 3095 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
756c5857
AI
3096
3097#define _mm512_mask_min_round_ps(W, U, A, B, R) \
3098 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3099
3100#define _mm512_maskz_min_round_ps(U, A, B, R) \
3101 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3102#endif
3103
3104#ifdef __OPTIMIZE__
3105extern __inline __m512d
3106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3107_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3108{
3109 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3110 (__v8df) __B,
3111 (__v8df)
0b192937 3112 _mm512_undefined_pd (),
756c5857
AI
3113 (__mmask8) -1, __R);
3114}
3115
3116extern __inline __m512d
3117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3118_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3119 __m512d __B, const int __R)
3120{
3121 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3122 (__v8df) __B,
3123 (__v8df) __W,
3124 (__mmask8) __U, __R);
3125}
3126
3127extern __inline __m512d
3128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3129_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3130 const int __R)
3131{
3132 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3133 (__v8df) __B,
3134 (__v8df)
3135 _mm512_setzero_pd (),
3136 (__mmask8) __U, __R);
3137}
3138
3139extern __inline __m512
3140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3142{
3143 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3144 (__v16sf) __B,
3145 (__v16sf)
0b192937 3146 _mm512_undefined_ps (),
756c5857
AI
3147 (__mmask16) -1, __R);
3148}
3149
3150extern __inline __m512
3151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3152_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3153 __m512 __B, const int __R)
3154{
3155 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3156 (__v16sf) __B,
3157 (__v16sf) __W,
3158 (__mmask16) __U, __R);
3159}
3160
3161extern __inline __m512
3162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3163_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3164 const int __R)
3165{
3166 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3167 (__v16sf) __B,
3168 (__v16sf)
3169 _mm512_setzero_ps (),
3170 (__mmask16) __U, __R);
3171}
3172
075691af
AI
3173extern __inline __m128d
3174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3176{
158061a6
OM
3177 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3178 (__v2df) __B,
3179 (__v2df)
3180 _mm_setzero_pd (),
3181 (__mmask8) -1, __R);
3182}
3183
3184extern __inline __m128d
3185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3187 const int __R)
3188{
3189 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3190 (__v2df) __B,
3191 (__v2df) __W,
3192 (__mmask8) __U, __R);
3193}
3194
3195extern __inline __m128d
3196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3198 const int __R)
3199{
3200 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3201 (__v2df) __B,
3202 (__v2df)
3203 _mm_setzero_pd (),
3204 (__mmask8) __U, __R);
075691af
AI
3205}
3206
3207extern __inline __m128
3208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3210{
158061a6
OM
3211 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3212 (__v4sf) __B,
3213 (__v4sf)
3214 _mm_setzero_ps (),
3215 (__mmask8) -1, __R);
3216}
3217
3218extern __inline __m128
3219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3221 const int __R)
3222{
3223 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3224 (__v4sf) __B,
3225 (__v4sf) __W,
3226 (__mmask8) __U, __R);
3227}
3228
3229extern __inline __m128
3230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3232{
3233 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3234 (__v4sf) __B,
3235 (__v4sf)
3236 _mm_setzero_ps (),
3237 (__mmask8) __U, __R);
075691af 3238}
756c5857
AI
3239#else
3240#define _mm512_scalef_round_pd(A, B, C) \
0b192937 3241 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857
AI
3242
3243#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3244 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3245
3246#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3247 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3248
3249#define _mm512_scalef_round_ps(A, B, C) \
0b192937 3250 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857
AI
3251
3252#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3253 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3254
3255#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3256 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
075691af
AI
3257
3258#define _mm_scalef_round_sd(A, B, C) \
158061a6
OM
3259 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3260 (__v2df)_mm_setzero_pd (), -1, C)
075691af
AI
3261
3262#define _mm_scalef_round_ss(A, B, C) \
158061a6
OM
3263 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3264 (__v4sf)_mm_setzero_ps (), -1, C)
756c5857
AI
3265#endif
3266
3267#ifdef __OPTIMIZE__
3268extern __inline __m512d
3269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3270_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3271{
3272 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3273 (__v8df) __B,
3274 (__v8df) __C,
3275 (__mmask8) -1, __R);
3276}
3277
3278extern __inline __m512d
3279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3280_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3281 __m512d __C, const int __R)
3282{
3283 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3284 (__v8df) __B,
3285 (__v8df) __C,
3286 (__mmask8) __U, __R);
3287}
3288
3289extern __inline __m512d
3290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3291_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3292 __mmask8 __U, const int __R)
3293{
3294 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3295 (__v8df) __B,
3296 (__v8df) __C,
3297 (__mmask8) __U, __R);
3298}
3299
3300extern __inline __m512d
3301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3303 __m512d __C, const int __R)
3304{
3305 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3306 (__v8df) __B,
3307 (__v8df) __C,
3308 (__mmask8) __U, __R);
3309}
3310
3311extern __inline __m512
3312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3314{
3315 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3316 (__v16sf) __B,
3317 (__v16sf) __C,
3318 (__mmask16) -1, __R);
3319}
3320
3321extern __inline __m512
3322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3324 __m512 __C, const int __R)
3325{
3326 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3327 (__v16sf) __B,
3328 (__v16sf) __C,
3329 (__mmask16) __U, __R);
3330}
3331
3332extern __inline __m512
3333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3334_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3335 __mmask16 __U, const int __R)
3336{
3337 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3338 (__v16sf) __B,
3339 (__v16sf) __C,
3340 (__mmask16) __U, __R);
3341}
3342
3343extern __inline __m512
3344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3345_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3346 __m512 __C, const int __R)
3347{
3348 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3349 (__v16sf) __B,
3350 (__v16sf) __C,
3351 (__mmask16) __U, __R);
3352}
3353
3354extern __inline __m512d
3355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3356_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3357{
3358 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3359 (__v8df) __B,
3360 -(__v8df) __C,
3361 (__mmask8) -1, __R);
3362}
3363
3364extern __inline __m512d
3365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3367 __m512d __C, const int __R)
3368{
3369 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3370 (__v8df) __B,
3371 -(__v8df) __C,
3372 (__mmask8) __U, __R);
3373}
3374
3375extern __inline __m512d
3376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3377_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3378 __mmask8 __U, const int __R)
3379{
3380 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3381 (__v8df) __B,
3382 (__v8df) __C,
3383 (__mmask8) __U, __R);
3384}
3385
3386extern __inline __m512d
3387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3388_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3389 __m512d __C, const int __R)
3390{
3391 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3392 (__v8df) __B,
3393 -(__v8df) __C,
3394 (__mmask8) __U, __R);
3395}
3396
3397extern __inline __m512
3398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3400{
3401 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3402 (__v16sf) __B,
3403 -(__v16sf) __C,
3404 (__mmask16) -1, __R);
3405}
3406
3407extern __inline __m512
3408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3410 __m512 __C, const int __R)
3411{
3412 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3413 (__v16sf) __B,
3414 -(__v16sf) __C,
3415 (__mmask16) __U, __R);
3416}
3417
3418extern __inline __m512
3419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3420_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3421 __mmask16 __U, const int __R)
3422{
3423 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3424 (__v16sf) __B,
3425 (__v16sf) __C,
3426 (__mmask16) __U, __R);
3427}
3428
3429extern __inline __m512
3430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3431_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3432 __m512 __C, const int __R)
3433{
3434 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3435 (__v16sf) __B,
3436 -(__v16sf) __C,
3437 (__mmask16) __U, __R);
3438}
3439
3440extern __inline __m512d
3441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3442_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3443{
3444 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3445 (__v8df) __B,
3446 (__v8df) __C,
3447 (__mmask8) -1, __R);
3448}
3449
3450extern __inline __m512d
3451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3452_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3453 __m512d __C, const int __R)
3454{
3455 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3456 (__v8df) __B,
3457 (__v8df) __C,
3458 (__mmask8) __U, __R);
3459}
3460
3461extern __inline __m512d
3462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3463_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3464 __mmask8 __U, const int __R)
3465{
3466 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3467 (__v8df) __B,
3468 (__v8df) __C,
3469 (__mmask8) __U, __R);
3470}
3471
3472extern __inline __m512d
3473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3475 __m512d __C, const int __R)
3476{
3477 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3478 (__v8df) __B,
3479 (__v8df) __C,
3480 (__mmask8) __U, __R);
3481}
3482
3483extern __inline __m512
3484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3485_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3486{
3487 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3488 (__v16sf) __B,
3489 (__v16sf) __C,
3490 (__mmask16) -1, __R);
3491}
3492
3493extern __inline __m512
3494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3495_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3496 __m512 __C, const int __R)
3497{
3498 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3499 (__v16sf) __B,
3500 (__v16sf) __C,
3501 (__mmask16) __U, __R);
3502}
3503
3504extern __inline __m512
3505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3506_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3507 __mmask16 __U, const int __R)
3508{
3509 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3510 (__v16sf) __B,
3511 (__v16sf) __C,
3512 (__mmask16) __U, __R);
3513}
3514
3515extern __inline __m512
3516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3518 __m512 __C, const int __R)
3519{
3520 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3521 (__v16sf) __B,
3522 (__v16sf) __C,
3523 (__mmask16) __U, __R);
3524}
3525
3526extern __inline __m512d
3527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3528_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3529{
3530 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3531 (__v8df) __B,
3532 -(__v8df) __C,
3533 (__mmask8) -1, __R);
3534}
3535
3536extern __inline __m512d
3537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3539 __m512d __C, const int __R)
3540{
3541 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3542 (__v8df) __B,
3543 -(__v8df) __C,
3544 (__mmask8) __U, __R);
3545}
3546
3547extern __inline __m512d
3548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3549_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3550 __mmask8 __U, const int __R)
3551{
3552 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3553 (__v8df) __B,
3554 (__v8df) __C,
3555 (__mmask8) __U, __R);
3556}
3557
3558extern __inline __m512d
3559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3561 __m512d __C, const int __R)
3562{
3563 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3564 (__v8df) __B,
3565 -(__v8df) __C,
3566 (__mmask8) __U, __R);
3567}
3568
3569extern __inline __m512
3570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3572{
3573 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3574 (__v16sf) __B,
3575 -(__v16sf) __C,
3576 (__mmask16) -1, __R);
3577}
3578
3579extern __inline __m512
3580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3581_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3582 __m512 __C, const int __R)
3583{
3584 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3585 (__v16sf) __B,
3586 -(__v16sf) __C,
3587 (__mmask16) __U, __R);
3588}
3589
3590extern __inline __m512
3591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3592_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3593 __mmask16 __U, const int __R)
3594{
3595 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3596 (__v16sf) __B,
3597 (__v16sf) __C,
3598 (__mmask16) __U, __R);
3599}
3600
3601extern __inline __m512
3602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3604 __m512 __C, const int __R)
3605{
3606 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3607 (__v16sf) __B,
3608 -(__v16sf) __C,
3609 (__mmask16) __U, __R);
3610}
3611
3612extern __inline __m512d
3613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3615{
3616 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3617 (__v8df) __B,
3618 (__v8df) __C,
3619 (__mmask8) -1, __R);
3620}
3621
3622extern __inline __m512d
3623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3625 __m512d __C, const int __R)
3626{
3627 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3628 (__v8df) __B,
3629 (__v8df) __C,
3630 (__mmask8) __U, __R);
3631}
3632
3633extern __inline __m512d
3634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3635_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3636 __mmask8 __U, const int __R)
3637{
3638 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3639 (__v8df) __B,
3640 (__v8df) __C,
3641 (__mmask8) __U, __R);
3642}
3643
3644extern __inline __m512d
3645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3646_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3647 __m512d __C, const int __R)
3648{
3649 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3650 (__v8df) __B,
3651 (__v8df) __C,
3652 (__mmask8) __U, __R);
3653}
3654
3655extern __inline __m512
3656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3658{
3659 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3660 (__v16sf) __B,
3661 (__v16sf) __C,
3662 (__mmask16) -1, __R);
3663}
3664
3665extern __inline __m512
3666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3668 __m512 __C, const int __R)
3669{
3670 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3671 (__v16sf) __B,
3672 (__v16sf) __C,
3673 (__mmask16) __U, __R);
3674}
3675
3676extern __inline __m512
3677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3678_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3679 __mmask16 __U, const int __R)
3680{
3681 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3682 (__v16sf) __B,
3683 (__v16sf) __C,
3684 (__mmask16) __U, __R);
3685}
3686
3687extern __inline __m512
3688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3690 __m512 __C, const int __R)
3691{
3692 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3693 (__v16sf) __B,
3694 (__v16sf) __C,
3695 (__mmask16) __U, __R);
3696}
3697
3698extern __inline __m512d
3699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3701{
3702 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3703 (__v8df) __B,
3704 -(__v8df) __C,
3705 (__mmask8) -1, __R);
3706}
3707
3708extern __inline __m512d
3709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3711 __m512d __C, const int __R)
3712{
3713 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3714 (__v8df) __B,
3715 (__v8df) __C,
3716 (__mmask8) __U, __R);
3717}
3718
3719extern __inline __m512d
3720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3721_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3722 __mmask8 __U, const int __R)
3723{
3724 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3725 (__v8df) __B,
3726 (__v8df) __C,
3727 (__mmask8) __U, __R);
3728}
3729
3730extern __inline __m512d
3731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3732_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3733 __m512d __C, const int __R)
3734{
3735 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3736 (__v8df) __B,
3737 -(__v8df) __C,
3738 (__mmask8) __U, __R);
3739}
3740
3741extern __inline __m512
3742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3743_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3744{
3745 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3746 (__v16sf) __B,
3747 -(__v16sf) __C,
3748 (__mmask16) -1, __R);
3749}
3750
3751extern __inline __m512
3752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3753_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3754 __m512 __C, const int __R)
3755{
3756 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3757 (__v16sf) __B,
3758 (__v16sf) __C,
3759 (__mmask16) __U, __R);
3760}
3761
3762extern __inline __m512
3763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3765 __mmask16 __U, const int __R)
3766{
3767 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3768 (__v16sf) __B,
3769 (__v16sf) __C,
3770 (__mmask16) __U, __R);
3771}
3772
3773extern __inline __m512
3774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3776 __m512 __C, const int __R)
3777{
3778 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3779 (__v16sf) __B,
3780 -(__v16sf) __C,
3781 (__mmask16) __U, __R);
3782}
3783#else
3784#define _mm512_fmadd_round_pd(A, B, C, R) \
3785 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3786
3787#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3788 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3789
3790#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3791 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3792
3793#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3794 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3795
3796#define _mm512_fmadd_round_ps(A, B, C, R) \
3797 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3798
3799#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3800 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3801
3802#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3803 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3804
3805#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3806 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3807
3808#define _mm512_fmsub_round_pd(A, B, C, R) \
3809 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3810
3811#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3812 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3813
3814#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3815 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3816
3817#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3818 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3819
3820#define _mm512_fmsub_round_ps(A, B, C, R) \
3821 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3822
3823#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3824 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3825
3826#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3827 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3828
3829#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3830 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3831
3832#define _mm512_fmaddsub_round_pd(A, B, C, R) \
3833 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3834
3835#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3836 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3837
3838#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3839 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3840
3841#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3842 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3843
3844#define _mm512_fmaddsub_round_ps(A, B, C, R) \
3845 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3846
3847#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3848 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3849
3850#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3851 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3852
3853#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3854 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3855
3856#define _mm512_fmsubadd_round_pd(A, B, C, R) \
3857 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3858
3859#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3860 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3861
3862#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3863 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3864
3865#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3866 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3867
3868#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3869 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3870
3871#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3872 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3873
3874#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3875 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3876
3877#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3878 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3879
3880#define _mm512_fnmadd_round_pd(A, B, C, R) \
3881 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3882
3883#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3884 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3885
3886#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3887 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3888
3889#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3890 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3891
3892#define _mm512_fnmadd_round_ps(A, B, C, R) \
3893 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3894
3895#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3896 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3897
3898#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3899 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3900
3901#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3902 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3903
3904#define _mm512_fnmsub_round_pd(A, B, C, R) \
3905 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3906
3907#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3908 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3909
3910#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3911 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3912
3913#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3914 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3915
3916#define _mm512_fnmsub_round_ps(A, B, C, R) \
3917 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3918
3919#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3920 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3921
3922#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3923 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3924
3925#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3926 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3927#endif
3928
3929extern __inline __m512i
3930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931_mm512_abs_epi64 (__m512i __A)
3932{
3933 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3934 (__v8di)
4271e5cb 3935 _mm512_undefined_epi32 (),
756c5857
AI
3936 (__mmask8) -1);
3937}
3938
3939extern __inline __m512i
3940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3941_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3942{
3943 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3944 (__v8di) __W,
3945 (__mmask8) __U);
3946}
3947
3948extern __inline __m512i
3949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3950_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3951{
3952 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3953 (__v8di)
3954 _mm512_setzero_si512 (),
3955 (__mmask8) __U);
3956}
3957
3958extern __inline __m512i
3959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3960_mm512_abs_epi32 (__m512i __A)
3961{
3962 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3963 (__v16si)
4271e5cb 3964 _mm512_undefined_epi32 (),
756c5857
AI
3965 (__mmask16) -1);
3966}
3967
3968extern __inline __m512i
3969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3970_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3971{
3972 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3973 (__v16si) __W,
3974 (__mmask16) __U);
3975}
3976
3977extern __inline __m512i
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3980{
3981 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3982 (__v16si)
3983 _mm512_setzero_si512 (),
3984 (__mmask16) __U);
3985}
3986
3987extern __inline __m512
3988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3989_mm512_broadcastss_ps (__m128 __A)
3990{
0b192937
UD
3991 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3992 (__v16sf)
3993 _mm512_undefined_ps (),
756c5857
AI
3994 (__mmask16) -1);
3995}
3996
3997extern __inline __m512
3998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
4000{
4001 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4002 (__v16sf) __O, __M);
4003}
4004
4005extern __inline __m512
4006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4007_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
4008{
4009 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4010 (__v16sf)
4011 _mm512_setzero_ps (),
4012 __M);
4013}
4014
4015extern __inline __m512d
4016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4017_mm512_broadcastsd_pd (__m128d __A)
4018{
0b192937
UD
4019 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4020 (__v8df)
4021 _mm512_undefined_pd (),
756c5857
AI
4022 (__mmask8) -1);
4023}
4024
4025extern __inline __m512d
4026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4027_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
4028{
4029 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4030 (__v8df) __O, __M);
4031}
4032
4033extern __inline __m512d
4034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4035_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
4036{
4037 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4038 (__v8df)
4039 _mm512_setzero_pd (),
4040 __M);
4041}
4042
4043extern __inline __m512i
4044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4045_mm512_broadcastd_epi32 (__m128i __A)
4046{
0b192937
UD
4047 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4048 (__v16si)
4271e5cb 4049 _mm512_undefined_epi32 (),
756c5857
AI
4050 (__mmask16) -1);
4051}
4052
4053extern __inline __m512i
4054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
4056{
4057 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4058 (__v16si) __O, __M);
4059}
4060
4061extern __inline __m512i
4062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
4064{
4065 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4066 (__v16si)
4067 _mm512_setzero_si512 (),
4068 __M);
4069}
4070
4071extern __inline __m512i
4072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4073_mm512_set1_epi32 (int __A)
4074{
0b192937
UD
4075 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4076 (__v16si)
4271e5cb 4077 _mm512_undefined_epi32 (),
756c5857
AI
4078 (__mmask16)(-1));
4079}
4080
4081extern __inline __m512i
4082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4083_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4084{
4085 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4086 __M);
4087}
4088
4089extern __inline __m512i
4090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4091_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4092{
4093 return (__m512i)
4094 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4095 (__v16si) _mm512_setzero_si512 (),
4096 __M);
4097}
4098
4099extern __inline __m512i
4100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4101_mm512_broadcastq_epi64 (__m128i __A)
4102{
0b192937
UD
4103 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4104 (__v8di)
4271e5cb 4105 _mm512_undefined_epi32 (),
756c5857
AI
4106 (__mmask8) -1);
4107}
4108
4109extern __inline __m512i
4110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4112{
4113 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4114 (__v8di) __O, __M);
4115}
4116
4117extern __inline __m512i
4118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4120{
4121 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4122 (__v8di)
4123 _mm512_setzero_si512 (),
4124 __M);
4125}
4126
4127extern __inline __m512i
4128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4129_mm512_set1_epi64 (long long __A)
4130{
0b192937
UD
4131 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4132 (__v8di)
4271e5cb 4133 _mm512_undefined_epi32 (),
756c5857 4134 (__mmask8)(-1));
756c5857
AI
4135}
4136
4137extern __inline __m512i
4138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4140{
756c5857
AI
4141 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4142 __M);
756c5857
AI
4143}
4144
4145extern __inline __m512i
4146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4147_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4148{
756c5857
AI
4149 return (__m512i)
4150 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4151 (__v8di) _mm512_setzero_si512 (),
4152 __M);
756c5857
AI
4153}
4154
4155extern __inline __m512
4156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4157_mm512_broadcast_f32x4 (__m128 __A)
4158{
0b192937
UD
4159 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4160 (__v16sf)
4161 _mm512_undefined_ps (),
756c5857
AI
4162 (__mmask16) -1);
4163}
4164
4165extern __inline __m512
4166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4168{
4169 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4170 (__v16sf) __O,
4171 __M);
4172}
4173
4174extern __inline __m512
4175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4176_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4177{
4178 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4179 (__v16sf)
4180 _mm512_setzero_ps (),
4181 __M);
4182}
4183
4184extern __inline __m512i
4185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4186_mm512_broadcast_i32x4 (__m128i __A)
4187{
756c5857 4188 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
0b192937 4189 (__v16si)
4271e5cb 4190 _mm512_undefined_epi32 (),
756c5857
AI
4191 (__mmask16) -1);
4192}
4193
4194extern __inline __m512i
4195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4197{
4198 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4199 (__v16si) __O,
4200 __M);
4201}
4202
4203extern __inline __m512i
4204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4206{
4207 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4208 (__v16si)
4209 _mm512_setzero_si512 (),
4210 __M);
4211}
4212
4213extern __inline __m512d
4214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4215_mm512_broadcast_f64x4 (__m256d __A)
4216{
756c5857 4217 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
0b192937
UD
4218 (__v8df)
4219 _mm512_undefined_pd (),
756c5857
AI
4220 (__mmask8) -1);
4221}
4222
4223extern __inline __m512d
4224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4225_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4226{
4227 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4228 (__v8df) __O,
4229 __M);
4230}
4231
4232extern __inline __m512d
4233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4235{
4236 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4237 (__v8df)
4238 _mm512_setzero_pd (),
4239 __M);
4240}
4241
4242extern __inline __m512i
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm512_broadcast_i64x4 (__m256i __A)
4245{
756c5857 4246 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
0b192937 4247 (__v8di)
4271e5cb 4248 _mm512_undefined_epi32 (),
756c5857
AI
4249 (__mmask8) -1);
4250}
4251
4252extern __inline __m512i
4253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4255{
4256 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4257 (__v8di) __O,
4258 __M);
4259}
4260
4261extern __inline __m512i
4262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4263_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4264{
4265 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4266 (__v8di)
4267 _mm512_setzero_si512 (),
4268 __M);
4269}
4270
4271typedef enum
4272{
4273 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4274 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4275 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4276 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4277 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4278 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4279 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4280 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4281 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4282 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4283 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4284 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4285 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4286 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4287 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4288 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4289 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4290 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4291 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4292 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4293 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4294 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4295 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4296 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4297 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4298 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4299 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4300 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4301 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4302 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4303 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4304 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4305 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4306 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4307 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4308 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4309 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4310 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4311 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4312 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4313 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4314 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4315 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4316 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4317 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4318 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4319 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4320 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4321 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4322 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4323 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4324 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4325 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4326 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4327 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4328 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4329 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4330 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4331 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4332 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4333 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4334 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4335 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4336 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4337 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4338 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4339 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4340 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4341 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4342 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4343 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4344 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4345 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4346 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4347 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4348 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4349 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4350 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4351 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4352 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4353 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4354 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4355 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4356 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4357 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4358 _MM_PERM_DDDD = 0xFF
4359} _MM_PERM_ENUM;
4360
4361#ifdef __OPTIMIZE__
4362extern __inline __m512i
4363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4365{
4366 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4367 __mask,
4368 (__v16si)
4271e5cb 4369 _mm512_undefined_epi32 (),
756c5857
AI
4370 (__mmask16) -1);
4371}
4372
4373extern __inline __m512i
4374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4375_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4376 _MM_PERM_ENUM __mask)
4377{
4378 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4379 __mask,
4380 (__v16si) __W,
4381 (__mmask16) __U);
4382}
4383
4384extern __inline __m512i
4385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4386_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4387{
4388 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4389 __mask,
4390 (__v16si)
4391 _mm512_setzero_si512 (),
4392 (__mmask16) __U);
4393}
4394
4395extern __inline __m512i
4396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4397_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4398{
4399 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4400 (__v8di) __B, __imm,
4401 (__v8di)
4271e5cb 4402 _mm512_undefined_epi32 (),
756c5857
AI
4403 (__mmask8) -1);
4404}
4405
4406extern __inline __m512i
4407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4409 __m512i __B, const int __imm)
4410{
4411 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4412 (__v8di) __B, __imm,
4413 (__v8di) __W,
4414 (__mmask8) __U);
4415}
4416
4417extern __inline __m512i
4418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4420 const int __imm)
4421{
4422 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4423 (__v8di) __B, __imm,
4424 (__v8di)
4425 _mm512_setzero_si512 (),
4426 (__mmask8) __U);
4427}
4428
4429extern __inline __m512i
4430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4432{
4433 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4434 (__v16si) __B,
4435 __imm,
4436 (__v16si)
4271e5cb 4437 _mm512_undefined_epi32 (),
756c5857
AI
4438 (__mmask16) -1);
4439}
4440
4441extern __inline __m512i
4442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4443_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4444 __m512i __B, const int __imm)
4445{
4446 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4447 (__v16si) __B,
4448 __imm,
4449 (__v16si) __W,
4450 (__mmask16) __U);
4451}
4452
4453extern __inline __m512i
4454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4455_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4456 const int __imm)
4457{
4458 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4459 (__v16si) __B,
4460 __imm,
4461 (__v16si)
4462 _mm512_setzero_si512 (),
4463 (__mmask16) __U);
4464}
4465
4466extern __inline __m512d
4467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4468_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4469{
4470 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4471 (__v8df) __B, __imm,
4472 (__v8df)
0b192937 4473 _mm512_undefined_pd (),
756c5857
AI
4474 (__mmask8) -1);
4475}
4476
4477extern __inline __m512d
4478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4480 __m512d __B, const int __imm)
4481{
4482 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4483 (__v8df) __B, __imm,
4484 (__v8df) __W,
4485 (__mmask8) __U);
4486}
4487
4488extern __inline __m512d
4489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4490_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4491 const int __imm)
4492{
4493 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4494 (__v8df) __B, __imm,
4495 (__v8df)
4496 _mm512_setzero_pd (),
4497 (__mmask8) __U);
4498}
4499
4500extern __inline __m512
4501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4502_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4503{
4504 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4505 (__v16sf) __B, __imm,
4506 (__v16sf)
0b192937 4507 _mm512_undefined_ps (),
756c5857
AI
4508 (__mmask16) -1);
4509}
4510
4511extern __inline __m512
4512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4513_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4514 __m512 __B, const int __imm)
4515{
4516 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4517 (__v16sf) __B, __imm,
4518 (__v16sf) __W,
4519 (__mmask16) __U);
4520}
4521
4522extern __inline __m512
4523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4525 const int __imm)
4526{
4527 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4528 (__v16sf) __B, __imm,
4529 (__v16sf)
4530 _mm512_setzero_ps (),
4531 (__mmask16) __U);
4532}
4533
4534#else
4535#define _mm512_shuffle_epi32(X, C) \
4536 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4271e5cb 4537 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4538 (__mmask16)-1))
4539
4540#define _mm512_mask_shuffle_epi32(W, U, X, C) \
4541 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4542 (__v16si)(__m512i)(W),\
4543 (__mmask16)(U)))
4544
4545#define _mm512_maskz_shuffle_epi32(U, X, C) \
4546 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4547 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4548 (__mmask16)(U)))
4549
4550#define _mm512_shuffle_i64x2(X, Y, C) \
4551 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4552 (__v8di)(__m512i)(Y), (int)(C),\
4271e5cb 4553 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4554 (__mmask8)-1))
4555
4556#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4557 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4558 (__v8di)(__m512i)(Y), (int)(C),\
4559 (__v8di)(__m512i)(W),\
4560 (__mmask8)(U)))
4561
4562#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4563 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4564 (__v8di)(__m512i)(Y), (int)(C),\
4565 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4566 (__mmask8)(U)))
4567
4568#define _mm512_shuffle_i32x4(X, Y, C) \
4569 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4570 (__v16si)(__m512i)(Y), (int)(C),\
4271e5cb 4571 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
756c5857
AI
4572 (__mmask16)-1))
4573
4574#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4575 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4576 (__v16si)(__m512i)(Y), (int)(C),\
4577 (__v16si)(__m512i)(W),\
4578 (__mmask16)(U)))
4579
4580#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4581 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4582 (__v16si)(__m512i)(Y), (int)(C),\
4583 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4584 (__mmask16)(U)))
4585
4586#define _mm512_shuffle_f64x2(X, Y, C) \
4587 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4588 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 4589 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
4590 (__mmask8)-1))
4591
4592#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4593 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4594 (__v8df)(__m512d)(Y), (int)(C),\
4595 (__v8df)(__m512d)(W),\
4596 (__mmask8)(U)))
4597
4598#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4599 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4600 (__v8df)(__m512d)(Y), (int)(C),\
4601 (__v8df)(__m512d)_mm512_setzero_pd(),\
4602 (__mmask8)(U)))
4603
4604#define _mm512_shuffle_f32x4(X, Y, C) \
4605 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4606 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 4607 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
4608 (__mmask16)-1))
4609
4610#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4611 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4612 (__v16sf)(__m512)(Y), (int)(C),\
4613 (__v16sf)(__m512)(W),\
4614 (__mmask16)(U)))
4615
4616#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4617 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4618 (__v16sf)(__m512)(Y), (int)(C),\
4619 (__v16sf)(__m512)_mm512_setzero_ps(),\
4620 (__mmask16)(U)))
4621#endif
4622
4623extern __inline __m512i
4624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4625_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4626{
4627 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4628 (__v16si) __B,
4629 (__v16si)
4271e5cb 4630 _mm512_undefined_epi32 (),
756c5857
AI
4631 (__mmask16) -1);
4632}
4633
4634extern __inline __m512i
4635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4636_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4637{
4638 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4639 (__v16si) __B,
4640 (__v16si) __W,
4641 (__mmask16) __U);
4642}
4643
4644extern __inline __m512i
4645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4646_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4647{
4648 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4649 (__v16si) __B,
4650 (__v16si)
4651 _mm512_setzero_si512 (),
4652 (__mmask16) __U);
4653}
4654
4655extern __inline __m512i
4656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4658{
4659 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4660 (__v16si) __B,
4661 (__v16si)
4271e5cb 4662 _mm512_undefined_epi32 (),
756c5857
AI
4663 (__mmask16) -1);
4664}
4665
4666extern __inline __m512i
4667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4669{
4670 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4671 (__v16si) __B,
4672 (__v16si) __W,
4673 (__mmask16) __U);
4674}
4675
4676extern __inline __m512i
4677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4679{
4680 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4681 (__v16si) __B,
4682 (__v16si)
4683 _mm512_setzero_si512 (),
4684 (__mmask16) __U);
4685}
4686
4687extern __inline __m512i
4688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4689_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4690{
4691 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4692 (__v8di) __B,
4693 (__v8di)
4271e5cb 4694 _mm512_undefined_epi32 (),
756c5857
AI
4695 (__mmask8) -1);
4696}
4697
4698extern __inline __m512i
4699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4700_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4701{
4702 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4703 (__v8di) __B,
4704 (__v8di) __W,
4705 (__mmask8) __U);
4706}
4707
4708extern __inline __m512i
4709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4710_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4711{
4712 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4713 (__v8di) __B,
4714 (__v8di)
4715 _mm512_setzero_si512 (),
4716 (__mmask8) __U);
4717}
4718
4719extern __inline __m512i
4720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4721_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4722{
4723 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4724 (__v8di) __B,
4725 (__v8di)
4271e5cb 4726 _mm512_undefined_epi32 (),
756c5857
AI
4727 (__mmask8) -1);
4728}
4729
4730extern __inline __m512i
4731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4732_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4733{
4734 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4735 (__v8di) __B,
4736 (__v8di) __W,
4737 (__mmask8) __U);
4738}
4739
4740extern __inline __m512i
4741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4743{
4744 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4745 (__v8di) __B,
4746 (__v8di)
4747 _mm512_setzero_si512 (),
4748 (__mmask8) __U);
4749}
4750
4751#ifdef __OPTIMIZE__
4752extern __inline __m256i
4753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4754_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4755{
4756 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4757 (__v8si)
0b192937 4758 _mm256_undefined_si256 (),
756c5857
AI
4759 (__mmask8) -1, __R);
4760}
4761
4762extern __inline __m256i
4763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4764_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4765 const int __R)
4766{
4767 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4768 (__v8si) __W,
4769 (__mmask8) __U, __R);
4770}
4771
4772extern __inline __m256i
4773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4774_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4775{
4776 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4777 (__v8si)
4778 _mm256_setzero_si256 (),
4779 (__mmask8) __U, __R);
4780}
4781
4782extern __inline __m256i
4783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4784_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4785{
4786 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4787 (__v8si)
0b192937 4788 _mm256_undefined_si256 (),
756c5857
AI
4789 (__mmask8) -1, __R);
4790}
4791
4792extern __inline __m256i
4793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4795 const int __R)
4796{
4797 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4798 (__v8si) __W,
4799 (__mmask8) __U, __R);
4800}
4801
4802extern __inline __m256i
4803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4804_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4805{
4806 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4807 (__v8si)
4808 _mm256_setzero_si256 (),
4809 (__mmask8) __U, __R);
4810}
4811#else
4812#define _mm512_cvtt_roundpd_epi32(A, B) \
0b192937 4813 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4814
4815#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4816 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4817
4818#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4819 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4820
4821#define _mm512_cvtt_roundpd_epu32(A, B) \
0b192937 4822 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4823
4824#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4825 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4826
4827#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4828 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4829#endif
4830
4831#ifdef __OPTIMIZE__
4832extern __inline __m256i
4833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4834_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4835{
4836 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4837 (__v8si)
0b192937 4838 _mm256_undefined_si256 (),
756c5857
AI
4839 (__mmask8) -1, __R);
4840}
4841
4842extern __inline __m256i
4843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4844_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4845 const int __R)
4846{
4847 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4848 (__v8si) __W,
4849 (__mmask8) __U, __R);
4850}
4851
4852extern __inline __m256i
4853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4854_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4855{
4856 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4857 (__v8si)
4858 _mm256_setzero_si256 (),
4859 (__mmask8) __U, __R);
4860}
4861
4862extern __inline __m256i
4863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4864_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4865{
4866 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4867 (__v8si)
0b192937 4868 _mm256_undefined_si256 (),
756c5857
AI
4869 (__mmask8) -1, __R);
4870}
4871
4872extern __inline __m256i
4873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4874_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4875 const int __R)
4876{
4877 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4878 (__v8si) __W,
4879 (__mmask8) __U, __R);
4880}
4881
4882extern __inline __m256i
4883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4885{
4886 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4887 (__v8si)
4888 _mm256_setzero_si256 (),
4889 (__mmask8) __U, __R);
4890}
4891#else
4892#define _mm512_cvt_roundpd_epi32(A, B) \
0b192937 4893 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4894
4895#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4896 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4897
4898#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4899 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4900
4901#define _mm512_cvt_roundpd_epu32(A, B) \
0b192937 4902 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857
AI
4903
4904#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4905 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4906
4907#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4908 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4909#endif
4910
4911#ifdef __OPTIMIZE__
4912extern __inline __m512i
4913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4915{
4916 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4917 (__v16si)
4271e5cb 4918 _mm512_undefined_epi32 (),
756c5857
AI
4919 (__mmask16) -1, __R);
4920}
4921
4922extern __inline __m512i
4923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4924_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4925 const int __R)
4926{
4927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4928 (__v16si) __W,
4929 (__mmask16) __U, __R);
4930}
4931
4932extern __inline __m512i
4933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4935{
4936 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4937 (__v16si)
4938 _mm512_setzero_si512 (),
4939 (__mmask16) __U, __R);
4940}
4941
4942extern __inline __m512i
4943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4945{
4946 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4947 (__v16si)
4271e5cb 4948 _mm512_undefined_epi32 (),
756c5857
AI
4949 (__mmask16) -1, __R);
4950}
4951
4952extern __inline __m512i
4953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4955 const int __R)
4956{
4957 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4958 (__v16si) __W,
4959 (__mmask16) __U, __R);
4960}
4961
4962extern __inline __m512i
4963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4965{
4966 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4967 (__v16si)
4968 _mm512_setzero_si512 (),
4969 (__mmask16) __U, __R);
4970}
4971#else
4972#define _mm512_cvtt_roundps_epi32(A, B) \
4271e5cb 4973 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4974
4975#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4976 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4977
4978#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4979 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4980
4981#define _mm512_cvtt_roundps_epu32(A, B) \
4271e5cb 4982 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
4983
4984#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4985 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4986
4987#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4988 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4989#endif
4990
4991#ifdef __OPTIMIZE__
4992extern __inline __m512i
4993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4994_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4995{
4996 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4997 (__v16si)
4271e5cb 4998 _mm512_undefined_epi32 (),
756c5857
AI
4999 (__mmask16) -1, __R);
5000}
5001
5002extern __inline __m512i
5003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5004_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
5005 const int __R)
5006{
5007 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5008 (__v16si) __W,
5009 (__mmask16) __U, __R);
5010}
5011
5012extern __inline __m512i
5013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5014_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5015{
5016 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5017 (__v16si)
5018 _mm512_setzero_si512 (),
5019 (__mmask16) __U, __R);
5020}
5021
5022extern __inline __m512i
5023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
5025{
5026 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5027 (__v16si)
4271e5cb 5028 _mm512_undefined_epi32 (),
756c5857
AI
5029 (__mmask16) -1, __R);
5030}
5031
5032extern __inline __m512i
5033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5035 const int __R)
5036{
5037 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5038 (__v16si) __W,
5039 (__mmask16) __U, __R);
5040}
5041
5042extern __inline __m512i
5043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5045{
5046 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5047 (__v16si)
5048 _mm512_setzero_si512 (),
5049 (__mmask16) __U, __R);
5050}
5051#else
5052#define _mm512_cvt_roundps_epi32(A, B) \
4271e5cb 5053 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5054
5055#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
5056 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
5057
5058#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
5059 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5060
5061#define _mm512_cvt_roundps_epu32(A, B) \
4271e5cb 5062 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857
AI
5063
5064#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
5065 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
5066
5067#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
5068 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5069#endif
5070
5071extern __inline __m128d
5072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073_mm_cvtu32_sd (__m128d __A, unsigned __B)
5074{
5075 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5076}
5077
5078#ifdef __x86_64__
5079#ifdef __OPTIMIZE__
5080extern __inline __m128d
5081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5083{
5084 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5085}
5086
5087extern __inline __m128d
5088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5089_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5090{
5091 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5092}
5093
5094extern __inline __m128d
5095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5096_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5097{
5098 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5099}
5100#else
5101#define _mm_cvt_roundu64_sd(A, B, C) \
5102 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5103
5104#define _mm_cvt_roundi64_sd(A, B, C) \
5105 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5106
5107#define _mm_cvt_roundsi64_sd(A, B, C) \
5108 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5109#endif
5110
5111#endif
5112
5113#ifdef __OPTIMIZE__
5114extern __inline __m128
5115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5117{
5118 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5119}
5120
5121extern __inline __m128
5122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5124{
5125 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5126}
5127
5128extern __inline __m128
5129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5131{
5132 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5133}
5134#else
5135#define _mm_cvt_roundu32_ss(A, B, C) \
5136 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5137
5138#define _mm_cvt_roundi32_ss(A, B, C) \
5139 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5140
5141#define _mm_cvt_roundsi32_ss(A, B, C) \
5142 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5143#endif
5144
5145#ifdef __x86_64__
5146#ifdef __OPTIMIZE__
5147extern __inline __m128
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5150{
5151 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5152}
5153
5154extern __inline __m128
5155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5157{
5158 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5159}
5160
5161extern __inline __m128
5162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5163_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5164{
5165 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5166}
5167#else
5168#define _mm_cvt_roundu64_ss(A, B, C) \
5169 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5170
5171#define _mm_cvt_roundi64_ss(A, B, C) \
5172 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5173
5174#define _mm_cvt_roundsi64_ss(A, B, C) \
5175 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5176#endif
5177
5178#endif
5179
5180extern __inline __m128i
5181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182_mm512_cvtepi32_epi8 (__m512i __A)
5183{
0b192937
UD
5184 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5185 (__v16qi)
5186 _mm_undefined_si128 (),
756c5857
AI
5187 (__mmask16) -1);
5188}
5189
d256b866
IT
5190extern __inline void
5191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5192_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5193{
5194 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5195}
5196
756c5857
AI
5197extern __inline __m128i
5198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5199_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5200{
5201 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5202 (__v16qi) __O, __M);
5203}
5204
5205extern __inline __m128i
5206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5207_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5208{
5209 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5210 (__v16qi)
5211 _mm_setzero_si128 (),
5212 __M);
5213}
5214
5215extern __inline __m128i
5216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217_mm512_cvtsepi32_epi8 (__m512i __A)
5218{
0b192937
UD
5219 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5220 (__v16qi)
5221 _mm_undefined_si128 (),
756c5857
AI
5222 (__mmask16) -1);
5223}
5224
d256b866
IT
5225extern __inline void
5226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5227_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5228{
5229 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5230}
5231
756c5857
AI
5232extern __inline __m128i
5233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5235{
5236 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5237 (__v16qi) __O, __M);
5238}
5239
5240extern __inline __m128i
5241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5243{
5244 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5245 (__v16qi)
5246 _mm_setzero_si128 (),
5247 __M);
5248}
5249
5250extern __inline __m128i
5251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252_mm512_cvtusepi32_epi8 (__m512i __A)
5253{
0b192937
UD
5254 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5255 (__v16qi)
5256 _mm_undefined_si128 (),
756c5857
AI
5257 (__mmask16) -1);
5258}
5259
d256b866
IT
5260extern __inline void
5261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5262_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5263{
5264 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5265}
5266
756c5857
AI
5267extern __inline __m128i
5268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5270{
5271 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5272 (__v16qi) __O,
5273 __M);
5274}
5275
5276extern __inline __m128i
5277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5278_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5279{
5280 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5281 (__v16qi)
5282 _mm_setzero_si128 (),
5283 __M);
5284}
5285
5286extern __inline __m256i
5287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5288_mm512_cvtepi32_epi16 (__m512i __A)
5289{
0b192937
UD
5290 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5291 (__v16hi)
5292 _mm256_undefined_si256 (),
756c5857
AI
5293 (__mmask16) -1);
5294}
5295
d256b866
IT
5296extern __inline void
5297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5298_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5299{
5300 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5301}
5302
756c5857
AI
5303extern __inline __m256i
5304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5306{
5307 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5308 (__v16hi) __O, __M);
5309}
5310
5311extern __inline __m256i
5312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5314{
5315 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5316 (__v16hi)
5317 _mm256_setzero_si256 (),
5318 __M);
5319}
5320
5321extern __inline __m256i
5322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323_mm512_cvtsepi32_epi16 (__m512i __A)
5324{
0b192937
UD
5325 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5326 (__v16hi)
5327 _mm256_undefined_si256 (),
756c5857
AI
5328 (__mmask16) -1);
5329}
5330
d256b866
IT
5331extern __inline void
5332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5333_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5334{
5335 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5336}
5337
756c5857
AI
5338extern __inline __m256i
5339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5341{
5342 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5343 (__v16hi) __O, __M);
5344}
5345
5346extern __inline __m256i
5347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5349{
5350 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5351 (__v16hi)
5352 _mm256_setzero_si256 (),
5353 __M);
5354}
5355
5356extern __inline __m256i
5357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358_mm512_cvtusepi32_epi16 (__m512i __A)
5359{
0b192937
UD
5360 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5361 (__v16hi)
5362 _mm256_undefined_si256 (),
756c5857
AI
5363 (__mmask16) -1);
5364}
5365
d256b866
IT
5366extern __inline void
5367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5369{
5370 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5371}
5372
756c5857
AI
5373extern __inline __m256i
5374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5376{
5377 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5378 (__v16hi) __O,
5379 __M);
5380}
5381
5382extern __inline __m256i
5383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5384_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5385{
5386 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5387 (__v16hi)
5388 _mm256_setzero_si256 (),
5389 __M);
5390}
5391
5392extern __inline __m256i
5393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5394_mm512_cvtepi64_epi32 (__m512i __A)
5395{
0b192937
UD
5396 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5397 (__v8si)
5398 _mm256_undefined_si256 (),
756c5857
AI
5399 (__mmask8) -1);
5400}
5401
d256b866
IT
5402extern __inline void
5403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5404_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5405{
5406 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5407}
5408
756c5857
AI
5409extern __inline __m256i
5410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5412{
5413 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5414 (__v8si) __O, __M);
5415}
5416
5417extern __inline __m256i
5418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5419_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5420{
5421 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5422 (__v8si)
5423 _mm256_setzero_si256 (),
5424 __M);
5425}
5426
5427extern __inline __m256i
5428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5429_mm512_cvtsepi64_epi32 (__m512i __A)
5430{
0b192937
UD
5431 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5432 (__v8si)
5433 _mm256_undefined_si256 (),
756c5857
AI
5434 (__mmask8) -1);
5435}
5436
d256b866
IT
5437extern __inline void
5438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5439_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5440{
5441 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5442}
5443
756c5857
AI
5444extern __inline __m256i
5445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5446_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5447{
5448 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5449 (__v8si) __O, __M);
5450}
5451
5452extern __inline __m256i
5453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5454_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5455{
5456 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5457 (__v8si)
5458 _mm256_setzero_si256 (),
5459 __M);
5460}
5461
5462extern __inline __m256i
5463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5464_mm512_cvtusepi64_epi32 (__m512i __A)
5465{
0b192937
UD
5466 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5467 (__v8si)
5468 _mm256_undefined_si256 (),
756c5857
AI
5469 (__mmask8) -1);
5470}
5471
6fb82517 5472extern __inline void
d256b866
IT
5473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5475{
5476 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5477}
5478
756c5857
AI
5479extern __inline __m256i
5480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5481_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5482{
5483 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5484 (__v8si) __O, __M);
5485}
5486
5487extern __inline __m256i
5488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5489_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5490{
5491 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5492 (__v8si)
5493 _mm256_setzero_si256 (),
5494 __M);
5495}
5496
5497extern __inline __m128i
5498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5499_mm512_cvtepi64_epi16 (__m512i __A)
5500{
0b192937
UD
5501 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5502 (__v8hi)
5503 _mm_undefined_si128 (),
756c5857
AI
5504 (__mmask8) -1);
5505}
5506
d256b866
IT
5507extern __inline void
5508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5510{
5511 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5512}
5513
756c5857
AI
5514extern __inline __m128i
5515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5516_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5517{
5518 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5519 (__v8hi) __O, __M);
5520}
5521
5522extern __inline __m128i
5523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5525{
5526 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5527 (__v8hi)
5528 _mm_setzero_si128 (),
5529 __M);
5530}
5531
5532extern __inline __m128i
5533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534_mm512_cvtsepi64_epi16 (__m512i __A)
5535{
0b192937
UD
5536 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5537 (__v8hi)
5538 _mm_undefined_si128 (),
756c5857
AI
5539 (__mmask8) -1);
5540}
5541
d256b866
IT
5542extern __inline void
5543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5544_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5545{
5546 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5547}
5548
756c5857
AI
5549extern __inline __m128i
5550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5552{
5553 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5554 (__v8hi) __O, __M);
5555}
5556
5557extern __inline __m128i
5558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5559_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5560{
5561 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5562 (__v8hi)
5563 _mm_setzero_si128 (),
5564 __M);
5565}
5566
5567extern __inline __m128i
5568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569_mm512_cvtusepi64_epi16 (__m512i __A)
5570{
0b192937
UD
5571 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5572 (__v8hi)
5573 _mm_undefined_si128 (),
756c5857
AI
5574 (__mmask8) -1);
5575}
5576
d256b866
IT
5577extern __inline void
5578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5579_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5580{
5581 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5582}
5583
756c5857
AI
5584extern __inline __m128i
5585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5587{
5588 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5589 (__v8hi) __O, __M);
5590}
5591
5592extern __inline __m128i
5593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5595{
5596 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5597 (__v8hi)
5598 _mm_setzero_si128 (),
5599 __M);
5600}
5601
5602extern __inline __m128i
5603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5604_mm512_cvtepi64_epi8 (__m512i __A)
5605{
0b192937
UD
5606 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5607 (__v16qi)
5608 _mm_undefined_si128 (),
756c5857
AI
5609 (__mmask8) -1);
5610}
5611
d256b866
IT
5612extern __inline void
5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5615{
5616 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5617}
5618
756c5857
AI
5619extern __inline __m128i
5620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5621_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5622{
5623 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5624 (__v16qi) __O, __M);
5625}
5626
5627extern __inline __m128i
5628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5630{
5631 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5632 (__v16qi)
5633 _mm_setzero_si128 (),
5634 __M);
5635}
5636
5637extern __inline __m128i
5638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639_mm512_cvtsepi64_epi8 (__m512i __A)
5640{
0b192937
UD
5641 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5642 (__v16qi)
5643 _mm_undefined_si128 (),
756c5857
AI
5644 (__mmask8) -1);
5645}
5646
d256b866
IT
5647extern __inline void
5648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5649_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5650{
5651 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5652}
5653
756c5857
AI
5654extern __inline __m128i
5655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5657{
5658 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5659 (__v16qi) __O, __M);
5660}
5661
5662extern __inline __m128i
5663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5665{
5666 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5667 (__v16qi)
5668 _mm_setzero_si128 (),
5669 __M);
5670}
5671
5672extern __inline __m128i
5673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5674_mm512_cvtusepi64_epi8 (__m512i __A)
5675{
0b192937
UD
5676 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5677 (__v16qi)
5678 _mm_undefined_si128 (),
756c5857
AI
5679 (__mmask8) -1);
5680}
5681
d256b866
IT
5682extern __inline void
5683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5684_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5685{
5686 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5687}
5688
756c5857
AI
5689extern __inline __m128i
5690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5692{
5693 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5694 (__v16qi) __O,
5695 __M);
5696}
5697
5698extern __inline __m128i
5699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5700_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5701{
5702 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5703 (__v16qi)
5704 _mm_setzero_si128 (),
5705 __M);
5706}
5707
5708extern __inline __m512d
5709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710_mm512_cvtepi32_pd (__m256i __A)
5711{
5712 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5713 (__v8df)
0b192937 5714 _mm512_undefined_pd (),
756c5857
AI
5715 (__mmask8) -1);
5716}
5717
5718extern __inline __m512d
5719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5720_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5721{
5722 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5723 (__v8df) __W,
5724 (__mmask8) __U);
5725}
5726
5727extern __inline __m512d
5728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5730{
5731 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5732 (__v8df)
5733 _mm512_setzero_pd (),
5734 (__mmask8) __U);
5735}
5736
5737extern __inline __m512d
5738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739_mm512_cvtepu32_pd (__m256i __A)
5740{
5741 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5742 (__v8df)
0b192937 5743 _mm512_undefined_pd (),
756c5857
AI
5744 (__mmask8) -1);
5745}
5746
5747extern __inline __m512d
5748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5750{
5751 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5752 (__v8df) __W,
5753 (__mmask8) __U);
5754}
5755
5756extern __inline __m512d
5757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5759{
5760 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5761 (__v8df)
5762 _mm512_setzero_pd (),
5763 (__mmask8) __U);
5764}
5765
5766#ifdef __OPTIMIZE__
5767extern __inline __m512
5768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5769_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5770{
5771 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5772 (__v16sf)
0b192937 5773 _mm512_undefined_ps (),
756c5857
AI
5774 (__mmask16) -1, __R);
5775}
5776
5777extern __inline __m512
5778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5779_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5780 const int __R)
5781{
5782 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5783 (__v16sf) __W,
5784 (__mmask16) __U, __R);
5785}
5786
5787extern __inline __m512
5788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5790{
5791 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5792 (__v16sf)
5793 _mm512_setzero_ps (),
5794 (__mmask16) __U, __R);
5795}
5796
5797extern __inline __m512
5798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5800{
5801 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5802 (__v16sf)
0b192937 5803 _mm512_undefined_ps (),
756c5857
AI
5804 (__mmask16) -1, __R);
5805}
5806
5807extern __inline __m512
5808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5810 const int __R)
5811{
5812 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5813 (__v16sf) __W,
5814 (__mmask16) __U, __R);
5815}
5816
5817extern __inline __m512
5818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5819_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5820{
5821 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5822 (__v16sf)
5823 _mm512_setzero_ps (),
5824 (__mmask16) __U, __R);
5825}
5826
5827#else
5828#define _mm512_cvt_roundepi32_ps(A, B) \
0b192937 5829 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5830
5831#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5832 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5833
5834#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5835 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5836
5837#define _mm512_cvt_roundepu32_ps(A, B) \
0b192937 5838 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
5839
5840#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5841 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5842
5843#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5844 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5845#endif
5846
5847#ifdef __OPTIMIZE__
5848extern __inline __m256d
5849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5850_mm512_extractf64x4_pd (__m512d __A, const int __imm)
5851{
5852 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5853 __imm,
5854 (__v4df)
0b192937 5855 _mm256_undefined_pd (),
756c5857
AI
5856 (__mmask8) -1);
5857}
5858
5859extern __inline __m256d
5860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5861_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5862 const int __imm)
5863{
5864 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5865 __imm,
5866 (__v4df) __W,
5867 (__mmask8) __U);
5868}
5869
5870extern __inline __m256d
5871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5872_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5873{
5874 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5875 __imm,
5876 (__v4df)
5877 _mm256_setzero_pd (),
5878 (__mmask8) __U);
5879}
5880
5881extern __inline __m128
5882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883_mm512_extractf32x4_ps (__m512 __A, const int __imm)
5884{
5885 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5886 __imm,
5887 (__v4sf)
0b192937 5888 _mm_undefined_ps (),
756c5857
AI
5889 (__mmask8) -1);
5890}
5891
5892extern __inline __m128
5893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5894_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5895 const int __imm)
5896{
5897 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5898 __imm,
5899 (__v4sf) __W,
5900 (__mmask8) __U);
5901}
5902
5903extern __inline __m128
5904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5906{
5907 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5908 __imm,
5909 (__v4sf)
5910 _mm_setzero_ps (),
5911 (__mmask8) __U);
5912}
5913
5914extern __inline __m256i
5915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5916_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5917{
5918 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5919 __imm,
5920 (__v4di)
0b192937 5921 _mm256_undefined_si256 (),
756c5857
AI
5922 (__mmask8) -1);
5923}
5924
5925extern __inline __m256i
5926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5927_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5928 const int __imm)
5929{
5930 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5931 __imm,
5932 (__v4di) __W,
5933 (__mmask8) __U);
5934}
5935
5936extern __inline __m256i
5937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5938_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5939{
5940 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5941 __imm,
5942 (__v4di)
5943 _mm256_setzero_si256 (),
5944 (__mmask8) __U);
5945}
5946
5947extern __inline __m128i
5948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5950{
5951 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5952 __imm,
5953 (__v4si)
0b192937 5954 _mm_undefined_si128 (),
756c5857
AI
5955 (__mmask8) -1);
5956}
5957
5958extern __inline __m128i
5959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5960_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5961 const int __imm)
5962{
5963 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5964 __imm,
5965 (__v4si) __W,
5966 (__mmask8) __U);
5967}
5968
5969extern __inline __m128i
5970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5971_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5972{
5973 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5974 __imm,
5975 (__v4si)
5976 _mm_setzero_si128 (),
5977 (__mmask8) __U);
5978}
5979#else
5980
5981#define _mm512_extractf64x4_pd(X, C) \
5982 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5983 (int) (C),\
0b192937 5984 (__v4df)(__m256d)_mm256_undefined_pd(),\
756c5857
AI
5985 (__mmask8)-1))
5986
5987#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5988 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5989 (int) (C),\
5990 (__v4df)(__m256d)(W),\
5991 (__mmask8)(U)))
5992
5993#define _mm512_maskz_extractf64x4_pd(U, X, C) \
5994 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5995 (int) (C),\
5996 (__v4df)(__m256d)_mm256_setzero_pd(),\
5997 (__mmask8)(U)))
5998
5999#define _mm512_extractf32x4_ps(X, C) \
6000 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6001 (int) (C),\
0b192937 6002 (__v4sf)(__m128)_mm_undefined_ps(),\
756c5857
AI
6003 (__mmask8)-1))
6004
6005#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
6006 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6007 (int) (C),\
6008 (__v4sf)(__m128)(W),\
6009 (__mmask8)(U)))
6010
6011#define _mm512_maskz_extractf32x4_ps(U, X, C) \
6012 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6013 (int) (C),\
6014 (__v4sf)(__m128)_mm_setzero_ps(),\
6015 (__mmask8)(U)))
6016
6017#define _mm512_extracti64x4_epi64(X, C) \
6018 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6019 (int) (C),\
0b192937 6020 (__v4di)(__m256i)_mm256_undefined_si256 (),\
756c5857
AI
6021 (__mmask8)-1))
6022
6023#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
6024 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6025 (int) (C),\
6026 (__v4di)(__m256i)(W),\
6027 (__mmask8)(U)))
6028
6029#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
6030 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6031 (int) (C),\
6032 (__v4di)(__m256i)_mm256_setzero_si256 (),\
6033 (__mmask8)(U)))
6034
6035#define _mm512_extracti32x4_epi32(X, C) \
6036 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6037 (int) (C),\
0b192937 6038 (__v4si)(__m128i)_mm_undefined_si128 (),\
756c5857
AI
6039 (__mmask8)-1))
6040
6041#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
6042 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6043 (int) (C),\
6044 (__v4si)(__m128i)(W),\
6045 (__mmask8)(U)))
6046
6047#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
6048 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6049 (int) (C),\
6050 (__v4si)(__m128i)_mm_setzero_si128 (),\
6051 (__mmask8)(U)))
6052#endif
6053
6054#ifdef __OPTIMIZE__
6055extern __inline __m512i
6056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
6058{
6059 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
6060 (__v4si) __B,
6061 __imm,
6062 (__v16si) __A, -1);
6063}
6064
6065extern __inline __m512
6066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
6068{
6069 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
6070 (__v4sf) __B,
6071 __imm,
6072 (__v16sf) __A, -1);
6073}
6074
6075extern __inline __m512i
6076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6078{
6079 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6080 (__v4di) __B,
6081 __imm,
6082 (__v8di)
4271e5cb 6083 _mm512_undefined_epi32 (),
756c5857
AI
6084 (__mmask8) -1);
6085}
6086
6087extern __inline __m512i
6088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6090 __m256i __B, const int __imm)
6091{
6092 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6093 (__v4di) __B,
6094 __imm,
6095 (__v8di) __W,
6096 (__mmask8) __U);
6097}
6098
6099extern __inline __m512i
6100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6102 const int __imm)
6103{
6104 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6105 (__v4di) __B,
6106 __imm,
6107 (__v8di)
6108 _mm512_setzero_si512 (),
6109 (__mmask8) __U);
6110}
6111
6112extern __inline __m512d
6113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6114_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6115{
6116 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6117 (__v4df) __B,
6118 __imm,
6119 (__v8df)
0b192937 6120 _mm512_undefined_pd (),
756c5857
AI
6121 (__mmask8) -1);
6122}
6123
6124extern __inline __m512d
6125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6127 __m256d __B, const int __imm)
6128{
6129 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6130 (__v4df) __B,
6131 __imm,
6132 (__v8df) __W,
6133 (__mmask8) __U);
6134}
6135
6136extern __inline __m512d
6137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6138_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6139 const int __imm)
6140{
6141 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6142 (__v4df) __B,
6143 __imm,
6144 (__v8df)
6145 _mm512_setzero_pd (),
6146 (__mmask8) __U);
6147}
6148#else
6149#define _mm512_insertf32x4(X, Y, C) \
6150 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6151 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6152
6153#define _mm512_inserti32x4(X, Y, C) \
6154 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6155 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6156
6157#define _mm512_insertf64x4(X, Y, C) \
6158 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6159 (__v4df)(__m256d) (Y), (int) (C), \
0b192937 6160 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
6161 (__mmask8)-1))
6162
6163#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6164 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6165 (__v4df)(__m256d) (Y), (int) (C), \
6166 (__v8df)(__m512d)(W), \
6167 (__mmask8)(U)))
6168
6169#define _mm512_maskz_insertf64x4(U, X, Y, C) \
6170 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6171 (__v4df)(__m256d) (Y), (int) (C), \
6172 (__v8df)(__m512d)_mm512_setzero_pd(), \
6173 (__mmask8)(U)))
6174
6175#define _mm512_inserti64x4(X, Y, C) \
6176 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6177 (__v4di)(__m256i) (Y), (int) (C), \
4271e5cb 6178 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
756c5857
AI
6179 (__mmask8)-1))
6180
6181#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6182 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6183 (__v4di)(__m256i) (Y), (int) (C),\
6184 (__v8di)(__m512i)(W),\
6185 (__mmask8)(U)))
6186
6187#define _mm512_maskz_inserti64x4(U, X, Y, C) \
6188 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6189 (__v4di)(__m256i) (Y), (int) (C), \
6190 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6191 (__mmask8)(U)))
6192#endif
6193
6194extern __inline __m512d
6195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196_mm512_loadu_pd (void const *__P)
6197{
c6b0037d 6198 return *(__m512d_u *)__P;
756c5857
AI
6199}
6200
6201extern __inline __m512d
6202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6204{
fc9cf6da 6205 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6206 (__v8df) __W,
6207 (__mmask8) __U);
6208}
6209
6210extern __inline __m512d
6211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6212_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6213{
fc9cf6da 6214 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
756c5857
AI
6215 (__v8df)
6216 _mm512_setzero_pd (),
6217 (__mmask8) __U);
6218}
6219
6220extern __inline void
6221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6222_mm512_storeu_pd (void *__P, __m512d __A)
6223{
c6b0037d 6224 *(__m512d_u *)__P = __A;
756c5857
AI
6225}
6226
6227extern __inline void
6228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6229_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6230{
fc9cf6da 6231 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
756c5857
AI
6232 (__mmask8) __U);
6233}
6234
6235extern __inline __m512
6236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237_mm512_loadu_ps (void const *__P)
6238{
c6b0037d 6239 return *(__m512_u *)__P;
756c5857
AI
6240}
6241
6242extern __inline __m512
6243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6245{
fc9cf6da 6246 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6247 (__v16sf) __W,
6248 (__mmask16) __U);
6249}
6250
6251extern __inline __m512
6252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6254{
fc9cf6da 6255 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
756c5857
AI
6256 (__v16sf)
6257 _mm512_setzero_ps (),
6258 (__mmask16) __U);
6259}
6260
6261extern __inline void
6262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263_mm512_storeu_ps (void *__P, __m512 __A)
6264{
c6b0037d 6265 *(__m512_u *)__P = __A;
756c5857
AI
6266}
6267
6268extern __inline void
6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6271{
fc9cf6da 6272 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
756c5857
AI
6273 (__mmask16) __U);
6274}
6275
6276extern __inline __m512i
6277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6278_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6279{
fc9cf6da 6280 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6281 (__v8di) __W,
6282 (__mmask8) __U);
6283}
6284
6285extern __inline __m512i
6286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6287_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6288{
fc9cf6da 6289 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
756c5857
AI
6290 (__v8di)
6291 _mm512_setzero_si512 (),
6292 (__mmask8) __U);
6293}
6294
6295extern __inline void
6296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6297_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6298{
fc9cf6da 6299 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
756c5857
AI
6300 (__mmask8) __U);
6301}
6302
6303extern __inline __m512i
6304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6305_mm512_loadu_si512 (void const *__P)
756c5857 6306{
c6b0037d 6307 return *(__m512i_u *)__P;
756c5857
AI
6308}
6309
6310extern __inline __m512i
6311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6312_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6313{
fc9cf6da 6314 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6315 (__v16si) __W,
6316 (__mmask16) __U);
6317}
6318
6319extern __inline __m512i
6320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6321_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6322{
fc9cf6da 6323 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
756c5857
AI
6324 (__v16si)
6325 _mm512_setzero_si512 (),
6326 (__mmask16) __U);
6327}
6328
6329extern __inline void
6330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
cf73ee60 6331_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 6332{
c6b0037d 6333 *(__m512i_u *)__P = __A;
756c5857
AI
6334}
6335
6336extern __inline void
6337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6338_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6339{
fc9cf6da 6340 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
756c5857
AI
6341 (__mmask16) __U);
6342}
6343
6344extern __inline __m512d
6345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6346_mm512_permutevar_pd (__m512d __A, __m512i __C)
6347{
6348 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6349 (__v8di) __C,
6350 (__v8df)
0b192937 6351 _mm512_undefined_pd (),
756c5857
AI
6352 (__mmask8) -1);
6353}
6354
6355extern __inline __m512d
6356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6358{
6359 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6360 (__v8di) __C,
6361 (__v8df) __W,
6362 (__mmask8) __U);
6363}
6364
6365extern __inline __m512d
6366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6367_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6368{
6369 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6370 (__v8di) __C,
6371 (__v8df)
6372 _mm512_setzero_pd (),
6373 (__mmask8) __U);
6374}
6375
6376extern __inline __m512
6377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6378_mm512_permutevar_ps (__m512 __A, __m512i __C)
6379{
6380 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6381 (__v16si) __C,
6382 (__v16sf)
0b192937 6383 _mm512_undefined_ps (),
756c5857
AI
6384 (__mmask16) -1);
6385}
6386
6387extern __inline __m512
6388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6390{
6391 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6392 (__v16si) __C,
6393 (__v16sf) __W,
6394 (__mmask16) __U);
6395}
6396
6397extern __inline __m512
6398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6400{
6401 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6402 (__v16si) __C,
6403 (__v16sf)
6404 _mm512_setzero_ps (),
6405 (__mmask16) __U);
6406}
6407
6408extern __inline __m512i
6409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6411{
6412 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6413 /* idx */ ,
6414 (__v8di) __A,
6415 (__v8di) __B,
6416 (__mmask8) -1);
6417}
6418
6419extern __inline __m512i
6420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6422 __m512i __B)
6423{
6424 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6425 /* idx */ ,
6426 (__v8di) __A,
6427 (__v8di) __B,
6428 (__mmask8) __U);
6429}
6430
6431extern __inline __m512i
6432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6433_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6434 __mmask8 __U, __m512i __B)
6435{
6436 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6437 (__v8di) __I
6438 /* idx */ ,
6439 (__v8di) __B,
6440 (__mmask8) __U);
6441}
6442
6443extern __inline __m512i
6444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6446 __m512i __I, __m512i __B)
6447{
6448 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6449 /* idx */ ,
6450 (__v8di) __A,
6451 (__v8di) __B,
6452 (__mmask8) __U);
6453}
6454
6455extern __inline __m512i
6456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6458{
6459 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6460 /* idx */ ,
6461 (__v16si) __A,
6462 (__v16si) __B,
6463 (__mmask16) -1);
6464}
6465
6466extern __inline __m512i
6467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6469 __m512i __I, __m512i __B)
6470{
6471 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6472 /* idx */ ,
6473 (__v16si) __A,
6474 (__v16si) __B,
6475 (__mmask16) __U);
6476}
6477
6478extern __inline __m512i
6479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6481 __mmask16 __U, __m512i __B)
6482{
6483 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6484 (__v16si) __I
6485 /* idx */ ,
6486 (__v16si) __B,
6487 (__mmask16) __U);
6488}
6489
6490extern __inline __m512i
6491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6493 __m512i __I, __m512i __B)
6494{
6495 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6496 /* idx */ ,
6497 (__v16si) __A,
6498 (__v16si) __B,
6499 (__mmask16) __U);
6500}
6501
6502extern __inline __m512d
6503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6505{
6506 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6507 /* idx */ ,
6508 (__v8df) __A,
6509 (__v8df) __B,
6510 (__mmask8) -1);
6511}
6512
6513extern __inline __m512d
6514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6515_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6516 __m512d __B)
6517{
6518 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6519 /* idx */ ,
6520 (__v8df) __A,
6521 (__v8df) __B,
6522 (__mmask8) __U);
6523}
6524
6525extern __inline __m512d
6526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6527_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6528 __m512d __B)
6529{
6530 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6531 (__v8di) __I
6532 /* idx */ ,
6533 (__v8df) __B,
6534 (__mmask8) __U);
6535}
6536
6537extern __inline __m512d
6538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6539_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6540 __m512d __B)
6541{
6542 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6543 /* idx */ ,
6544 (__v8df) __A,
6545 (__v8df) __B,
6546 (__mmask8) __U);
6547}
6548
6549extern __inline __m512
6550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6552{
6553 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6554 /* idx */ ,
6555 (__v16sf) __A,
6556 (__v16sf) __B,
6557 (__mmask16) -1);
6558}
6559
6560extern __inline __m512
6561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6563{
6564 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6565 /* idx */ ,
6566 (__v16sf) __A,
6567 (__v16sf) __B,
6568 (__mmask16) __U);
6569}
6570
6571extern __inline __m512
6572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6573_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6574 __m512 __B)
6575{
6576 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6577 (__v16si) __I
6578 /* idx */ ,
6579 (__v16sf) __B,
6580 (__mmask16) __U);
6581}
6582
6583extern __inline __m512
6584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6585_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6586 __m512 __B)
6587{
6588 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6589 /* idx */ ,
6590 (__v16sf) __A,
6591 (__v16sf) __B,
6592 (__mmask16) __U);
6593}
6594
6595#ifdef __OPTIMIZE__
6596extern __inline __m512d
6597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6598_mm512_permute_pd (__m512d __X, const int __C)
6599{
6600 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6601 (__v8df)
0b192937 6602 _mm512_undefined_pd (),
756c5857
AI
6603 (__mmask8) -1);
6604}
6605
6606extern __inline __m512d
6607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6608_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6609{
6610 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6611 (__v8df) __W,
6612 (__mmask8) __U);
6613}
6614
6615extern __inline __m512d
6616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6617_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6618{
6619 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6620 (__v8df)
6621 _mm512_setzero_pd (),
6622 (__mmask8) __U);
6623}
6624
6625extern __inline __m512
6626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6627_mm512_permute_ps (__m512 __X, const int __C)
6628{
6629 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6630 (__v16sf)
0b192937 6631 _mm512_undefined_ps (),
756c5857
AI
6632 (__mmask16) -1);
6633}
6634
6635extern __inline __m512
6636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6637_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6638{
6639 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6640 (__v16sf) __W,
6641 (__mmask16) __U);
6642}
6643
6644extern __inline __m512
6645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6646_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6647{
6648 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6649 (__v16sf)
6650 _mm512_setzero_ps (),
6651 (__mmask16) __U);
6652}
6653#else
6654#define _mm512_permute_pd(X, C) \
6655 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
0b192937 6656 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
6657 (__mmask8)(-1)))
6658
6659#define _mm512_mask_permute_pd(W, U, X, C) \
6660 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6661 (__v8df)(__m512d)(W), \
6662 (__mmask8)(U)))
6663
6664#define _mm512_maskz_permute_pd(U, X, C) \
6665 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6666 (__v8df)(__m512d)_mm512_setzero_pd(), \
6667 (__mmask8)(U)))
6668
6669#define _mm512_permute_ps(X, C) \
6670 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
0b192937 6671 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
6672 (__mmask16)(-1)))
6673
6674#define _mm512_mask_permute_ps(W, U, X, C) \
6675 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6676 (__v16sf)(__m512)(W), \
6677 (__mmask16)(U)))
6678
6679#define _mm512_maskz_permute_ps(U, X, C) \
6680 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6681 (__v16sf)(__m512)_mm512_setzero_ps(), \
6682 (__mmask16)(U)))
6683#endif
6684
6685#ifdef __OPTIMIZE__
6686extern __inline __m512i
6687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6688_mm512_permutex_epi64 (__m512i __X, const int __I)
6689{
6690 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6691 (__v8di)
4271e5cb 6692 _mm512_undefined_epi32 (),
756c5857
AI
6693 (__mmask8) (-1));
6694}
6695
6696extern __inline __m512i
6697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6698_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6699 __m512i __X, const int __I)
6700{
6701 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6702 (__v8di) __W,
6703 (__mmask8) __M);
6704}
6705
6706extern __inline __m512i
6707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6708_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6709{
6710 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6711 (__v8di)
6712 _mm512_setzero_si512 (),
6713 (__mmask8) __M);
6714}
6715
6716extern __inline __m512d
6717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6718_mm512_permutex_pd (__m512d __X, const int __M)
6719{
6720 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6721 (__v8df)
0b192937 6722 _mm512_undefined_pd (),
756c5857
AI
6723 (__mmask8) -1);
6724}
6725
6726extern __inline __m512d
6727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6728_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6729{
6730 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6731 (__v8df) __W,
6732 (__mmask8) __U);
6733}
6734
6735extern __inline __m512d
6736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6737_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6738{
6739 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6740 (__v8df)
6741 _mm512_setzero_pd (),
6742 (__mmask8) __U);
6743}
6744#else
6745#define _mm512_permutex_pd(X, M) \
6746 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
0b192937
UD
6747 (__v8df)(__m512d)_mm512_undefined_pd(),\
6748 (__mmask8)-1))
756c5857
AI
6749
6750#define _mm512_mask_permutex_pd(W, U, X, M) \
6751 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6752 (__v8df)(__m512d)(W), (__mmask8)(U)))
6753
6754#define _mm512_maskz_permutex_pd(U, X, M) \
6755 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6756 (__v8df)(__m512d)_mm512_setzero_pd(),\
6757 (__mmask8)(U)))
6758
6759#define _mm512_permutex_epi64(X, I) \
6760 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6761 (int)(I), \
0b192937 6762 (__v8di)(__m512i) \
4271e5cb 6763 (_mm512_undefined_epi32 ()),\
756c5857
AI
6764 (__mmask8)(-1)))
6765
6766#define _mm512_maskz_permutex_epi64(M, X, I) \
6767 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6768 (int)(I), \
6769 (__v8di)(__m512i) \
6770 (_mm512_setzero_si512 ()),\
6771 (__mmask8)(M)))
6772
6773#define _mm512_mask_permutex_epi64(W, M, X, I) \
6774 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6775 (int)(I), \
6776 (__v8di)(__m512i)(W), \
6777 (__mmask8)(M)))
6778#endif
6779
6780extern __inline __m512i
6781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6783{
583a9919
KY
6784 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6785 (__v8di) __X,
756c5857
AI
6786 (__v8di)
6787 _mm512_setzero_si512 (),
6788 __M);
6789}
6790
6791extern __inline __m512i
6792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6793_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6794{
583a9919
KY
6795 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6796 (__v8di) __X,
756c5857 6797 (__v8di)
4271e5cb 6798 _mm512_undefined_epi32 (),
756c5857
AI
6799 (__mmask8) -1);
6800}
6801
6802extern __inline __m512i
6803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6804_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6805 __m512i __Y)
6806{
583a9919
KY
6807 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6808 (__v8di) __X,
756c5857
AI
6809 (__v8di) __W,
6810 __M);
6811}
6812
6813extern __inline __m512i
6814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6816{
583a9919
KY
6817 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6818 (__v16si) __X,
756c5857
AI
6819 (__v16si)
6820 _mm512_setzero_si512 (),
6821 __M);
6822}
6823
6824extern __inline __m512i
6825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6827{
583a9919
KY
6828 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6829 (__v16si) __X,
756c5857 6830 (__v16si)
4271e5cb 6831 _mm512_undefined_epi32 (),
756c5857
AI
6832 (__mmask16) -1);
6833}
6834
6835extern __inline __m512i
6836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6837_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6838 __m512i __Y)
6839{
583a9919
KY
6840 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6841 (__v16si) __X,
756c5857
AI
6842 (__v16si) __W,
6843 __M);
6844}
6845
6846extern __inline __m512d
6847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6848_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6849{
6850 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6851 (__v8di) __X,
6852 (__v8df)
0b192937 6853 _mm512_undefined_pd (),
756c5857
AI
6854 (__mmask8) -1);
6855}
6856
6857extern __inline __m512d
6858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6859_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6860{
6861 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6862 (__v8di) __X,
6863 (__v8df) __W,
6864 (__mmask8) __U);
6865}
6866
6867extern __inline __m512d
6868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6869_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6870{
6871 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6872 (__v8di) __X,
6873 (__v8df)
6874 _mm512_setzero_pd (),
6875 (__mmask8) __U);
6876}
6877
6878extern __inline __m512
6879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6880_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6881{
6882 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6883 (__v16si) __X,
6884 (__v16sf)
0b192937 6885 _mm512_undefined_ps (),
756c5857
AI
6886 (__mmask16) -1);
6887}
6888
6889extern __inline __m512
6890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6891_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6892{
6893 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6894 (__v16si) __X,
6895 (__v16sf) __W,
6896 (__mmask16) __U);
6897}
6898
6899extern __inline __m512
6900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6901_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6902{
6903 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6904 (__v16si) __X,
6905 (__v16sf)
6906 _mm512_setzero_ps (),
6907 (__mmask16) __U);
6908}
6909
6910#ifdef __OPTIMIZE__
6911extern __inline __m512
6912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6913_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6914{
6915 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6916 (__v16sf) __V, __imm,
6917 (__v16sf)
0b192937 6918 _mm512_undefined_ps (),
756c5857
AI
6919 (__mmask16) -1);
6920}
6921
6922extern __inline __m512
6923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6924_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6925 __m512 __V, const int __imm)
6926{
6927 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6928 (__v16sf) __V, __imm,
6929 (__v16sf) __W,
6930 (__mmask16) __U);
6931}
6932
6933extern __inline __m512
6934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6935_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6936{
6937 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6938 (__v16sf) __V, __imm,
6939 (__v16sf)
6940 _mm512_setzero_ps (),
6941 (__mmask16) __U);
6942}
6943
6944extern __inline __m512d
6945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6946_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6947{
6948 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6949 (__v8df) __V, __imm,
6950 (__v8df)
0b192937 6951 _mm512_undefined_pd (),
756c5857
AI
6952 (__mmask8) -1);
6953}
6954
6955extern __inline __m512d
6956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6958 __m512d __V, const int __imm)
6959{
6960 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6961 (__v8df) __V, __imm,
6962 (__v8df) __W,
6963 (__mmask8) __U);
6964}
6965
6966extern __inline __m512d
6967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6968_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6969 const int __imm)
6970{
6971 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6972 (__v8df) __V, __imm,
6973 (__v8df)
6974 _mm512_setzero_pd (),
6975 (__mmask8) __U);
6976}
6977
6978extern __inline __m512d
6979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6980_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6981 const int __imm, const int __R)
6982{
6983 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6984 (__v8df) __B,
6985 (__v8di) __C,
6986 __imm,
6987 (__mmask8) -1, __R);
6988}
6989
6990extern __inline __m512d
6991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6993 __m512i __C, const int __imm, const int __R)
6994{
6995 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6996 (__v8df) __B,
6997 (__v8di) __C,
6998 __imm,
6999 (__mmask8) __U, __R);
7000}
7001
7002extern __inline __m512d
7003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7004_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
7005 __m512i __C, const int __imm, const int __R)
7006{
7007 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
7008 (__v8df) __B,
7009 (__v8di) __C,
7010 __imm,
7011 (__mmask8) __U, __R);
7012}
7013
7014extern __inline __m512
7015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7016_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
7017 const int __imm, const int __R)
7018{
7019 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7020 (__v16sf) __B,
7021 (__v16si) __C,
7022 __imm,
7023 (__mmask16) -1, __R);
7024}
7025
7026extern __inline __m512
7027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7028_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7029 __m512i __C, const int __imm, const int __R)
7030{
7031 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7032 (__v16sf) __B,
7033 (__v16si) __C,
7034 __imm,
7035 (__mmask16) __U, __R);
7036}
7037
7038extern __inline __m512
7039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7040_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7041 __m512i __C, const int __imm, const int __R)
7042{
7043 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
7044 (__v16sf) __B,
7045 (__v16si) __C,
7046 __imm,
7047 (__mmask16) __U, __R);
7048}
7049
7050extern __inline __m128d
7051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7052_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
7053 const int __imm, const int __R)
7054{
7055 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7056 (__v2df) __B,
7057 (__v2di) __C, __imm,
7058 (__mmask8) -1, __R);
7059}
7060
7061extern __inline __m128d
7062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7063_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
7064 __m128i __C, const int __imm, const int __R)
7065{
7066 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7067 (__v2df) __B,
7068 (__v2di) __C, __imm,
7069 (__mmask8) __U, __R);
7070}
7071
7072extern __inline __m128d
7073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7074_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7075 __m128i __C, const int __imm, const int __R)
7076{
7077 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
7078 (__v2df) __B,
7079 (__v2di) __C,
7080 __imm,
7081 (__mmask8) __U, __R);
7082}
7083
7084extern __inline __m128
7085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7086_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
7087 const int __imm, const int __R)
7088{
7089 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7090 (__v4sf) __B,
7091 (__v4si) __C, __imm,
7092 (__mmask8) -1, __R);
7093}
7094
7095extern __inline __m128
7096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7097_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7098 __m128i __C, const int __imm, const int __R)
7099{
7100 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7101 (__v4sf) __B,
7102 (__v4si) __C, __imm,
7103 (__mmask8) __U, __R);
7104}
7105
7106extern __inline __m128
7107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7108_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7109 __m128i __C, const int __imm, const int __R)
7110{
7111 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
7112 (__v4sf) __B,
7113 (__v4si) __C, __imm,
7114 (__mmask8) __U, __R);
7115}
7116
7117#else
7118#define _mm512_shuffle_pd(X, Y, C) \
7119 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7120 (__v8df)(__m512d)(Y), (int)(C),\
0b192937 7121 (__v8df)(__m512d)_mm512_undefined_pd(),\
756c5857
AI
7122 (__mmask8)-1))
7123
7124#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7125 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7126 (__v8df)(__m512d)(Y), (int)(C),\
7127 (__v8df)(__m512d)(W),\
7128 (__mmask8)(U)))
7129
7130#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7131 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7132 (__v8df)(__m512d)(Y), (int)(C),\
7133 (__v8df)(__m512d)_mm512_setzero_pd(),\
7134 (__mmask8)(U)))
7135
7136#define _mm512_shuffle_ps(X, Y, C) \
7137 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7138 (__v16sf)(__m512)(Y), (int)(C),\
0b192937 7139 (__v16sf)(__m512)_mm512_undefined_ps(),\
756c5857
AI
7140 (__mmask16)-1))
7141
7142#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7143 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7144 (__v16sf)(__m512)(Y), (int)(C),\
7145 (__v16sf)(__m512)(W),\
7146 (__mmask16)(U)))
7147
7148#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7149 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7150 (__v16sf)(__m512)(Y), (int)(C),\
7151 (__v16sf)(__m512)_mm512_setzero_ps(),\
7152 (__mmask16)(U)))
7153
7154#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7155 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7156 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7157 (__mmask8)(-1), (R)))
7158
7159#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
7160 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7161 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7162 (__mmask8)(U), (R)))
7163
7164#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
7165 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
7166 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7167 (__mmask8)(U), (R)))
7168
7169#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7170 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7171 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7172 (__mmask16)(-1), (R)))
7173
7174#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
7175 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7176 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7177 (__mmask16)(U), (R)))
7178
7179#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
7180 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
7181 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7182 (__mmask16)(U), (R)))
7183
7184#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7185 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7186 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7187 (__mmask8)(-1), (R)))
7188
7189#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7190 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7191 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7192 (__mmask8)(U), (R)))
7193
7194#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7195 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7196 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7197 (__mmask8)(U), (R)))
7198
7199#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7200 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7201 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7202 (__mmask8)(-1), (R)))
7203
7204#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7205 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7206 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7207 (__mmask8)(U), (R)))
7208
7209#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7210 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7211 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7212 (__mmask8)(U), (R)))
7213#endif
7214
7215extern __inline __m512
7216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7217_mm512_movehdup_ps (__m512 __A)
7218{
7219 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7220 (__v16sf)
0b192937 7221 _mm512_undefined_ps (),
756c5857
AI
7222 (__mmask16) -1);
7223}
7224
7225extern __inline __m512
7226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7227_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7228{
7229 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7230 (__v16sf) __W,
7231 (__mmask16) __U);
7232}
7233
7234extern __inline __m512
7235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7236_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7237{
7238 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7239 (__v16sf)
7240 _mm512_setzero_ps (),
7241 (__mmask16) __U);
7242}
7243
7244extern __inline __m512
7245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7246_mm512_moveldup_ps (__m512 __A)
7247{
7248 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7249 (__v16sf)
0b192937 7250 _mm512_undefined_ps (),
756c5857
AI
7251 (__mmask16) -1);
7252}
7253
7254extern __inline __m512
7255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7257{
7258 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7259 (__v16sf) __W,
7260 (__mmask16) __U);
7261}
7262
7263extern __inline __m512
7264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7266{
7267 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7268 (__v16sf)
7269 _mm512_setzero_ps (),
7270 (__mmask16) __U);
7271}
7272
7273extern __inline __m512i
7274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7275_mm512_or_si512 (__m512i __A, __m512i __B)
7276{
2069d6fc 7277 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7278}
7279
7280extern __inline __m512i
7281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282_mm512_or_epi32 (__m512i __A, __m512i __B)
7283{
2069d6fc 7284 return (__m512i) ((__v16su) __A | (__v16su) __B);
756c5857
AI
7285}
7286
7287extern __inline __m512i
7288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7289_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7290{
7291 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7292 (__v16si) __B,
7293 (__v16si) __W,
7294 (__mmask16) __U);
7295}
7296
7297extern __inline __m512i
7298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7299_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7300{
7301 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7302 (__v16si) __B,
7303 (__v16si)
7304 _mm512_setzero_si512 (),
7305 (__mmask16) __U);
7306}
7307
7308extern __inline __m512i
7309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7310_mm512_or_epi64 (__m512i __A, __m512i __B)
7311{
2069d6fc 7312 return (__m512i) ((__v8du) __A | (__v8du) __B);
756c5857
AI
7313}
7314
7315extern __inline __m512i
7316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7317_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7318{
7319 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7320 (__v8di) __B,
7321 (__v8di) __W,
7322 (__mmask8) __U);
7323}
7324
7325extern __inline __m512i
7326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7328{
7329 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7330 (__v8di) __B,
7331 (__v8di)
7332 _mm512_setzero_si512 (),
7333 (__mmask8) __U);
7334}
7335
7336extern __inline __m512i
7337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7338_mm512_xor_si512 (__m512i __A, __m512i __B)
7339{
2069d6fc 7340 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7341}
7342
7343extern __inline __m512i
7344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345_mm512_xor_epi32 (__m512i __A, __m512i __B)
7346{
2069d6fc 7347 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
756c5857
AI
7348}
7349
7350extern __inline __m512i
7351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7352_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7353{
7354 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7355 (__v16si) __B,
7356 (__v16si) __W,
7357 (__mmask16) __U);
7358}
7359
7360extern __inline __m512i
7361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7362_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7363{
7364 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7365 (__v16si) __B,
7366 (__v16si)
7367 _mm512_setzero_si512 (),
7368 (__mmask16) __U);
7369}
7370
7371extern __inline __m512i
7372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7373_mm512_xor_epi64 (__m512i __A, __m512i __B)
7374{
2069d6fc 7375 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
756c5857
AI
7376}
7377
7378extern __inline __m512i
7379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7380_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7381{
7382 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7383 (__v8di) __B,
7384 (__v8di) __W,
7385 (__mmask8) __U);
7386}
7387
7388extern __inline __m512i
7389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 7390_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857
AI
7391{
7392 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7393 (__v8di) __B,
7394 (__v8di)
7395 _mm512_setzero_si512 (),
7396 (__mmask8) __U);
7397}
7398
7399#ifdef __OPTIMIZE__
7400extern __inline __m512i
7401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402_mm512_rol_epi32 (__m512i __A, const int __B)
7403{
7404 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7405 (__v16si)
4271e5cb 7406 _mm512_undefined_epi32 (),
756c5857
AI
7407 (__mmask16) -1);
7408}
7409
7410extern __inline __m512i
7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7413{
7414 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7415 (__v16si) __W,
7416 (__mmask16) __U);
7417}
7418
7419extern __inline __m512i
7420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7421_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7422{
7423 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7424 (__v16si)
7425 _mm512_setzero_si512 (),
7426 (__mmask16) __U);
7427}
7428
7429extern __inline __m512i
7430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431_mm512_ror_epi32 (__m512i __A, int __B)
7432{
7433 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7434 (__v16si)
4271e5cb 7435 _mm512_undefined_epi32 (),
756c5857
AI
7436 (__mmask16) -1);
7437}
7438
7439extern __inline __m512i
7440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7442{
7443 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7444 (__v16si) __W,
7445 (__mmask16) __U);
7446}
7447
7448extern __inline __m512i
7449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7451{
7452 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7453 (__v16si)
7454 _mm512_setzero_si512 (),
7455 (__mmask16) __U);
7456}
7457
7458extern __inline __m512i
7459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7460_mm512_rol_epi64 (__m512i __A, const int __B)
7461{
7462 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7463 (__v8di)
4271e5cb 7464 _mm512_undefined_epi32 (),
756c5857
AI
7465 (__mmask8) -1);
7466}
7467
7468extern __inline __m512i
7469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7470_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7471{
7472 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7473 (__v8di) __W,
7474 (__mmask8) __U);
7475}
7476
7477extern __inline __m512i
7478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7479_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7480{
7481 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7482 (__v8di)
7483 _mm512_setzero_si512 (),
7484 (__mmask8) __U);
7485}
7486
7487extern __inline __m512i
7488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489_mm512_ror_epi64 (__m512i __A, int __B)
7490{
7491 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7492 (__v8di)
4271e5cb 7493 _mm512_undefined_epi32 (),
756c5857
AI
7494 (__mmask8) -1);
7495}
7496
7497extern __inline __m512i
7498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7500{
7501 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7502 (__v8di) __W,
7503 (__mmask8) __U);
7504}
7505
7506extern __inline __m512i
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7509{
7510 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7511 (__v8di)
7512 _mm512_setzero_si512 (),
7513 (__mmask8) __U);
7514}
7515
7516#else
7517#define _mm512_rol_epi32(A, B) \
7518 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7519 (int)(B), \
4271e5cb 7520 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7521 (__mmask16)(-1)))
7522#define _mm512_mask_rol_epi32(W, U, A, B) \
7523 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7524 (int)(B), \
7525 (__v16si)(__m512i)(W), \
7526 (__mmask16)(U)))
7527#define _mm512_maskz_rol_epi32(U, A, B) \
7528 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7529 (int)(B), \
7530 (__v16si)_mm512_setzero_si512 (), \
7531 (__mmask16)(U)))
7532#define _mm512_ror_epi32(A, B) \
7533 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7534 (int)(B), \
4271e5cb 7535 (__v16si)_mm512_undefined_epi32 (), \
756c5857
AI
7536 (__mmask16)(-1)))
7537#define _mm512_mask_ror_epi32(W, U, A, B) \
7538 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7539 (int)(B), \
7540 (__v16si)(__m512i)(W), \
7541 (__mmask16)(U)))
7542#define _mm512_maskz_ror_epi32(U, A, B) \
7543 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7544 (int)(B), \
7545 (__v16si)_mm512_setzero_si512 (), \
7546 (__mmask16)(U)))
7547#define _mm512_rol_epi64(A, B) \
7548 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7549 (int)(B), \
4271e5cb 7550 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7551 (__mmask8)(-1)))
7552#define _mm512_mask_rol_epi64(W, U, A, B) \
7553 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7554 (int)(B), \
7555 (__v8di)(__m512i)(W), \
7556 (__mmask8)(U)))
7557#define _mm512_maskz_rol_epi64(U, A, B) \
7558 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7559 (int)(B), \
7560 (__v8di)_mm512_setzero_si512 (), \
7561 (__mmask8)(U)))
7562
7563#define _mm512_ror_epi64(A, B) \
7564 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7565 (int)(B), \
4271e5cb 7566 (__v8di)_mm512_undefined_epi32 (), \
756c5857
AI
7567 (__mmask8)(-1)))
7568#define _mm512_mask_ror_epi64(W, U, A, B) \
7569 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7570 (int)(B), \
7571 (__v8di)(__m512i)(W), \
7572 (__mmask8)(U)))
7573#define _mm512_maskz_ror_epi64(U, A, B) \
7574 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7575 (int)(B), \
7576 (__v8di)_mm512_setzero_si512 (), \
7577 (__mmask8)(U)))
7578#endif
7579
7580extern __inline __m512i
7581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582_mm512_and_si512 (__m512i __A, __m512i __B)
7583{
2069d6fc 7584 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7585}
7586
7587extern __inline __m512i
7588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7589_mm512_and_epi32 (__m512i __A, __m512i __B)
7590{
2069d6fc 7591 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
7592}
7593
7594extern __inline __m512i
7595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7596_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7597{
7598 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7599 (__v16si) __B,
7600 (__v16si) __W,
7601 (__mmask16) __U);
7602}
7603
7604extern __inline __m512i
7605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7606_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7607{
7608 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7609 (__v16si) __B,
7610 (__v16si)
7611 _mm512_setzero_si512 (),
7612 (__mmask16) __U);
7613}
7614
7615extern __inline __m512i
7616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7617_mm512_and_epi64 (__m512i __A, __m512i __B)
7618{
2069d6fc 7619 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
7620}
7621
7622extern __inline __m512i
7623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7625{
7626 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7627 (__v8di) __B,
7628 (__v8di) __W, __U);
7629}
7630
7631extern __inline __m512i
7632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7633_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7634{
7635 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7636 (__v8di) __B,
7637 (__v8di)
7638 _mm512_setzero_pd (),
7639 __U);
7640}
7641
7642extern __inline __m512i
7643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7644_mm512_andnot_si512 (__m512i __A, __m512i __B)
7645{
7646 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7647 (__v16si) __B,
7648 (__v16si)
4271e5cb 7649 _mm512_undefined_epi32 (),
756c5857
AI
7650 (__mmask16) -1);
7651}
7652
7653extern __inline __m512i
7654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7655_mm512_andnot_epi32 (__m512i __A, __m512i __B)
7656{
7657 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7658 (__v16si) __B,
7659 (__v16si)
4271e5cb 7660 _mm512_undefined_epi32 (),
756c5857
AI
7661 (__mmask16) -1);
7662}
7663
7664extern __inline __m512i
7665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7667{
7668 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7669 (__v16si) __B,
7670 (__v16si) __W,
7671 (__mmask16) __U);
7672}
7673
7674extern __inline __m512i
7675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7677{
7678 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7679 (__v16si) __B,
7680 (__v16si)
7681 _mm512_setzero_si512 (),
7682 (__mmask16) __U);
7683}
7684
7685extern __inline __m512i
7686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7687_mm512_andnot_epi64 (__m512i __A, __m512i __B)
7688{
7689 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7690 (__v8di) __B,
7691 (__v8di)
4271e5cb 7692 _mm512_undefined_epi32 (),
756c5857
AI
7693 (__mmask8) -1);
7694}
7695
7696extern __inline __m512i
7697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7699{
7700 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7701 (__v8di) __B,
7702 (__v8di) __W, __U);
7703}
7704
7705extern __inline __m512i
7706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7707_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7708{
7709 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7710 (__v8di) __B,
7711 (__v8di)
7712 _mm512_setzero_pd (),
7713 __U);
7714}
7715
7716extern __inline __mmask16
7717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7718_mm512_test_epi32_mask (__m512i __A, __m512i __B)
7719{
7720 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7721 (__v16si) __B,
7722 (__mmask16) -1);
7723}
7724
7725extern __inline __mmask16
7726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7727_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7728{
7729 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7730 (__v16si) __B, __U);
7731}
7732
7733extern __inline __mmask8
7734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7735_mm512_test_epi64_mask (__m512i __A, __m512i __B)
7736{
7737 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7738 (__v8di) __B,
7739 (__mmask8) -1);
7740}
7741
7742extern __inline __mmask8
7743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7745{
7746 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7747}
7748
260d3642
IT
7749extern __inline __mmask16
7750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7752{
7753 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7754 (__v16si) __B,
7755 (__mmask16) -1);
7756}
7757
7758extern __inline __mmask16
7759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7760_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7761{
7762 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7763 (__v16si) __B, __U);
7764}
7765
7766extern __inline __mmask8
7767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7768_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7769{
7770 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7771 (__v8di) __B,
7772 (__mmask8) -1);
7773}
7774
7775extern __inline __mmask8
7776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7777_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7778{
7779 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7780 (__v8di) __B, __U);
7781}
7782
dcb2c527
JJ
7783extern __inline __m512
7784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7785_mm512_abs_ps (__m512 __A)
7786{
7787 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7788 _mm512_set1_epi32 (0x7fffffff));
7789}
7790
7791extern __inline __m512
7792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7794{
7795 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7796 _mm512_set1_epi32 (0x7fffffff));
7797}
7798
7799extern __inline __m512d
7800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 7801_mm512_abs_pd (__m512d __A)
dcb2c527
JJ
7802{
7803 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7804 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7805}
7806
7807extern __inline __m512d
7808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
23bce99c 7809_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
dcb2c527
JJ
7810{
7811 return (__m512d)
7812 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7813 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7814}
7815
756c5857
AI
7816extern __inline __m512i
7817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7818_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7819{
7820 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7821 (__v16si) __B,
7822 (__v16si)
4271e5cb 7823 _mm512_undefined_epi32 (),
756c5857
AI
7824 (__mmask16) -1);
7825}
7826
7827extern __inline __m512i
7828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7829_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7830 __m512i __B)
7831{
7832 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7833 (__v16si) __B,
7834 (__v16si) __W,
7835 (__mmask16) __U);
7836}
7837
7838extern __inline __m512i
7839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7841{
7842 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7843 (__v16si) __B,
7844 (__v16si)
7845 _mm512_setzero_si512 (),
7846 (__mmask16) __U);
7847}
7848
7849extern __inline __m512i
7850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7852{
7853 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7854 (__v8di) __B,
7855 (__v8di)
4271e5cb 7856 _mm512_undefined_epi32 (),
756c5857
AI
7857 (__mmask8) -1);
7858}
7859
7860extern __inline __m512i
7861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7863{
7864 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7865 (__v8di) __B,
7866 (__v8di) __W,
7867 (__mmask8) __U);
7868}
7869
7870extern __inline __m512i
7871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7872_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7873{
7874 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7875 (__v8di) __B,
7876 (__v8di)
7877 _mm512_setzero_si512 (),
7878 (__mmask8) __U);
7879}
7880
7881extern __inline __m512i
7882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7883_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7884{
7885 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7886 (__v16si) __B,
7887 (__v16si)
4271e5cb 7888 _mm512_undefined_epi32 (),
756c5857
AI
7889 (__mmask16) -1);
7890}
7891
7892extern __inline __m512i
7893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7895 __m512i __B)
7896{
7897 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7898 (__v16si) __B,
7899 (__v16si) __W,
7900 (__mmask16) __U);
7901}
7902
7903extern __inline __m512i
7904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7906{
7907 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7908 (__v16si) __B,
7909 (__v16si)
7910 _mm512_setzero_si512 (),
7911 (__mmask16) __U);
7912}
7913
7914extern __inline __m512i
7915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7916_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7917{
7918 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7919 (__v8di) __B,
7920 (__v8di)
4271e5cb 7921 _mm512_undefined_epi32 (),
756c5857
AI
7922 (__mmask8) -1);
7923}
7924
7925extern __inline __m512i
7926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7927_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7928{
7929 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7930 (__v8di) __B,
7931 (__v8di) __W,
7932 (__mmask8) __U);
7933}
7934
7935extern __inline __m512i
7936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7938{
7939 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7940 (__v8di) __B,
7941 (__v8di)
7942 _mm512_setzero_si512 (),
7943 (__mmask8) __U);
7944}
7945
7946#ifdef __x86_64__
7947#ifdef __OPTIMIZE__
7948extern __inline unsigned long long
7949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7950_mm_cvt_roundss_u64 (__m128 __A, const int __R)
7951{
7952 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7953}
7954
7955extern __inline long long
7956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7957_mm_cvt_roundss_si64 (__m128 __A, const int __R)
7958{
7959 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7960}
7961
7962extern __inline long long
7963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964_mm_cvt_roundss_i64 (__m128 __A, const int __R)
7965{
7966 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7967}
7968
7969extern __inline unsigned long long
7970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7972{
7973 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7974}
7975
7976extern __inline long long
7977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7978_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7979{
7980 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7981}
7982
7983extern __inline long long
7984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7986{
7987 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7988}
7989#else
7990#define _mm_cvt_roundss_u64(A, B) \
7991 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7992
7993#define _mm_cvt_roundss_si64(A, B) \
7994 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7995
7996#define _mm_cvt_roundss_i64(A, B) \
7997 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7998
7999#define _mm_cvtt_roundss_u64(A, B) \
8000 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
8001
8002#define _mm_cvtt_roundss_i64(A, B) \
8003 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8004
8005#define _mm_cvtt_roundss_si64(A, B) \
8006 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8007#endif
8008#endif
8009
8010#ifdef __OPTIMIZE__
8011extern __inline unsigned
8012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8013_mm_cvt_roundss_u32 (__m128 __A, const int __R)
8014{
8015 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
8016}
8017
8018extern __inline int
8019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8020_mm_cvt_roundss_si32 (__m128 __A, const int __R)
8021{
8022 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8023}
8024
8025extern __inline int
8026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8027_mm_cvt_roundss_i32 (__m128 __A, const int __R)
8028{
8029 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8030}
8031
8032extern __inline unsigned
8033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8034_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
8035{
8036 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
8037}
8038
8039extern __inline int
8040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8041_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
8042{
8043 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8044}
8045
8046extern __inline int
8047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8048_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
8049{
8050 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8051}
8052#else
8053#define _mm_cvt_roundss_u32(A, B) \
8054 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
8055
8056#define _mm_cvt_roundss_si32(A, B) \
8057 ((int)__builtin_ia32_vcvtss2si32(A, B))
8058
8059#define _mm_cvt_roundss_i32(A, B) \
8060 ((int)__builtin_ia32_vcvtss2si32(A, B))
8061
8062#define _mm_cvtt_roundss_u32(A, B) \
8063 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
8064
8065#define _mm_cvtt_roundss_si32(A, B) \
8066 ((int)__builtin_ia32_vcvttss2si32(A, B))
8067
8068#define _mm_cvtt_roundss_i32(A, B) \
8069 ((int)__builtin_ia32_vcvttss2si32(A, B))
8070#endif
8071
8072#ifdef __x86_64__
8073#ifdef __OPTIMIZE__
8074extern __inline unsigned long long
8075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8076_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8077{
8078 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8079}
8080
8081extern __inline long long
8082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8083_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8084{
8085 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8086}
8087
8088extern __inline long long
8089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8090_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8091{
8092 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8093}
8094
8095extern __inline unsigned long long
8096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8097_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8098{
8099 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8100}
8101
8102extern __inline long long
8103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8104_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8105{
8106 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8107}
8108
8109extern __inline long long
8110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8111_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8112{
8113 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8114}
8115#else
8116#define _mm_cvt_roundsd_u64(A, B) \
8117 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8118
8119#define _mm_cvt_roundsd_si64(A, B) \
8120 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8121
8122#define _mm_cvt_roundsd_i64(A, B) \
8123 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8124
8125#define _mm_cvtt_roundsd_u64(A, B) \
8126 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8127
8128#define _mm_cvtt_roundsd_si64(A, B) \
8129 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8130
8131#define _mm_cvtt_roundsd_i64(A, B) \
8132 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8133#endif
8134#endif
8135
8136#ifdef __OPTIMIZE__
8137extern __inline unsigned
8138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8140{
8141 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8142}
8143
8144extern __inline int
8145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8146_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8147{
8148 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8149}
8150
8151extern __inline int
8152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8153_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8154{
8155 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8156}
8157
8158extern __inline unsigned
8159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8160_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8161{
8162 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8163}
8164
8165extern __inline int
8166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8168{
8169 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8170}
8171
8172extern __inline int
8173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8174_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8175{
8176 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8177}
8178#else
8179#define _mm_cvt_roundsd_u32(A, B) \
8180 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8181
8182#define _mm_cvt_roundsd_si32(A, B) \
8183 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8184
8185#define _mm_cvt_roundsd_i32(A, B) \
8186 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8187
8188#define _mm_cvtt_roundsd_u32(A, B) \
8189 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8190
8191#define _mm_cvtt_roundsd_si32(A, B) \
8192 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8193
8194#define _mm_cvtt_roundsd_i32(A, B) \
8195 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8196#endif
8197
8198extern __inline __m512d
8199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200_mm512_movedup_pd (__m512d __A)
8201{
8202 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8203 (__v8df)
0b192937 8204 _mm512_undefined_pd (),
756c5857
AI
8205 (__mmask8) -1);
8206}
8207
8208extern __inline __m512d
8209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8210_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8211{
8212 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8213 (__v8df) __W,
8214 (__mmask8) __U);
8215}
8216
8217extern __inline __m512d
8218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8219_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8220{
8221 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8222 (__v8df)
8223 _mm512_setzero_pd (),
8224 (__mmask8) __U);
8225}
8226
8227extern __inline __m512d
8228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8229_mm512_unpacklo_pd (__m512d __A, __m512d __B)
8230{
8231 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8232 (__v8df) __B,
8233 (__v8df)
0b192937 8234 _mm512_undefined_pd (),
756c5857
AI
8235 (__mmask8) -1);
8236}
8237
8238extern __inline __m512d
8239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8240_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8241{
8242 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8243 (__v8df) __B,
8244 (__v8df) __W,
8245 (__mmask8) __U);
8246}
8247
8248extern __inline __m512d
8249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8250_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8251{
8252 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8253 (__v8df) __B,
8254 (__v8df)
8255 _mm512_setzero_pd (),
8256 (__mmask8) __U);
8257}
8258
8259extern __inline __m512d
8260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8261_mm512_unpackhi_pd (__m512d __A, __m512d __B)
8262{
8263 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8264 (__v8df) __B,
8265 (__v8df)
0b192937 8266 _mm512_undefined_pd (),
756c5857
AI
8267 (__mmask8) -1);
8268}
8269
8270extern __inline __m512d
8271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8272_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8273{
8274 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8275 (__v8df) __B,
8276 (__v8df) __W,
8277 (__mmask8) __U);
8278}
8279
8280extern __inline __m512d
8281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8282_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8283{
8284 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8285 (__v8df) __B,
8286 (__v8df)
8287 _mm512_setzero_pd (),
8288 (__mmask8) __U);
8289}
8290
8291extern __inline __m512
8292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8293_mm512_unpackhi_ps (__m512 __A, __m512 __B)
8294{
8295 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8296 (__v16sf) __B,
8297 (__v16sf)
0b192937 8298 _mm512_undefined_ps (),
756c5857
AI
8299 (__mmask16) -1);
8300}
8301
8302extern __inline __m512
8303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8305{
8306 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8307 (__v16sf) __B,
8308 (__v16sf) __W,
8309 (__mmask16) __U);
8310}
8311
8312extern __inline __m512
8313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8315{
8316 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8317 (__v16sf) __B,
8318 (__v16sf)
8319 _mm512_setzero_ps (),
8320 (__mmask16) __U);
8321}
8322
8323#ifdef __OPTIMIZE__
8324extern __inline __m512d
8325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8326_mm512_cvt_roundps_pd (__m256 __A, const int __R)
8327{
8328 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8329 (__v8df)
0b192937 8330 _mm512_undefined_pd (),
756c5857
AI
8331 (__mmask8) -1, __R);
8332}
8333
8334extern __inline __m512d
8335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8336_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8337 const int __R)
8338{
8339 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8340 (__v8df) __W,
8341 (__mmask8) __U, __R);
8342}
8343
8344extern __inline __m512d
8345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8346_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8347{
8348 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8349 (__v8df)
8350 _mm512_setzero_pd (),
8351 (__mmask8) __U, __R);
8352}
8353
8354extern __inline __m512
8355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356_mm512_cvt_roundph_ps (__m256i __A, const int __R)
8357{
8358 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8359 (__v16sf)
0b192937 8360 _mm512_undefined_ps (),
756c5857
AI
8361 (__mmask16) -1, __R);
8362}
8363
8364extern __inline __m512
8365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8366_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8367 const int __R)
8368{
8369 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8370 (__v16sf) __W,
8371 (__mmask16) __U, __R);
8372}
8373
8374extern __inline __m512
8375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8377{
8378 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8379 (__v16sf)
8380 _mm512_setzero_ps (),
8381 (__mmask16) __U, __R);
8382}
8383
8384extern __inline __m256i
8385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386_mm512_cvt_roundps_ph (__m512 __A, const int __I)
8387{
8388 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8389 __I,
8390 (__v16hi)
0b192937 8391 _mm256_undefined_si256 (),
756c5857
AI
8392 -1);
8393}
8394
8395extern __inline __m256i
8396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8397_mm512_cvtps_ph (__m512 __A, const int __I)
8398{
8399 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8400 __I,
8401 (__v16hi)
0b192937 8402 _mm256_undefined_si256 (),
756c5857
AI
8403 -1);
8404}
8405
8406extern __inline __m256i
8407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8408_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8409 const int __I)
8410{
8411 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8412 __I,
8413 (__v16hi) __U,
8414 (__mmask16) __W);
8415}
8416
8417extern __inline __m256i
8418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8419_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8420{
8421 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8422 __I,
8423 (__v16hi) __U,
8424 (__mmask16) __W);
8425}
8426
8427extern __inline __m256i
8428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8430{
8431 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8432 __I,
8433 (__v16hi)
8434 _mm256_setzero_si256 (),
8435 (__mmask16) __W);
8436}
8437
8438extern __inline __m256i
8439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8440_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8441{
8442 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8443 __I,
8444 (__v16hi)
8445 _mm256_setzero_si256 (),
8446 (__mmask16) __W);
8447}
8448#else
8449#define _mm512_cvt_roundps_pd(A, B) \
0b192937 8450 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
756c5857
AI
8451
8452#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8453 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8454
8455#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8456 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8457
8458#define _mm512_cvt_roundph_ps(A, B) \
0b192937 8459 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
756c5857
AI
8460
8461#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8462 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8463
8464#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8465 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8466
8467#define _mm512_cvt_roundps_ph(A, I) \
8468 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 8469 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
8470#define _mm512_cvtps_ph(A, I) \
8471 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
0b192937 8472 (__v16hi)_mm256_undefined_si256 (), -1))
756c5857
AI
8473#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8474 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8475 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8476#define _mm512_mask_cvtps_ph(U, W, A, I) \
8477 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8478 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8479#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8480 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8481 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8482#define _mm512_maskz_cvtps_ph(W, A, I) \
8483 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8484 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8485#endif
8486
8487#ifdef __OPTIMIZE__
8488extern __inline __m256
8489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8491{
8492 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8493 (__v8sf)
0b192937 8494 _mm256_undefined_ps (),
756c5857
AI
8495 (__mmask8) -1, __R);
8496}
8497
8498extern __inline __m256
8499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8501 const int __R)
8502{
8503 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8504 (__v8sf) __W,
8505 (__mmask8) __U, __R);
8506}
8507
8508extern __inline __m256
8509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8511{
8512 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8513 (__v8sf)
8514 _mm256_setzero_ps (),
8515 (__mmask8) __U, __R);
8516}
8517
075691af
AI
8518extern __inline __m128
8519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8520_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8521{
8522 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8523 (__v2df) __B,
8524 __R);
8525}
8526
8527extern __inline __m128d
8528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8529_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8530{
8531 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8532 (__v4sf) __B,
8533 __R);
8534}
756c5857
AI
8535#else
8536#define _mm512_cvt_roundpd_ps(A, B) \
0b192937 8537 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
756c5857
AI
8538
8539#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8540 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8541
8542#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8543 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
075691af
AI
8544
8545#define _mm_cvt_roundsd_ss(A, B, C) \
8546 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8547
8548#define _mm_cvt_roundss_sd(A, B, C) \
8549 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
756c5857
AI
8550#endif
8551
8552extern __inline void
8553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8554_mm512_stream_si512 (__m512i * __P, __m512i __A)
8555{
8556 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8557}
8558
8559extern __inline void
8560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8561_mm512_stream_ps (float *__P, __m512 __A)
8562{
8563 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8564}
8565
8566extern __inline void
8567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8568_mm512_stream_pd (double *__P, __m512d __A)
8569{
8570 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8571}
8572
c56a42b9
KY
8573extern __inline __m512i
8574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8575_mm512_stream_load_si512 (void *__P)
8576{
8577 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8578}
8579
9c3c2608
UB
8580/* Constants for mantissa extraction */
8581typedef enum
8582{
8583 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8584 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8585 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8586 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8587} _MM_MANTISSA_NORM_ENUM;
8588
8589typedef enum
8590{
8591 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8592 _MM_MANT_SIGN_zero, /* sign = 0 */
8593 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8594} _MM_MANTISSA_SIGN_ENUM;
8595
756c5857 8596#ifdef __OPTIMIZE__
075691af
AI
8597extern __inline __m128
8598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8599_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8600{
8601 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8602 (__v4sf) __B,
8603 __R);
8604}
8605
68d872d7
SP
8606extern __inline __m128
8607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8609 __m128 __B, const int __R)
8610{
8611 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8612 (__v4sf) __B,
8613 (__v4sf) __W,
8614 (__mmask8) __U, __R);
8615}
8616
8617extern __inline __m128
8618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8619_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8620 const int __R)
8621{
8622 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8623 (__v4sf) __B,
8624 (__v4sf)
8625 _mm_setzero_ps (),
8626 (__mmask8) __U, __R);
8627}
8628
075691af
AI
8629extern __inline __m128d
8630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8632{
8633 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8634 (__v2df) __B,
8635 __R);
8636}
8637
68d872d7
SP
8638extern __inline __m128d
8639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8640_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8641 __m128d __B, const int __R)
8642{
8643 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8644 (__v2df) __B,
8645 (__v2df) __W,
8646 (__mmask8) __U, __R);
8647}
8648
8649extern __inline __m128d
8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8652 const int __R)
8653{
8654 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8655 (__v2df) __B,
8656 (__v2df)
8657 _mm_setzero_pd (),
8658 (__mmask8) __U, __R);
8659}
8660
756c5857
AI
8661extern __inline __m512
8662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663_mm512_getexp_round_ps (__m512 __A, const int __R)
8664{
8665 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8666 (__v16sf)
0b192937 8667 _mm512_undefined_ps (),
756c5857
AI
8668 (__mmask16) -1, __R);
8669}
8670
8671extern __inline __m512
8672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8674 const int __R)
8675{
8676 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8677 (__v16sf) __W,
8678 (__mmask16) __U, __R);
8679}
8680
8681extern __inline __m512
8682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8683_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8684{
8685 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8686 (__v16sf)
8687 _mm512_setzero_ps (),
8688 (__mmask16) __U, __R);
8689}
8690
8691extern __inline __m512d
8692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8693_mm512_getexp_round_pd (__m512d __A, const int __R)
8694{
8695 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8696 (__v8df)
0b192937 8697 _mm512_undefined_pd (),
756c5857
AI
8698 (__mmask8) -1, __R);
8699}
8700
8701extern __inline __m512d
8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8704 const int __R)
8705{
8706 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8707 (__v8df) __W,
8708 (__mmask8) __U, __R);
8709}
8710
8711extern __inline __m512d
8712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8713_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8714{
8715 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8716 (__v8df)
8717 _mm512_setzero_pd (),
8718 (__mmask8) __U, __R);
8719}
8720
756c5857
AI
8721extern __inline __m512d
8722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8723_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8724 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8725{
8726 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8727 (__C << 2) | __B,
0b192937 8728 _mm512_undefined_pd (),
756c5857
AI
8729 (__mmask8) -1, __R);
8730}
8731
8732extern __inline __m512d
8733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8734_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8735 _MM_MANTISSA_NORM_ENUM __B,
8736 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8737{
8738 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8739 (__C << 2) | __B,
8740 (__v8df) __W, __U,
8741 __R);
8742}
8743
8744extern __inline __m512d
8745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8746_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8747 _MM_MANTISSA_NORM_ENUM __B,
8748 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8749{
8750 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8751 (__C << 2) | __B,
8752 (__v8df)
8753 _mm512_setzero_pd (),
8754 __U, __R);
8755}
8756
8757extern __inline __m512
8758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8759_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8760 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8761{
8762 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8763 (__C << 2) | __B,
0b192937 8764 _mm512_undefined_ps (),
756c5857
AI
8765 (__mmask16) -1, __R);
8766}
8767
8768extern __inline __m512
8769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8770_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8771 _MM_MANTISSA_NORM_ENUM __B,
8772 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8773{
8774 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8775 (__C << 2) | __B,
8776 (__v16sf) __W, __U,
8777 __R);
8778}
8779
8780extern __inline __m512
8781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8782_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8783 _MM_MANTISSA_NORM_ENUM __B,
8784 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8785{
8786 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8787 (__C << 2) | __B,
8788 (__v16sf)
8789 _mm512_setzero_ps (),
8790 __U, __R);
8791}
8792
075691af
AI
8793extern __inline __m128d
8794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8795_mm_getmant_round_sd (__m128d __A, __m128d __B,
8796 _MM_MANTISSA_NORM_ENUM __C,
8797 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8798{
8799 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8800 (__v2df) __B,
8801 (__D << 2) | __C,
8802 __R);
8803}
8804
68d872d7
SP
8805extern __inline __m128d
8806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8807_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8808 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8809 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8810{
8811 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8812 (__v2df) __B,
8813 (__D << 2) | __C,
8814 (__v2df) __W,
8815 __U, __R);
8816}
8817
8818extern __inline __m128d
8819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8821 _MM_MANTISSA_NORM_ENUM __C,
8822 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8823{
8824 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8825 (__v2df) __B,
8826 (__D << 2) | __C,
8827 (__v2df)
8828 _mm_setzero_pd(),
8829 __U, __R);
8830}
8831
075691af
AI
8832extern __inline __m128
8833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8834_mm_getmant_round_ss (__m128 __A, __m128 __B,
8835 _MM_MANTISSA_NORM_ENUM __C,
8836 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8837{
8838 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8839 (__v4sf) __B,
8840 (__D << 2) | __C,
8841 __R);
8842}
8843
68d872d7
SP
8844extern __inline __m128
8845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8847 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8848 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8849{
8850 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8851 (__v4sf) __B,
8852 (__D << 2) | __C,
8853 (__v4sf) __W,
8854 __U, __R);
8855}
8856
8857extern __inline __m128
8858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8859_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8860 _MM_MANTISSA_NORM_ENUM __C,
8861 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8862{
8863 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8864 (__v4sf) __B,
8865 (__D << 2) | __C,
8866 (__v4sf)
8867 _mm_setzero_ps(),
8868 __U, __R);
8869}
8870
756c5857
AI
8871#else
8872#define _mm512_getmant_round_pd(X, B, C, R) \
8873 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8874 (int)(((C)<<2) | (B)), \
0b192937 8875 (__v8df)(__m512d)_mm512_undefined_pd(), \
756c5857
AI
8876 (__mmask8)-1,\
8877 (R)))
8878
8879#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8880 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8881 (int)(((C)<<2) | (B)), \
8882 (__v8df)(__m512d)(W), \
8883 (__mmask8)(U),\
8884 (R)))
8885
8886#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8887 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8888 (int)(((C)<<2) | (B)), \
8889 (__v8df)(__m512d)_mm512_setzero_pd(), \
8890 (__mmask8)(U),\
8891 (R)))
8892#define _mm512_getmant_round_ps(X, B, C, R) \
8893 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8894 (int)(((C)<<2) | (B)), \
0b192937 8895 (__v16sf)(__m512)_mm512_undefined_ps(), \
756c5857
AI
8896 (__mmask16)-1,\
8897 (R)))
8898
8899#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8900 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8901 (int)(((C)<<2) | (B)), \
8902 (__v16sf)(__m512)(W), \
8903 (__mmask16)(U),\
8904 (R)))
8905
8906#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8907 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8908 (int)(((C)<<2) | (B)), \
8909 (__v16sf)(__m512)_mm512_setzero_ps(), \
8910 (__mmask16)(U),\
8911 (R)))
075691af
AI
8912#define _mm_getmant_round_sd(X, Y, C, D, R) \
8913 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8914 (__v2df)(__m128d)(Y), \
8915 (int)(((D)<<2) | (C)), \
8916 (R)))
8917
68d872d7
SP
8918#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
8919 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8920 (__v2df)(__m128d)(Y), \
8921 (int)(((D)<<2) | (C)), \
8922 (__v2df)(__m128d)(W), \
8923 (__mmask8)(U),\
8924 (R)))
8925
8926#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
8927 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8928 (__v2df)(__m128d)(Y), \
8929 (int)(((D)<<2) | (C)), \
8930 (__v2df)(__m128d)_mm_setzero_pd(), \
8931 (__mmask8)(U),\
8932 (R)))
8933
075691af
AI
8934#define _mm_getmant_round_ss(X, Y, C, D, R) \
8935 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8936 (__v4sf)(__m128)(Y), \
8937 (int)(((D)<<2) | (C)), \
8938 (R)))
8939
68d872d7
SP
8940#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
8941 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8942 (__v4sf)(__m128)(Y), \
8943 (int)(((D)<<2) | (C)), \
8944 (__v4sf)(__m128)(W), \
8945 (__mmask8)(U),\
8946 (R)))
8947
8948#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
8949 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8950 (__v4sf)(__m128)(Y), \
8951 (int)(((D)<<2) | (C)), \
8952 (__v4sf)(__m128)_mm_setzero_ps(), \
8953 (__mmask8)(U),\
8954 (R)))
8955
075691af
AI
8956#define _mm_getexp_round_ss(A, B, R) \
8957 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8958
68d872d7
SP
8959#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
8960 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
8961
8962#define _mm_maskz_getexp_round_ss(U, A, B, C) \
8963 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
8964
075691af
AI
8965#define _mm_getexp_round_sd(A, B, R) \
8966 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8967
68d872d7
SP
8968#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
8969 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
8970
8971#define _mm_maskz_getexp_round_sd(U, A, B, C) \
8972 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
8973
8974
756c5857
AI
8975#define _mm512_getexp_round_ps(A, R) \
8976 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 8977 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
756c5857
AI
8978
8979#define _mm512_mask_getexp_round_ps(W, U, A, R) \
8980 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8981 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8982
8983#define _mm512_maskz_getexp_round_ps(U, A, R) \
8984 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8985 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8986
8987#define _mm512_getexp_round_pd(A, R) \
8988 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 8989 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857
AI
8990
8991#define _mm512_mask_getexp_round_pd(W, U, A, R) \
8992 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8993 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8994
8995#define _mm512_maskz_getexp_round_pd(U, A, R) \
8996 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8997 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8998#endif
8999
9000#ifdef __OPTIMIZE__
9001extern __inline __m512
9002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9003_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
9004{
9005 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
9006 (__v16sf)
9007 _mm512_undefined_ps (),
9008 -1, __R);
756c5857
AI
9009}
9010
9011extern __inline __m512
9012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9013_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
9014 const int __imm, const int __R)
9015{
9016 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
9017 (__v16sf) __A,
9018 (__mmask16) __B, __R);
9019}
9020
9021extern __inline __m512
9022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9023_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
9024 const int __imm, const int __R)
9025{
9026 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
9027 __imm,
9028 (__v16sf)
9029 _mm512_setzero_ps (),
9030 (__mmask16) __A, __R);
9031}
9032
9033extern __inline __m512d
9034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9035_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
9036{
9037 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
9038 (__v8df)
9039 _mm512_undefined_pd (),
9040 -1, __R);
756c5857
AI
9041}
9042
9043extern __inline __m512d
9044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
9046 __m512d __C, const int __imm, const int __R)
9047{
9048 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
9049 (__v8df) __A,
9050 (__mmask8) __B, __R);
9051}
9052
9053extern __inline __m512d
9054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
9056 const int __imm, const int __R)
9057{
9058 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
9059 __imm,
9060 (__v8df)
9061 _mm512_setzero_pd (),
9062 (__mmask8) __A, __R);
9063}
075691af
AI
9064
9065extern __inline __m128
9066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9067_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
9068{
9069 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
9070 (__v4sf) __B, __imm, __R);
9071}
9072
9073extern __inline __m128d
9074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9075_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9076 const int __R)
9077{
9078 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
9079 (__v2df) __B, __imm, __R);
9080}
9081
756c5857
AI
9082#else
9083#define _mm512_roundscale_round_ps(A, B, R) \
9084 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 9085 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
756c5857
AI
9086#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9087 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9088 (int)(D), \
9089 (__v16sf)(__m512)(A), \
9090 (__mmask16)(B), R))
9091#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9092 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9093 (int)(C), \
9094 (__v16sf)_mm512_setzero_ps(),\
9095 (__mmask16)(A), R))
9096#define _mm512_roundscale_round_pd(A, B, R) \
9097 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 9098 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
756c5857
AI
9099#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9100 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9101 (int)(D), \
9102 (__v8df)(__m512d)(A), \
9103 (__mmask8)(B), R))
9104#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9105 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9106 (int)(C), \
9107 (__v8df)_mm512_setzero_pd(),\
9108 (__mmask8)(A), R))
075691af
AI
9109#define _mm_roundscale_round_ss(A, B, C, R) \
9110 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
9111 (__v4sf)(__m128)(B), (int)(C), R))
9112#define _mm_roundscale_round_sd(A, B, C, R) \
9113 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
9114 (__v2df)(__m128d)(B), (int)(C), R))
756c5857
AI
9115#endif
9116
9117extern __inline __m512
9118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9119_mm512_floor_ps (__m512 __A)
9120{
9121 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9122 _MM_FROUND_FLOOR,
9123 (__v16sf) __A, -1,
9124 _MM_FROUND_CUR_DIRECTION);
9125}
9126
9127extern __inline __m512d
9128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129_mm512_floor_pd (__m512d __A)
9130{
9131 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9132 _MM_FROUND_FLOOR,
9133 (__v8df) __A, -1,
9134 _MM_FROUND_CUR_DIRECTION);
9135}
9136
9137extern __inline __m512
9138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9139_mm512_ceil_ps (__m512 __A)
9140{
9141 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9142 _MM_FROUND_CEIL,
9143 (__v16sf) __A, -1,
9144 _MM_FROUND_CUR_DIRECTION);
9145}
9146
9147extern __inline __m512d
9148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9149_mm512_ceil_pd (__m512d __A)
9150{
9151 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9152 _MM_FROUND_CEIL,
9153 (__v8df) __A, -1,
9154 _MM_FROUND_CUR_DIRECTION);
9155}
9156
9157extern __inline __m512
9158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9159_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9160{
9161 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9162 _MM_FROUND_FLOOR,
9163 (__v16sf) __W, __U,
9164 _MM_FROUND_CUR_DIRECTION);
9165}
9166
9167extern __inline __m512d
9168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9169_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9170{
9171 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9172 _MM_FROUND_FLOOR,
9173 (__v8df) __W, __U,
9174 _MM_FROUND_CUR_DIRECTION);
9175}
9176
9177extern __inline __m512
9178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9180{
9181 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9182 _MM_FROUND_CEIL,
9183 (__v16sf) __W, __U,
9184 _MM_FROUND_CUR_DIRECTION);
9185}
9186
9187extern __inline __m512d
9188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9189_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9190{
9191 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9192 _MM_FROUND_CEIL,
9193 (__v8df) __W, __U,
9194 _MM_FROUND_CUR_DIRECTION);
9195}
9196
756c5857 9197#ifdef __OPTIMIZE__
756c5857
AI
9198extern __inline __m512i
9199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9200_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9201{
9202 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9203 (__v16si) __B, __imm,
9204 (__v16si)
4271e5cb 9205 _mm512_undefined_epi32 (),
756c5857
AI
9206 (__mmask16) -1);
9207}
9208
9209extern __inline __m512i
9210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9212 __m512i __B, const int __imm)
9213{
9214 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9215 (__v16si) __B, __imm,
9216 (__v16si) __W,
9217 (__mmask16) __U);
9218}
9219
9220extern __inline __m512i
9221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9222_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9223 const int __imm)
9224{
9225 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9226 (__v16si) __B, __imm,
9227 (__v16si)
9228 _mm512_setzero_si512 (),
9229 (__mmask16) __U);
9230}
9231
9232extern __inline __m512i
9233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9234_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9235{
9236 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9237 (__v8di) __B, __imm,
9238 (__v8di)
4271e5cb 9239 _mm512_undefined_epi32 (),
756c5857
AI
9240 (__mmask8) -1);
9241}
9242
9243extern __inline __m512i
9244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9245_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9246 __m512i __B, const int __imm)
9247{
9248 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9249 (__v8di) __B, __imm,
9250 (__v8di) __W,
9251 (__mmask8) __U);
9252}
9253
9254extern __inline __m512i
9255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9257 const int __imm)
9258{
9259 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9260 (__v8di) __B, __imm,
9261 (__v8di)
9262 _mm512_setzero_si512 (),
9263 (__mmask8) __U);
9264}
9265#else
756c5857
AI
9266#define _mm512_alignr_epi32(X, Y, C) \
9267 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
4271e5cb 9268 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
756c5857
AI
9269 (__mmask16)-1))
9270
9271#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9272 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9273 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9274 (__mmask16)(U)))
9275
9276#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9277 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
0b192937 9278 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
756c5857
AI
9279 (__mmask16)(U)))
9280
9281#define _mm512_alignr_epi64(X, Y, C) \
9282 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
4271e5cb 9283 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
0b192937 9284 (__mmask8)-1))
756c5857
AI
9285
9286#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9287 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9288 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9289
9290#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9291 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
0b192937 9292 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
756c5857
AI
9293 (__mmask8)(U)))
9294#endif
9295
9296extern __inline __mmask16
9297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9298_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9299{
9300 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9301 (__v16si) __B,
9302 (__mmask16) -1);
9303}
9304
9305extern __inline __mmask16
9306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9308{
9309 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9310 (__v16si) __B, __U);
9311}
9312
9313extern __inline __mmask8
9314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9315_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9316{
9317 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9318 (__v8di) __B, __U);
9319}
9320
9321extern __inline __mmask8
9322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9323_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9324{
9325 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9326 (__v8di) __B,
9327 (__mmask8) -1);
9328}
9329
9330extern __inline __mmask16
9331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9333{
9334 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9335 (__v16si) __B,
9336 (__mmask16) -1);
9337}
9338
9339extern __inline __mmask16
9340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9341_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9342{
9343 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9344 (__v16si) __B, __U);
9345}
9346
9347extern __inline __mmask8
9348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9350{
9351 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9352 (__v8di) __B, __U);
9353}
9354
9355extern __inline __mmask8
9356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9357_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9358{
9359 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9360 (__v8di) __B,
9361 (__mmask8) -1);
9362}
9363
d256b866
IT
9364extern __inline __mmask16
9365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9366_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9367{
9368 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9369 (__v16si) __Y, 5,
9370 (__mmask16) -1);
9371}
9372
275be1da
IT
9373extern __inline __mmask16
9374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9375_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9376{
9377 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9378 (__v16si) __Y, 5,
9379 (__mmask16) __M);
9380}
9381
9382extern __inline __mmask16
9383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9384_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9385{
9386 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9387 (__v16si) __Y, 5,
9388 (__mmask16) __M);
9389}
9390
d256b866
IT
9391extern __inline __mmask16
9392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9394{
9395 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9396 (__v16si) __Y, 5,
9397 (__mmask16) -1);
9398}
9399
275be1da
IT
9400extern __inline __mmask8
9401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9402_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9403{
9404 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9405 (__v8di) __Y, 5,
9406 (__mmask8) __M);
9407}
9408
d256b866
IT
9409extern __inline __mmask8
9410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9411_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9412{
9413 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9414 (__v8di) __Y, 5,
9415 (__mmask8) -1);
9416}
9417
275be1da
IT
9418extern __inline __mmask8
9419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9420_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9421{
9422 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9423 (__v8di) __Y, 5,
9424 (__mmask8) __M);
9425}
9426
d256b866
IT
9427extern __inline __mmask8
9428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9429_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9430{
9431 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9432 (__v8di) __Y, 5,
9433 (__mmask8) -1);
9434}
9435
275be1da
IT
9436extern __inline __mmask16
9437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9439{
9440 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9441 (__v16si) __Y, 2,
9442 (__mmask16) __M);
9443}
9444
d256b866
IT
9445extern __inline __mmask16
9446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9447_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9448{
9449 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9450 (__v16si) __Y, 2,
9451 (__mmask16) -1);
9452}
9453
275be1da
IT
9454extern __inline __mmask16
9455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9456_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9457{
9458 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9459 (__v16si) __Y, 2,
9460 (__mmask16) __M);
9461}
9462
d256b866
IT
9463extern __inline __mmask16
9464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9465_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9466{
9467 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9468 (__v16si) __Y, 2,
9469 (__mmask16) -1);
9470}
9471
275be1da
IT
9472extern __inline __mmask8
9473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9475{
9476 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9477 (__v8di) __Y, 2,
9478 (__mmask8) __M);
9479}
9480
d256b866
IT
9481extern __inline __mmask8
9482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9483_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9484{
9485 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9486 (__v8di) __Y, 2,
9487 (__mmask8) -1);
9488}
9489
275be1da
IT
9490extern __inline __mmask8
9491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9492_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9493{
9494 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9495 (__v8di) __Y, 2,
9496 (__mmask8) __M);
9497}
9498
d256b866
IT
9499extern __inline __mmask8
9500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9501_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9502{
9503 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9504 (__v8di) __Y, 2,
9505 (__mmask8) -1);
9506}
9507
275be1da
IT
9508extern __inline __mmask16
9509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9510_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9511{
9512 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9513 (__v16si) __Y, 1,
9514 (__mmask16) __M);
9515}
9516
d256b866
IT
9517extern __inline __mmask16
9518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9519_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9520{
9521 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9522 (__v16si) __Y, 1,
9523 (__mmask16) -1);
9524}
9525
275be1da
IT
9526extern __inline __mmask16
9527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9528_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9529{
9530 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9531 (__v16si) __Y, 1,
9532 (__mmask16) __M);
9533}
9534
d256b866
IT
9535extern __inline __mmask16
9536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9537_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9538{
9539 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9540 (__v16si) __Y, 1,
9541 (__mmask16) -1);
9542}
9543
275be1da
IT
9544extern __inline __mmask8
9545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9546_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9547{
9548 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9549 (__v8di) __Y, 1,
9550 (__mmask8) __M);
9551}
9552
d256b866
IT
9553extern __inline __mmask8
9554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9555_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9556{
9557 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9558 (__v8di) __Y, 1,
9559 (__mmask8) -1);
9560}
9561
275be1da
IT
9562extern __inline __mmask8
9563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9564_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9565{
9566 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9567 (__v8di) __Y, 1,
9568 (__mmask8) __M);
9569}
9570
d256b866
IT
9571extern __inline __mmask8
9572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9574{
9575 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9576 (__v8di) __Y, 1,
9577 (__mmask8) -1);
9578}
9579
9580extern __inline __mmask16
9581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9582_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9583{
9584 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9585 (__v16si) __Y, 4,
9586 (__mmask16) -1);
9587}
9588
275be1da
IT
9589extern __inline __mmask16
9590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9591_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9592{
9593 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9594 (__v16si) __Y, 4,
9595 (__mmask16) __M);
9596}
9597
9598extern __inline __mmask16
9599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9600_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9601{
9602 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9603 (__v16si) __Y, 4,
9604 (__mmask16) __M);
9605}
9606
d256b866
IT
9607extern __inline __mmask16
9608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9609_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9610{
9611 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9612 (__v16si) __Y, 4,
9613 (__mmask16) -1);
9614}
9615
275be1da
IT
9616extern __inline __mmask8
9617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 9618_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
275be1da
IT
9619{
9620 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9621 (__v8di) __Y, 4,
9622 (__mmask8) __M);
9623}
9624
d256b866
IT
9625extern __inline __mmask8
9626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9627_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9628{
9629 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9630 (__v8di) __Y, 4,
9631 (__mmask8) -1);
9632}
9633
275be1da
IT
9634extern __inline __mmask8
9635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9636_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9637{
9638 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9639 (__v8di) __Y, 4,
9640 (__mmask8) __M);
9641}
9642
d256b866
IT
9643extern __inline __mmask8
9644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9645_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9646{
9647 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9648 (__v8di) __Y, 4,
9649 (__mmask8) -1);
9650}
9651
756c5857
AI
9652#define _MM_CMPINT_EQ 0x0
9653#define _MM_CMPINT_LT 0x1
9654#define _MM_CMPINT_LE 0x2
9655#define _MM_CMPINT_UNUSED 0x3
9656#define _MM_CMPINT_NE 0x4
9657#define _MM_CMPINT_NLT 0x5
9658#define _MM_CMPINT_GE 0x5
9659#define _MM_CMPINT_NLE 0x6
9660#define _MM_CMPINT_GT 0x6
9661
9662#ifdef __OPTIMIZE__
d8ea3e7c
AS
9663extern __inline __mmask16
9664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9665_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9666{
9667 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9668 (__mmask8) __B);
9669}
9670
9671extern __inline __mmask16
9672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9673_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9674{
9675 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9676 (__mmask8) __B);
9677}
9678
756c5857
AI
9679extern __inline __mmask8
9680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9681_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9682{
9683 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9684 (__v8di) __Y, __P,
9685 (__mmask8) -1);
9686}
9687
9688extern __inline __mmask16
9689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9690_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9691{
9692 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9693 (__v16si) __Y, __P,
9694 (__mmask16) -1);
9695}
9696
9697extern __inline __mmask8
9698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9699_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9700{
9701 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9702 (__v8di) __Y, __P,
9703 (__mmask8) -1);
9704}
9705
9706extern __inline __mmask16
9707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9708_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9709{
9710 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9711 (__v16si) __Y, __P,
9712 (__mmask16) -1);
9713}
9714
9715extern __inline __mmask8
9716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9717_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9718 const int __R)
9719{
9720 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9721 (__v8df) __Y, __P,
9722 (__mmask8) -1, __R);
9723}
9724
9725extern __inline __mmask16
9726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9727_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9728{
9729 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9730 (__v16sf) __Y, __P,
9731 (__mmask16) -1, __R);
9732}
9733
9734extern __inline __mmask8
9735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9736_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9737 const int __P)
9738{
9739 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9740 (__v8di) __Y, __P,
9741 (__mmask8) __U);
9742}
9743
9744extern __inline __mmask16
9745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9746_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9747 const int __P)
9748{
9749 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9750 (__v16si) __Y, __P,
9751 (__mmask16) __U);
9752}
9753
9754extern __inline __mmask8
9755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9756_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9757 const int __P)
9758{
9759 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9760 (__v8di) __Y, __P,
9761 (__mmask8) __U);
9762}
9763
9764extern __inline __mmask16
9765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9766_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9767 const int __P)
9768{
9769 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9770 (__v16si) __Y, __P,
9771 (__mmask16) __U);
9772}
9773
9774extern __inline __mmask8
9775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9776_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9777 const int __P, const int __R)
9778{
9779 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9780 (__v8df) __Y, __P,
9781 (__mmask8) __U, __R);
9782}
9783
9784extern __inline __mmask16
9785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9786_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9787 const int __P, const int __R)
9788{
9789 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9790 (__v16sf) __Y, __P,
9791 (__mmask16) __U, __R);
9792}
9793
9794extern __inline __mmask8
9795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9797{
9798 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9799 (__v2df) __Y, __P,
9800 (__mmask8) -1, __R);
9801}
9802
9803extern __inline __mmask8
9804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9806 const int __P, const int __R)
9807{
9808 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9809 (__v2df) __Y, __P,
9810 (__mmask8) __M, __R);
9811}
9812
9813extern __inline __mmask8
9814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9816{
9817 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9818 (__v4sf) __Y, __P,
9819 (__mmask8) -1, __R);
9820}
9821
9822extern __inline __mmask8
9823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9825 const int __P, const int __R)
9826{
9827 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9828 (__v4sf) __Y, __P,
9829 (__mmask8) __M, __R);
9830}
9831
9832#else
d8ea3e7c
AS
9833#define _kshiftli_mask16(X, Y) \
9834 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9835
9836#define _kshiftri_mask16(X, Y) \
9837 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9838
756c5857
AI
9839#define _mm512_cmp_epi64_mask(X, Y, P) \
9840 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9841 (__v8di)(__m512i)(Y), (int)(P),\
9842 (__mmask8)-1))
9843
9844#define _mm512_cmp_epi32_mask(X, Y, P) \
383321ec
UB
9845 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9846 (__v16si)(__m512i)(Y), (int)(P), \
9847 (__mmask16)-1))
756c5857
AI
9848
9849#define _mm512_cmp_epu64_mask(X, Y, P) \
9850 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9851 (__v8di)(__m512i)(Y), (int)(P),\
9852 (__mmask8)-1))
9853
9854#define _mm512_cmp_epu32_mask(X, Y, P) \
383321ec
UB
9855 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9856 (__v16si)(__m512i)(Y), (int)(P), \
9857 (__mmask16)-1))
756c5857 9858
383321ec 9859#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
756c5857
AI
9860 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9861 (__v8df)(__m512d)(Y), (int)(P),\
9862 (__mmask8)-1, R))
9863
383321ec 9864#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
756c5857
AI
9865 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9866 (__v16sf)(__m512)(Y), (int)(P),\
9867 (__mmask16)-1, R))
9868
383321ec 9869#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
756c5857
AI
9870 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9871 (__v8di)(__m512i)(Y), (int)(P),\
9872 (__mmask8)M))
9873
383321ec
UB
9874#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9875 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9876 (__v16si)(__m512i)(Y), (int)(P), \
9877 (__mmask16)M))
756c5857 9878
383321ec 9879#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
756c5857
AI
9880 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9881 (__v8di)(__m512i)(Y), (int)(P),\
9882 (__mmask8)M))
9883
383321ec
UB
9884#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9885 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9886 (__v16si)(__m512i)(Y), (int)(P), \
9887 (__mmask16)M))
756c5857 9888
383321ec 9889#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
756c5857
AI
9890 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9891 (__v8df)(__m512d)(Y), (int)(P),\
9892 (__mmask8)M, R))
9893
383321ec 9894#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
756c5857
AI
9895 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9896 (__v16sf)(__m512)(Y), (int)(P),\
9897 (__mmask16)M, R))
9898
383321ec 9899#define _mm_cmp_round_sd_mask(X, Y, P, R) \
756c5857
AI
9900 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9901 (__v2df)(__m128d)(Y), (int)(P),\
9902 (__mmask8)-1, R))
9903
383321ec 9904#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
756c5857
AI
9905 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9906 (__v2df)(__m128d)(Y), (int)(P),\
9907 (M), R))
9908
383321ec 9909#define _mm_cmp_round_ss_mask(X, Y, P, R) \
756c5857
AI
9910 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9911 (__v4sf)(__m128)(Y), (int)(P), \
9912 (__mmask8)-1, R))
9913
383321ec 9914#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
756c5857
AI
9915 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9916 (__v4sf)(__m128)(Y), (int)(P), \
9917 (M), R))
9918#endif
9919
9920#ifdef __OPTIMIZE__
9921extern __inline __m512
9922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9923_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 9924{
b5fd0b71
JJ
9925 __m512 __v1_old = _mm512_undefined_ps ();
9926 __mmask16 __mask = 0xFFFF;
756c5857 9927
b5fd0b71 9928 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9929 __addr,
9930 (__v16si) __index,
b5fd0b71 9931 __mask, __scale);
756c5857
AI
9932}
9933
9934extern __inline __m512
9935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 9936_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
0e171d54 9937 __m512i __index, void const *__addr, int __scale)
756c5857 9938{
b5fd0b71 9939 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
756c5857
AI
9940 __addr,
9941 (__v16si) __index,
9942 __mask, __scale);
9943}
9944
9945extern __inline __m512d
9946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9947_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
756c5857 9948{
b5fd0b71
JJ
9949 __m512d __v1_old = _mm512_undefined_pd ();
9950 __mmask8 __mask = 0xFF;
756c5857 9951
b5fd0b71 9952 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
756c5857 9953 __addr,
b5fd0b71 9954 (__v8si) __index, __mask,
756c5857
AI
9955 __scale);
9956}
9957
9958extern __inline __m512d
9959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 9961 __m256i __index, void const *__addr, int __scale)
756c5857
AI
9962{
9963 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9964 __addr,
9965 (__v8si) __index,
9966 __mask, __scale);
9967}
9968
9969extern __inline __m256
9970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9971_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
756c5857 9972{
b5fd0b71
JJ
9973 __m256 __v1_old = _mm256_undefined_ps ();
9974 __mmask8 __mask = 0xFF;
756c5857 9975
b5fd0b71 9976 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
756c5857 9977 __addr,
b5fd0b71 9978 (__v8di) __index, __mask,
756c5857
AI
9979 __scale);
9980}
9981
9982extern __inline __m256
9983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9984_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
0e171d54 9985 __m512i __index, void const *__addr, int __scale)
756c5857
AI
9986{
9987 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9988 __addr,
9989 (__v8di) __index,
9990 __mask, __scale);
9991}
9992
9993extern __inline __m512d
9994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 9995_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
756c5857 9996{
b5fd0b71
JJ
9997 __m512d __v1_old = _mm512_undefined_pd ();
9998 __mmask8 __mask = 0xFF;
756c5857 9999
b5fd0b71 10000 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
756c5857 10001 __addr,
b5fd0b71 10002 (__v8di) __index, __mask,
756c5857
AI
10003 __scale);
10004}
10005
10006extern __inline __m512d
10007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10008_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
0e171d54 10009 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10010{
10011 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10012 __addr,
10013 (__v8di) __index,
10014 __mask, __scale);
10015}
10016
10017extern __inline __m512i
10018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10019_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10020{
b5fd0b71
JJ
10021 __m512i __v1_old = _mm512_undefined_epi32 ();
10022 __mmask16 __mask = 0xFFFF;
756c5857 10023
b5fd0b71 10024 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
756c5857
AI
10025 __addr,
10026 (__v16si) __index,
b5fd0b71 10027 __mask, __scale);
756c5857
AI
10028}
10029
10030extern __inline __m512i
10031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10032_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
0e171d54 10033 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10034{
10035 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10036 __addr,
10037 (__v16si) __index,
10038 __mask, __scale);
10039}
10040
10041extern __inline __m512i
10042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10043_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
756c5857 10044{
b5fd0b71
JJ
10045 __m512i __v1_old = _mm512_undefined_epi32 ();
10046 __mmask8 __mask = 0xFF;
756c5857 10047
b5fd0b71 10048 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
756c5857 10049 __addr,
b5fd0b71 10050 (__v8si) __index, __mask,
756c5857
AI
10051 __scale);
10052}
10053
10054extern __inline __m512i
10055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10056_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10057 __m256i __index, void const *__addr,
756c5857
AI
10058 int __scale)
10059{
10060 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10061 __addr,
10062 (__v8si) __index,
10063 __mask, __scale);
10064}
10065
10066extern __inline __m256i
10067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10068_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
756c5857 10069{
b5fd0b71
JJ
10070 __m256i __v1_old = _mm256_undefined_si256 ();
10071 __mmask8 __mask = 0xFF;
756c5857 10072
b5fd0b71 10073 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
756c5857
AI
10074 __addr,
10075 (__v8di) __index,
b5fd0b71 10076 __mask, __scale);
756c5857
AI
10077}
10078
10079extern __inline __m256i
10080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
0e171d54 10082 __m512i __index, void const *__addr, int __scale)
756c5857
AI
10083{
10084 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10085 __addr,
10086 (__v8di) __index,
10087 __mask, __scale);
10088}
10089
10090extern __inline __m512i
10091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10092_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
756c5857 10093{
b5fd0b71
JJ
10094 __m512i __v1_old = _mm512_undefined_epi32 ();
10095 __mmask8 __mask = 0xFF;
756c5857 10096
b5fd0b71 10097 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
756c5857 10098 __addr,
b5fd0b71 10099 (__v8di) __index, __mask,
756c5857
AI
10100 __scale);
10101}
10102
10103extern __inline __m512i
10104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
0e171d54 10106 __m512i __index, void const *__addr,
756c5857
AI
10107 int __scale)
10108{
10109 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10110 __addr,
10111 (__v8di) __index,
10112 __mask, __scale);
10113}
10114
10115extern __inline void
10116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10117_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
756c5857
AI
10118{
10119 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10120 (__v16si) __index, (__v16sf) __v1, __scale);
10121}
10122
10123extern __inline void
10124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10125_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
756c5857
AI
10126 __m512i __index, __m512 __v1, int __scale)
10127{
10128 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10129 (__v16sf) __v1, __scale);
10130}
10131
10132extern __inline void
10133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10134_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
756c5857
AI
10135 int __scale)
10136{
10137 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10138 (__v8si) __index, (__v8df) __v1, __scale);
10139}
10140
10141extern __inline void
10142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10143_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10144 __m256i __index, __m512d __v1, int __scale)
10145{
10146 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10147 (__v8df) __v1, __scale);
10148}
10149
10150extern __inline void
10151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10152_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
756c5857
AI
10153{
10154 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10155 (__v8di) __index, (__v8sf) __v1, __scale);
10156}
10157
10158extern __inline void
10159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10160_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
756c5857
AI
10161 __m512i __index, __m256 __v1, int __scale)
10162{
10163 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10164 (__v8sf) __v1, __scale);
10165}
10166
10167extern __inline void
10168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10169_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
756c5857
AI
10170 int __scale)
10171{
10172 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10173 (__v8di) __index, (__v8df) __v1, __scale);
10174}
10175
10176extern __inline void
10177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10178_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
756c5857
AI
10179 __m512i __index, __m512d __v1, int __scale)
10180{
10181 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10182 (__v8df) __v1, __scale);
10183}
10184
10185extern __inline void
10186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10187_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10188 __m512i __v1, int __scale)
10189{
10190 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10191 (__v16si) __index, (__v16si) __v1, __scale);
10192}
10193
10194extern __inline void
10195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10196_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
756c5857
AI
10197 __m512i __index, __m512i __v1, int __scale)
10198{
10199 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10200 (__v16si) __v1, __scale);
10201}
10202
10203extern __inline void
10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10205_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
756c5857
AI
10206 __m512i __v1, int __scale)
10207{
10208 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10209 (__v8si) __index, (__v8di) __v1, __scale);
10210}
10211
10212extern __inline void
10213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10214_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10215 __m256i __index, __m512i __v1, int __scale)
10216{
10217 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10218 (__v8di) __v1, __scale);
10219}
10220
10221extern __inline void
10222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10223_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
756c5857
AI
10224 __m256i __v1, int __scale)
10225{
10226 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10227 (__v8di) __index, (__v8si) __v1, __scale);
10228}
10229
10230extern __inline void
10231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10232_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
756c5857
AI
10233 __m512i __index, __m256i __v1, int __scale)
10234{
10235 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10236 (__v8si) __v1, __scale);
10237}
10238
10239extern __inline void
10240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10241_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
756c5857
AI
10242 __m512i __v1, int __scale)
10243{
10244 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10245 (__v8di) __index, (__v8di) __v1, __scale);
10246}
10247
10248extern __inline void
10249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
0e171d54 10250_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
756c5857
AI
10251 __m512i __index, __m512i __v1, int __scale)
10252{
10253 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10254 (__v8di) __v1, __scale);
10255}
10256#else
10257#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
0b192937 10258 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
0e171d54 10259 (void const *)ADDR, \
756c5857
AI
10260 (__v16si)(__m512i)INDEX, \
10261 (__mmask16)0xFFFF, (int)SCALE)
10262
10263#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10264 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
0e171d54 10265 (void const *)ADDR, \
756c5857
AI
10266 (__v16si)(__m512i)INDEX, \
10267 (__mmask16)MASK, (int)SCALE)
10268
10269#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
0b192937 10270 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
0e171d54 10271 (void const *)ADDR, \
756c5857
AI
10272 (__v8si)(__m256i)INDEX, \
10273 (__mmask8)0xFF, (int)SCALE)
10274
10275#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10276 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
0e171d54 10277 (void const *)ADDR, \
756c5857
AI
10278 (__v8si)(__m256i)INDEX, \
10279 (__mmask8)MASK, (int)SCALE)
10280
10281#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
0b192937 10282 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
0e171d54 10283 (void const *)ADDR, \
756c5857
AI
10284 (__v8di)(__m512i)INDEX, \
10285 (__mmask8)0xFF, (int)SCALE)
10286
10287#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10288 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
0e171d54 10289 (void const *)ADDR, \
756c5857
AI
10290 (__v8di)(__m512i)INDEX, \
10291 (__mmask8)MASK, (int)SCALE)
10292
10293#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
0b192937 10294 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
0e171d54 10295 (void const *)ADDR, \
756c5857
AI
10296 (__v8di)(__m512i)INDEX, \
10297 (__mmask8)0xFF, (int)SCALE)
10298
10299#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10300 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
0e171d54 10301 (void const *)ADDR, \
756c5857
AI
10302 (__v8di)(__m512i)INDEX, \
10303 (__mmask8)MASK, (int)SCALE)
10304
10305#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
4271e5cb 10306 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
0e171d54 10307 (void const *)ADDR, \
756c5857
AI
10308 (__v16si)(__m512i)INDEX, \
10309 (__mmask16)0xFFFF, (int)SCALE)
10310
10311#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10312 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
0e171d54 10313 (void const *)ADDR, \
756c5857
AI
10314 (__v16si)(__m512i)INDEX, \
10315 (__mmask16)MASK, (int)SCALE)
10316
10317#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 10318 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
0e171d54 10319 (void const *)ADDR, \
756c5857
AI
10320 (__v8si)(__m256i)INDEX, \
10321 (__mmask8)0xFF, (int)SCALE)
10322
10323#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10324 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
0e171d54 10325 (void const *)ADDR, \
756c5857
AI
10326 (__v8si)(__m256i)INDEX, \
10327 (__mmask8)MASK, (int)SCALE)
10328
10329#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
0b192937 10330 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
0e171d54 10331 (void const *)ADDR, \
756c5857
AI
10332 (__v8di)(__m512i)INDEX, \
10333 (__mmask8)0xFF, (int)SCALE)
10334
10335#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10336 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
0e171d54 10337 (void const *)ADDR, \
756c5857
AI
10338 (__v8di)(__m512i)INDEX, \
10339 (__mmask8)MASK, (int)SCALE)
10340
10341#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
4271e5cb 10342 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
0e171d54 10343 (void const *)ADDR, \
756c5857
AI
10344 (__v8di)(__m512i)INDEX, \
10345 (__mmask8)0xFF, (int)SCALE)
10346
10347#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10348 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
0e171d54 10349 (void const *)ADDR, \
756c5857
AI
10350 (__v8di)(__m512i)INDEX, \
10351 (__mmask8)MASK, (int)SCALE)
10352
10353#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 10354 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
756c5857
AI
10355 (__v16si)(__m512i)INDEX, \
10356 (__v16sf)(__m512)V1, (int)SCALE)
10357
10358#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10359 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
10360 (__v16si)(__m512i)INDEX, \
10361 (__v16sf)(__m512)V1, (int)SCALE)
10362
10363#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 10364 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10365 (__v8si)(__m256i)INDEX, \
10366 (__v8df)(__m512d)V1, (int)SCALE)
10367
10368#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10369 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10370 (__v8si)(__m256i)INDEX, \
10371 (__v8df)(__m512d)V1, (int)SCALE)
10372
10373#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 10374 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10375 (__v8di)(__m512i)INDEX, \
10376 (__v8sf)(__m256)V1, (int)SCALE)
10377
10378#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10379 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
10380 (__v8di)(__m512i)INDEX, \
10381 (__v8sf)(__m256)V1, (int)SCALE)
10382
10383#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 10384 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10385 (__v8di)(__m512i)INDEX, \
10386 (__v8df)(__m512d)V1, (int)SCALE)
10387
10388#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10389 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10390 (__v8di)(__m512i)INDEX, \
10391 (__v8df)(__m512d)V1, (int)SCALE)
10392
10393#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 10394 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
756c5857
AI
10395 (__v16si)(__m512i)INDEX, \
10396 (__v16si)(__m512i)V1, (int)SCALE)
10397
10398#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10399 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
756c5857
AI
10400 (__v16si)(__m512i)INDEX, \
10401 (__v16si)(__m512i)V1, (int)SCALE)
10402
10403#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 10404 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10405 (__v8si)(__m256i)INDEX, \
10406 (__v8di)(__m512i)V1, (int)SCALE)
10407
10408#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10409 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10410 (__v8si)(__m256i)INDEX, \
10411 (__v8di)(__m512i)V1, (int)SCALE)
10412
10413#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 10414 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10415 (__v8di)(__m512i)INDEX, \
10416 (__v8si)(__m256i)V1, (int)SCALE)
10417
10418#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10419 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10420 (__v8di)(__m512i)INDEX, \
10421 (__v8si)(__m256i)V1, (int)SCALE)
10422
10423#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 10424 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
756c5857
AI
10425 (__v8di)(__m512i)INDEX, \
10426 (__v8di)(__m512i)V1, (int)SCALE)
10427
10428#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 10429 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
756c5857
AI
10430 (__v8di)(__m512i)INDEX, \
10431 (__v8di)(__m512i)V1, (int)SCALE)
10432#endif
10433
10434extern __inline __m512d
10435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10436_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10437{
10438 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10439 (__v8df) __W,
10440 (__mmask8) __U);
10441}
10442
10443extern __inline __m512d
10444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10445_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10446{
10447 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10448 (__v8df)
10449 _mm512_setzero_pd (),
10450 (__mmask8) __U);
10451}
10452
10453extern __inline void
10454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10455_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10456{
10457 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10458 (__mmask8) __U);
10459}
10460
10461extern __inline __m512
10462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10463_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10464{
10465 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10466 (__v16sf) __W,
10467 (__mmask16) __U);
10468}
10469
10470extern __inline __m512
10471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10472_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10473{
10474 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10475 (__v16sf)
10476 _mm512_setzero_ps (),
10477 (__mmask16) __U);
10478}
10479
10480extern __inline void
10481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10482_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10483{
10484 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10485 (__mmask16) __U);
10486}
10487
10488extern __inline __m512i
10489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10491{
10492 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10493 (__v8di) __W,
10494 (__mmask8) __U);
10495}
10496
10497extern __inline __m512i
10498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10500{
10501 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10502 (__v8di)
10503 _mm512_setzero_si512 (),
10504 (__mmask8) __U);
10505}
10506
10507extern __inline void
10508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10510{
10511 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10512 (__mmask8) __U);
10513}
10514
10515extern __inline __m512i
10516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10517_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10518{
10519 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10520 (__v16si) __W,
10521 (__mmask16) __U);
10522}
10523
10524extern __inline __m512i
10525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10526_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10527{
10528 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10529 (__v16si)
10530 _mm512_setzero_si512 (),
10531 (__mmask16) __U);
10532}
10533
10534extern __inline void
10535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10536_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10537{
10538 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10539 (__mmask16) __U);
10540}
10541
10542extern __inline __m512d
10543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10544_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10545{
10546 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10547 (__v8df) __W,
10548 (__mmask8) __U);
10549}
10550
10551extern __inline __m512d
10552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10553_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10554{
10555 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10556 (__v8df)
10557 _mm512_setzero_pd (),
10558 (__mmask8) __U);
10559}
10560
10561extern __inline __m512d
10562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10563_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10564{
10565 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10566 (__v8df) __W,
10567 (__mmask8) __U);
10568}
10569
10570extern __inline __m512d
10571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10572_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10573{
10574 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10575 (__v8df)
10576 _mm512_setzero_pd (),
10577 (__mmask8) __U);
10578}
10579
10580extern __inline __m512
10581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10582_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10583{
10584 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10585 (__v16sf) __W,
10586 (__mmask16) __U);
10587}
10588
10589extern __inline __m512
10590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10592{
10593 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10594 (__v16sf)
10595 _mm512_setzero_ps (),
10596 (__mmask16) __U);
10597}
10598
10599extern __inline __m512
10600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10602{
10603 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10604 (__v16sf) __W,
10605 (__mmask16) __U);
10606}
10607
10608extern __inline __m512
10609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10610_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10611{
10612 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10613 (__v16sf)
10614 _mm512_setzero_ps (),
10615 (__mmask16) __U);
10616}
10617
10618extern __inline __m512i
10619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10621{
10622 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10623 (__v8di) __W,
10624 (__mmask8) __U);
10625}
10626
10627extern __inline __m512i
10628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10629_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10630{
10631 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10632 (__v8di)
10633 _mm512_setzero_si512 (),
10634 (__mmask8) __U);
10635}
10636
10637extern __inline __m512i
10638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10639_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10640{
10641 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10642 (__v8di) __W,
10643 (__mmask8) __U);
10644}
10645
10646extern __inline __m512i
10647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10648_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10649{
10650 return (__m512i)
10651 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10652 (__v8di)
10653 _mm512_setzero_si512 (),
10654 (__mmask8) __U);
10655}
10656
10657extern __inline __m512i
10658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10659_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10660{
10661 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10662 (__v16si) __W,
10663 (__mmask16) __U);
10664}
10665
10666extern __inline __m512i
10667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10669{
10670 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10671 (__v16si)
10672 _mm512_setzero_si512 (),
10673 (__mmask16) __U);
10674}
10675
10676extern __inline __m512i
10677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10678_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10679{
10680 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10681 (__v16si) __W,
10682 (__mmask16) __U);
10683}
10684
10685extern __inline __m512i
10686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10687_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10688{
10689 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10690 (__v16si)
10691 _mm512_setzero_si512
10692 (), (__mmask16) __U);
10693}
10694
10695/* Mask arithmetic operations */
6901ea62
AS
10696#define _kand_mask16 _mm512_kand
10697#define _kandn_mask16 _mm512_kandn
10698#define _knot_mask16 _mm512_knot
10699#define _kor_mask16 _mm512_kor
10700#define _kxnor_mask16 _mm512_kxnor
10701#define _kxor_mask16 _mm512_kxor
10702
dea06111
AS
10703extern __inline unsigned char
10704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10705_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10706{
10707 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10708 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10709}
10710
10711extern __inline unsigned char
10712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10714{
10715 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10716 (__mmask16) __B);
10717}
10718
10719extern __inline unsigned char
10720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10721_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10722{
10723 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10724 (__mmask16) __B);
10725}
10726
7cdb6e4c
AS
10727extern __inline unsigned int
10728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729_cvtmask16_u32 (__mmask16 __A)
10730{
10731 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10732}
10733
10734extern __inline __mmask16
10735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736_cvtu32_mask16 (unsigned int __A)
10737{
10738 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10739}
10740
10741extern __inline __mmask16
10742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10743_load_mask16 (__mmask16 *__A)
10744{
10745 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10746}
10747
10748extern __inline void
10749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10750_store_mask16 (__mmask16 *__A, __mmask16 __B)
10751{
10752 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10753}
10754
756c5857
AI
10755extern __inline __mmask16
10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757_mm512_kand (__mmask16 __A, __mmask16 __B)
10758{
10759 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10760}
10761
10762extern __inline __mmask16
10763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10764_mm512_kandn (__mmask16 __A, __mmask16 __B)
10765{
6901ea62
AS
10766 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10767 (__mmask16) __B);
756c5857
AI
10768}
10769
10770extern __inline __mmask16
10771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772_mm512_kor (__mmask16 __A, __mmask16 __B)
10773{
10774 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10775}
10776
10777extern __inline int
10778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10779_mm512_kortestz (__mmask16 __A, __mmask16 __B)
10780{
10781 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10782 (__mmask16) __B);
10783}
10784
10785extern __inline int
10786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787_mm512_kortestc (__mmask16 __A, __mmask16 __B)
10788{
10789 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10790 (__mmask16) __B);
10791}
10792
10793extern __inline __mmask16
10794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10795_mm512_kxnor (__mmask16 __A, __mmask16 __B)
10796{
10797 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10798}
10799
10800extern __inline __mmask16
10801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10802_mm512_kxor (__mmask16 __A, __mmask16 __B)
10803{
10804 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10805}
10806
10807extern __inline __mmask16
10808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10809_mm512_knot (__mmask16 __A)
10810{
10811 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10812}
10813
10814extern __inline __mmask16
10815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10816_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10817{
10818 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10819}
10820
6901ea62
AS
10821extern __inline __mmask16
10822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10823_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10824{
10825 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10826}
10827
756c5857
AI
10828#ifdef __OPTIMIZE__
10829extern __inline __m512i
10830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10831_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10832 const int __imm)
10833{
10834 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10835 (__v4si) __D,
10836 __imm,
10837 (__v16si)
10838 _mm512_setzero_si512 (),
10839 __B);
10840}
10841
10842extern __inline __m512
10843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10844_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10845 const int __imm)
10846{
10847 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10848 (__v4sf) __D,
10849 __imm,
10850 (__v16sf)
10851 _mm512_setzero_ps (), __B);
10852}
10853
10854extern __inline __m512i
10855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10856_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10857 __m128i __D, const int __imm)
10858{
10859 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10860 (__v4si) __D,
10861 __imm,
10862 (__v16si) __A,
10863 __B);
10864}
10865
10866extern __inline __m512
10867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10868_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10869 __m128 __D, const int __imm)
10870{
10871 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10872 (__v4sf) __D,
10873 __imm,
10874 (__v16sf) __A, __B);
10875}
10876#else
10877#define _mm512_maskz_insertf32x4(A, X, Y, C) \
10878 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10879 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
18379eea 10880 (__mmask16)(A)))
756c5857
AI
10881
10882#define _mm512_maskz_inserti32x4(A, X, Y, C) \
10883 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10884 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
18379eea 10885 (__mmask16)(A)))
756c5857
AI
10886
10887#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10888 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10889 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
18379eea 10890 (__mmask16)(B)))
756c5857
AI
10891
10892#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10893 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10894 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
18379eea 10895 (__mmask16)(B)))
756c5857
AI
10896#endif
10897
10898extern __inline __m512i
10899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900_mm512_max_epi64 (__m512i __A, __m512i __B)
10901{
10902 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10903 (__v8di) __B,
10904 (__v8di)
4271e5cb 10905 _mm512_undefined_epi32 (),
756c5857
AI
10906 (__mmask8) -1);
10907}
10908
10909extern __inline __m512i
10910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10911_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10912{
10913 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10914 (__v8di) __B,
10915 (__v8di)
10916 _mm512_setzero_si512 (),
10917 __M);
10918}
10919
10920extern __inline __m512i
10921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10922_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10923{
10924 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10925 (__v8di) __B,
10926 (__v8di) __W, __M);
10927}
10928
10929extern __inline __m512i
10930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931_mm512_min_epi64 (__m512i __A, __m512i __B)
10932{
10933 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10934 (__v8di) __B,
10935 (__v8di)
4271e5cb 10936 _mm512_undefined_epi32 (),
756c5857
AI
10937 (__mmask8) -1);
10938}
10939
10940extern __inline __m512i
10941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10943{
10944 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10945 (__v8di) __B,
10946 (__v8di) __W, __M);
10947}
10948
10949extern __inline __m512i
10950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10951_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10952{
10953 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10954 (__v8di) __B,
10955 (__v8di)
10956 _mm512_setzero_si512 (),
10957 __M);
10958}
10959
10960extern __inline __m512i
10961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10962_mm512_max_epu64 (__m512i __A, __m512i __B)
10963{
10964 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10965 (__v8di) __B,
10966 (__v8di)
4271e5cb 10967 _mm512_undefined_epi32 (),
756c5857
AI
10968 (__mmask8) -1);
10969}
10970
10971extern __inline __m512i
10972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10974{
10975 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10976 (__v8di) __B,
10977 (__v8di)
10978 _mm512_setzero_si512 (),
10979 __M);
10980}
10981
10982extern __inline __m512i
10983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10985{
10986 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10987 (__v8di) __B,
10988 (__v8di) __W, __M);
10989}
10990
10991extern __inline __m512i
10992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993_mm512_min_epu64 (__m512i __A, __m512i __B)
10994{
10995 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10996 (__v8di) __B,
10997 (__v8di)
4271e5cb 10998 _mm512_undefined_epi32 (),
756c5857
AI
10999 (__mmask8) -1);
11000}
11001
11002extern __inline __m512i
11003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11004_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11005{
11006 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11007 (__v8di) __B,
11008 (__v8di) __W, __M);
11009}
11010
11011extern __inline __m512i
11012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11014{
11015 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11016 (__v8di) __B,
11017 (__v8di)
11018 _mm512_setzero_si512 (),
11019 __M);
11020}
11021
11022extern __inline __m512i
11023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024_mm512_max_epi32 (__m512i __A, __m512i __B)
11025{
11026 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11027 (__v16si) __B,
11028 (__v16si)
4271e5cb 11029 _mm512_undefined_epi32 (),
756c5857
AI
11030 (__mmask16) -1);
11031}
11032
11033extern __inline __m512i
11034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11036{
11037 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11038 (__v16si) __B,
11039 (__v16si)
11040 _mm512_setzero_si512 (),
11041 __M);
11042}
11043
11044extern __inline __m512i
11045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11047{
11048 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11049 (__v16si) __B,
11050 (__v16si) __W, __M);
11051}
11052
11053extern __inline __m512i
11054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11055_mm512_min_epi32 (__m512i __A, __m512i __B)
11056{
11057 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11058 (__v16si) __B,
11059 (__v16si)
4271e5cb 11060 _mm512_undefined_epi32 (),
756c5857
AI
11061 (__mmask16) -1);
11062}
11063
11064extern __inline __m512i
11065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11066_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11067{
11068 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11069 (__v16si) __B,
11070 (__v16si)
11071 _mm512_setzero_si512 (),
11072 __M);
11073}
11074
11075extern __inline __m512i
11076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11078{
11079 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11080 (__v16si) __B,
11081 (__v16si) __W, __M);
11082}
11083
11084extern __inline __m512i
11085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11086_mm512_max_epu32 (__m512i __A, __m512i __B)
11087{
11088 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11089 (__v16si) __B,
11090 (__v16si)
4271e5cb 11091 _mm512_undefined_epi32 (),
756c5857
AI
11092 (__mmask16) -1);
11093}
11094
11095extern __inline __m512i
11096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11097_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11098{
11099 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11100 (__v16si) __B,
11101 (__v16si)
11102 _mm512_setzero_si512 (),
11103 __M);
11104}
11105
11106extern __inline __m512i
11107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11108_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11109{
11110 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11111 (__v16si) __B,
11112 (__v16si) __W, __M);
11113}
11114
11115extern __inline __m512i
11116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117_mm512_min_epu32 (__m512i __A, __m512i __B)
11118{
11119 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11120 (__v16si) __B,
11121 (__v16si)
4271e5cb 11122 _mm512_undefined_epi32 (),
756c5857
AI
11123 (__mmask16) -1);
11124}
11125
11126extern __inline __m512i
11127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11128_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11129{
11130 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11131 (__v16si) __B,
11132 (__v16si)
11133 _mm512_setzero_si512 (),
11134 __M);
11135}
11136
11137extern __inline __m512i
11138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11140{
11141 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11142 (__v16si) __B,
11143 (__v16si) __W, __M);
11144}
11145
11146extern __inline __m512
11147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148_mm512_unpacklo_ps (__m512 __A, __m512 __B)
11149{
11150 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11151 (__v16sf) __B,
11152 (__v16sf)
0b192937 11153 _mm512_undefined_ps (),
756c5857
AI
11154 (__mmask16) -1);
11155}
11156
11157extern __inline __m512
11158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11160{
11161 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11162 (__v16sf) __B,
11163 (__v16sf) __W,
11164 (__mmask16) __U);
11165}
11166
11167extern __inline __m512
11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11170{
11171 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11172 (__v16sf) __B,
11173 (__v16sf)
11174 _mm512_setzero_ps (),
11175 (__mmask16) __U);
11176}
11177
075691af
AI
11178#ifdef __OPTIMIZE__
11179extern __inline __m128d
11180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11182{
11183 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11184 (__v2df) __B,
11185 __R);
11186}
11187
f4ee3a9e
UB
11188extern __inline __m128d
11189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11190_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11191 __m128d __B, const int __R)
11192{
11193 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11194 (__v2df) __B,
11195 (__v2df) __W,
11196 (__mmask8) __U, __R);
11197}
11198
11199extern __inline __m128d
11200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11201_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11202 const int __R)
11203{
11204 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11205 (__v2df) __B,
11206 (__v2df)
11207 _mm_setzero_pd (),
11208 (__mmask8) __U, __R);
11209}
11210
075691af
AI
11211extern __inline __m128
11212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11214{
11215 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11216 (__v4sf) __B,
11217 __R);
11218}
11219
f4ee3a9e
UB
11220extern __inline __m128
11221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11222_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11223 __m128 __B, const int __R)
11224{
11225 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11226 (__v4sf) __B,
11227 (__v4sf) __W,
11228 (__mmask8) __U, __R);
11229}
11230
11231extern __inline __m128
11232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11233_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11234 const int __R)
11235{
11236 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11237 (__v4sf) __B,
11238 (__v4sf)
11239 _mm_setzero_ps (),
11240 (__mmask8) __U, __R);
11241}
11242
075691af
AI
11243extern __inline __m128d
11244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11245_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11246{
11247 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11248 (__v2df) __B,
11249 __R);
11250}
11251
f4ee3a9e
UB
11252extern __inline __m128d
11253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11254_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11255 __m128d __B, const int __R)
11256{
11257 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11258 (__v2df) __B,
11259 (__v2df) __W,
11260 (__mmask8) __U, __R);
11261}
11262
11263extern __inline __m128d
11264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11265_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11266 const int __R)
11267{
11268 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11269 (__v2df) __B,
11270 (__v2df)
11271 _mm_setzero_pd (),
11272 (__mmask8) __U, __R);
11273}
11274
075691af
AI
11275extern __inline __m128
11276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11277_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11278{
11279 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11280 (__v4sf) __B,
11281 __R);
11282}
11283
f4ee3a9e
UB
11284extern __inline __m128
11285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11286_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11287 __m128 __B, const int __R)
11288{
11289 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11290 (__v4sf) __B,
11291 (__v4sf) __W,
11292 (__mmask8) __U, __R);
11293}
11294
11295extern __inline __m128
11296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11297_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11298 const int __R)
11299{
11300 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11301 (__v4sf) __B,
11302 (__v4sf)
11303 _mm_setzero_ps (),
11304 (__mmask8) __U, __R);
11305}
11306
075691af
AI
11307#else
11308#define _mm_max_round_sd(A, B, C) \
f4ee3a9e
UB
11309 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11310
11311#define _mm_mask_max_round_sd(W, U, A, B, C) \
11312 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11313
11314#define _mm_maskz_max_round_sd(U, A, B, C) \
11315 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11316
11317#define _mm_max_round_ss(A, B, C) \
f4ee3a9e
UB
11318 (__m128)__builtin_ia32_maxss_round(A, B, C)
11319
11320#define _mm_mask_max_round_ss(W, U, A, B, C) \
11321 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11322
11323#define _mm_maskz_max_round_ss(U, A, B, C) \
11324 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
075691af
AI
11325
11326#define _mm_min_round_sd(A, B, C) \
f4ee3a9e
UB
11327 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11328
11329#define _mm_mask_min_round_sd(W, U, A, B, C) \
11330 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11331
11332#define _mm_maskz_min_round_sd(U, A, B, C) \
11333 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
075691af
AI
11334
11335#define _mm_min_round_ss(A, B, C) \
f4ee3a9e
UB
11336 (__m128)__builtin_ia32_minss_round(A, B, C)
11337
11338#define _mm_mask_min_round_ss(W, U, A, B, C) \
11339 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11340
11341#define _mm_maskz_min_round_ss(U, A, B, C) \
11342 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11343
075691af
AI
11344#endif
11345
756c5857
AI
11346extern __inline __m512d
11347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11348_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11349{
11350 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11351 (__v8df) __W,
11352 (__mmask8) __U);
11353}
11354
11355extern __inline __m512
11356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11358{
11359 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11360 (__v16sf) __W,
11361 (__mmask16) __U);
11362}
11363
11364extern __inline __m512i
11365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11366_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11367{
11368 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11369 (__v8di) __W,
11370 (__mmask8) __U);
11371}
11372
11373extern __inline __m512i
11374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11375_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11376{
11377 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11378 (__v16si) __W,
11379 (__mmask16) __U);
11380}
11381
075691af
AI
11382#ifdef __OPTIMIZE__
11383extern __inline __m128d
11384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11385_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11386{
11387 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11388 (__v2df) __A,
11389 (__v2df) __B,
11390 __R);
11391}
11392
11393extern __inline __m128
11394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11395_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11396{
11397 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11398 (__v4sf) __A,
11399 (__v4sf) __B,
11400 __R);
11401}
11402
11403extern __inline __m128d
11404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11405_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11406{
11407 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11408 (__v2df) __A,
11409 -(__v2df) __B,
11410 __R);
11411}
11412
11413extern __inline __m128
11414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11415_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11416{
11417 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11418 (__v4sf) __A,
11419 -(__v4sf) __B,
11420 __R);
11421}
11422
11423extern __inline __m128d
11424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11425_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11426{
11427 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11428 -(__v2df) __A,
11429 (__v2df) __B,
11430 __R);
11431}
11432
11433extern __inline __m128
11434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11435_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11436{
11437 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11438 -(__v4sf) __A,
11439 (__v4sf) __B,
11440 __R);
11441}
11442
11443extern __inline __m128d
11444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11445_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11446{
11447 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11448 -(__v2df) __A,
11449 -(__v2df) __B,
11450 __R);
11451}
11452
11453extern __inline __m128
11454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11456{
11457 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11458 -(__v4sf) __A,
11459 -(__v4sf) __B,
11460 __R);
11461}
11462#else
11463#define _mm_fmadd_round_sd(A, B, C, R) \
11464 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11465
11466#define _mm_fmadd_round_ss(A, B, C, R) \
11467 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11468
11469#define _mm_fmsub_round_sd(A, B, C, R) \
11470 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11471
11472#define _mm_fmsub_round_ss(A, B, C, R) \
11473 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11474
11475#define _mm_fnmadd_round_sd(A, B, C, R) \
11476 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11477
11478#define _mm_fnmadd_round_ss(A, B, C, R) \
11479 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11480
11481#define _mm_fnmsub_round_sd(A, B, C, R) \
11482 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11483
11484#define _mm_fnmsub_round_ss(A, B, C, R) \
11485 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11486#endif
11487
756c5857
AI
11488#ifdef __OPTIMIZE__
11489extern __inline int
11490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11491_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11492{
11493 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11494}
11495
11496extern __inline int
11497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11498_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11499{
11500 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11501}
11502#else
11503#define _mm_comi_round_ss(A, B, C, D)\
11504__builtin_ia32_vcomiss(A, B, C, D)
11505#define _mm_comi_round_sd(A, B, C, D)\
11506__builtin_ia32_vcomisd(A, B, C, D)
11507#endif
11508
11509extern __inline __m512d
11510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511_mm512_sqrt_pd (__m512d __A)
11512{
11513 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11514 (__v8df)
0b192937 11515 _mm512_undefined_pd (),
756c5857
AI
11516 (__mmask8) -1,
11517 _MM_FROUND_CUR_DIRECTION);
11518}
11519
11520extern __inline __m512d
11521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11523{
11524 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11525 (__v8df) __W,
11526 (__mmask8) __U,
11527 _MM_FROUND_CUR_DIRECTION);
11528}
11529
11530extern __inline __m512d
11531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11533{
11534 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11535 (__v8df)
11536 _mm512_setzero_pd (),
11537 (__mmask8) __U,
11538 _MM_FROUND_CUR_DIRECTION);
11539}
11540
11541extern __inline __m512
11542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543_mm512_sqrt_ps (__m512 __A)
11544{
11545 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11546 (__v16sf)
0b192937 11547 _mm512_undefined_ps (),
756c5857
AI
11548 (__mmask16) -1,
11549 _MM_FROUND_CUR_DIRECTION);
11550}
11551
11552extern __inline __m512
11553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11555{
11556 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11557 (__v16sf) __W,
11558 (__mmask16) __U,
11559 _MM_FROUND_CUR_DIRECTION);
11560}
11561
11562extern __inline __m512
11563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11565{
11566 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11567 (__v16sf)
11568 _mm512_setzero_ps (),
11569 (__mmask16) __U,
11570 _MM_FROUND_CUR_DIRECTION);
11571}
11572
11573extern __inline __m512d
11574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11575_mm512_add_pd (__m512d __A, __m512d __B)
11576{
2069d6fc 11577 return (__m512d) ((__v8df)__A + (__v8df)__B);
756c5857
AI
11578}
11579
11580extern __inline __m512d
11581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11582_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11583{
11584 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11585 (__v8df) __B,
11586 (__v8df) __W,
11587 (__mmask8) __U,
11588 _MM_FROUND_CUR_DIRECTION);
11589}
11590
11591extern __inline __m512d
11592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11593_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11594{
11595 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11596 (__v8df) __B,
11597 (__v8df)
11598 _mm512_setzero_pd (),
11599 (__mmask8) __U,
11600 _MM_FROUND_CUR_DIRECTION);
11601}
11602
11603extern __inline __m512
11604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11605_mm512_add_ps (__m512 __A, __m512 __B)
11606{
2069d6fc 11607 return (__m512) ((__v16sf)__A + (__v16sf)__B);
756c5857
AI
11608}
11609
11610extern __inline __m512
11611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11613{
11614 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11615 (__v16sf) __B,
11616 (__v16sf) __W,
11617 (__mmask16) __U,
11618 _MM_FROUND_CUR_DIRECTION);
11619}
11620
11621extern __inline __m512
11622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11624{
11625 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11626 (__v16sf) __B,
11627 (__v16sf)
11628 _mm512_setzero_ps (),
11629 (__mmask16) __U,
11630 _MM_FROUND_CUR_DIRECTION);
11631}
11632
1853f5c7
SP
11633extern __inline __m128d
11634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11635_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11636{
11637 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11638 (__v2df) __B,
11639 (__v2df) __W,
11640 (__mmask8) __U,
11641 _MM_FROUND_CUR_DIRECTION);
11642}
11643
11644extern __inline __m128d
11645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11646_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11647{
11648 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11649 (__v2df) __B,
11650 (__v2df)
11651 _mm_setzero_pd (),
11652 (__mmask8) __U,
11653 _MM_FROUND_CUR_DIRECTION);
11654}
11655
11656extern __inline __m128
11657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11658_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11659{
11660 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11661 (__v4sf) __B,
11662 (__v4sf) __W,
11663 (__mmask8) __U,
11664 _MM_FROUND_CUR_DIRECTION);
11665}
11666
11667extern __inline __m128
11668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11669_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11670{
11671 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11672 (__v4sf) __B,
11673 (__v4sf)
11674 _mm_setzero_ps (),
11675 (__mmask8) __U,
11676 _MM_FROUND_CUR_DIRECTION);
11677}
11678
756c5857
AI
11679extern __inline __m512d
11680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681_mm512_sub_pd (__m512d __A, __m512d __B)
11682{
2069d6fc 11683 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
11684}
11685
11686extern __inline __m512d
11687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11689{
11690 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11691 (__v8df) __B,
11692 (__v8df) __W,
11693 (__mmask8) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11695}
11696
11697extern __inline __m512d
11698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11700{
11701 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11702 (__v8df) __B,
11703 (__v8df)
11704 _mm512_setzero_pd (),
11705 (__mmask8) __U,
11706 _MM_FROUND_CUR_DIRECTION);
11707}
11708
11709extern __inline __m512
11710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11711_mm512_sub_ps (__m512 __A, __m512 __B)
11712{
2069d6fc 11713 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
11714}
11715
11716extern __inline __m512
11717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11719{
11720 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11721 (__v16sf) __B,
11722 (__v16sf) __W,
11723 (__mmask16) __U,
11724 _MM_FROUND_CUR_DIRECTION);
11725}
11726
11727extern __inline __m512
11728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11729_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11730{
11731 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11732 (__v16sf) __B,
11733 (__v16sf)
11734 _mm512_setzero_ps (),
11735 (__mmask16) __U,
11736 _MM_FROUND_CUR_DIRECTION);
11737}
11738
1853f5c7
SP
11739extern __inline __m128d
11740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11741_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11742{
11743 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11744 (__v2df) __B,
11745 (__v2df) __W,
11746 (__mmask8) __U,
11747 _MM_FROUND_CUR_DIRECTION);
11748}
11749
11750extern __inline __m128d
11751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11752_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11753{
11754 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11755 (__v2df) __B,
11756 (__v2df)
11757 _mm_setzero_pd (),
11758 (__mmask8) __U,
11759 _MM_FROUND_CUR_DIRECTION);
11760}
11761
11762extern __inline __m128
11763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11765{
11766 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11767 (__v4sf) __B,
11768 (__v4sf) __W,
11769 (__mmask8) __U,
11770 _MM_FROUND_CUR_DIRECTION);
11771}
11772
11773extern __inline __m128
11774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11775_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11776{
11777 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11778 (__v4sf) __B,
11779 (__v4sf)
11780 _mm_setzero_ps (),
11781 (__mmask8) __U,
11782 _MM_FROUND_CUR_DIRECTION);
11783}
11784
756c5857
AI
11785extern __inline __m512d
11786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787_mm512_mul_pd (__m512d __A, __m512d __B)
11788{
2069d6fc 11789 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
11790}
11791
11792extern __inline __m512d
11793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11794_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11795{
11796 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11797 (__v8df) __B,
11798 (__v8df) __W,
11799 (__mmask8) __U,
11800 _MM_FROUND_CUR_DIRECTION);
11801}
11802
11803extern __inline __m512d
11804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11806{
11807 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11808 (__v8df) __B,
11809 (__v8df)
11810 _mm512_setzero_pd (),
11811 (__mmask8) __U,
11812 _MM_FROUND_CUR_DIRECTION);
11813}
11814
11815extern __inline __m512
11816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11817_mm512_mul_ps (__m512 __A, __m512 __B)
11818{
2069d6fc 11819 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
11820}
11821
11822extern __inline __m512
11823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11825{
11826 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11827 (__v16sf) __B,
11828 (__v16sf) __W,
11829 (__mmask16) __U,
11830 _MM_FROUND_CUR_DIRECTION);
11831}
11832
11833extern __inline __m512
11834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11836{
11837 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11838 (__v16sf) __B,
11839 (__v16sf)
11840 _mm512_setzero_ps (),
11841 (__mmask16) __U,
11842 _MM_FROUND_CUR_DIRECTION);
11843}
11844
f4ee3a9e
UB
11845extern __inline __m128d
11846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11847_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11848 __m128d __B)
11849{
11850 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11851 (__v2df) __B,
11852 (__v2df) __W,
11853 (__mmask8) __U,
11854 _MM_FROUND_CUR_DIRECTION);
11855}
11856
11857extern __inline __m128d
11858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11860{
11861 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11862 (__v2df) __B,
11863 (__v2df)
11864 _mm_setzero_pd (),
11865 (__mmask8) __U,
11866 _MM_FROUND_CUR_DIRECTION);
11867}
11868
11869extern __inline __m128
11870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11871_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11872 __m128 __B)
11873{
11874 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11875 (__v4sf) __B,
11876 (__v4sf) __W,
11877 (__mmask8) __U,
11878 _MM_FROUND_CUR_DIRECTION);
11879}
11880
11881extern __inline __m128
11882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11883_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11884{
11885 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11886 (__v4sf) __B,
11887 (__v4sf)
11888 _mm_setzero_ps (),
11889 (__mmask8) __U,
11890 _MM_FROUND_CUR_DIRECTION);
11891}
11892
756c5857
AI
11893extern __inline __m512d
11894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11895_mm512_div_pd (__m512d __M, __m512d __V)
11896{
2069d6fc 11897 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
11898}
11899
11900extern __inline __m512d
11901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11902_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11903{
11904 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11905 (__v8df) __V,
11906 (__v8df) __W,
11907 (__mmask8) __U,
11908 _MM_FROUND_CUR_DIRECTION);
11909}
11910
11911extern __inline __m512d
11912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11914{
11915 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11916 (__v8df) __V,
11917 (__v8df)
11918 _mm512_setzero_pd (),
11919 (__mmask8) __U,
11920 _MM_FROUND_CUR_DIRECTION);
11921}
11922
11923extern __inline __m512
11924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11925_mm512_div_ps (__m512 __A, __m512 __B)
11926{
2069d6fc 11927 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
11928}
11929
11930extern __inline __m512
11931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11933{
11934 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11935 (__v16sf) __B,
11936 (__v16sf) __W,
11937 (__mmask16) __U,
11938 _MM_FROUND_CUR_DIRECTION);
11939}
11940
11941extern __inline __m512
11942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11943_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11944{
11945 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11946 (__v16sf) __B,
11947 (__v16sf)
11948 _mm512_setzero_ps (),
11949 (__mmask16) __U,
11950 _MM_FROUND_CUR_DIRECTION);
11951}
11952
f4ee3a9e
UB
11953extern __inline __m128d
11954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11955_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11956 __m128d __B)
11957{
11958 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11959 (__v2df) __B,
11960 (__v2df) __W,
11961 (__mmask8) __U,
11962 _MM_FROUND_CUR_DIRECTION);
11963}
11964
11965extern __inline __m128d
11966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11968{
11969 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11970 (__v2df) __B,
11971 (__v2df)
11972 _mm_setzero_pd (),
11973 (__mmask8) __U,
11974 _MM_FROUND_CUR_DIRECTION);
11975}
11976
11977extern __inline __m128
11978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11980 __m128 __B)
11981{
11982 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11983 (__v4sf) __B,
11984 (__v4sf) __W,
11985 (__mmask8) __U,
11986 _MM_FROUND_CUR_DIRECTION);
11987}
11988
11989extern __inline __m128
11990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11991_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11992{
11993 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11994 (__v4sf) __B,
11995 (__v4sf)
11996 _mm_setzero_ps (),
11997 (__mmask8) __U,
11998 _MM_FROUND_CUR_DIRECTION);
11999}
12000
756c5857
AI
12001extern __inline __m512d
12002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003_mm512_max_pd (__m512d __A, __m512d __B)
12004{
12005 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12006 (__v8df) __B,
12007 (__v8df)
0b192937 12008 _mm512_undefined_pd (),
756c5857
AI
12009 (__mmask8) -1,
12010 _MM_FROUND_CUR_DIRECTION);
12011}
12012
12013extern __inline __m512d
12014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12015_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12016{
12017 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12018 (__v8df) __B,
12019 (__v8df) __W,
12020 (__mmask8) __U,
12021 _MM_FROUND_CUR_DIRECTION);
12022}
12023
12024extern __inline __m512d
12025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
12027{
12028 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12029 (__v8df) __B,
12030 (__v8df)
12031 _mm512_setzero_pd (),
12032 (__mmask8) __U,
12033 _MM_FROUND_CUR_DIRECTION);
12034}
12035
12036extern __inline __m512
12037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12038_mm512_max_ps (__m512 __A, __m512 __B)
12039{
12040 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12041 (__v16sf) __B,
12042 (__v16sf)
0b192937 12043 _mm512_undefined_ps (),
756c5857
AI
12044 (__mmask16) -1,
12045 _MM_FROUND_CUR_DIRECTION);
12046}
12047
12048extern __inline __m512
12049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12050_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12051{
12052 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12053 (__v16sf) __B,
12054 (__v16sf) __W,
12055 (__mmask16) __U,
12056 _MM_FROUND_CUR_DIRECTION);
12057}
12058
12059extern __inline __m512
12060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12061_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
12062{
12063 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12064 (__v16sf) __B,
12065 (__v16sf)
12066 _mm512_setzero_ps (),
12067 (__mmask16) __U,
12068 _MM_FROUND_CUR_DIRECTION);
12069}
12070
dc7401c0
SP
12071extern __inline __m128d
12072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12074{
12075 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12076 (__v2df) __B,
12077 (__v2df) __W,
12078 (__mmask8) __U,
12079 _MM_FROUND_CUR_DIRECTION);
12080}
12081
12082extern __inline __m128d
12083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
12085{
12086 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12087 (__v2df) __B,
12088 (__v2df)
12089 _mm_setzero_pd (),
12090 (__mmask8) __U,
12091 _MM_FROUND_CUR_DIRECTION);
12092}
12093
12094extern __inline __m128
12095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12097{
12098 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12099 (__v4sf) __B,
12100 (__v4sf) __W,
12101 (__mmask8) __U,
12102 _MM_FROUND_CUR_DIRECTION);
12103}
12104
12105extern __inline __m128
12106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12107_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
12108{
12109 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12110 (__v4sf) __B,
12111 (__v4sf)
12112 _mm_setzero_ps (),
12113 (__mmask8) __U,
12114 _MM_FROUND_CUR_DIRECTION);
12115}
12116
756c5857
AI
12117extern __inline __m512d
12118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12119_mm512_min_pd (__m512d __A, __m512d __B)
12120{
12121 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12122 (__v8df) __B,
12123 (__v8df)
0b192937 12124 _mm512_undefined_pd (),
756c5857
AI
12125 (__mmask8) -1,
12126 _MM_FROUND_CUR_DIRECTION);
12127}
12128
12129extern __inline __m512d
12130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12131_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12132{
12133 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12134 (__v8df) __B,
12135 (__v8df) __W,
12136 (__mmask8) __U,
12137 _MM_FROUND_CUR_DIRECTION);
12138}
12139
12140extern __inline __m512d
12141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12142_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12143{
12144 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12145 (__v8df) __B,
12146 (__v8df)
12147 _mm512_setzero_pd (),
12148 (__mmask8) __U,
12149 _MM_FROUND_CUR_DIRECTION);
12150}
12151
12152extern __inline __m512
12153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154_mm512_min_ps (__m512 __A, __m512 __B)
12155{
12156 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12157 (__v16sf) __B,
12158 (__v16sf)
0b192937 12159 _mm512_undefined_ps (),
756c5857
AI
12160 (__mmask16) -1,
12161 _MM_FROUND_CUR_DIRECTION);
12162}
12163
12164extern __inline __m512
12165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12166_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12167{
12168 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12169 (__v16sf) __B,
12170 (__v16sf) __W,
12171 (__mmask16) __U,
12172 _MM_FROUND_CUR_DIRECTION);
12173}
12174
12175extern __inline __m512
12176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12177_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12178{
12179 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12180 (__v16sf) __B,
12181 (__v16sf)
12182 _mm512_setzero_ps (),
12183 (__mmask16) __U,
12184 _MM_FROUND_CUR_DIRECTION);
12185}
12186
dc7401c0
SP
12187extern __inline __m128d
12188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12189_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12190{
12191 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12192 (__v2df) __B,
12193 (__v2df) __W,
12194 (__mmask8) __U,
12195 _MM_FROUND_CUR_DIRECTION);
12196}
12197
12198extern __inline __m128d
12199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12200_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
12201{
12202 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12203 (__v2df) __B,
12204 (__v2df)
12205 _mm_setzero_pd (),
12206 (__mmask8) __U,
12207 _MM_FROUND_CUR_DIRECTION);
12208}
12209
12210extern __inline __m128
12211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12212_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12213{
12214 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12215 (__v4sf) __B,
12216 (__v4sf) __W,
12217 (__mmask8) __U,
12218 _MM_FROUND_CUR_DIRECTION);
12219}
12220
12221extern __inline __m128
12222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12223_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
12224{
12225 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12226 (__v4sf) __B,
12227 (__v4sf)
12228 _mm_setzero_ps (),
12229 (__mmask8) __U,
12230 _MM_FROUND_CUR_DIRECTION);
12231}
12232
756c5857
AI
12233extern __inline __m512d
12234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235_mm512_scalef_pd (__m512d __A, __m512d __B)
12236{
12237 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12238 (__v8df) __B,
12239 (__v8df)
0b192937 12240 _mm512_undefined_pd (),
756c5857
AI
12241 (__mmask8) -1,
12242 _MM_FROUND_CUR_DIRECTION);
12243}
12244
12245extern __inline __m512d
12246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12247_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12248{
12249 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12250 (__v8df) __B,
12251 (__v8df) __W,
12252 (__mmask8) __U,
12253 _MM_FROUND_CUR_DIRECTION);
12254}
12255
12256extern __inline __m512d
12257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
12259{
12260 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12261 (__v8df) __B,
12262 (__v8df)
12263 _mm512_setzero_pd (),
12264 (__mmask8) __U,
12265 _MM_FROUND_CUR_DIRECTION);
12266}
12267
12268extern __inline __m512
12269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12270_mm512_scalef_ps (__m512 __A, __m512 __B)
12271{
12272 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12273 (__v16sf) __B,
12274 (__v16sf)
0b192937 12275 _mm512_undefined_ps (),
756c5857
AI
12276 (__mmask16) -1,
12277 _MM_FROUND_CUR_DIRECTION);
12278}
12279
12280extern __inline __m512
12281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12283{
12284 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12285 (__v16sf) __B,
12286 (__v16sf) __W,
12287 (__mmask16) __U,
12288 _MM_FROUND_CUR_DIRECTION);
12289}
12290
12291extern __inline __m512
12292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
12294{
12295 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12296 (__v16sf) __B,
12297 (__v16sf)
12298 _mm512_setzero_ps (),
12299 (__mmask16) __U,
12300 _MM_FROUND_CUR_DIRECTION);
12301}
12302
075691af
AI
12303extern __inline __m128d
12304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12305_mm_scalef_sd (__m128d __A, __m128d __B)
12306{
158061a6
OM
12307 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
12308 (__v2df) __B,
12309 (__v2df)
12310 _mm_setzero_pd (),
12311 (__mmask8) -1,
12312 _MM_FROUND_CUR_DIRECTION);
075691af
AI
12313}
12314
12315extern __inline __m128
12316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12317_mm_scalef_ss (__m128 __A, __m128 __B)
12318{
158061a6
OM
12319 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
12320 (__v4sf) __B,
12321 (__v4sf)
12322 _mm_setzero_ps (),
12323 (__mmask8) -1,
12324 _MM_FROUND_CUR_DIRECTION);
075691af
AI
12325}
12326
756c5857
AI
12327extern __inline __m512d
12328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12329_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12330{
12331 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12332 (__v8df) __B,
12333 (__v8df) __C,
12334 (__mmask8) -1,
12335 _MM_FROUND_CUR_DIRECTION);
12336}
12337
12338extern __inline __m512d
12339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12341{
12342 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12343 (__v8df) __B,
12344 (__v8df) __C,
12345 (__mmask8) __U,
12346 _MM_FROUND_CUR_DIRECTION);
12347}
12348
12349extern __inline __m512d
12350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12351_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12352{
12353 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12354 (__v8df) __B,
12355 (__v8df) __C,
12356 (__mmask8) __U,
12357 _MM_FROUND_CUR_DIRECTION);
12358}
12359
12360extern __inline __m512d
12361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12362_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12363{
12364 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12365 (__v8df) __B,
12366 (__v8df) __C,
12367 (__mmask8) __U,
12368 _MM_FROUND_CUR_DIRECTION);
12369}
12370
12371extern __inline __m512
12372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12373_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12374{
12375 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12376 (__v16sf) __B,
12377 (__v16sf) __C,
12378 (__mmask16) -1,
12379 _MM_FROUND_CUR_DIRECTION);
12380}
12381
12382extern __inline __m512
12383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12384_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12385{
12386 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12387 (__v16sf) __B,
12388 (__v16sf) __C,
12389 (__mmask16) __U,
12390 _MM_FROUND_CUR_DIRECTION);
12391}
12392
12393extern __inline __m512
12394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12395_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12396{
12397 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12398 (__v16sf) __B,
12399 (__v16sf) __C,
12400 (__mmask16) __U,
12401 _MM_FROUND_CUR_DIRECTION);
12402}
12403
12404extern __inline __m512
12405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12407{
12408 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12409 (__v16sf) __B,
12410 (__v16sf) __C,
12411 (__mmask16) __U,
12412 _MM_FROUND_CUR_DIRECTION);
12413}
12414
12415extern __inline __m512d
12416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12417_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12418{
12419 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12420 (__v8df) __B,
12421 -(__v8df) __C,
12422 (__mmask8) -1,
12423 _MM_FROUND_CUR_DIRECTION);
12424}
12425
12426extern __inline __m512d
12427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12428_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12429{
12430 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12431 (__v8df) __B,
12432 -(__v8df) __C,
12433 (__mmask8) __U,
12434 _MM_FROUND_CUR_DIRECTION);
12435}
12436
12437extern __inline __m512d
12438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12439_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12440{
12441 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12442 (__v8df) __B,
12443 (__v8df) __C,
12444 (__mmask8) __U,
12445 _MM_FROUND_CUR_DIRECTION);
12446}
12447
12448extern __inline __m512d
12449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12450_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12451{
12452 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12453 (__v8df) __B,
12454 -(__v8df) __C,
12455 (__mmask8) __U,
12456 _MM_FROUND_CUR_DIRECTION);
12457}
12458
12459extern __inline __m512
12460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12461_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12462{
12463 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12464 (__v16sf) __B,
12465 -(__v16sf) __C,
12466 (__mmask16) -1,
12467 _MM_FROUND_CUR_DIRECTION);
12468}
12469
12470extern __inline __m512
12471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12472_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12473{
12474 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12475 (__v16sf) __B,
12476 -(__v16sf) __C,
12477 (__mmask16) __U,
12478 _MM_FROUND_CUR_DIRECTION);
12479}
12480
12481extern __inline __m512
12482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12483_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12484{
12485 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12486 (__v16sf) __B,
12487 (__v16sf) __C,
12488 (__mmask16) __U,
12489 _MM_FROUND_CUR_DIRECTION);
12490}
12491
12492extern __inline __m512
12493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12494_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12495{
12496 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12497 (__v16sf) __B,
12498 -(__v16sf) __C,
12499 (__mmask16) __U,
12500 _MM_FROUND_CUR_DIRECTION);
12501}
12502
12503extern __inline __m512d
12504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12505_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12506{
12507 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12508 (__v8df) __B,
12509 (__v8df) __C,
12510 (__mmask8) -1,
12511 _MM_FROUND_CUR_DIRECTION);
12512}
12513
12514extern __inline __m512d
12515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12516_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12517{
12518 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12519 (__v8df) __B,
12520 (__v8df) __C,
12521 (__mmask8) __U,
12522 _MM_FROUND_CUR_DIRECTION);
12523}
12524
12525extern __inline __m512d
12526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12527_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12528{
12529 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12530 (__v8df) __B,
12531 (__v8df) __C,
12532 (__mmask8) __U,
12533 _MM_FROUND_CUR_DIRECTION);
12534}
12535
12536extern __inline __m512d
12537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12538_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12539{
12540 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12541 (__v8df) __B,
12542 (__v8df) __C,
12543 (__mmask8) __U,
12544 _MM_FROUND_CUR_DIRECTION);
12545}
12546
12547extern __inline __m512
12548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12549_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12550{
12551 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12552 (__v16sf) __B,
12553 (__v16sf) __C,
12554 (__mmask16) -1,
12555 _MM_FROUND_CUR_DIRECTION);
12556}
12557
12558extern __inline __m512
12559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12560_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12561{
12562 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12563 (__v16sf) __B,
12564 (__v16sf) __C,
12565 (__mmask16) __U,
12566 _MM_FROUND_CUR_DIRECTION);
12567}
12568
12569extern __inline __m512
12570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12571_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12572{
12573 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12574 (__v16sf) __B,
12575 (__v16sf) __C,
12576 (__mmask16) __U,
12577 _MM_FROUND_CUR_DIRECTION);
12578}
12579
12580extern __inline __m512
12581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12582_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12583{
12584 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12585 (__v16sf) __B,
12586 (__v16sf) __C,
12587 (__mmask16) __U,
12588 _MM_FROUND_CUR_DIRECTION);
12589}
12590
12591extern __inline __m512d
12592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12593_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12594{
12595 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12596 (__v8df) __B,
12597 -(__v8df) __C,
12598 (__mmask8) -1,
12599 _MM_FROUND_CUR_DIRECTION);
12600}
12601
12602extern __inline __m512d
12603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12604_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12605{
12606 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12607 (__v8df) __B,
12608 -(__v8df) __C,
12609 (__mmask8) __U,
12610 _MM_FROUND_CUR_DIRECTION);
12611}
12612
12613extern __inline __m512d
12614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12615_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12616{
12617 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12618 (__v8df) __B,
12619 (__v8df) __C,
12620 (__mmask8) __U,
12621 _MM_FROUND_CUR_DIRECTION);
12622}
12623
12624extern __inline __m512d
12625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12626_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12627{
12628 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12629 (__v8df) __B,
12630 -(__v8df) __C,
12631 (__mmask8) __U,
12632 _MM_FROUND_CUR_DIRECTION);
12633}
12634
12635extern __inline __m512
12636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12637_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12638{
12639 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12640 (__v16sf) __B,
12641 -(__v16sf) __C,
12642 (__mmask16) -1,
12643 _MM_FROUND_CUR_DIRECTION);
12644}
12645
12646extern __inline __m512
12647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12648_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12649{
12650 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12651 (__v16sf) __B,
12652 -(__v16sf) __C,
12653 (__mmask16) __U,
12654 _MM_FROUND_CUR_DIRECTION);
12655}
12656
12657extern __inline __m512
12658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12659_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12660{
12661 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12662 (__v16sf) __B,
12663 (__v16sf) __C,
12664 (__mmask16) __U,
12665 _MM_FROUND_CUR_DIRECTION);
12666}
12667
12668extern __inline __m512
12669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12670_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12671{
12672 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12673 (__v16sf) __B,
12674 -(__v16sf) __C,
12675 (__mmask16) __U,
12676 _MM_FROUND_CUR_DIRECTION);
12677}
12678
12679extern __inline __m512d
12680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12681_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12682{
12683 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12684 (__v8df) __B,
12685 (__v8df) __C,
12686 (__mmask8) -1,
12687 _MM_FROUND_CUR_DIRECTION);
12688}
12689
12690extern __inline __m512d
12691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12692_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12693{
12694 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12695 (__v8df) __B,
12696 (__v8df) __C,
12697 (__mmask8) __U,
12698 _MM_FROUND_CUR_DIRECTION);
12699}
12700
12701extern __inline __m512d
12702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12703_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12704{
12705 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
12706 (__v8df) __B,
12707 (__v8df) __C,
12708 (__mmask8) __U,
12709 _MM_FROUND_CUR_DIRECTION);
12710}
12711
12712extern __inline __m512d
12713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12714_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12715{
12716 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12717 (__v8df) __B,
12718 (__v8df) __C,
12719 (__mmask8) __U,
12720 _MM_FROUND_CUR_DIRECTION);
12721}
12722
12723extern __inline __m512
12724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12725_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12726{
12727 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12728 (__v16sf) __B,
12729 (__v16sf) __C,
12730 (__mmask16) -1,
12731 _MM_FROUND_CUR_DIRECTION);
12732}
12733
12734extern __inline __m512
12735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12736_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12737{
12738 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12739 (__v16sf) __B,
12740 (__v16sf) __C,
12741 (__mmask16) __U,
12742 _MM_FROUND_CUR_DIRECTION);
12743}
12744
12745extern __inline __m512
12746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12747_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12748{
12749 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
12750 (__v16sf) __B,
12751 (__v16sf) __C,
12752 (__mmask16) __U,
12753 _MM_FROUND_CUR_DIRECTION);
12754}
12755
12756extern __inline __m512
12757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12758_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12759{
12760 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12761 (__v16sf) __B,
12762 (__v16sf) __C,
12763 (__mmask16) __U,
12764 _MM_FROUND_CUR_DIRECTION);
12765}
12766
12767extern __inline __m512d
12768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12769_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12770{
12771 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12772 (__v8df) __B,
12773 -(__v8df) __C,
12774 (__mmask8) -1,
12775 _MM_FROUND_CUR_DIRECTION);
12776}
12777
12778extern __inline __m512d
12779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12780_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12781{
12782 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12783 (__v8df) __B,
12784 (__v8df) __C,
12785 (__mmask8) __U,
12786 _MM_FROUND_CUR_DIRECTION);
12787}
12788
12789extern __inline __m512d
12790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12791_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12792{
12793 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12794 (__v8df) __B,
12795 (__v8df) __C,
12796 (__mmask8) __U,
12797 _MM_FROUND_CUR_DIRECTION);
12798}
12799
12800extern __inline __m512d
12801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12802_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12803{
12804 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12805 (__v8df) __B,
12806 -(__v8df) __C,
12807 (__mmask8) __U,
12808 _MM_FROUND_CUR_DIRECTION);
12809}
12810
12811extern __inline __m512
12812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12813_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12814{
12815 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12816 (__v16sf) __B,
12817 -(__v16sf) __C,
12818 (__mmask16) -1,
12819 _MM_FROUND_CUR_DIRECTION);
12820}
12821
12822extern __inline __m512
12823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12824_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12825{
12826 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12827 (__v16sf) __B,
12828 (__v16sf) __C,
12829 (__mmask16) __U,
12830 _MM_FROUND_CUR_DIRECTION);
12831}
12832
12833extern __inline __m512
12834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12835_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12836{
12837 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12838 (__v16sf) __B,
12839 (__v16sf) __C,
12840 (__mmask16) __U,
12841 _MM_FROUND_CUR_DIRECTION);
12842}
12843
12844extern __inline __m512
12845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12847{
12848 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12849 (__v16sf) __B,
12850 -(__v16sf) __C,
12851 (__mmask16) __U,
12852 _MM_FROUND_CUR_DIRECTION);
12853}
12854
12855extern __inline __m256i
12856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12857_mm512_cvttpd_epi32 (__m512d __A)
12858{
12859 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12860 (__v8si)
0b192937 12861 _mm256_undefined_si256 (),
756c5857
AI
12862 (__mmask8) -1,
12863 _MM_FROUND_CUR_DIRECTION);
12864}
12865
12866extern __inline __m256i
12867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12868_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12869{
12870 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12871 (__v8si) __W,
12872 (__mmask8) __U,
12873 _MM_FROUND_CUR_DIRECTION);
12874}
12875
12876extern __inline __m256i
12877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12878_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12879{
12880 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12881 (__v8si)
12882 _mm256_setzero_si256 (),
12883 (__mmask8) __U,
12884 _MM_FROUND_CUR_DIRECTION);
12885}
12886
12887extern __inline __m256i
12888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12889_mm512_cvttpd_epu32 (__m512d __A)
12890{
12891 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12892 (__v8si)
0b192937 12893 _mm256_undefined_si256 (),
756c5857
AI
12894 (__mmask8) -1,
12895 _MM_FROUND_CUR_DIRECTION);
12896}
12897
12898extern __inline __m256i
12899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12900_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12901{
12902 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12903 (__v8si) __W,
12904 (__mmask8) __U,
12905 _MM_FROUND_CUR_DIRECTION);
12906}
12907
12908extern __inline __m256i
12909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12910_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12911{
12912 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12913 (__v8si)
12914 _mm256_setzero_si256 (),
12915 (__mmask8) __U,
12916 _MM_FROUND_CUR_DIRECTION);
12917}
12918
12919extern __inline __m256i
12920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12921_mm512_cvtpd_epi32 (__m512d __A)
12922{
12923 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12924 (__v8si)
0b192937 12925 _mm256_undefined_si256 (),
756c5857
AI
12926 (__mmask8) -1,
12927 _MM_FROUND_CUR_DIRECTION);
12928}
12929
12930extern __inline __m256i
12931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12932_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12933{
12934 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12935 (__v8si) __W,
12936 (__mmask8) __U,
12937 _MM_FROUND_CUR_DIRECTION);
12938}
12939
12940extern __inline __m256i
12941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12942_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12943{
12944 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12945 (__v8si)
12946 _mm256_setzero_si256 (),
12947 (__mmask8) __U,
12948 _MM_FROUND_CUR_DIRECTION);
12949}
12950
12951extern __inline __m256i
12952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12953_mm512_cvtpd_epu32 (__m512d __A)
12954{
12955 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12956 (__v8si)
0b192937 12957 _mm256_undefined_si256 (),
756c5857
AI
12958 (__mmask8) -1,
12959 _MM_FROUND_CUR_DIRECTION);
12960}
12961
12962extern __inline __m256i
12963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12964_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12965{
12966 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12967 (__v8si) __W,
12968 (__mmask8) __U,
12969 _MM_FROUND_CUR_DIRECTION);
12970}
12971
12972extern __inline __m256i
12973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12974_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12975{
12976 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12977 (__v8si)
12978 _mm256_setzero_si256 (),
12979 (__mmask8) __U,
12980 _MM_FROUND_CUR_DIRECTION);
12981}
12982
12983extern __inline __m512i
12984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12985_mm512_cvttps_epi32 (__m512 __A)
12986{
12987 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12988 (__v16si)
4271e5cb 12989 _mm512_undefined_epi32 (),
756c5857
AI
12990 (__mmask16) -1,
12991 _MM_FROUND_CUR_DIRECTION);
12992}
12993
12994extern __inline __m512i
12995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12996_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12997{
12998 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12999 (__v16si) __W,
13000 (__mmask16) __U,
13001 _MM_FROUND_CUR_DIRECTION);
13002}
13003
13004extern __inline __m512i
13005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13006_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
13007{
13008 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13009 (__v16si)
13010 _mm512_setzero_si512 (),
13011 (__mmask16) __U,
13012 _MM_FROUND_CUR_DIRECTION);
13013}
13014
13015extern __inline __m512i
13016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13017_mm512_cvttps_epu32 (__m512 __A)
13018{
13019 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13020 (__v16si)
4271e5cb 13021 _mm512_undefined_epi32 (),
756c5857
AI
13022 (__mmask16) -1,
13023 _MM_FROUND_CUR_DIRECTION);
13024}
13025
13026extern __inline __m512i
13027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13028_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13029{
13030 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13031 (__v16si) __W,
13032 (__mmask16) __U,
13033 _MM_FROUND_CUR_DIRECTION);
13034}
13035
13036extern __inline __m512i
13037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13038_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
13039{
13040 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13041 (__v16si)
13042 _mm512_setzero_si512 (),
13043 (__mmask16) __U,
13044 _MM_FROUND_CUR_DIRECTION);
13045}
13046
13047extern __inline __m512i
13048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13049_mm512_cvtps_epi32 (__m512 __A)
13050{
13051 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13052 (__v16si)
4271e5cb 13053 _mm512_undefined_epi32 (),
756c5857
AI
13054 (__mmask16) -1,
13055 _MM_FROUND_CUR_DIRECTION);
13056}
13057
13058extern __inline __m512i
13059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13060_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13061{
13062 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13063 (__v16si) __W,
13064 (__mmask16) __U,
13065 _MM_FROUND_CUR_DIRECTION);
13066}
13067
13068extern __inline __m512i
13069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13070_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
13071{
13072 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13073 (__v16si)
13074 _mm512_setzero_si512 (),
13075 (__mmask16) __U,
13076 _MM_FROUND_CUR_DIRECTION);
13077}
13078
13079extern __inline __m512i
13080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13081_mm512_cvtps_epu32 (__m512 __A)
13082{
13083 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13084 (__v16si)
4271e5cb 13085 _mm512_undefined_epi32 (),
756c5857
AI
13086 (__mmask16) -1,
13087 _MM_FROUND_CUR_DIRECTION);
13088}
13089
13090extern __inline __m512i
13091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13092_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13093{
13094 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13095 (__v16si) __W,
13096 (__mmask16) __U,
13097 _MM_FROUND_CUR_DIRECTION);
13098}
13099
13100extern __inline __m512i
13101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13102_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
13103{
13104 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13105 (__v16si)
13106 _mm512_setzero_si512 (),
13107 (__mmask16) __U,
13108 _MM_FROUND_CUR_DIRECTION);
13109}
13110
dcb2c527
JJ
13111extern __inline double
13112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13113_mm512_cvtsd_f64 (__m512d __A)
13114{
13115 return __A[0];
13116}
13117
13118extern __inline float
13119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13120_mm512_cvtss_f32 (__m512 __A)
13121{
13122 return __A[0];
13123}
13124
756c5857
AI
13125#ifdef __x86_64__
13126extern __inline __m128
13127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13128_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
13129{
13130 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
13131 _MM_FROUND_CUR_DIRECTION);
13132}
13133
13134extern __inline __m128d
13135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13136_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
13137{
13138 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
13139 _MM_FROUND_CUR_DIRECTION);
13140}
13141#endif
13142
13143extern __inline __m128
13144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13145_mm_cvtu32_ss (__m128 __A, unsigned __B)
13146{
13147 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
13148 _MM_FROUND_CUR_DIRECTION);
13149}
13150
13151extern __inline __m512
13152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13153_mm512_cvtepi32_ps (__m512i __A)
13154{
13155 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13156 (__v16sf)
0b192937 13157 _mm512_undefined_ps (),
756c5857
AI
13158 (__mmask16) -1,
13159 _MM_FROUND_CUR_DIRECTION);
13160}
13161
13162extern __inline __m512
13163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13164_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13165{
13166 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13167 (__v16sf) __W,
13168 (__mmask16) __U,
13169 _MM_FROUND_CUR_DIRECTION);
13170}
13171
13172extern __inline __m512
13173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13174_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
13175{
13176 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13177 (__v16sf)
13178 _mm512_setzero_ps (),
13179 (__mmask16) __U,
13180 _MM_FROUND_CUR_DIRECTION);
13181}
13182
13183extern __inline __m512
13184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13185_mm512_cvtepu32_ps (__m512i __A)
13186{
13187 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13188 (__v16sf)
0b192937 13189 _mm512_undefined_ps (),
756c5857
AI
13190 (__mmask16) -1,
13191 _MM_FROUND_CUR_DIRECTION);
13192}
13193
13194extern __inline __m512
13195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13196_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13197{
13198 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13199 (__v16sf) __W,
13200 (__mmask16) __U,
13201 _MM_FROUND_CUR_DIRECTION);
13202}
13203
13204extern __inline __m512
13205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13206_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
13207{
13208 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13209 (__v16sf)
13210 _mm512_setzero_ps (),
13211 (__mmask16) __U,
13212 _MM_FROUND_CUR_DIRECTION);
13213}
13214
13215#ifdef __OPTIMIZE__
13216extern __inline __m512d
13217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13218_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
13219{
13220 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13221 (__v8df) __B,
13222 (__v8di) __C,
13223 __imm,
13224 (__mmask8) -1,
13225 _MM_FROUND_CUR_DIRECTION);
13226}
13227
13228extern __inline __m512d
13229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13230_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
13231 __m512i __C, const int __imm)
13232{
13233 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13234 (__v8df) __B,
13235 (__v8di) __C,
13236 __imm,
13237 (__mmask8) __U,
13238 _MM_FROUND_CUR_DIRECTION);
13239}
13240
13241extern __inline __m512d
13242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13243_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
13244 __m512i __C, const int __imm)
13245{
13246 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
13247 (__v8df) __B,
13248 (__v8di) __C,
13249 __imm,
13250 (__mmask8) __U,
13251 _MM_FROUND_CUR_DIRECTION);
13252}
13253
13254extern __inline __m512
13255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13256_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
13257{
13258 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13259 (__v16sf) __B,
13260 (__v16si) __C,
13261 __imm,
13262 (__mmask16) -1,
13263 _MM_FROUND_CUR_DIRECTION);
13264}
13265
13266extern __inline __m512
13267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13268_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
13269 __m512i __C, const int __imm)
13270{
13271 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13272 (__v16sf) __B,
13273 (__v16si) __C,
13274 __imm,
13275 (__mmask16) __U,
13276 _MM_FROUND_CUR_DIRECTION);
13277}
13278
13279extern __inline __m512
13280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13281_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
13282 __m512i __C, const int __imm)
13283{
13284 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
13285 (__v16sf) __B,
13286 (__v16si) __C,
13287 __imm,
13288 (__mmask16) __U,
13289 _MM_FROUND_CUR_DIRECTION);
13290}
13291
13292extern __inline __m128d
13293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13294_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
13295{
13296 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13297 (__v2df) __B,
13298 (__v2di) __C, __imm,
13299 (__mmask8) -1,
13300 _MM_FROUND_CUR_DIRECTION);
13301}
13302
13303extern __inline __m128d
13304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13305_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
13306 __m128i __C, const int __imm)
13307{
13308 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13309 (__v2df) __B,
13310 (__v2di) __C, __imm,
13311 (__mmask8) __U,
13312 _MM_FROUND_CUR_DIRECTION);
13313}
13314
13315extern __inline __m128d
13316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13317_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
13318 __m128i __C, const int __imm)
13319{
13320 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
13321 (__v2df) __B,
13322 (__v2di) __C,
13323 __imm,
13324 (__mmask8) __U,
13325 _MM_FROUND_CUR_DIRECTION);
13326}
13327
13328extern __inline __m128
13329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13330_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
13331{
13332 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13333 (__v4sf) __B,
13334 (__v4si) __C, __imm,
13335 (__mmask8) -1,
13336 _MM_FROUND_CUR_DIRECTION);
13337}
13338
13339extern __inline __m128
13340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13341_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13342 __m128i __C, const int __imm)
13343{
13344 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13345 (__v4sf) __B,
13346 (__v4si) __C, __imm,
13347 (__mmask8) __U,
13348 _MM_FROUND_CUR_DIRECTION);
13349}
13350
13351extern __inline __m128
13352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13353_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13354 __m128i __C, const int __imm)
13355{
13356 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13357 (__v4sf) __B,
13358 (__v4si) __C, __imm,
13359 (__mmask8) __U,
13360 _MM_FROUND_CUR_DIRECTION);
13361}
13362#else
13363#define _mm512_fixupimm_pd(X, Y, Z, C) \
13364 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13365 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13366 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13367
13368#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
13369 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13370 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13371 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13372
13373#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
13374 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
13375 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13376 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13377
13378#define _mm512_fixupimm_ps(X, Y, Z, C) \
13379 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13380 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13381 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13382
13383#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
13384 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13385 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13386 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13387
13388#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
13389 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
13390 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13391 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13392
13393#define _mm_fixupimm_sd(X, Y, Z, C) \
13394 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13395 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13396 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13397
13398#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
13399 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13400 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13401 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13402
13403#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
13404 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
13405 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13406 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13407
13408#define _mm_fixupimm_ss(X, Y, Z, C) \
13409 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13410 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13411 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13412
13413#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
13414 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13415 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13416 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13417
13418#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
13419 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
13420 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13421 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13422#endif
13423
13424#ifdef __x86_64__
13425extern __inline unsigned long long
13426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13427_mm_cvtss_u64 (__m128 __A)
13428{
13429 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13430 __A,
13431 _MM_FROUND_CUR_DIRECTION);
13432}
13433
13434extern __inline unsigned long long
13435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13436_mm_cvttss_u64 (__m128 __A)
13437{
13438 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13439 __A,
13440 _MM_FROUND_CUR_DIRECTION);
13441}
13442
13443extern __inline long long
13444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13445_mm_cvttss_i64 (__m128 __A)
13446{
13447 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13448 _MM_FROUND_CUR_DIRECTION);
13449}
13450#endif /* __x86_64__ */
13451
13452extern __inline unsigned
13453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13454_mm_cvtss_u32 (__m128 __A)
13455{
13456 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13457 _MM_FROUND_CUR_DIRECTION);
13458}
13459
13460extern __inline unsigned
13461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13462_mm_cvttss_u32 (__m128 __A)
13463{
13464 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13465 _MM_FROUND_CUR_DIRECTION);
13466}
13467
13468extern __inline int
13469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13470_mm_cvttss_i32 (__m128 __A)
13471{
13472 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13473 _MM_FROUND_CUR_DIRECTION);
13474}
13475
13476#ifdef __x86_64__
13477extern __inline unsigned long long
13478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13479_mm_cvtsd_u64 (__m128d __A)
13480{
13481 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13482 __A,
13483 _MM_FROUND_CUR_DIRECTION);
13484}
13485
13486extern __inline unsigned long long
13487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13488_mm_cvttsd_u64 (__m128d __A)
13489{
13490 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13491 __A,
13492 _MM_FROUND_CUR_DIRECTION);
13493}
13494
13495extern __inline long long
13496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13497_mm_cvttsd_i64 (__m128d __A)
13498{
13499 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13500 _MM_FROUND_CUR_DIRECTION);
13501}
13502#endif /* __x86_64__ */
13503
13504extern __inline unsigned
13505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13506_mm_cvtsd_u32 (__m128d __A)
13507{
13508 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13509 _MM_FROUND_CUR_DIRECTION);
13510}
13511
13512extern __inline unsigned
13513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13514_mm_cvttsd_u32 (__m128d __A)
13515{
13516 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13517 _MM_FROUND_CUR_DIRECTION);
13518}
13519
13520extern __inline int
13521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13522_mm_cvttsd_i32 (__m128d __A)
13523{
13524 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13525 _MM_FROUND_CUR_DIRECTION);
13526}
13527
13528extern __inline __m512d
13529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13530_mm512_cvtps_pd (__m256 __A)
13531{
13532 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13533 (__v8df)
0b192937 13534 _mm512_undefined_pd (),
756c5857
AI
13535 (__mmask8) -1,
13536 _MM_FROUND_CUR_DIRECTION);
13537}
13538
13539extern __inline __m512d
13540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13541_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13542{
13543 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13544 (__v8df) __W,
13545 (__mmask8) __U,
13546 _MM_FROUND_CUR_DIRECTION);
13547}
13548
13549extern __inline __m512d
13550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13551_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13552{
13553 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13554 (__v8df)
13555 _mm512_setzero_pd (),
13556 (__mmask8) __U,
13557 _MM_FROUND_CUR_DIRECTION);
13558}
13559
13560extern __inline __m512
13561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13562_mm512_cvtph_ps (__m256i __A)
13563{
13564 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13565 (__v16sf)
0b192937 13566 _mm512_undefined_ps (),
756c5857
AI
13567 (__mmask16) -1,
13568 _MM_FROUND_CUR_DIRECTION);
13569}
13570
13571extern __inline __m512
13572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13573_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13574{
13575 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13576 (__v16sf) __W,
13577 (__mmask16) __U,
13578 _MM_FROUND_CUR_DIRECTION);
13579}
13580
13581extern __inline __m512
13582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13583_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13584{
13585 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13586 (__v16sf)
13587 _mm512_setzero_ps (),
13588 (__mmask16) __U,
13589 _MM_FROUND_CUR_DIRECTION);
13590}
13591
13592extern __inline __m256
13593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13594_mm512_cvtpd_ps (__m512d __A)
13595{
13596 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13597 (__v8sf)
0b192937 13598 _mm256_undefined_ps (),
756c5857
AI
13599 (__mmask8) -1,
13600 _MM_FROUND_CUR_DIRECTION);
13601}
13602
13603extern __inline __m256
13604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13605_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13606{
13607 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13608 (__v8sf) __W,
13609 (__mmask8) __U,
13610 _MM_FROUND_CUR_DIRECTION);
13611}
13612
13613extern __inline __m256
13614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13615_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13616{
13617 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13618 (__v8sf)
13619 _mm256_setzero_ps (),
13620 (__mmask8) __U,
13621 _MM_FROUND_CUR_DIRECTION);
13622}
13623
13624#ifdef __OPTIMIZE__
13625extern __inline __m512
13626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13627_mm512_getexp_ps (__m512 __A)
13628{
13629 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13630 (__v16sf)
0b192937 13631 _mm512_undefined_ps (),
756c5857
AI
13632 (__mmask16) -1,
13633 _MM_FROUND_CUR_DIRECTION);
13634}
13635
13636extern __inline __m512
13637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13638_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13639{
13640 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13641 (__v16sf) __W,
13642 (__mmask16) __U,
13643 _MM_FROUND_CUR_DIRECTION);
13644}
13645
13646extern __inline __m512
13647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13648_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13649{
13650 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13651 (__v16sf)
13652 _mm512_setzero_ps (),
13653 (__mmask16) __U,
13654 _MM_FROUND_CUR_DIRECTION);
13655}
13656
13657extern __inline __m512d
13658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13659_mm512_getexp_pd (__m512d __A)
13660{
13661 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13662 (__v8df)
0b192937 13663 _mm512_undefined_pd (),
756c5857
AI
13664 (__mmask8) -1,
13665 _MM_FROUND_CUR_DIRECTION);
13666}
13667
13668extern __inline __m512d
13669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13670_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13671{
13672 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13673 (__v8df) __W,
13674 (__mmask8) __U,
13675 _MM_FROUND_CUR_DIRECTION);
13676}
13677
13678extern __inline __m512d
13679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13680_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13681{
13682 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13683 (__v8df)
13684 _mm512_setzero_pd (),
13685 (__mmask8) __U,
13686 _MM_FROUND_CUR_DIRECTION);
13687}
13688
075691af
AI
13689extern __inline __m128
13690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13691_mm_getexp_ss (__m128 __A, __m128 __B)
13692{
13693 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13694 (__v4sf) __B,
13695 _MM_FROUND_CUR_DIRECTION);
13696}
13697
68d872d7
SP
13698extern __inline __m128
13699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13700_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13701{
13702 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13703 (__v4sf) __B,
13704 (__v4sf) __W,
13705 (__mmask8) __U,
13706 _MM_FROUND_CUR_DIRECTION);
13707}
13708
13709extern __inline __m128
13710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13711_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
13712{
13713 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13714 (__v4sf) __B,
13715 (__v4sf)
13716 _mm_setzero_ps (),
13717 (__mmask8) __U,
13718 _MM_FROUND_CUR_DIRECTION);
13719}
13720
075691af
AI
13721extern __inline __m128d
13722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13723_mm_getexp_sd (__m128d __A, __m128d __B)
13724{
13725 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13726 (__v2df) __B,
13727 _MM_FROUND_CUR_DIRECTION);
13728}
13729
68d872d7
SP
13730extern __inline __m128d
13731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13732_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13733{
13734 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13735 (__v2df) __B,
13736 (__v2df) __W,
13737 (__mmask8) __U,
13738 _MM_FROUND_CUR_DIRECTION);
13739}
13740
13741extern __inline __m128d
13742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13743_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
13744{
13745 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13746 (__v2df) __B,
13747 (__v2df)
13748 _mm_setzero_pd (),
13749 (__mmask8) __U,
13750 _MM_FROUND_CUR_DIRECTION);
13751}
13752
756c5857
AI
13753extern __inline __m512d
13754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13755_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13756 _MM_MANTISSA_SIGN_ENUM __C)
13757{
13758 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13759 (__C << 2) | __B,
0b192937 13760 _mm512_undefined_pd (),
756c5857
AI
13761 (__mmask8) -1,
13762 _MM_FROUND_CUR_DIRECTION);
13763}
13764
13765extern __inline __m512d
13766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13767_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13768 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13769{
13770 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13771 (__C << 2) | __B,
13772 (__v8df) __W, __U,
13773 _MM_FROUND_CUR_DIRECTION);
13774}
13775
13776extern __inline __m512d
13777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13778_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13779 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13780{
13781 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13782 (__C << 2) | __B,
13783 (__v8df)
13784 _mm512_setzero_pd (),
13785 __U,
13786 _MM_FROUND_CUR_DIRECTION);
13787}
13788
13789extern __inline __m512
13790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13791_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13792 _MM_MANTISSA_SIGN_ENUM __C)
13793{
13794 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13795 (__C << 2) | __B,
0b192937 13796 _mm512_undefined_ps (),
756c5857
AI
13797 (__mmask16) -1,
13798 _MM_FROUND_CUR_DIRECTION);
13799}
13800
13801extern __inline __m512
13802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13803_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13804 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13805{
13806 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13807 (__C << 2) | __B,
13808 (__v16sf) __W, __U,
13809 _MM_FROUND_CUR_DIRECTION);
13810}
13811
13812extern __inline __m512
13813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13814_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13815 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13816{
13817 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13818 (__C << 2) | __B,
13819 (__v16sf)
13820 _mm512_setzero_ps (),
13821 __U,
13822 _MM_FROUND_CUR_DIRECTION);
13823}
13824
075691af
AI
13825extern __inline __m128d
13826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13827_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13828 _MM_MANTISSA_SIGN_ENUM __D)
13829{
13830 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13831 (__v2df) __B,
13832 (__D << 2) | __C,
13833 _MM_FROUND_CUR_DIRECTION);
13834}
13835
68d872d7
SP
13836extern __inline __m128d
13837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13838_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
13839 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13840{
13841 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13842 (__v2df) __B,
13843 (__D << 2) | __C,
13844 (__v2df) __W,
13845 __U,
13846 _MM_FROUND_CUR_DIRECTION);
13847}
13848
13849extern __inline __m128d
13850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13851_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
13852 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13853{
13854 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13855 (__v2df) __B,
13856 (__D << 2) | __C,
13857 (__v2df)
13858 _mm_setzero_pd(),
13859 __U,
13860 _MM_FROUND_CUR_DIRECTION);
13861}
13862
075691af
AI
13863extern __inline __m128
13864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13865_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13866 _MM_MANTISSA_SIGN_ENUM __D)
13867{
13868 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13869 (__v4sf) __B,
13870 (__D << 2) | __C,
13871 _MM_FROUND_CUR_DIRECTION);
13872}
13873
68d872d7
SP
13874extern __inline __m128
13875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13876_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
13877 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13878{
13879 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13880 (__v4sf) __B,
13881 (__D << 2) | __C,
13882 (__v4sf) __W,
13883 __U,
13884 _MM_FROUND_CUR_DIRECTION);
13885}
13886
13887extern __inline __m128
13888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13889_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
13890 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13891{
13892 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13893 (__v4sf) __B,
13894 (__D << 2) | __C,
13895 (__v4sf)
13896 _mm_setzero_ps(),
13897 __U,
13898 _MM_FROUND_CUR_DIRECTION);
13899}
13900
756c5857
AI
13901#else
13902#define _mm512_getmant_pd(X, B, C) \
13903 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13904 (int)(((C)<<2) | (B)), \
0b192937 13905 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
13906 (__mmask8)-1,\
13907 _MM_FROUND_CUR_DIRECTION))
13908
13909#define _mm512_mask_getmant_pd(W, U, X, B, C) \
13910 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13911 (int)(((C)<<2) | (B)), \
13912 (__v8df)(__m512d)(W), \
13913 (__mmask8)(U),\
13914 _MM_FROUND_CUR_DIRECTION))
13915
13916#define _mm512_maskz_getmant_pd(U, X, B, C) \
13917 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13918 (int)(((C)<<2) | (B)), \
0b192937 13919 (__v8df)_mm512_setzero_pd(), \
756c5857
AI
13920 (__mmask8)(U),\
13921 _MM_FROUND_CUR_DIRECTION))
13922#define _mm512_getmant_ps(X, B, C) \
13923 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13924 (int)(((C)<<2) | (B)), \
0b192937 13925 (__v16sf)_mm512_undefined_ps(), \
756c5857
AI
13926 (__mmask16)-1,\
13927 _MM_FROUND_CUR_DIRECTION))
13928
13929#define _mm512_mask_getmant_ps(W, U, X, B, C) \
13930 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13931 (int)(((C)<<2) | (B)), \
13932 (__v16sf)(__m512)(W), \
13933 (__mmask16)(U),\
13934 _MM_FROUND_CUR_DIRECTION))
13935
13936#define _mm512_maskz_getmant_ps(U, X, B, C) \
13937 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13938 (int)(((C)<<2) | (B)), \
0b192937 13939 (__v16sf)_mm512_setzero_ps(), \
756c5857
AI
13940 (__mmask16)(U),\
13941 _MM_FROUND_CUR_DIRECTION))
075691af
AI
13942#define _mm_getmant_sd(X, Y, C, D) \
13943 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
13944 (__v2df)(__m128d)(Y), \
13945 (int)(((D)<<2) | (C)), \
13946 _MM_FROUND_CUR_DIRECTION))
13947
68d872d7
SP
13948#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
13949 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13950 (__v2df)(__m128d)(Y), \
13951 (int)(((D)<<2) | (C)), \
13952 (__v2df)(__m128d)(W), \
13953 (__mmask8)(U),\
13954 _MM_FROUND_CUR_DIRECTION))
13955
13956#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
13957 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13958 (__v2df)(__m128d)(Y), \
13959 (int)(((D)<<2) | (C)), \
13960 (__v2df)_mm_setzero_pd(), \
13961 (__mmask8)(U),\
13962 _MM_FROUND_CUR_DIRECTION))
13963
075691af
AI
13964#define _mm_getmant_ss(X, Y, C, D) \
13965 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
13966 (__v4sf)(__m128)(Y), \
13967 (int)(((D)<<2) | (C)), \
13968 _MM_FROUND_CUR_DIRECTION))
13969
68d872d7
SP
13970#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
13971 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13972 (__v4sf)(__m128)(Y), \
13973 (int)(((D)<<2) | (C)), \
13974 (__v4sf)(__m128)(W), \
13975 (__mmask8)(U),\
13976 _MM_FROUND_CUR_DIRECTION))
13977
13978#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
13979 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13980 (__v4sf)(__m128)(Y), \
13981 (int)(((D)<<2) | (C)), \
13982 (__v4sf)_mm_setzero_ps(), \
13983 (__mmask8)(U),\
13984 _MM_FROUND_CUR_DIRECTION))
13985
075691af 13986#define _mm_getexp_ss(A, B) \
68d872d7 13987 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
075691af
AI
13988 _MM_FROUND_CUR_DIRECTION))
13989
68d872d7
SP
13990#define _mm_mask_getexp_ss(W, U, A, B) \
13991 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
13992 _MM_FROUND_CUR_DIRECTION)
13993
13994#define _mm_maskz_getexp_ss(U, A, B) \
13995 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
13996 _MM_FROUND_CUR_DIRECTION)
13997
075691af 13998#define _mm_getexp_sd(A, B) \
68d872d7 13999 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
075691af
AI
14000 _MM_FROUND_CUR_DIRECTION))
14001
68d872d7
SP
14002#define _mm_mask_getexp_sd(W, U, A, B) \
14003 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
14004 _MM_FROUND_CUR_DIRECTION)
14005
14006#define _mm_maskz_getexp_sd(U, A, B) \
14007 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
14008 _MM_FROUND_CUR_DIRECTION)
14009
756c5857
AI
14010#define _mm512_getexp_ps(A) \
14011 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 14012 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14013
14014#define _mm512_mask_getexp_ps(W, U, A) \
14015 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14016 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14017
14018#define _mm512_maskz_getexp_ps(U, A) \
14019 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14020 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14021
14022#define _mm512_getexp_pd(A) \
14023 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 14024 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14025
14026#define _mm512_mask_getexp_pd(W, U, A) \
14027 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14028 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14029
14030#define _mm512_maskz_getexp_pd(U, A) \
14031 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14032 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14033#endif
14034
14035#ifdef __OPTIMIZE__
14036extern __inline __m512
14037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14038_mm512_roundscale_ps (__m512 __A, const int __imm)
14039{
14040 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
14041 (__v16sf)
14042 _mm512_undefined_ps (),
14043 -1,
756c5857
AI
14044 _MM_FROUND_CUR_DIRECTION);
14045}
14046
14047extern __inline __m512
14048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14049_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
14050 const int __imm)
14051{
14052 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
14053 (__v16sf) __A,
14054 (__mmask16) __B,
14055 _MM_FROUND_CUR_DIRECTION);
14056}
14057
14058extern __inline __m512
14059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14060_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
14061{
14062 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
14063 __imm,
14064 (__v16sf)
14065 _mm512_setzero_ps (),
14066 (__mmask16) __A,
14067 _MM_FROUND_CUR_DIRECTION);
14068}
14069
14070extern __inline __m512d
14071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14072_mm512_roundscale_pd (__m512d __A, const int __imm)
14073{
14074 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
14075 (__v8df)
14076 _mm512_undefined_pd (),
14077 -1,
756c5857
AI
14078 _MM_FROUND_CUR_DIRECTION);
14079}
14080
14081extern __inline __m512d
14082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14083_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
14084 const int __imm)
14085{
14086 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
14087 (__v8df) __A,
14088 (__mmask8) __B,
14089 _MM_FROUND_CUR_DIRECTION);
14090}
14091
14092extern __inline __m512d
14093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14094_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
14095{
14096 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
14097 __imm,
14098 (__v8df)
14099 _mm512_setzero_pd (),
14100 (__mmask8) __A,
14101 _MM_FROUND_CUR_DIRECTION);
14102}
14103
075691af
AI
14104extern __inline __m128
14105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14106_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
14107{
14108 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
14109 (__v4sf) __B, __imm,
14110 _MM_FROUND_CUR_DIRECTION);
14111}
14112
14113extern __inline __m128d
14114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14115_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
14116{
14117 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
14118 (__v2df) __B, __imm,
14119 _MM_FROUND_CUR_DIRECTION);
14120}
14121
756c5857
AI
14122#else
14123#define _mm512_roundscale_ps(A, B) \
14124 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 14125 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14126#define _mm512_mask_roundscale_ps(A, B, C, D) \
14127 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
14128 (int)(D), \
14129 (__v16sf)(__m512)(A), \
14130 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
14131#define _mm512_maskz_roundscale_ps(A, B, C) \
14132 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
14133 (int)(C), \
14134 (__v16sf)_mm512_setzero_ps(),\
14135 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
14136#define _mm512_roundscale_pd(A, B) \
14137 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 14138 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14139#define _mm512_mask_roundscale_pd(A, B, C, D) \
14140 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
14141 (int)(D), \
14142 (__v8df)(__m512d)(A), \
14143 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
14144#define _mm512_maskz_roundscale_pd(A, B, C) \
14145 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
14146 (int)(C), \
14147 (__v8df)_mm512_setzero_pd(),\
14148 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
075691af
AI
14149#define _mm_roundscale_ss(A, B, C) \
14150 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
14151 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14152#define _mm_roundscale_sd(A, B, C) \
14153 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
14154 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
14155#endif
14156
14157#ifdef __OPTIMIZE__
14158extern __inline __mmask8
14159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14160_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
14161{
14162 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14163 (__v8df) __Y, __P,
14164 (__mmask8) -1,
14165 _MM_FROUND_CUR_DIRECTION);
14166}
14167
14168extern __inline __mmask16
14169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14170_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
14171{
14172 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14173 (__v16sf) __Y, __P,
14174 (__mmask16) -1,
14175 _MM_FROUND_CUR_DIRECTION);
14176}
14177
14178extern __inline __mmask16
14179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14180_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
14181{
14182 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14183 (__v16sf) __Y, __P,
14184 (__mmask16) __U,
14185 _MM_FROUND_CUR_DIRECTION);
14186}
14187
14188extern __inline __mmask8
14189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14190_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
14191{
14192 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14193 (__v8df) __Y, __P,
14194 (__mmask8) __U,
14195 _MM_FROUND_CUR_DIRECTION);
14196}
14197
7e23f4a6
OM
14198extern __inline __mmask8
14199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14200_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
14201{
14202 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14203 (__v8df) __Y, _CMP_EQ_OQ,
14204 (__mmask8) -1,
14205 _MM_FROUND_CUR_DIRECTION);
14206}
14207
14208extern __inline __mmask8
14209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14210_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14211{
14212 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14213 (__v8df) __Y, _CMP_EQ_OQ,
14214 (__mmask8) __U,
14215 _MM_FROUND_CUR_DIRECTION);
14216}
14217
14218extern __inline __mmask8
14219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14220_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
14221{
14222 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14223 (__v8df) __Y, _CMP_LT_OS,
14224 (__mmask8) -1,
14225 _MM_FROUND_CUR_DIRECTION);
14226}
14227
14228extern __inline __mmask8
14229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14230_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14231{
14232 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14233 (__v8df) __Y, _CMP_LT_OS,
14234 (__mmask8) __U,
14235 _MM_FROUND_CUR_DIRECTION);
14236}
14237
14238extern __inline __mmask8
14239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14240_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
14241{
14242 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14243 (__v8df) __Y, _CMP_LE_OS,
14244 (__mmask8) -1,
14245 _MM_FROUND_CUR_DIRECTION);
14246}
14247
14248extern __inline __mmask8
14249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14250_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14251{
14252 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14253 (__v8df) __Y, _CMP_LE_OS,
14254 (__mmask8) __U,
14255 _MM_FROUND_CUR_DIRECTION);
14256}
14257
14258extern __inline __mmask8
14259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14260_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
14261{
14262 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14263 (__v8df) __Y, _CMP_UNORD_Q,
14264 (__mmask8) -1,
14265 _MM_FROUND_CUR_DIRECTION);
14266}
14267
14268extern __inline __mmask8
14269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14270_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14271{
14272 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14273 (__v8df) __Y, _CMP_UNORD_Q,
14274 (__mmask8) __U,
14275 _MM_FROUND_CUR_DIRECTION);
14276}
14277
14278extern __inline __mmask8
14279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14280_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
14281{
14282 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14283 (__v8df) __Y, _CMP_NEQ_UQ,
14284 (__mmask8) -1,
14285 _MM_FROUND_CUR_DIRECTION);
14286}
14287
14288extern __inline __mmask8
14289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14290_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14291{
14292 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14293 (__v8df) __Y, _CMP_NEQ_UQ,
14294 (__mmask8) __U,
14295 _MM_FROUND_CUR_DIRECTION);
14296}
14297
14298extern __inline __mmask8
14299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14300_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
14301{
14302 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14303 (__v8df) __Y, _CMP_NLT_US,
14304 (__mmask8) -1,
14305 _MM_FROUND_CUR_DIRECTION);
14306}
14307
14308extern __inline __mmask8
14309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14310_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14311{
14312 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14313 (__v8df) __Y, _CMP_NLT_US,
14314 (__mmask8) __U,
14315 _MM_FROUND_CUR_DIRECTION);
14316}
14317
14318extern __inline __mmask8
14319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14320_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
14321{
14322 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14323 (__v8df) __Y, _CMP_NLE_US,
14324 (__mmask8) -1,
14325 _MM_FROUND_CUR_DIRECTION);
14326}
14327
14328extern __inline __mmask8
14329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14330_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14331{
14332 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14333 (__v8df) __Y, _CMP_NLE_US,
14334 (__mmask8) __U,
14335 _MM_FROUND_CUR_DIRECTION);
14336}
14337
14338extern __inline __mmask8
14339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14340_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
14341{
14342 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14343 (__v8df) __Y, _CMP_ORD_Q,
14344 (__mmask8) -1,
14345 _MM_FROUND_CUR_DIRECTION);
14346}
14347
14348extern __inline __mmask8
14349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14350_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14351{
14352 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14353 (__v8df) __Y, _CMP_ORD_Q,
14354 (__mmask8) __U,
14355 _MM_FROUND_CUR_DIRECTION);
14356}
14357
14358extern __inline __mmask16
14359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14360_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
14361{
14362 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14363 (__v16sf) __Y, _CMP_EQ_OQ,
14364 (__mmask16) -1,
14365 _MM_FROUND_CUR_DIRECTION);
14366}
14367
14368extern __inline __mmask16
14369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14370_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14371{
14372 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14373 (__v16sf) __Y, _CMP_EQ_OQ,
14374 (__mmask16) __U,
14375 _MM_FROUND_CUR_DIRECTION);
14376}
14377
14378extern __inline __mmask16
14379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14380_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
14381{
14382 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14383 (__v16sf) __Y, _CMP_LT_OS,
14384 (__mmask16) -1,
14385 _MM_FROUND_CUR_DIRECTION);
14386}
14387
14388extern __inline __mmask16
14389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14390_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14391{
14392 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14393 (__v16sf) __Y, _CMP_LT_OS,
14394 (__mmask16) __U,
14395 _MM_FROUND_CUR_DIRECTION);
14396}
14397
14398extern __inline __mmask16
14399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14400_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
14401{
14402 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14403 (__v16sf) __Y, _CMP_LE_OS,
14404 (__mmask16) -1,
14405 _MM_FROUND_CUR_DIRECTION);
14406}
14407
14408extern __inline __mmask16
14409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14410_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14411{
14412 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14413 (__v16sf) __Y, _CMP_LE_OS,
14414 (__mmask16) __U,
14415 _MM_FROUND_CUR_DIRECTION);
14416}
14417
14418extern __inline __mmask16
14419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14420_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
14421{
14422 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14423 (__v16sf) __Y, _CMP_UNORD_Q,
14424 (__mmask16) -1,
14425 _MM_FROUND_CUR_DIRECTION);
14426}
14427
14428extern __inline __mmask16
14429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14430_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14431{
14432 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14433 (__v16sf) __Y, _CMP_UNORD_Q,
14434 (__mmask16) __U,
14435 _MM_FROUND_CUR_DIRECTION);
14436}
14437
14438extern __inline __mmask16
14439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14440_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
14441{
14442 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14443 (__v16sf) __Y, _CMP_NEQ_UQ,
14444 (__mmask16) -1,
14445 _MM_FROUND_CUR_DIRECTION);
14446}
14447
14448extern __inline __mmask16
14449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14450_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14451{
14452 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14453 (__v16sf) __Y, _CMP_NEQ_UQ,
14454 (__mmask16) __U,
14455 _MM_FROUND_CUR_DIRECTION);
14456}
14457
14458extern __inline __mmask16
14459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14460_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
14461{
14462 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14463 (__v16sf) __Y, _CMP_NLT_US,
14464 (__mmask16) -1,
14465 _MM_FROUND_CUR_DIRECTION);
14466}
14467
14468extern __inline __mmask16
14469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14470_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14471{
14472 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14473 (__v16sf) __Y, _CMP_NLT_US,
14474 (__mmask16) __U,
14475 _MM_FROUND_CUR_DIRECTION);
14476}
14477
14478extern __inline __mmask16
14479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14480_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
14481{
14482 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14483 (__v16sf) __Y, _CMP_NLE_US,
14484 (__mmask16) -1,
14485 _MM_FROUND_CUR_DIRECTION);
14486}
14487
14488extern __inline __mmask16
14489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14490_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14491{
14492 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14493 (__v16sf) __Y, _CMP_NLE_US,
14494 (__mmask16) __U,
14495 _MM_FROUND_CUR_DIRECTION);
14496}
14497
14498extern __inline __mmask16
14499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14500_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
14501{
14502 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14503 (__v16sf) __Y, _CMP_ORD_Q,
14504 (__mmask16) -1,
14505 _MM_FROUND_CUR_DIRECTION);
14506}
14507
14508extern __inline __mmask16
14509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14510_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14511{
14512 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14513 (__v16sf) __Y, _CMP_ORD_Q,
14514 (__mmask16) __U,
14515 _MM_FROUND_CUR_DIRECTION);
14516}
14517
756c5857
AI
14518extern __inline __mmask8
14519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14520_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
14521{
14522 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14523 (__v2df) __Y, __P,
14524 (__mmask8) -1,
14525 _MM_FROUND_CUR_DIRECTION);
14526}
14527
14528extern __inline __mmask8
14529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14530_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
14531{
14532 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14533 (__v2df) __Y, __P,
14534 (__mmask8) __M,
14535 _MM_FROUND_CUR_DIRECTION);
14536}
14537
14538extern __inline __mmask8
14539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14540_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
14541{
14542 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14543 (__v4sf) __Y, __P,
14544 (__mmask8) -1,
14545 _MM_FROUND_CUR_DIRECTION);
14546}
14547
14548extern __inline __mmask8
14549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14550_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
14551{
14552 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14553 (__v4sf) __Y, __P,
14554 (__mmask8) __M,
14555 _MM_FROUND_CUR_DIRECTION);
14556}
14557
14558#else
14559#define _mm512_cmp_pd_mask(X, Y, P) \
14560 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14561 (__v8df)(__m512d)(Y), (int)(P),\
14562 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14563
14564#define _mm512_cmp_ps_mask(X, Y, P) \
14565 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14566 (__v16sf)(__m512)(Y), (int)(P),\
14567 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
14568
14569#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
14570 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14571 (__v8df)(__m512d)(Y), (int)(P),\
14572 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
14573
14574#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
14575 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14576 (__v16sf)(__m512)(Y), (int)(P),\
14577 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
14578
14579#define _mm_cmp_sd_mask(X, Y, P) \
14580 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14581 (__v2df)(__m128d)(Y), (int)(P),\
14582 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14583
14584#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
14585 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14586 (__v2df)(__m128d)(Y), (int)(P),\
14587 M,_MM_FROUND_CUR_DIRECTION))
14588
14589#define _mm_cmp_ss_mask(X, Y, P) \
14590 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14591 (__v4sf)(__m128)(Y), (int)(P), \
14592 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14593
14594#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
14595 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14596 (__v4sf)(__m128)(Y), (int)(P), \
14597 M,_MM_FROUND_CUR_DIRECTION))
14598#endif
14599
2196a885
KY
14600extern __inline __mmask16
14601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14602_mm512_kmov (__mmask16 __A)
14603{
7cdb6e4c 14604 return __builtin_ia32_kmovw (__A);
2196a885
KY
14605}
14606
275be1da
IT
14607extern __inline __m512
14608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14609_mm512_castpd_ps (__m512d __A)
14610{
14611 return (__m512) (__A);
14612}
14613
14614extern __inline __m512i
14615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14616_mm512_castpd_si512 (__m512d __A)
14617{
14618 return (__m512i) (__A);
14619}
14620
14621extern __inline __m512d
14622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14623_mm512_castps_pd (__m512 __A)
14624{
14625 return (__m512d) (__A);
14626}
14627
14628extern __inline __m512i
14629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14630_mm512_castps_si512 (__m512 __A)
14631{
14632 return (__m512i) (__A);
14633}
14634
14635extern __inline __m512
14636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14637_mm512_castsi512_ps (__m512i __A)
14638{
14639 return (__m512) (__A);
14640}
14641
14642extern __inline __m512d
14643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14644_mm512_castsi512_pd (__m512i __A)
14645{
14646 return (__m512d) (__A);
14647}
14648
14649extern __inline __m128d
14650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14651_mm512_castpd512_pd128 (__m512d __A)
14652{
14653 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
14654}
14655
14656extern __inline __m128
14657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14658_mm512_castps512_ps128 (__m512 __A)
14659{
14660 return _mm512_extractf32x4_ps(__A, 0);
14661}
14662
14663extern __inline __m128i
14664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14665_mm512_castsi512_si128 (__m512i __A)
14666{
14667 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
14668}
14669
14670extern __inline __m256d
14671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14672_mm512_castpd512_pd256 (__m512d __A)
14673{
14674 return _mm512_extractf64x4_pd(__A, 0);
14675}
14676
14677extern __inline __m256
14678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14679_mm512_castps512_ps256 (__m512 __A)
14680{
14681 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
14682}
14683
14684extern __inline __m256i
14685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14686_mm512_castsi512_si256 (__m512i __A)
14687{
14688 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
14689}
14690
14691extern __inline __m512d
14692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14693_mm512_castpd128_pd512 (__m128d __A)
14694{
14695 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
14696}
14697
14698extern __inline __m512
14699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14700_mm512_castps128_ps512 (__m128 __A)
14701{
14702 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
14703}
14704
14705extern __inline __m512i
14706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14707_mm512_castsi128_si512 (__m128i __A)
14708{
14709 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
14710}
14711
14712extern __inline __m512d
14713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14714_mm512_castpd256_pd512 (__m256d __A)
14715{
14716 return __builtin_ia32_pd512_256pd (__A);
14717}
14718
14719extern __inline __m512
14720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14721_mm512_castps256_ps512 (__m256 __A)
14722{
14723 return __builtin_ia32_ps512_256ps (__A);
14724}
14725
14726extern __inline __m512i
14727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14728_mm512_castsi256_si512 (__m256i __A)
14729{
14730 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
14731}
14732
14733extern __inline __mmask16
14734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14735_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
14736{
14737 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14738 (__v16si) __B, 0,
14739 (__mmask16) -1);
14740}
14741
14742extern __inline __mmask16
14743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14744_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14745{
14746 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14747 (__v16si) __B, 0, __U);
14748}
14749
14750extern __inline __mmask8
14751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14752_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14753{
14754 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14755 (__v8di) __B, 0, __U);
14756}
14757
14758extern __inline __mmask8
14759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14760_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
14761{
14762 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14763 (__v8di) __B, 0,
14764 (__mmask8) -1);
14765}
14766
14767extern __inline __mmask16
14768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14769_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
14770{
14771 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14772 (__v16si) __B, 6,
14773 (__mmask16) -1);
14774}
14775
14776extern __inline __mmask16
14777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14778_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14779{
14780 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14781 (__v16si) __B, 6, __U);
14782}
14783
14784extern __inline __mmask8
14785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14786_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14787{
14788 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14789 (__v8di) __B, 6, __U);
14790}
14791
14792extern __inline __mmask8
14793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14794_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
14795{
14796 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14797 (__v8di) __B, 6,
14798 (__mmask8) -1);
14799}
14800
167a5b77
JJ
14801#undef __MM512_REDUCE_OP
14802#define __MM512_REDUCE_OP(op) \
14803 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
14804 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
14805 __m256i __T3 = (__m256i) (__T1 op __T2); \
14806 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
14807 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
14808 __v4si __T6 = __T4 op __T5; \
14809 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14810 __v4si __T8 = __T6 op __T7; \
14811 return __T8[0] op __T8[1]
14812
14813extern __inline int
14814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14815_mm512_reduce_add_epi32 (__m512i __A)
14816{
14817 __MM512_REDUCE_OP (+);
14818}
14819
14820extern __inline int
14821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14822_mm512_reduce_mul_epi32 (__m512i __A)
14823{
14824 __MM512_REDUCE_OP (*);
14825}
14826
14827extern __inline int
14828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14829_mm512_reduce_and_epi32 (__m512i __A)
14830{
14831 __MM512_REDUCE_OP (&);
14832}
14833
14834extern __inline int
14835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14836_mm512_reduce_or_epi32 (__m512i __A)
14837{
14838 __MM512_REDUCE_OP (|);
14839}
14840
14841extern __inline int
14842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14843_mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14844{
14845 __A = _mm512_maskz_mov_epi32 (__U, __A);
14846 __MM512_REDUCE_OP (+);
14847}
14848
14849extern __inline int
14850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14851_mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14852{
14853 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14854 __MM512_REDUCE_OP (*);
14855}
14856
14857extern __inline int
14858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14859_mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14860{
14861 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14862 __MM512_REDUCE_OP (&);
14863}
14864
14865extern __inline int
14866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14867_mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14868{
14869 __A = _mm512_maskz_mov_epi32 (__U, __A);
14870 __MM512_REDUCE_OP (|);
14871}
14872
14873#undef __MM512_REDUCE_OP
14874#define __MM512_REDUCE_OP(op) \
14875 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
14876 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
14877 __m256i __T3 = _mm256_##op (__T1, __T2); \
14878 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
14879 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
14880 __m128i __T6 = _mm_##op (__T4, __T5); \
14881 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
14882 (__v4si) { 2, 3, 0, 1 }); \
14883 __m128i __T8 = _mm_##op (__T6, __T7); \
14884 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
14885 (__v4si) { 1, 0, 1, 0 }); \
14886 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
14887 return __T10[0]
14888
14889extern __inline int
14890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14891_mm512_reduce_min_epi32 (__m512i __A)
14892{
14893 __MM512_REDUCE_OP (min_epi32);
14894}
14895
14896extern __inline int
14897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14898_mm512_reduce_max_epi32 (__m512i __A)
14899{
14900 __MM512_REDUCE_OP (max_epi32);
14901}
14902
14903extern __inline unsigned int
14904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14905_mm512_reduce_min_epu32 (__m512i __A)
14906{
14907 __MM512_REDUCE_OP (min_epu32);
14908}
14909
14910extern __inline unsigned int
14911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14912_mm512_reduce_max_epu32 (__m512i __A)
14913{
14914 __MM512_REDUCE_OP (max_epu32);
14915}
14916
14917extern __inline int
14918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14919_mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14920{
14921 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14922 __MM512_REDUCE_OP (min_epi32);
14923}
14924
14925extern __inline int
14926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14927_mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14928{
14929 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14930 __MM512_REDUCE_OP (max_epi32);
14931}
14932
14933extern __inline unsigned int
14934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14935_mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14936{
14937 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14938 __MM512_REDUCE_OP (min_epu32);
14939}
14940
14941extern __inline unsigned int
14942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14943_mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14944{
14945 __A = _mm512_maskz_mov_epi32 (__U, __A);
14946 __MM512_REDUCE_OP (max_epu32);
14947}
14948
14949#undef __MM512_REDUCE_OP
14950#define __MM512_REDUCE_OP(op) \
14951 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14952 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14953 __m256 __T3 = __T1 op __T2; \
14954 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14955 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14956 __m128 __T6 = __T4 op __T5; \
14957 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14958 __m128 __T8 = __T6 op __T7; \
14959 return __T8[0] op __T8[1]
14960
14961extern __inline float
14962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14963_mm512_reduce_add_ps (__m512 __A)
14964{
14965 __MM512_REDUCE_OP (+);
14966}
14967
14968extern __inline float
14969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14970_mm512_reduce_mul_ps (__m512 __A)
14971{
14972 __MM512_REDUCE_OP (*);
14973}
14974
14975extern __inline float
14976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14977_mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14978{
14979 __A = _mm512_maskz_mov_ps (__U, __A);
14980 __MM512_REDUCE_OP (+);
14981}
14982
14983extern __inline float
14984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14985_mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14986{
14987 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14988 __MM512_REDUCE_OP (*);
14989}
14990
14991#undef __MM512_REDUCE_OP
14992#define __MM512_REDUCE_OP(op) \
14993 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14994 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14995 __m256 __T3 = _mm256_##op (__T1, __T2); \
14996 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14997 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14998 __m128 __T6 = _mm_##op (__T4, __T5); \
14999 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15000 __m128 __T8 = _mm_##op (__T6, __T7); \
15001 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
15002 __m128 __T10 = _mm_##op (__T8, __T9); \
15003 return __T10[0]
15004
15005extern __inline float
15006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15007_mm512_reduce_min_ps (__m512 __A)
15008{
15009 __MM512_REDUCE_OP (min_ps);
15010}
15011
15012extern __inline float
15013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15014_mm512_reduce_max_ps (__m512 __A)
15015{
15016 __MM512_REDUCE_OP (max_ps);
15017}
15018
15019extern __inline float
15020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15021_mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
15022{
15023 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
15024 __MM512_REDUCE_OP (min_ps);
15025}
15026
15027extern __inline float
15028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15029_mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
15030{
15031 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
15032 __MM512_REDUCE_OP (max_ps);
15033}
15034
15035#undef __MM512_REDUCE_OP
15036#define __MM512_REDUCE_OP(op) \
15037 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
15038 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
15039 __m256i __T3 = (__m256i) (__T1 op __T2); \
15040 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
15041 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
15042 __v2di __T6 = __T4 op __T5; \
15043 return __T6[0] op __T6[1]
15044
15045extern __inline long long
15046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15047_mm512_reduce_add_epi64 (__m512i __A)
15048{
15049 __MM512_REDUCE_OP (+);
15050}
15051
15052extern __inline long long
15053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15054_mm512_reduce_mul_epi64 (__m512i __A)
15055{
15056 __MM512_REDUCE_OP (*);
15057}
15058
15059extern __inline long long
15060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15061_mm512_reduce_and_epi64 (__m512i __A)
15062{
15063 __MM512_REDUCE_OP (&);
15064}
15065
15066extern __inline long long
15067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15068_mm512_reduce_or_epi64 (__m512i __A)
15069{
15070 __MM512_REDUCE_OP (|);
15071}
15072
15073extern __inline long long
15074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15075_mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
15076{
15077 __A = _mm512_maskz_mov_epi64 (__U, __A);
15078 __MM512_REDUCE_OP (+);
15079}
15080
15081extern __inline long long
15082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15083_mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
15084{
15085 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
15086 __MM512_REDUCE_OP (*);
15087}
15088
15089extern __inline long long
15090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15091_mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
15092{
15093 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15094 __MM512_REDUCE_OP (&);
15095}
15096
15097extern __inline long long
15098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15099_mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
15100{
15101 __A = _mm512_maskz_mov_epi64 (__U, __A);
15102 __MM512_REDUCE_OP (|);
15103}
15104
15105#undef __MM512_REDUCE_OP
15106#define __MM512_REDUCE_OP(op) \
15107 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
15108 __m512i __T2 = _mm512_##op (__A, __T1); \
15109 __m512i __T3 \
15110 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
15111 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
15112 __m512i __T4 = _mm512_##op (__T2, __T3); \
15113 __m512i __T5 \
15114 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
15115 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
15116 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
15117 return __T6[0]
15118
15119extern __inline long long
15120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15121_mm512_reduce_min_epi64 (__m512i __A)
15122{
15123 __MM512_REDUCE_OP (min_epi64);
15124}
15125
15126extern __inline long long
15127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15128_mm512_reduce_max_epi64 (__m512i __A)
15129{
15130 __MM512_REDUCE_OP (max_epi64);
15131}
15132
15133extern __inline long long
15134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15135_mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
15136{
15137 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
15138 __U, __A);
15139 __MM512_REDUCE_OP (min_epi64);
15140}
15141
15142extern __inline long long
15143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15144_mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
15145{
15146 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
15147 __U, __A);
15148 __MM512_REDUCE_OP (max_epi64);
15149}
15150
15151extern __inline unsigned long long
15152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15153_mm512_reduce_min_epu64 (__m512i __A)
15154{
15155 __MM512_REDUCE_OP (min_epu64);
15156}
15157
15158extern __inline unsigned long long
15159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15160_mm512_reduce_max_epu64 (__m512i __A)
15161{
15162 __MM512_REDUCE_OP (max_epu64);
15163}
15164
15165extern __inline unsigned long long
15166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15167_mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
15168{
15169 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15170 __MM512_REDUCE_OP (min_epu64);
15171}
15172
15173extern __inline unsigned long long
15174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15175_mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
15176{
15177 __A = _mm512_maskz_mov_epi64 (__U, __A);
15178 __MM512_REDUCE_OP (max_epu64);
15179}
15180
15181#undef __MM512_REDUCE_OP
15182#define __MM512_REDUCE_OP(op) \
15183 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15184 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15185 __m256d __T3 = __T1 op __T2; \
15186 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15187 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15188 __m128d __T6 = __T4 op __T5; \
15189 return __T6[0] op __T6[1]
15190
15191extern __inline double
15192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15193_mm512_reduce_add_pd (__m512d __A)
15194{
15195 __MM512_REDUCE_OP (+);
15196}
15197
15198extern __inline double
15199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15200_mm512_reduce_mul_pd (__m512d __A)
15201{
15202 __MM512_REDUCE_OP (*);
15203}
15204
15205extern __inline double
15206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15207_mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
15208{
15209 __A = _mm512_maskz_mov_pd (__U, __A);
15210 __MM512_REDUCE_OP (+);
15211}
15212
15213extern __inline double
15214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15215_mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
15216{
15217 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
15218 __MM512_REDUCE_OP (*);
15219}
15220
15221#undef __MM512_REDUCE_OP
15222#define __MM512_REDUCE_OP(op) \
15223 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15224 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15225 __m256d __T3 = _mm256_##op (__T1, __T2); \
15226 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15227 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15228 __m128d __T6 = _mm_##op (__T4, __T5); \
15229 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
15230 __m128d __T8 = _mm_##op (__T6, __T7); \
15231 return __T8[0]
15232
15233extern __inline double
15234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15235_mm512_reduce_min_pd (__m512d __A)
15236{
15237 __MM512_REDUCE_OP (min_pd);
15238}
15239
15240extern __inline double
15241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15242_mm512_reduce_max_pd (__m512d __A)
15243{
15244 __MM512_REDUCE_OP (max_pd);
15245}
15246
15247extern __inline double
15248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15249_mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
15250{
15251 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
15252 __MM512_REDUCE_OP (min_pd);
15253}
15254
15255extern __inline double
15256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15257_mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
15258{
15259 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
15260 __MM512_REDUCE_OP (max_pd);
15261}
15262
15263#undef __MM512_REDUCE_OP
15264
756c5857
AI
15265#ifdef __DISABLE_AVX512F__
15266#undef __DISABLE_AVX512F__
15267#pragma GCC pop_options
15268#endif /* __DISABLE_AVX512F__ */
15269
15270#endif /* _AVX512FINTRIN_H_INCLUDED */
This page took 3.078271 seconds and 5 git commands to generate.