]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/sse.md
Enable avx512 embedde broadcast for vpternlog.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE2
25 UNSPEC_MOVDI_TO_SSE
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_INSERTPS
42 UNSPEC_DP
43 UNSPEC_MOVNTDQA
44 UNSPEC_MPSADBW
45 UNSPEC_PHMINPOSUW
46 UNSPEC_PTEST
47
48 ;; For SSE4.2 support
49 UNSPEC_PCMPESTR
50 UNSPEC_PCMPISTR
51
52 ;; For FMA4 support
53 UNSPEC_FMADDSUB
54 UNSPEC_XOP_UNSIGNED_CMP
55 UNSPEC_XOP_TRUEFALSE
56 UNSPEC_FRCZ
57
58 ;; For AES support
59 UNSPEC_AESENC
60 UNSPEC_AESENCLAST
61 UNSPEC_AESDEC
62 UNSPEC_AESDECLAST
63 UNSPEC_AESIMC
64 UNSPEC_AESKEYGENASSIST
65
66 ;; For PCLMUL support
67 UNSPEC_PCLMUL
68
69 ;; For AVX support
70 UNSPEC_PCMP
71 UNSPEC_VPERMIL
72 UNSPEC_VPERMIL2
73 UNSPEC_VPERMIL2F128
74 UNSPEC_CAST
75 UNSPEC_VTESTP
76 UNSPEC_VCVTPH2PS
77 UNSPEC_VCVTPS2PH
78
79 ;; For AVX2 support
80 UNSPEC_VPERMVAR
81 UNSPEC_VPERMTI
82 UNSPEC_GATHER
83 UNSPEC_VSIBADDR
84
85 ;; For AVX512F support
86 UNSPEC_VPERMT2
87 UNSPEC_UNSIGNED_FIX_NOTRUNC
88 UNSPEC_UNSIGNED_PCMP
89 UNSPEC_TESTM
90 UNSPEC_TESTNM
91 UNSPEC_SCATTER
92 UNSPEC_RCP14
93 UNSPEC_RSQRT14
94 UNSPEC_FIXUPIMM
95 UNSPEC_VTERNLOG
96 UNSPEC_GETEXP
97 UNSPEC_GETMANT
98 UNSPEC_ALIGN
99 UNSPEC_CONFLICT
100 UNSPEC_COMPRESS
101 UNSPEC_COMPRESS_STORE
102 UNSPEC_EXPAND
103 UNSPEC_MASKED_EQ
104 UNSPEC_MASKED_GT
105
106 ;; Mask operations
107 UNSPEC_MASKOP
108 UNSPEC_KORTEST
109 UNSPEC_KTEST
110 ;; Mask load
111 UNSPEC_MASKLOAD
112
113 ;; For embed. rounding feature
114 UNSPEC_EMBEDDED_ROUNDING
115
116 ;; For AVX512PF support
117 UNSPEC_GATHER_PREFETCH
118 UNSPEC_SCATTER_PREFETCH
119
120 ;; For AVX512ER support
121 UNSPEC_EXP2
122 UNSPEC_RCP28
123 UNSPEC_RSQRT28
124
125 ;; For SHA support
126 UNSPEC_SHA1MSG1
127 UNSPEC_SHA1MSG2
128 UNSPEC_SHA1NEXTE
129 UNSPEC_SHA1RNDS4
130 UNSPEC_SHA256MSG1
131 UNSPEC_SHA256MSG2
132 UNSPEC_SHA256RNDS2
133
134 ;; For AVX512BW support
135 UNSPEC_DBPSADBW
136 UNSPEC_PMADDUBSW512
137 UNSPEC_PMADDWD512
138 UNSPEC_PSHUFHW
139 UNSPEC_PSHUFLW
140 UNSPEC_CVTINT2MASK
141
142 ;; For AVX512DQ support
143 UNSPEC_REDUCE
144 UNSPEC_FPCLASS
145 UNSPEC_RANGE
146
147 ;; For AVX512IFMA support
148 UNSPEC_VPMADD52LUQ
149 UNSPEC_VPMADD52HUQ
150
151 ;; For AVX512VBMI support
152 UNSPEC_VPMULTISHIFT
153
154 ;; For AVX5124FMAPS/AVX5124VNNIW support
155 UNSPEC_VP4FMADD
156 UNSPEC_VP4FNMADD
157 UNSPEC_VP4DPWSSD
158 UNSPEC_VP4DPWSSDS
159
160 ;; For GFNI support
161 UNSPEC_GF2P8AFFINEINV
162 UNSPEC_GF2P8AFFINE
163 UNSPEC_GF2P8MUL
164
165 ;; For AVX512VBMI2 support
166 UNSPEC_VPSHLD
167 UNSPEC_VPSHRD
168 UNSPEC_VPSHRDV
169 UNSPEC_VPSHLDV
170
171 ;; For AVX512VNNI support
172 UNSPEC_VPMADDUBSWACCD
173 UNSPEC_VPMADDUBSWACCSSD
174 UNSPEC_VPMADDWDACCD
175 UNSPEC_VPMADDWDACCSSD
176
177 ;; For VAES support
178 UNSPEC_VAESDEC
179 UNSPEC_VAESDECLAST
180 UNSPEC_VAESENC
181 UNSPEC_VAESENCLAST
182
183 ;; For VPCLMULQDQ support
184 UNSPEC_VPCLMULQDQ
185
186 ;; For AVX512BITALG support
187 UNSPEC_VPSHUFBIT
188
189 ;; For VP2INTERSECT support
190 UNSPEC_VP2INTERSECT
191
192 ;; For AVX512BF16 support
193 UNSPEC_VCVTNE2PS2BF16
194 UNSPEC_VCVTNEPS2BF16
195 UNSPEC_VDPBF16PS
196 ])
197
198 (define_c_enum "unspecv" [
199 UNSPECV_LDMXCSR
200 UNSPECV_STMXCSR
201 UNSPECV_CLFLUSH
202 UNSPECV_MONITOR
203 UNSPECV_MWAIT
204 UNSPECV_VZEROALL
205
206 ;; For KEYLOCKER
207 UNSPECV_LOADIWKEY
208 UNSPECV_AESDEC128KLU8
209 UNSPECV_AESENC128KLU8
210 UNSPECV_AESDEC256KLU8
211 UNSPECV_AESENC256KLU8
212 UNSPECV_AESDECWIDE128KLU8
213 UNSPECV_AESENCWIDE128KLU8
214 UNSPECV_AESDECWIDE256KLU8
215 UNSPECV_AESENCWIDE256KLU8
216 UNSPECV_ENCODEKEY128U32
217 UNSPECV_ENCODEKEY256U32
218 ])
219
220 ;; All vector modes including V?TImode, used in move patterns.
221 (define_mode_iterator VMOVE
222 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
223 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
224 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
225 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
226 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
227 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
228 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
229
230 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
231 (define_mode_iterator V48_AVX512VL
232 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
233 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
234 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
235 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
236
237 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
238 (define_mode_iterator VI12_AVX512VL
239 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
240 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
241
242 ;; Same iterator, but without supposed TARGET_AVX512BW
243 (define_mode_iterator VI12_AVX512VLBW
244 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
245 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
246 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
247
248 (define_mode_iterator VI1_AVX512VL
249 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
250
251 ;; All vector modes
252 (define_mode_iterator V
253 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
254 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
255 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
256 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
257 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
258 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
259
260 ;; All 128bit vector modes
261 (define_mode_iterator V_128
262 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
263
264 ;; All 256bit vector modes
265 (define_mode_iterator V_256
266 [V32QI V16HI V8SI V4DI V8SF V4DF])
267
268 ;; All 128bit and 256bit vector modes
269 (define_mode_iterator V_128_256
270 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
271
272 ;; All 512bit vector modes
273 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
274
275 ;; All 256bit and 512bit vector modes
276 (define_mode_iterator V_256_512
277 [V32QI V16HI V8SI V4DI V8SF V4DF
278 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
279 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
280
281 ;; All vector float modes
282 (define_mode_iterator VF
283 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
284 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
285
286 ;; 128- and 256-bit float vector modes
287 (define_mode_iterator VF_128_256
288 [(V8SF "TARGET_AVX") V4SF
289 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
290
291 ;; All SFmode vector float modes
292 (define_mode_iterator VF1
293 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
294
295 (define_mode_iterator VF1_AVX2
296 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
297
298 ;; 128- and 256-bit SF vector modes
299 (define_mode_iterator VF1_128_256
300 [(V8SF "TARGET_AVX") V4SF])
301
302 (define_mode_iterator VF1_128_256VL
303 [V8SF (V4SF "TARGET_AVX512VL")])
304
305 ;; All DFmode vector float modes
306 (define_mode_iterator VF2
307 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
308
309 ;; 128- and 256-bit DF vector modes
310 (define_mode_iterator VF2_128_256
311 [(V4DF "TARGET_AVX") V2DF])
312
313 (define_mode_iterator VF2_512_256
314 [(V8DF "TARGET_AVX512F") V4DF])
315
316 (define_mode_iterator VF2_512_256VL
317 [V8DF (V4DF "TARGET_AVX512VL")])
318
319 ;; All 128bit vector float modes
320 (define_mode_iterator VF_128
321 [V4SF (V2DF "TARGET_SSE2")])
322
323 ;; All 256bit vector float modes
324 (define_mode_iterator VF_256
325 [V8SF V4DF])
326
327 ;; All 512bit vector float modes
328 (define_mode_iterator VF_512
329 [V16SF V8DF])
330
331 (define_mode_iterator VI48_AVX512VL
332 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
333 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
334
335 (define_mode_iterator VI1248_AVX512VLBW
336 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
337 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
338 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
339 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
340 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
341 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
342
343 (define_mode_iterator VF_AVX512VL
344 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
345 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
346
347 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
348 (define_mode_iterator VF1_AVX512ER_128_256
349 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
350
351 (define_mode_iterator VF2_AVX512VL
352 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
353
354 (define_mode_iterator VF1_AVX512VL
355 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
356
357 ;; All vector integer modes
358 (define_mode_iterator VI
359 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
360 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
361 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
362 (V8SI "TARGET_AVX") V4SI
363 (V4DI "TARGET_AVX") V2DI])
364
365 (define_mode_iterator VI_AVX2
366 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
367 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
368 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
369 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
370
371 ;; All QImode vector integer modes
372 (define_mode_iterator VI1
373 [(V32QI "TARGET_AVX") V16QI])
374
375 ;; All DImode vector integer modes
376 (define_mode_iterator V_AVX
377 [V16QI V8HI V4SI V2DI V4SF V2DF
378 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
379 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
380 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
381
382 (define_mode_iterator VI48_AVX
383 [V4SI V2DI
384 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
385
386 (define_mode_iterator VI8
387 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
388
389 (define_mode_iterator VI8_FVL
390 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
391
392 (define_mode_iterator VI8_AVX512VL
393 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
394
395 (define_mode_iterator VI8_256_512
396 [V8DI (V4DI "TARGET_AVX512VL")])
397
398 (define_mode_iterator VI1_AVX2
399 [(V32QI "TARGET_AVX2") V16QI])
400
401 (define_mode_iterator VI1_AVX512
402 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
403
404 (define_mode_iterator VI1_AVX512F
405 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
406
407 (define_mode_iterator VI12_256_512_AVX512VL
408 [V64QI (V32QI "TARGET_AVX512VL")
409 V32HI (V16HI "TARGET_AVX512VL")])
410
411 (define_mode_iterator VI2_AVX2
412 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
413
414 (define_mode_iterator VI2_AVX512F
415 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
416
417 (define_mode_iterator VI4_AVX
418 [(V8SI "TARGET_AVX") V4SI])
419
420 (define_mode_iterator VI4_AVX2
421 [(V8SI "TARGET_AVX2") V4SI])
422
423 (define_mode_iterator VI4_AVX512F
424 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
425
426 (define_mode_iterator VI4_AVX512VL
427 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
428
429 (define_mode_iterator VI48_AVX512F_AVX512VL
430 [V4SI V8SI (V16SI "TARGET_AVX512F")
431 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
432
433 (define_mode_iterator VI2_AVX512VL
434 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
435
436 (define_mode_iterator VI1_AVX512VL_F
437 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
438
439 (define_mode_iterator VI8_AVX2_AVX512BW
440 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
441
442 (define_mode_iterator VI8_AVX2
443 [(V4DI "TARGET_AVX2") V2DI])
444
445 (define_mode_iterator VI8_AVX2_AVX512F
446 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
447
448 (define_mode_iterator VI8_AVX_AVX512F
449 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
450
451 (define_mode_iterator VI4_128_8_256
452 [V4SI V4DI])
453
454 ;; All V8D* modes
455 (define_mode_iterator V8FI
456 [V8DF V8DI])
457
458 ;; All V16S* modes
459 (define_mode_iterator V16FI
460 [V16SF V16SI])
461
462 ;; ??? We should probably use TImode instead.
463 (define_mode_iterator VIMAX_AVX2_AVX512BW
464 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
465
466 ;; Suppose TARGET_AVX512BW as baseline
467 (define_mode_iterator VIMAX_AVX512VL
468 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
469
470 (define_mode_iterator VIMAX_AVX2
471 [(V2TI "TARGET_AVX2") V1TI])
472
473 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
474 (define_mode_iterator SSESCALARMODE
475 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
476
477 (define_mode_iterator VI12_AVX2
478 [(V32QI "TARGET_AVX2") V16QI
479 (V16HI "TARGET_AVX2") V8HI])
480
481 (define_mode_iterator VI12_AVX2_AVX512BW
482 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
483 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
484
485 (define_mode_iterator VI24_AVX2
486 [(V16HI "TARGET_AVX2") V8HI
487 (V8SI "TARGET_AVX2") V4SI])
488
489 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
490 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
491 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
492 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
493
494 (define_mode_iterator VI124_AVX2
495 [(V32QI "TARGET_AVX2") V16QI
496 (V16HI "TARGET_AVX2") V8HI
497 (V8SI "TARGET_AVX2") V4SI])
498
499 (define_mode_iterator VI2_AVX2_AVX512BW
500 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
501
502 (define_mode_iterator VI248_AVX512VL
503 [V32HI V16SI V8DI
504 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
505 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
506 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
507
508 (define_mode_iterator VI248_AVX512VLBW
509 [(V32HI "TARGET_AVX512BW")
510 (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
511 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
512 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
513 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
514
515 (define_mode_iterator VI48_AVX2
516 [(V8SI "TARGET_AVX2") V4SI
517 (V4DI "TARGET_AVX2") V2DI])
518
519 (define_mode_iterator VI248_AVX2
520 [(V16HI "TARGET_AVX2") V8HI
521 (V8SI "TARGET_AVX2") V4SI
522 (V4DI "TARGET_AVX2") V2DI])
523
524 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
525 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
526 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
527 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
528
529 (define_mode_iterator VI248_AVX512BW
530 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
531
532 (define_mode_iterator VI248_AVX512BW_AVX512VL
533 [(V32HI "TARGET_AVX512BW")
534 (V4DI "TARGET_AVX512VL") V16SI V8DI])
535
536 ;; Suppose TARGET_AVX512VL as baseline
537 (define_mode_iterator VI248_AVX512BW_1
538 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
539 V8SI V4SI
540 V2DI])
541
542 (define_mode_iterator VI248_AVX512BW_2
543 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
544 V8SI V4SI
545 V4DI V2DI])
546
547 (define_mode_iterator VI48_AVX512F
548 [(V16SI "TARGET_AVX512F") V8SI V4SI
549 (V8DI "TARGET_AVX512F") V4DI V2DI])
550
551 (define_mode_iterator VI48_AVX_AVX512F
552 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
553 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
554
555 (define_mode_iterator VI12_AVX_AVX512F
556 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
557 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
558
559 (define_mode_iterator V48_AVX2
560 [V4SF V2DF
561 V8SF V4DF
562 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
563 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
564
565 (define_mode_iterator VI1_AVX512VLBW
566 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
567 (V16QI "TARGET_AVX512VL")])
568
569 (define_mode_attr avx512
570 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
571 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
572 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
573 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
574 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
575 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
576
577 (define_mode_attr v_Yw
578 [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
579 (V8HI "Yw") (V16HI "Yw") (V32HI "v")
580 (V4SI "v") (V8SI "v") (V16SI "v")
581 (V2DI "v") (V4DI "v") (V8DI "v")
582 (V4SF "v") (V8SF "v") (V16SF "v")
583 (V2DF "v") (V4DF "v") (V8DF "v")
584 (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")])
585
586 (define_mode_attr sse2_avx_avx512f
587 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
588 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
589 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
590 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
591 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
592 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
593
594 (define_mode_attr sse2_avx2
595 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
596 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
597 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
598 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
599 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
600
601 (define_mode_attr ssse3_avx2
602 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
603 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
604 (V4SI "ssse3") (V8SI "avx2")
605 (V2DI "ssse3") (V4DI "avx2")
606 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
607
608 (define_mode_attr sse4_1_avx2
609 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
610 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
611 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
612 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
613
614 (define_mode_attr avx_avx2
615 [(V4SF "avx") (V2DF "avx")
616 (V8SF "avx") (V4DF "avx")
617 (V4SI "avx2") (V2DI "avx2")
618 (V8SI "avx2") (V4DI "avx2")])
619
620 (define_mode_attr vec_avx2
621 [(V16QI "vec") (V32QI "avx2")
622 (V8HI "vec") (V16HI "avx2")
623 (V4SI "vec") (V8SI "avx2")
624 (V2DI "vec") (V4DI "avx2")])
625
626 (define_mode_attr avx2_avx512
627 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
628 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
629 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
630 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
631 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
632
633 (define_mode_attr shuffletype
634 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
635 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
636 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
637 (V32HI "i") (V16HI "i") (V8HI "i")
638 (V64QI "i") (V32QI "i") (V16QI "i")
639 (V4TI "i") (V2TI "i") (V1TI "i")])
640
641 (define_mode_attr ssequartermode
642 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
643
644 (define_mode_attr ssequarterinsnmode
645 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
646
647 (define_mode_attr vecmemsuffix
648 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
649 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
650
651 (define_mode_attr ssedoublemodelower
652 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
653 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
654 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
655
656 (define_mode_attr ssedoublemode
657 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
658 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
659 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
660 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
661 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
662 (V4DI "V8DI") (V8DI "V16DI")])
663
664 (define_mode_attr ssebytemode
665 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
666 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
667
668 ;; All 128bit vector integer modes
669 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
670
671 ;; All 256bit vector integer modes
672 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
673
674 ;; All 128 and 256bit vector integer modes
675 (define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
676
677 ;; Various 128bit vector integer mode combinations
678 (define_mode_iterator VI12_128 [V16QI V8HI])
679 (define_mode_iterator VI14_128 [V16QI V4SI])
680 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
681 (define_mode_iterator VI24_128 [V8HI V4SI])
682 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
683 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
684 (define_mode_iterator VI248_512 [V32HI V16SI V8DI])
685 (define_mode_iterator VI48_128 [V4SI V2DI])
686 (define_mode_iterator VI148_512 [V64QI V16SI V8DI])
687 (define_mode_iterator VI148_256 [V32QI V8SI V4DI])
688 (define_mode_iterator VI148_128 [V16QI V4SI V2DI])
689
690 ;; Various 256bit and 512 vector integer mode combinations
691 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
692 (define_mode_iterator VI124_256_AVX512F_AVX512BW
693 [V32QI V16HI V8SI
694 (V64QI "TARGET_AVX512BW")
695 (V32HI "TARGET_AVX512BW")
696 (V16SI "TARGET_AVX512F")])
697 (define_mode_iterator VI48_256 [V8SI V4DI])
698 (define_mode_iterator VI48_512 [V16SI V8DI])
699 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
700 (define_mode_iterator VI_AVX512BW
701 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
702
703 ;; Int-float size matches
704 (define_mode_iterator VI4F_128 [V4SI V4SF])
705 (define_mode_iterator VI8F_128 [V2DI V2DF])
706 (define_mode_iterator VI4F_256 [V8SI V8SF])
707 (define_mode_iterator VI8F_256 [V4DI V4DF])
708 (define_mode_iterator VI4F_256_512
709 [V8SI V8SF
710 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
711 (define_mode_iterator VI48F_256_512
712 [V8SI V8SF
713 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
714 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
715 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
716 (define_mode_iterator VF48_I1248
717 [V16SI V16SF V8DI V8DF V32HI V64QI])
718 (define_mode_iterator VI48F
719 [V16SI V16SF V8DI V8DF
720 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
721 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
722 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
723 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
724 (define_mode_iterator VI12_VI48F_AVX512VLBW
725 [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
726 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
727 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
728 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
729 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
730 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
731 (V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
732 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
733 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
734
735 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
736
737 (define_mode_iterator VF_AVX512
738 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
739 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
740 V16SF V8DF])
741
742 (define_mode_attr avx512bcst
743 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
744 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
745 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
746 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
747 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
748 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
749
750 ;; Mapping from float mode to required SSE level
751 (define_mode_attr sse
752 [(SF "sse") (DF "sse2")
753 (V4SF "sse") (V2DF "sse2")
754 (V16SF "avx512f") (V8SF "avx")
755 (V8DF "avx512f") (V4DF "avx")])
756
757 (define_mode_attr sse2
758 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
759 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
760
761 (define_mode_attr sse3
762 [(V16QI "sse3") (V32QI "avx")])
763
764 (define_mode_attr sse4_1
765 [(V4SF "sse4_1") (V2DF "sse4_1")
766 (V8SF "avx") (V4DF "avx")
767 (V8DF "avx512f")
768 (V4DI "avx") (V2DI "sse4_1")
769 (V8SI "avx") (V4SI "sse4_1")
770 (V16QI "sse4_1") (V32QI "avx")
771 (V8HI "sse4_1") (V16HI "avx")])
772
773 (define_mode_attr avxsizesuffix
774 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
775 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
776 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
777 (V16SF "512") (V8DF "512")
778 (V8SF "256") (V4DF "256")
779 (V4SF "") (V2DF "")])
780
781 ;; SSE instruction mode
782 (define_mode_attr sseinsnmode
783 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
784 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
785 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
786 (V16SF "V16SF") (V8DF "V8DF")
787 (V8SF "V8SF") (V4DF "V4DF")
788 (V4SF "V4SF") (V2DF "V2DF")
789 (TI "TI")])
790
791 ;; SSE constant -1 constraint
792 (define_mode_attr sseconstm1
793 [(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC")
794 (V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC")
795 (V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC")
796 (V16SF "BF") (V8DF "BF")
797 (V8SF "BF") (V4DF "BF")
798 (V4SF "BF") (V2DF "BF")])
799
800 ;; Mapping of vector modes to corresponding mask size
801 (define_mode_attr avx512fmaskmode
802 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
803 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
804 (V16SI "HI") (V8SI "QI") (V4SI "QI")
805 (V8DI "QI") (V4DI "QI") (V2DI "QI")
806 (V16SF "HI") (V8SF "QI") (V4SF "QI")
807 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
808
809 ;; Mapping of vector modes to corresponding mask size
810 (define_mode_attr avx512fmaskmodelower
811 [(V64QI "di") (V32QI "si") (V16QI "hi")
812 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
813 (V16SI "hi") (V8SI "qi") (V4SI "qi")
814 (V8DI "qi") (V4DI "qi") (V2DI "qi")
815 (V16SF "hi") (V8SF "qi") (V4SF "qi")
816 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
817
818 ;; Mapping of vector modes to corresponding mask half size
819 (define_mode_attr avx512fmaskhalfmode
820 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
821 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
822 (V16SI "QI") (V8SI "QI") (V4SI "QI")
823 (V8DI "QI") (V4DI "QI") (V2DI "QI")
824 (V16SF "QI") (V8SF "QI") (V4SF "QI")
825 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
826
827 ;; Mapping of vector float modes to an integer mode of the same size
828 (define_mode_attr sseintvecmode
829 [(V16SF "V16SI") (V8DF "V8DI")
830 (V8SF "V8SI") (V4DF "V4DI")
831 (V4SF "V4SI") (V2DF "V2DI")
832 (V16SI "V16SI") (V8DI "V8DI")
833 (V8SI "V8SI") (V4DI "V4DI")
834 (V4SI "V4SI") (V2DI "V2DI")
835 (V16HI "V16HI") (V8HI "V8HI")
836 (V32HI "V32HI") (V64QI "V64QI")
837 (V32QI "V32QI") (V16QI "V16QI")])
838
839 (define_mode_attr sseintvecmode2
840 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
841 (V8SF "OI") (V4SF "TI")])
842
843 (define_mode_attr sseintvecmodelower
844 [(V16SF "v16si") (V8DF "v8di")
845 (V8SF "v8si") (V4DF "v4di")
846 (V4SF "v4si") (V2DF "v2di")
847 (V8SI "v8si") (V4DI "v4di")
848 (V4SI "v4si") (V2DI "v2di")
849 (V16HI "v16hi") (V8HI "v8hi")
850 (V32QI "v32qi") (V16QI "v16qi")])
851
852 ;; Mapping of vector modes to a vector mode of double size
853 (define_mode_attr ssedoublevecmode
854 [(V64QI "V128QI") (V32HI "V64HI") (V16SI "V32SI") (V8DI "V16DI")
855 (V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
856 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
857 (V16SF "V32SF") (V8DF "V16DF")
858 (V8SF "V16SF") (V4DF "V8DF")
859 (V4SF "V8SF") (V2DF "V4DF")])
860
861 ;; Mapping of vector modes to a vector mode of half size
862 ;; instead of V1DI/V1DF, DI/DF are used for V2DI/V2DF although they are scalar.
863 (define_mode_attr ssehalfvecmode
864 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
865 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
866 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V2DI "DI")
867 (V16SF "V8SF") (V8DF "V4DF")
868 (V8SF "V4SF") (V4DF "V2DF")
869 (V4SF "V2SF") (V2DF "DF")])
870
871 (define_mode_attr ssehalfvecmodelower
872 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
873 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
874 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
875 (V16SF "v8sf") (V8DF "v4df")
876 (V8SF "v4sf") (V4DF "v2df")
877 (V4SF "v2sf")])
878
879 ;; Mapping of vector modes ti packed single mode of the same size
880 (define_mode_attr ssePSmode
881 [(V16SI "V16SF") (V8DF "V16SF")
882 (V16SF "V16SF") (V8DI "V16SF")
883 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
884 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
885 (V8SI "V8SF") (V4SI "V4SF")
886 (V4DI "V8SF") (V2DI "V4SF")
887 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
888 (V8SF "V8SF") (V4SF "V4SF")
889 (V4DF "V8SF") (V2DF "V4SF")])
890
891 (define_mode_attr ssePSmode2
892 [(V8DI "V8SF") (V4DI "V4SF")])
893
894 ;; Mapping of vector modes back to the scalar modes
895 (define_mode_attr ssescalarmode
896 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
897 (V32HI "HI") (V16HI "HI") (V8HI "HI")
898 (V16SI "SI") (V8SI "SI") (V4SI "SI")
899 (V8DI "DI") (V4DI "DI") (V2DI "DI")
900 (V16SF "SF") (V8SF "SF") (V4SF "SF")
901 (V8DF "DF") (V4DF "DF") (V2DF "DF")
902 (V4TI "TI") (V2TI "TI")])
903
904 ;; Mapping of vector modes back to the scalar modes
905 (define_mode_attr ssescalarmodelower
906 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
907 (V32HI "hi") (V16HI "hi") (V8HI "hi")
908 (V16SI "si") (V8SI "si") (V4SI "si")
909 (V8DI "di") (V4DI "di") (V2DI "di")
910 (V16SF "sf") (V8SF "sf") (V4SF "sf")
911 (V8DF "df") (V4DF "df") (V2DF "df")
912 (V4TI "ti") (V2TI "ti")])
913
914 ;; Mapping of vector modes to the 128bit modes
915 (define_mode_attr ssexmmmode
916 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
917 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
918 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
919 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
920 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
921 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
922
923 ;; Pointer size override for scalar modes (Intel asm dialect)
924 (define_mode_attr iptr
925 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
926 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
927 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
928 (V16SF "k") (V8DF "q")
929 (V8SF "k") (V4DF "q")
930 (V4SF "k") (V2DF "q")
931 (SF "k") (DF "q")])
932
933 ;; Mapping of vector modes to VPTERNLOG suffix
934 (define_mode_attr ternlogsuffix
935 [(V8DI "q") (V4DI "q") (V2DI "q")
936 (V8DF "q") (V4DF "q") (V2DF "q")
937 (V16SI "d") (V8SI "d") (V4SI "d")
938 (V16SF "d") (V8SF "d") (V4SF "d")
939 (V32HI "d") (V16HI "d") (V8HI "d")
940 (V64QI "d") (V32QI "d") (V16QI "d")])
941
942 ;; Number of scalar elements in each vector type
943 (define_mode_attr ssescalarnum
944 [(V64QI "64") (V16SI "16") (V8DI "8")
945 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
946 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
947 (V16SF "16") (V8DF "8")
948 (V8SF "8") (V4DF "4")
949 (V4SF "4") (V2DF "2")])
950
951 ;; Mask of scalar elements in each vector type
952 (define_mode_attr ssescalarnummask
953 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
954 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
955 (V8SF "7") (V4DF "3")
956 (V4SF "3") (V2DF "1")])
957
958 (define_mode_attr ssescalarsize
959 [(V4TI "64") (V2TI "64") (V1TI "64")
960 (V8DI "64") (V4DI "64") (V2DI "64")
961 (V64QI "8") (V32QI "8") (V16QI "8")
962 (V32HI "16") (V16HI "16") (V8HI "16")
963 (V16SI "32") (V8SI "32") (V4SI "32")
964 (V16SF "32") (V8SF "32") (V4SF "32")
965 (V8DF "64") (V4DF "64") (V2DF "64")])
966
967 ;; SSE prefix for integer vector modes
968 (define_mode_attr sseintprefix
969 [(V2DI "p") (V2DF "")
970 (V4DI "p") (V4DF "")
971 (V8DI "p") (V8DF "")
972 (V4SI "p") (V4SF "")
973 (V8SI "p") (V8SF "")
974 (V16SI "p") (V16SF "")
975 (V16QI "p") (V8HI "p")
976 (V32QI "p") (V16HI "p")
977 (V64QI "p") (V32HI "p")])
978
979 ;; SSE scalar suffix for vector modes
980 (define_mode_attr ssescalarmodesuffix
981 [(SF "ss") (DF "sd")
982 (V16SF "ss") (V8DF "sd")
983 (V8SF "ss") (V4DF "sd")
984 (V4SF "ss") (V2DF "sd")
985 (V16SI "d") (V8DI "q")
986 (V8SI "d") (V4DI "q")
987 (V4SI "d") (V2DI "q")])
988
989 ;; Pack/unpack vector modes
990 (define_mode_attr sseunpackmode
991 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
992 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
993 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
994
995 (define_mode_attr ssepackmode
996 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
997 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
998 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
999
1000 ;; Mapping of the max integer size for xop rotate immediate constraint
1001 (define_mode_attr sserotatemax
1002 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
1003
1004 ;; Mapping of mode to cast intrinsic name
1005 (define_mode_attr castmode
1006 [(V8SI "si") (V8SF "ps") (V4DF "pd")
1007 (V16SI "si") (V16SF "ps") (V8DF "pd")])
1008
1009 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
1010 ;; i64x4 or f64x4 for 512bit modes.
1011 (define_mode_attr i128
1012 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
1013 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
1014 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
1015
1016 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
1017 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
1018 (define_mode_attr i128vldq
1019 [(V8SF "f32x4") (V4DF "f64x2")
1020 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
1021
1022 ;; Mix-n-match
1023 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
1024 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
1025
1026 ;; Mapping for dbpsabbw modes
1027 (define_mode_attr dbpsadbwmode
1028 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
1029
1030 ;; Mapping suffixes for broadcast
1031 (define_mode_attr bcstscalarsuff
1032 [(V64QI "b") (V32QI "b") (V16QI "b")
1033 (V32HI "w") (V16HI "w") (V8HI "w")
1034 (V16SI "d") (V8SI "d") (V4SI "d")
1035 (V8DI "q") (V4DI "q") (V2DI "q")
1036 (V16SF "ss") (V8SF "ss") (V4SF "ss")
1037 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
1038
1039 ;; Tie mode of assembler operand to mode iterator
1040 (define_mode_attr xtg_mode
1041 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
1042 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
1043 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
1044
1045 ;; Half mask mode for unpacks
1046 (define_mode_attr HALFMASKMODE
1047 [(DI "SI") (SI "HI")])
1048
1049 ;; Double mask mode for packs
1050 (define_mode_attr DOUBLEMASKMODE
1051 [(HI "SI") (SI "DI")])
1052
1053
1054 ;; Include define_subst patterns for instructions with mask
1055 (include "subst.md")
1056
1057 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1058
1059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1060 ;;
1061 ;; Move patterns
1062 ;;
1063 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1064
1065 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1066 ;; This is essential for maintaining stable calling conventions.
1067
1068 (define_expand "mov<mode>"
1069 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1070 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1071 "TARGET_SSE"
1072 {
1073 ix86_expand_vector_move (<MODE>mode, operands);
1074 DONE;
1075 })
1076
1077 (define_insn "mov<mode>_internal"
1078 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1079 "=v,v ,v ,m")
1080 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1081 " C,<sseconstm1>,vm,v"))]
1082 "TARGET_SSE
1083 && (register_operand (operands[0], <MODE>mode)
1084 || register_operand (operands[1], <MODE>mode))"
1085 {
1086 switch (get_attr_type (insn))
1087 {
1088 case TYPE_SSELOG1:
1089 return standard_sse_constant_opcode (insn, operands);
1090
1091 case TYPE_SSEMOV:
1092 return ix86_output_ssemov (insn, operands);
1093
1094 default:
1095 gcc_unreachable ();
1096 }
1097 }
1098 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1099 (set_attr "prefix" "maybe_vex")
1100 (set (attr "mode")
1101 (cond [(match_test "TARGET_AVX")
1102 (const_string "<sseinsnmode>")
1103 (ior (not (match_test "TARGET_SSE2"))
1104 (match_test "optimize_function_for_size_p (cfun)"))
1105 (const_string "V4SF")
1106 (and (match_test "<MODE>mode == V2DFmode")
1107 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1108 (const_string "V4SF")
1109 (and (eq_attr "alternative" "3")
1110 (match_test "TARGET_SSE_TYPELESS_STORES"))
1111 (const_string "V4SF")
1112 (and (eq_attr "alternative" "0")
1113 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1114 (const_string "TI")
1115 ]
1116 (const_string "<sseinsnmode>")))
1117 (set (attr "enabled")
1118 (cond [(and (match_test "<MODE_SIZE> == 16")
1119 (eq_attr "alternative" "1"))
1120 (symbol_ref "TARGET_SSE2")
1121 (and (match_test "<MODE_SIZE> == 32")
1122 (eq_attr "alternative" "1"))
1123 (symbol_ref "TARGET_AVX2")
1124 ]
1125 (symbol_ref "true")))])
1126
1127 ;; If mem_addr points to a memory region with less than whole vector size bytes
1128 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1129 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1130 ;; See pr97642.
1131 (define_expand "<avx512>_load<mode>_mask"
1132 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1133 (vec_merge:V48_AVX512VL
1134 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1135 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1136 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1137 "TARGET_AVX512F"
1138 {
1139 if (CONST_INT_P (operands[3]))
1140 {
1141 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1142 DONE;
1143 }
1144 else if (MEM_P (operands[1]))
1145 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1146 gen_rtvec(1, operands[1]),
1147 UNSPEC_MASKLOAD);
1148 })
1149
1150 (define_insn "*<avx512>_load<mode>_mask"
1151 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1152 (vec_merge:V48_AVX512VL
1153 (unspec:V48_AVX512VL
1154 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1155 UNSPEC_MASKLOAD)
1156 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1157 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1158 "TARGET_AVX512F"
1159 {
1160 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1161 {
1162 if (misaligned_operand (operands[1], <MODE>mode))
1163 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1164 else
1165 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1166 }
1167 else
1168 {
1169 if (misaligned_operand (operands[1], <MODE>mode))
1170 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1171 else
1172 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1173 }
1174 }
1175 [(set_attr "type" "ssemov")
1176 (set_attr "prefix" "evex")
1177 (set_attr "mode" "<sseinsnmode>")])
1178
1179 (define_insn_and_split "*<avx512>_load<mode>"
1180 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1181 (unspec:V48_AVX512VL
1182 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1183 UNSPEC_MASKLOAD))]
1184 "TARGET_AVX512F"
1185 "#"
1186 "&& 1"
1187 [(set (match_dup 0) (match_dup 1))])
1188
1189 (define_expand "<avx512>_load<mode>_mask"
1190 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1191 (vec_merge:VI12_AVX512VL
1192 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1193 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1194 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1195 "TARGET_AVX512BW"
1196 {
1197 if (CONST_INT_P (operands[3]))
1198 {
1199 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1200 DONE;
1201 }
1202 else if (MEM_P (operands[1]))
1203 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1204 gen_rtvec(1, operands[1]),
1205 UNSPEC_MASKLOAD);
1206
1207 })
1208
1209 (define_insn "*<avx512>_load<mode>_mask"
1210 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1211 (vec_merge:VI12_AVX512VL
1212 (unspec:VI12_AVX512VL
1213 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1214 UNSPEC_MASKLOAD)
1215 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1216 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1217 "TARGET_AVX512BW"
1218 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1219 [(set_attr "type" "ssemov")
1220 (set_attr "prefix" "evex")
1221 (set_attr "mode" "<sseinsnmode>")])
1222
1223 (define_insn_and_split "*<avx512>_load<mode>"
1224 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1225 (unspec:VI12_AVX512VL
1226 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1227 UNSPEC_MASKLOAD))]
1228 "TARGET_AVX512BW"
1229 "#"
1230 "&& 1"
1231 [(set (match_dup 0) (match_dup 1))])
1232
1233 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1234 [(set (match_operand:VF_128 0 "register_operand" "=v")
1235 (vec_merge:VF_128
1236 (vec_merge:VF_128
1237 (match_operand:VF_128 2 "register_operand" "v")
1238 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1239 (match_operand:QI 4 "register_operand" "Yk"))
1240 (match_operand:VF_128 1 "register_operand" "v")
1241 (const_int 1)))]
1242 "TARGET_AVX512F"
1243 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1244 [(set_attr "type" "ssemov")
1245 (set_attr "prefix" "evex")
1246 (set_attr "mode" "<ssescalarmode>")])
1247
1248 (define_expand "avx512f_load<mode>_mask"
1249 [(set (match_operand:<ssevecmode> 0 "register_operand")
1250 (vec_merge:<ssevecmode>
1251 (vec_merge:<ssevecmode>
1252 (vec_duplicate:<ssevecmode>
1253 (match_operand:MODEF 1 "memory_operand"))
1254 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1255 (match_operand:QI 3 "register_operand"))
1256 (match_dup 4)
1257 (const_int 1)))]
1258 "TARGET_AVX512F"
1259 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1260
1261 (define_insn "*avx512f_load<mode>_mask"
1262 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1263 (vec_merge:<ssevecmode>
1264 (vec_merge:<ssevecmode>
1265 (vec_duplicate:<ssevecmode>
1266 (match_operand:MODEF 1 "memory_operand" "m"))
1267 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1268 (match_operand:QI 3 "register_operand" "Yk"))
1269 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1270 (const_int 1)))]
1271 "TARGET_AVX512F"
1272 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1273 [(set_attr "type" "ssemov")
1274 (set_attr "prefix" "evex")
1275 (set_attr "memory" "load")
1276 (set_attr "mode" "<MODE>")])
1277
1278 (define_insn "avx512f_store<mode>_mask"
1279 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1280 (if_then_else:MODEF
1281 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1282 (const_int 1))
1283 (vec_select:MODEF
1284 (match_operand:<ssevecmode> 1 "register_operand" "v")
1285 (parallel [(const_int 0)]))
1286 (match_dup 0)))]
1287 "TARGET_AVX512F"
1288 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1289 [(set_attr "type" "ssemov")
1290 (set_attr "prefix" "evex")
1291 (set_attr "memory" "store")
1292 (set_attr "mode" "<MODE>")])
1293
1294 (define_insn "<avx512>_blendm<mode>"
1295 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1296 (vec_merge:V48_AVX512VL
1297 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1298 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1299 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1300 "TARGET_AVX512F"
1301 {
1302 if (REG_P (operands[1])
1303 && REGNO (operands[1]) != REGNO (operands[0]))
1304 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1305
1306 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1307 {
1308 if (misaligned_operand (operands[2], <MODE>mode))
1309 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1310 else
1311 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1312 }
1313 else
1314 {
1315 if (misaligned_operand (operands[2], <MODE>mode))
1316 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1317 else
1318 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1319 }
1320 }
1321 [(set_attr "type" "ssemov")
1322 (set_attr "prefix" "evex")
1323 (set_attr "mode" "<sseinsnmode>")])
1324
1325 (define_insn "<avx512>_blendm<mode>"
1326 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1327 (vec_merge:VI12_AVX512VL
1328 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1329 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1330 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1331 "TARGET_AVX512BW"
1332 "@
1333 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1334 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1335 [(set_attr "type" "ssemov")
1336 (set_attr "prefix" "evex")
1337 (set_attr "mode" "<sseinsnmode>")])
1338
1339 (define_insn "<avx512>_store<mode>_mask"
1340 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1341 (vec_merge:V48_AVX512VL
1342 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1343 (match_dup 0)
1344 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1345 "TARGET_AVX512F"
1346 {
1347 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1348 {
1349 if (misaligned_operand (operands[0], <MODE>mode))
1350 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1351 else
1352 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1353 }
1354 else
1355 {
1356 if (misaligned_operand (operands[0], <MODE>mode))
1357 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1358 else
1359 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1360 }
1361 }
1362 [(set_attr "type" "ssemov")
1363 (set_attr "prefix" "evex")
1364 (set_attr "memory" "store")
1365 (set_attr "mode" "<sseinsnmode>")])
1366
1367 (define_insn "<avx512>_store<mode>_mask"
1368 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1369 (vec_merge:VI12_AVX512VL
1370 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1371 (match_dup 0)
1372 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1373 "TARGET_AVX512BW"
1374 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1375 [(set_attr "type" "ssemov")
1376 (set_attr "prefix" "evex")
1377 (set_attr "memory" "store")
1378 (set_attr "mode" "<sseinsnmode>")])
1379
1380 (define_insn "sse2_movq128"
1381 [(set (match_operand:V2DI 0 "register_operand" "=v")
1382 (vec_concat:V2DI
1383 (vec_select:DI
1384 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1385 (parallel [(const_int 0)]))
1386 (const_int 0)))]
1387 "TARGET_SSE2"
1388 "%vmovq\t{%1, %0|%0, %q1}"
1389 [(set_attr "type" "ssemov")
1390 (set_attr "prefix" "maybe_vex")
1391 (set_attr "mode" "TI")])
1392
1393 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1394 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1395 ;; from memory, we'd prefer to load the memory directly into the %xmm
1396 ;; register. To facilitate this happy circumstance, this pattern won't
1397 ;; split until after register allocation. If the 64-bit value didn't
1398 ;; come from memory, this is the best we can do. This is much better
1399 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1400 ;; from there.
1401
1402 (define_insn_and_split "movdi_to_sse"
1403 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1404 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1405 UNSPEC_MOVDI_TO_SSE))
1406 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1407 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1408 "#"
1409 "&& reload_completed"
1410 [(const_int 0)]
1411 {
1412 if (register_operand (operands[1], DImode))
1413 {
1414 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1415 Assemble the 64-bit DImode value in an xmm register. */
1416 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1417 gen_lowpart (SImode, operands[1])));
1418 if (TARGET_SSE4_1)
1419 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1420 gen_highpart (SImode, operands[1]),
1421 GEN_INT (2)));
1422 else
1423 {
1424 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1425 gen_highpart (SImode, operands[1])));
1426 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1427 operands[2]));
1428 }
1429 }
1430 else if (memory_operand (operands[1], DImode))
1431 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1432 operands[1], const0_rtx));
1433 else
1434 gcc_unreachable ();
1435 DONE;
1436 }
1437 [(set_attr "isa" "sse4,*,*")])
1438
1439 (define_split
1440 [(set (match_operand:V4SF 0 "register_operand")
1441 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1442 "TARGET_SSE && reload_completed"
1443 [(set (match_dup 0)
1444 (vec_merge:V4SF
1445 (vec_duplicate:V4SF (match_dup 1))
1446 (match_dup 2)
1447 (const_int 1)))]
1448 {
1449 operands[1] = gen_lowpart (SFmode, operands[1]);
1450 operands[2] = CONST0_RTX (V4SFmode);
1451 })
1452
1453 (define_split
1454 [(set (match_operand:V2DF 0 "register_operand")
1455 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1456 "TARGET_SSE2 && reload_completed"
1457 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1458 {
1459 operands[1] = gen_lowpart (DFmode, operands[1]);
1460 operands[2] = CONST0_RTX (DFmode);
1461 })
1462
1463 (define_expand "movmisalign<mode>"
1464 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1465 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1466 "TARGET_SSE"
1467 {
1468 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1469 DONE;
1470 })
1471
1472 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1473 (define_peephole2
1474 [(set (match_operand:V2DF 0 "sse_reg_operand")
1475 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1476 (match_operand:DF 4 "const0_operand")))
1477 (set (match_operand:V2DF 2 "sse_reg_operand")
1478 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1479 (parallel [(const_int 0)]))
1480 (match_operand:DF 3 "memory_operand")))]
1481 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1482 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1483 [(set (match_dup 2) (match_dup 5))]
1484 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1485
1486 (define_peephole2
1487 [(set (match_operand:DF 0 "sse_reg_operand")
1488 (match_operand:DF 1 "memory_operand"))
1489 (set (match_operand:V2DF 2 "sse_reg_operand")
1490 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1491 (match_operand:DF 3 "memory_operand")))]
1492 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1493 && REGNO (operands[4]) == REGNO (operands[2])
1494 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1495 [(set (match_dup 2) (match_dup 5))]
1496 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1497
1498 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1499 (define_peephole2
1500 [(set (match_operand:DF 0 "memory_operand")
1501 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1502 (parallel [(const_int 0)])))
1503 (set (match_operand:DF 2 "memory_operand")
1504 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1505 (parallel [(const_int 1)])))]
1506 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1507 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1508 [(set (match_dup 4) (match_dup 1))]
1509 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1510
1511 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1512 [(set (match_operand:VI1 0 "register_operand" "=x")
1513 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1514 UNSPEC_LDDQU))]
1515 "TARGET_SSE3"
1516 "%vlddqu\t{%1, %0|%0, %1}"
1517 [(set_attr "type" "ssemov")
1518 (set_attr "movu" "1")
1519 (set (attr "prefix_data16")
1520 (if_then_else
1521 (match_test "TARGET_AVX")
1522 (const_string "*")
1523 (const_string "0")))
1524 (set (attr "prefix_rep")
1525 (if_then_else
1526 (match_test "TARGET_AVX")
1527 (const_string "*")
1528 (const_string "1")))
1529 (set_attr "prefix" "maybe_vex")
1530 (set_attr "mode" "<sseinsnmode>")])
1531
1532 (define_insn "sse2_movnti<mode>"
1533 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1534 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1535 UNSPEC_MOVNT))]
1536 "TARGET_SSE2"
1537 "movnti\t{%1, %0|%0, %1}"
1538 [(set_attr "type" "ssemov")
1539 (set_attr "prefix_data16" "0")
1540 (set_attr "mode" "<MODE>")])
1541
1542 (define_insn "<sse>_movnt<mode>"
1543 [(set (match_operand:VF 0 "memory_operand" "=m")
1544 (unspec:VF
1545 [(match_operand:VF 1 "register_operand" "v")]
1546 UNSPEC_MOVNT))]
1547 "TARGET_SSE"
1548 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1549 [(set_attr "type" "ssemov")
1550 (set_attr "prefix" "maybe_vex")
1551 (set_attr "mode" "<MODE>")])
1552
1553 (define_insn "<sse2>_movnt<mode>"
1554 [(set (match_operand:VI8 0 "memory_operand" "=m")
1555 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1556 UNSPEC_MOVNT))]
1557 "TARGET_SSE2"
1558 "%vmovntdq\t{%1, %0|%0, %1}"
1559 [(set_attr "type" "ssecvt")
1560 (set (attr "prefix_data16")
1561 (if_then_else
1562 (match_test "TARGET_AVX")
1563 (const_string "*")
1564 (const_string "1")))
1565 (set_attr "prefix" "maybe_vex")
1566 (set_attr "mode" "<sseinsnmode>")])
1567
1568 ; Expand patterns for non-temporal stores. At the moment, only those
1569 ; that directly map to insns are defined; it would be possible to
1570 ; define patterns for other modes that would expand to several insns.
1571
1572 ;; Modes handled by storent patterns.
1573 (define_mode_iterator STORENT_MODE
1574 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1575 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1576 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1577 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1578 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1579
1580 (define_expand "storent<mode>"
1581 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1582 (unspec:STORENT_MODE
1583 [(match_operand:STORENT_MODE 1 "register_operand")]
1584 UNSPEC_MOVNT))]
1585 "TARGET_SSE")
1586
1587 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1588 ;;
1589 ;; Mask operations
1590 ;;
1591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1592
1593 ;; All integer modes with AVX512BW/DQ.
1594 (define_mode_iterator SWI1248_AVX512BWDQ
1595 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1596
1597 ;; All integer modes with AVX512BW, where HImode operation
1598 ;; can be used instead of QImode.
1599 (define_mode_iterator SWI1248_AVX512BW
1600 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1601
1602 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1603 (define_mode_iterator SWI1248_AVX512BWDQ2
1604 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1605 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1606
1607 (define_expand "kmov<mskmodesuffix>"
1608 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1609 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1610 "TARGET_AVX512F
1611 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1612
1613 (define_insn "k<code><mode>"
1614 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1615 (any_logic:SWI1248_AVX512BW
1616 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1617 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1618 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1619 "TARGET_AVX512F"
1620 {
1621 if (get_attr_mode (insn) == MODE_HI)
1622 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1623 else
1624 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1625 }
1626 [(set_attr "type" "msklog")
1627 (set_attr "prefix" "vex")
1628 (set (attr "mode")
1629 (cond [(and (match_test "<MODE>mode == QImode")
1630 (not (match_test "TARGET_AVX512DQ")))
1631 (const_string "HI")
1632 ]
1633 (const_string "<MODE>")))])
1634
1635 (define_split
1636 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1637 (any_logic:SWI1248_AVX512BW
1638 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1639 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1640 (clobber (reg:CC FLAGS_REG))]
1641 "TARGET_AVX512F && reload_completed"
1642 [(parallel
1643 [(set (match_dup 0)
1644 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1645 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1646
1647 (define_insn "kandn<mode>"
1648 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1649 (and:SWI1248_AVX512BW
1650 (not:SWI1248_AVX512BW
1651 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1652 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1653 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1654 "TARGET_AVX512F"
1655 {
1656 if (get_attr_mode (insn) == MODE_HI)
1657 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1658 else
1659 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1660 }
1661 [(set_attr "type" "msklog")
1662 (set_attr "prefix" "vex")
1663 (set (attr "mode")
1664 (cond [(and (match_test "<MODE>mode == QImode")
1665 (not (match_test "TARGET_AVX512DQ")))
1666 (const_string "HI")
1667 ]
1668 (const_string "<MODE>")))])
1669
1670 (define_split
1671 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1672 (and:SWI1248_AVX512BW
1673 (not:SWI1248_AVX512BW
1674 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1675 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1676 (clobber (reg:CC FLAGS_REG))]
1677 "TARGET_AVX512F && reload_completed"
1678 [(parallel
1679 [(set (match_dup 0)
1680 (and:SWI1248_AVX512BW
1681 (not:SWI1248_AVX512BW (match_dup 1))
1682 (match_dup 2)))
1683 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1684
1685 (define_insn "kxnor<mode>"
1686 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1687 (not:SWI1248_AVX512BW
1688 (xor:SWI1248_AVX512BW
1689 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1690 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1691 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1692 "TARGET_AVX512F"
1693 {
1694 if (get_attr_mode (insn) == MODE_HI)
1695 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1696 else
1697 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1698 }
1699 [(set_attr "type" "msklog")
1700 (set_attr "prefix" "vex")
1701 (set (attr "mode")
1702 (cond [(and (match_test "<MODE>mode == QImode")
1703 (not (match_test "TARGET_AVX512DQ")))
1704 (const_string "HI")
1705 ]
1706 (const_string "<MODE>")))])
1707
1708 (define_insn "knot<mode>"
1709 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1710 (not:SWI1248_AVX512BW
1711 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1712 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1713 "TARGET_AVX512F"
1714 {
1715 if (get_attr_mode (insn) == MODE_HI)
1716 return "knotw\t{%1, %0|%0, %1}";
1717 else
1718 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1719 }
1720 [(set_attr "type" "msklog")
1721 (set_attr "prefix" "vex")
1722 (set (attr "mode")
1723 (cond [(and (match_test "<MODE>mode == QImode")
1724 (not (match_test "TARGET_AVX512DQ")))
1725 (const_string "HI")
1726 ]
1727 (const_string "<MODE>")))])
1728
1729 (define_split
1730 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1731 (not:SWI1248_AVX512BW
1732 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
1733 "TARGET_AVX512F && reload_completed"
1734 [(parallel
1735 [(set (match_dup 0)
1736 (not:SWI1248_AVX512BW (match_dup 1)))
1737 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1738
1739 (define_insn "*knotsi_1_zext"
1740 [(set (match_operand:DI 0 "register_operand" "=k")
1741 (zero_extend:DI
1742 (not:SI (match_operand:SI 1 "register_operand" "k"))))
1743 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1744 "TARGET_AVX512BW"
1745 "knotd\t{%1, %0|%0, %1}";
1746 [(set_attr "type" "msklog")
1747 (set_attr "prefix" "vex")
1748 (set_attr "mode" "SI")])
1749
1750 (define_split
1751 [(set (match_operand:DI 0 "mask_reg_operand")
1752 (zero_extend:DI
1753 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
1754 "TARGET_AVX512BW && reload_completed"
1755 [(parallel
1756 [(set (match_dup 0)
1757 (zero_extend:DI
1758 (not:SI (match_dup 1))))
1759 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1760
1761 (define_insn "kadd<mode>"
1762 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1763 (plus:SWI1248_AVX512BWDQ2
1764 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1765 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1766 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1767 "TARGET_AVX512F"
1768 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1769 [(set_attr "type" "msklog")
1770 (set_attr "prefix" "vex")
1771 (set_attr "mode" "<MODE>")])
1772
1773 ;; Mask variant shift mnemonics
1774 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1775
1776 (define_insn "k<code><mode>"
1777 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1778 (any_lshift:SWI1248_AVX512BWDQ
1779 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1780 (match_operand 2 "const_0_to_255_operand" "n")))
1781 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1782 "TARGET_AVX512F"
1783 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1784 [(set_attr "type" "msklog")
1785 (set_attr "prefix" "vex")
1786 (set_attr "mode" "<MODE>")])
1787
1788 (define_split
1789 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1790 (any_lshift:SWI1248_AVX512BW
1791 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1792 (match_operand 2 "const_int_operand")))
1793 (clobber (reg:CC FLAGS_REG))]
1794 "TARGET_AVX512F && reload_completed"
1795 [(parallel
1796 [(set (match_dup 0)
1797 (any_lshift:SWI1248_AVX512BW
1798 (match_dup 1)
1799 (match_dup 2)))
1800 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1801
1802 (define_insn "ktest<mode>"
1803 [(set (reg:CC FLAGS_REG)
1804 (unspec:CC
1805 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1806 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1807 UNSPEC_KTEST))]
1808 "TARGET_AVX512F"
1809 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1810 [(set_attr "mode" "<MODE>")
1811 (set_attr "type" "msklog")
1812 (set_attr "prefix" "vex")])
1813
1814 (define_insn "kortest<mode>"
1815 [(set (reg:CC FLAGS_REG)
1816 (unspec:CC
1817 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1818 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1819 UNSPEC_KORTEST))]
1820 "TARGET_AVX512F"
1821 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1822 [(set_attr "mode" "<MODE>")
1823 (set_attr "type" "msklog")
1824 (set_attr "prefix" "vex")])
1825
1826 (define_insn "kunpckhi"
1827 [(set (match_operand:HI 0 "register_operand" "=k")
1828 (ior:HI
1829 (ashift:HI
1830 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1831 (const_int 8))
1832 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1833 "TARGET_AVX512F"
1834 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1835 [(set_attr "mode" "HI")
1836 (set_attr "type" "msklog")
1837 (set_attr "prefix" "vex")])
1838
1839 (define_insn "kunpcksi"
1840 [(set (match_operand:SI 0 "register_operand" "=k")
1841 (ior:SI
1842 (ashift:SI
1843 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1844 (const_int 16))
1845 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1846 "TARGET_AVX512BW"
1847 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1848 [(set_attr "mode" "SI")])
1849
1850 (define_insn "kunpckdi"
1851 [(set (match_operand:DI 0 "register_operand" "=k")
1852 (ior:DI
1853 (ashift:DI
1854 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1855 (const_int 32))
1856 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1857 "TARGET_AVX512BW"
1858 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1859 [(set_attr "mode" "DI")])
1860
1861
1862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1863 ;;
1864 ;; Parallel floating point arithmetic
1865 ;;
1866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1867
1868 (define_expand "<code><mode>2"
1869 [(set (match_operand:VF 0 "register_operand")
1870 (absneg:VF
1871 (match_operand:VF 1 "register_operand")))]
1872 "TARGET_SSE"
1873 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1874
1875 (define_insn_and_split "*<code><mode>2"
1876 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1877 (absneg:VF
1878 (match_operand:VF 1 "vector_operand" "0,xBm,v,m")))
1879 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1880 "TARGET_SSE"
1881 "#"
1882 "&& reload_completed"
1883 [(set (match_dup 0)
1884 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1885 {
1886 if (TARGET_AVX)
1887 {
1888 if (MEM_P (operands[1]))
1889 std::swap (operands[1], operands[2]);
1890 }
1891 else
1892 {
1893 if (operands_match_p (operands[0], operands[2]))
1894 std::swap (operands[1], operands[2]);
1895 }
1896 }
1897 [(set_attr "isa" "noavx,noavx,avx,avx")])
1898
1899 (define_insn_and_split "*nabs<mode>2"
1900 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1901 (neg:VF
1902 (abs:VF
1903 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1904 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1905 "TARGET_SSE"
1906 "#"
1907 "&& reload_completed"
1908 [(set (match_dup 0)
1909 (ior:VF (match_dup 1) (match_dup 2)))]
1910 {
1911 if (TARGET_AVX)
1912 {
1913 if (MEM_P (operands[1]))
1914 std::swap (operands[1], operands[2]);
1915 }
1916 else
1917 {
1918 if (operands_match_p (operands[0], operands[2]))
1919 std::swap (operands[1], operands[2]);
1920 }
1921 }
1922 [(set_attr "isa" "noavx,noavx,avx,avx")])
1923
1924 (define_expand "cond_<insn><mode>"
1925 [(set (match_operand:VF 0 "register_operand")
1926 (vec_merge:VF
1927 (plusminus:VF
1928 (match_operand:VF 2 "vector_operand")
1929 (match_operand:VF 3 "vector_operand"))
1930 (match_operand:VF 4 "nonimm_or_0_operand")
1931 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
1932 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
1933 {
1934 emit_insn (gen_<insn><mode>3_mask (operands[0],
1935 operands[2],
1936 operands[3],
1937 operands[4],
1938 operands[1]));
1939 DONE;
1940 })
1941
1942 (define_expand "<insn><mode>3<mask_name><round_name>"
1943 [(set (match_operand:VF 0 "register_operand")
1944 (plusminus:VF
1945 (match_operand:VF 1 "<round_nimm_predicate>")
1946 (match_operand:VF 2 "<round_nimm_predicate>")))]
1947 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1948 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1949
1950 (define_insn "*<insn><mode>3<mask_name><round_name>"
1951 [(set (match_operand:VF 0 "register_operand" "=x,v")
1952 (plusminus:VF
1953 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
1954 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1955 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1956 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1957 "@
1958 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1959 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1960 [(set_attr "isa" "noavx,avx")
1961 (set_attr "type" "sseadd")
1962 (set_attr "prefix" "<bcst_mask_prefix3>")
1963 (set_attr "mode" "<MODE>")])
1964
1965 ;; Standard scalar operation patterns which preserve the rest of the
1966 ;; vector for combiner.
1967 (define_insn "*<sse>_vm<insn><mode>3"
1968 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1969 (vec_merge:VF_128
1970 (vec_duplicate:VF_128
1971 (plusminus:<ssescalarmode>
1972 (vec_select:<ssescalarmode>
1973 (match_operand:VF_128 1 "register_operand" "0,v")
1974 (parallel [(const_int 0)]))
1975 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1976 (match_dup 1)
1977 (const_int 1)))]
1978 "TARGET_SSE"
1979 "@
1980 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1981 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1982 [(set_attr "isa" "noavx,avx")
1983 (set_attr "type" "sseadd")
1984 (set_attr "prefix" "orig,vex")
1985 (set_attr "mode" "<ssescalarmode>")])
1986
1987 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
1988 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1989 (vec_merge:VF_128
1990 (plusminus:VF_128
1991 (match_operand:VF_128 1 "register_operand" "0,v")
1992 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1993 (match_dup 1)
1994 (const_int 1)))]
1995 "TARGET_SSE"
1996 "@
1997 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1998 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1999 [(set_attr "isa" "noavx,avx")
2000 (set_attr "type" "sseadd")
2001 (set_attr "prefix" "<round_scalar_prefix>")
2002 (set_attr "mode" "<ssescalarmode>")])
2003
2004 (define_expand "cond_mul<mode>"
2005 [(set (match_operand:VF 0 "register_operand")
2006 (vec_merge:VF
2007 (mult:VF
2008 (match_operand:VF 2 "vector_operand")
2009 (match_operand:VF 3 "vector_operand"))
2010 (match_operand:VF 4 "nonimm_or_0_operand")
2011 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2012 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2013 {
2014 emit_insn (gen_mul<mode>3_mask (operands[0],
2015 operands[2],
2016 operands[3],
2017 operands[4],
2018 operands[1]));
2019 DONE;
2020 })
2021
2022 (define_expand "mul<mode>3<mask_name><round_name>"
2023 [(set (match_operand:VF 0 "register_operand")
2024 (mult:VF
2025 (match_operand:VF 1 "<round_nimm_predicate>")
2026 (match_operand:VF 2 "<round_nimm_predicate>")))]
2027 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2028 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
2029
2030 (define_insn "*mul<mode>3<mask_name><round_name>"
2031 [(set (match_operand:VF 0 "register_operand" "=x,v")
2032 (mult:VF
2033 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
2034 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2035 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
2036 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2037 "@
2038 mul<ssemodesuffix>\t{%2, %0|%0, %2}
2039 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2040 [(set_attr "isa" "noavx,avx")
2041 (set_attr "type" "ssemul")
2042 (set_attr "prefix" "<bcst_mask_prefix3>")
2043 (set_attr "btver2_decode" "direct,double")
2044 (set_attr "mode" "<MODE>")])
2045
2046 ;; Standard scalar operation patterns which preserve the rest of the
2047 ;; vector for combiner.
2048 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
2049 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2050 (vec_merge:VF_128
2051 (vec_duplicate:VF_128
2052 (multdiv:<ssescalarmode>
2053 (vec_select:<ssescalarmode>
2054 (match_operand:VF_128 1 "register_operand" "0,v")
2055 (parallel [(const_int 0)]))
2056 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
2057 (match_dup 1)
2058 (const_int 1)))]
2059 "TARGET_SSE"
2060 "@
2061 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2062 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2063 [(set_attr "isa" "noavx,avx")
2064 (set_attr "type" "sse<multdiv_mnemonic>")
2065 (set_attr "prefix" "orig,vex")
2066 (set_attr "btver2_decode" "direct,double")
2067 (set_attr "mode" "<ssescalarmode>")])
2068
2069 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
2070 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2071 (vec_merge:VF_128
2072 (multdiv:VF_128
2073 (match_operand:VF_128 1 "register_operand" "0,v")
2074 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2075 (match_dup 1)
2076 (const_int 1)))]
2077 "TARGET_SSE"
2078 "@
2079 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2080 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
2081 [(set_attr "isa" "noavx,avx")
2082 (set_attr "type" "sse<multdiv_mnemonic>")
2083 (set_attr "prefix" "<round_scalar_prefix>")
2084 (set_attr "btver2_decode" "direct,double")
2085 (set_attr "mode" "<ssescalarmode>")])
2086
2087 (define_expand "div<mode>3"
2088 [(set (match_operand:VF2 0 "register_operand")
2089 (div:VF2 (match_operand:VF2 1 "register_operand")
2090 (match_operand:VF2 2 "vector_operand")))]
2091 "TARGET_SSE2")
2092
2093 (define_expand "div<mode>3"
2094 [(set (match_operand:VF1 0 "register_operand")
2095 (div:VF1 (match_operand:VF1 1 "register_operand")
2096 (match_operand:VF1 2 "vector_operand")))]
2097 "TARGET_SSE"
2098 {
2099 if (TARGET_SSE_MATH
2100 && TARGET_RECIP_VEC_DIV
2101 && !optimize_insn_for_size_p ()
2102 && flag_finite_math_only && !flag_trapping_math
2103 && flag_unsafe_math_optimizations)
2104 {
2105 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2106 DONE;
2107 }
2108 })
2109
2110 (define_expand "cond_div<mode>"
2111 [(set (match_operand:VF 0 "register_operand")
2112 (vec_merge:VF
2113 (div:VF
2114 (match_operand:VF 2 "register_operand")
2115 (match_operand:VF 3 "vector_operand"))
2116 (match_operand:VF 4 "nonimm_or_0_operand")
2117 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2118 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2119 {
2120 emit_insn (gen_<sse>_div<mode>3_mask (operands[0],
2121 operands[2],
2122 operands[3],
2123 operands[4],
2124 operands[1]));
2125 DONE;
2126 })
2127
2128 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2129 [(set (match_operand:VF 0 "register_operand" "=x,v")
2130 (div:VF
2131 (match_operand:VF 1 "register_operand" "0,v")
2132 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2133 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2134 "@
2135 div<ssemodesuffix>\t{%2, %0|%0, %2}
2136 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2137 [(set_attr "isa" "noavx,avx")
2138 (set_attr "type" "ssediv")
2139 (set_attr "prefix" "<bcst_mask_prefix3>")
2140 (set_attr "mode" "<MODE>")])
2141
2142 (define_insn "<sse>_rcp<mode>2"
2143 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2144 (unspec:VF1_128_256
2145 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2146 "TARGET_SSE"
2147 "%vrcpps\t{%1, %0|%0, %1}"
2148 [(set_attr "type" "sse")
2149 (set_attr "atom_sse_attr" "rcp")
2150 (set_attr "btver2_sse_attr" "rcp")
2151 (set_attr "prefix" "maybe_vex")
2152 (set_attr "mode" "<MODE>")])
2153
2154 (define_insn "sse_vmrcpv4sf2"
2155 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2156 (vec_merge:V4SF
2157 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2158 UNSPEC_RCP)
2159 (match_operand:V4SF 2 "register_operand" "0,x")
2160 (const_int 1)))]
2161 "TARGET_SSE"
2162 "@
2163 rcpss\t{%1, %0|%0, %k1}
2164 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2165 [(set_attr "isa" "noavx,avx")
2166 (set_attr "type" "sse")
2167 (set_attr "atom_sse_attr" "rcp")
2168 (set_attr "btver2_sse_attr" "rcp")
2169 (set_attr "prefix" "orig,vex")
2170 (set_attr "mode" "SF")])
2171
2172 (define_insn "*sse_vmrcpv4sf2"
2173 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2174 (vec_merge:V4SF
2175 (vec_duplicate:V4SF
2176 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2177 UNSPEC_RCP))
2178 (match_operand:V4SF 2 "register_operand" "0,x")
2179 (const_int 1)))]
2180 "TARGET_SSE"
2181 "@
2182 rcpss\t{%1, %0|%0, %1}
2183 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2184 [(set_attr "isa" "noavx,avx")
2185 (set_attr "type" "sse")
2186 (set_attr "atom_sse_attr" "rcp")
2187 (set_attr "btver2_sse_attr" "rcp")
2188 (set_attr "prefix" "orig,vex")
2189 (set_attr "mode" "SF")])
2190
2191 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2192 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2193 (unspec:VF_AVX512VL
2194 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2195 UNSPEC_RCP14))]
2196 "TARGET_AVX512F"
2197 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2198 [(set_attr "type" "sse")
2199 (set_attr "prefix" "evex")
2200 (set_attr "mode" "<MODE>")])
2201
2202 (define_insn "srcp14<mode>"
2203 [(set (match_operand:VF_128 0 "register_operand" "=v")
2204 (vec_merge:VF_128
2205 (unspec:VF_128
2206 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2207 UNSPEC_RCP14)
2208 (match_operand:VF_128 2 "register_operand" "v")
2209 (const_int 1)))]
2210 "TARGET_AVX512F"
2211 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2212 [(set_attr "type" "sse")
2213 (set_attr "prefix" "evex")
2214 (set_attr "mode" "<MODE>")])
2215
2216 (define_insn "srcp14<mode>_mask"
2217 [(set (match_operand:VF_128 0 "register_operand" "=v")
2218 (vec_merge:VF_128
2219 (vec_merge:VF_128
2220 (unspec:VF_128
2221 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2222 UNSPEC_RCP14)
2223 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2224 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2225 (match_operand:VF_128 2 "register_operand" "v")
2226 (const_int 1)))]
2227 "TARGET_AVX512F"
2228 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2229 [(set_attr "type" "sse")
2230 (set_attr "prefix" "evex")
2231 (set_attr "mode" "<MODE>")])
2232
2233 (define_expand "sqrt<mode>2"
2234 [(set (match_operand:VF2 0 "register_operand")
2235 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2236 "TARGET_SSE2")
2237
2238 (define_expand "sqrt<mode>2"
2239 [(set (match_operand:VF1 0 "register_operand")
2240 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2241 "TARGET_SSE"
2242 {
2243 if (TARGET_SSE_MATH
2244 && TARGET_RECIP_VEC_SQRT
2245 && !optimize_insn_for_size_p ()
2246 && flag_finite_math_only && !flag_trapping_math
2247 && flag_unsafe_math_optimizations)
2248 {
2249 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2250 DONE;
2251 }
2252 })
2253
2254 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2255 [(set (match_operand:VF 0 "register_operand" "=x,v")
2256 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2257 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2258 "@
2259 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2260 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2261 [(set_attr "isa" "noavx,avx")
2262 (set_attr "type" "sse")
2263 (set_attr "atom_sse_attr" "sqrt")
2264 (set_attr "btver2_sse_attr" "sqrt")
2265 (set_attr "prefix" "maybe_vex")
2266 (set_attr "mode" "<MODE>")])
2267
2268 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2269 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2270 (vec_merge:VF_128
2271 (sqrt:VF_128
2272 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2273 (match_operand:VF_128 2 "register_operand" "0,v")
2274 (const_int 1)))]
2275 "TARGET_SSE"
2276 "@
2277 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2278 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2279 [(set_attr "isa" "noavx,avx")
2280 (set_attr "type" "sse")
2281 (set_attr "atom_sse_attr" "sqrt")
2282 (set_attr "prefix" "<round_scalar_prefix>")
2283 (set_attr "btver2_sse_attr" "sqrt")
2284 (set_attr "mode" "<ssescalarmode>")])
2285
2286 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2287 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2288 (vec_merge:VF_128
2289 (vec_duplicate:VF_128
2290 (sqrt:<ssescalarmode>
2291 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2292 (match_operand:VF_128 2 "register_operand" "0,v")
2293 (const_int 1)))]
2294 "TARGET_SSE"
2295 "@
2296 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2297 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2298 [(set_attr "isa" "noavx,avx")
2299 (set_attr "type" "sse")
2300 (set_attr "atom_sse_attr" "sqrt")
2301 (set_attr "prefix" "<round_scalar_prefix>")
2302 (set_attr "btver2_sse_attr" "sqrt")
2303 (set_attr "mode" "<ssescalarmode>")])
2304
2305 (define_expand "rsqrt<mode>2"
2306 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2307 (unspec:VF1_AVX512ER_128_256
2308 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2309 UNSPEC_RSQRT))]
2310 "TARGET_SSE && TARGET_SSE_MATH"
2311 {
2312 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2313 DONE;
2314 })
2315
2316 (define_insn "<sse>_rsqrt<mode>2"
2317 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2318 (unspec:VF1_128_256
2319 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2320 "TARGET_SSE"
2321 "%vrsqrtps\t{%1, %0|%0, %1}"
2322 [(set_attr "type" "sse")
2323 (set_attr "prefix" "maybe_vex")
2324 (set_attr "mode" "<MODE>")])
2325
2326 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2327 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2328 (unspec:VF_AVX512VL
2329 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2330 UNSPEC_RSQRT14))]
2331 "TARGET_AVX512F"
2332 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2333 [(set_attr "type" "sse")
2334 (set_attr "prefix" "evex")
2335 (set_attr "mode" "<MODE>")])
2336
2337 (define_insn "rsqrt14<mode>"
2338 [(set (match_operand:VF_128 0 "register_operand" "=v")
2339 (vec_merge:VF_128
2340 (unspec:VF_128
2341 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2342 UNSPEC_RSQRT14)
2343 (match_operand:VF_128 2 "register_operand" "v")
2344 (const_int 1)))]
2345 "TARGET_AVX512F"
2346 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2347 [(set_attr "type" "sse")
2348 (set_attr "prefix" "evex")
2349 (set_attr "mode" "<MODE>")])
2350
2351 (define_insn "rsqrt14_<mode>_mask"
2352 [(set (match_operand:VF_128 0 "register_operand" "=v")
2353 (vec_merge:VF_128
2354 (vec_merge:VF_128
2355 (unspec:VF_128
2356 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2357 UNSPEC_RSQRT14)
2358 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2359 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2360 (match_operand:VF_128 2 "register_operand" "v")
2361 (const_int 1)))]
2362 "TARGET_AVX512F"
2363 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2364 [(set_attr "type" "sse")
2365 (set_attr "prefix" "evex")
2366 (set_attr "mode" "<MODE>")])
2367
2368 (define_insn "sse_vmrsqrtv4sf2"
2369 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2370 (vec_merge:V4SF
2371 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2372 UNSPEC_RSQRT)
2373 (match_operand:V4SF 2 "register_operand" "0,x")
2374 (const_int 1)))]
2375 "TARGET_SSE"
2376 "@
2377 rsqrtss\t{%1, %0|%0, %k1}
2378 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2379 [(set_attr "isa" "noavx,avx")
2380 (set_attr "type" "sse")
2381 (set_attr "prefix" "orig,vex")
2382 (set_attr "mode" "SF")])
2383
2384 (define_insn "*sse_vmrsqrtv4sf2"
2385 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2386 (vec_merge:V4SF
2387 (vec_duplicate:V4SF
2388 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2389 UNSPEC_RSQRT))
2390 (match_operand:V4SF 2 "register_operand" "0,x")
2391 (const_int 1)))]
2392 "TARGET_SSE"
2393 "@
2394 rsqrtss\t{%1, %0|%0, %1}
2395 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2396 [(set_attr "isa" "noavx,avx")
2397 (set_attr "type" "sse")
2398 (set_attr "prefix" "orig,vex")
2399 (set_attr "mode" "SF")])
2400
2401 (define_expand "cond_<code><mode>"
2402 [(set (match_operand:VF 0 "register_operand")
2403 (vec_merge:VF
2404 (smaxmin:VF
2405 (match_operand:VF 2 "vector_operand")
2406 (match_operand:VF 3 "vector_operand"))
2407 (match_operand:VF 4 "nonimm_or_0_operand")
2408 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2409 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2410 {
2411 emit_insn (gen_<code><mode>3_mask (operands[0],
2412 operands[2],
2413 operands[3],
2414 operands[4],
2415 operands[1]));
2416 DONE;
2417 })
2418
2419 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2420 [(set (match_operand:VF 0 "register_operand")
2421 (smaxmin:VF
2422 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2423 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2424 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2425 {
2426 if (!flag_finite_math_only || flag_signed_zeros)
2427 {
2428 operands[1] = force_reg (<MODE>mode, operands[1]);
2429 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2430 (operands[0], operands[1], operands[2]
2431 <mask_operand_arg34>
2432 <round_saeonly_mask_arg3>));
2433 DONE;
2434 }
2435 else
2436 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2437 })
2438
2439 ;; These versions of the min/max patterns are intentionally ignorant of
2440 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2441 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2442 ;; are undefined in this condition, we're certain this is correct.
2443
2444 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2445 [(set (match_operand:VF 0 "register_operand" "=x,v")
2446 (smaxmin:VF
2447 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2448 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2449 "TARGET_SSE
2450 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2451 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2452 "@
2453 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2454 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2455 [(set_attr "isa" "noavx,avx")
2456 (set_attr "type" "sseadd")
2457 (set_attr "btver2_sse_attr" "maxmin")
2458 (set_attr "prefix" "<mask_prefix3>")
2459 (set_attr "mode" "<MODE>")])
2460
2461 ;; These versions of the min/max patterns implement exactly the operations
2462 ;; min = (op1 < op2 ? op1 : op2)
2463 ;; max = (!(op1 < op2) ? op1 : op2)
2464 ;; Their operands are not commutative, and thus they may be used in the
2465 ;; presence of -0.0 and NaN.
2466
2467 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2468 [(set (match_operand:VF 0 "register_operand" "=x,v")
2469 (unspec:VF
2470 [(match_operand:VF 1 "register_operand" "0,v")
2471 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2472 IEEE_MAXMIN))]
2473 "TARGET_SSE
2474 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2475 "@
2476 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2477 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2478 [(set_attr "isa" "noavx,avx")
2479 (set_attr "type" "sseadd")
2480 (set_attr "btver2_sse_attr" "maxmin")
2481 (set_attr "prefix" "<mask_prefix3>")
2482 (set_attr "mode" "<MODE>")])
2483
2484 ;; Standard scalar operation patterns which preserve the rest of the
2485 ;; vector for combiner.
2486 (define_insn "*ieee_<ieee_maxmin><mode>3"
2487 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2488 (vec_merge:VF_128
2489 (vec_duplicate:VF_128
2490 (unspec:<ssescalarmode>
2491 [(vec_select:<ssescalarmode>
2492 (match_operand:VF_128 1 "register_operand" "0,v")
2493 (parallel [(const_int 0)]))
2494 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2495 IEEE_MAXMIN))
2496 (match_dup 1)
2497 (const_int 1)))]
2498 "TARGET_SSE"
2499 "@
2500 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2501 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2502 [(set_attr "isa" "noavx,avx")
2503 (set_attr "type" "sseadd")
2504 (set_attr "btver2_sse_attr" "maxmin")
2505 (set_attr "prefix" "orig,vex")
2506 (set_attr "mode" "<ssescalarmode>")])
2507
2508 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2509 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2510 (vec_merge:VF_128
2511 (smaxmin:VF_128
2512 (match_operand:VF_128 1 "register_operand" "0,v")
2513 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2514 (match_dup 1)
2515 (const_int 1)))]
2516 "TARGET_SSE"
2517 "@
2518 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2519 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2520 [(set_attr "isa" "noavx,avx")
2521 (set_attr "type" "sse")
2522 (set_attr "btver2_sse_attr" "maxmin")
2523 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2524 (set_attr "mode" "<ssescalarmode>")])
2525
2526 (define_mode_attr addsub_cst [(V4DF "5") (V2DF "1")
2527 (V4SF "5") (V8SF "85")])
2528
2529 (define_insn "vec_addsub<mode>3"
2530 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2531 (vec_merge:VF_128_256
2532 (minus:VF_128_256
2533 (match_operand:VF_128_256 1 "register_operand" "0,x")
2534 (match_operand:VF_128_256 2 "vector_operand" "xBm, xm"))
2535 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2536 (const_int <addsub_cst>)))]
2537 "TARGET_SSE3"
2538 "@
2539 addsub<ssemodesuffix>\t{%2, %0|%0, %2}
2540 vaddsub<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2541 [(set_attr "isa" "noavx,avx")
2542 (set_attr "type" "sseadd")
2543 (set (attr "atom_unit")
2544 (if_then_else
2545 (match_test "<MODE>mode == V2DFmode")
2546 (const_string "complex")
2547 (const_string "*")))
2548 (set_attr "prefix" "orig,vex")
2549 (set (attr "prefix_rep")
2550 (if_then_else
2551 (and (match_test "<MODE>mode == V4SFmode")
2552 (eq_attr "alternative" "0"))
2553 (const_string "1")
2554 (const_string "*")))
2555 (set_attr "mode" "<MODE>")])
2556
2557 (define_split
2558 [(set (match_operand:VF_128_256 0 "register_operand")
2559 (match_operator:VF_128_256 6 "addsub_vm_operator"
2560 [(minus:VF_128_256
2561 (match_operand:VF_128_256 1 "register_operand")
2562 (match_operand:VF_128_256 2 "vector_operand"))
2563 (plus:VF_128_256
2564 (match_operand:VF_128_256 3 "vector_operand")
2565 (match_operand:VF_128_256 4 "vector_operand"))
2566 (match_operand 5 "const_int_operand")]))]
2567 "TARGET_SSE3
2568 && can_create_pseudo_p ()
2569 && ((rtx_equal_p (operands[1], operands[3])
2570 && rtx_equal_p (operands[2], operands[4]))
2571 || (rtx_equal_p (operands[1], operands[4])
2572 && rtx_equal_p (operands[2], operands[3])))"
2573 [(set (match_dup 0)
2574 (vec_merge:VF_128_256
2575 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2576 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2577 (match_dup 5)))])
2578
2579 (define_split
2580 [(set (match_operand:VF_128_256 0 "register_operand")
2581 (match_operator:VF_128_256 6 "addsub_vm_operator"
2582 [(plus:VF_128_256
2583 (match_operand:VF_128_256 1 "vector_operand")
2584 (match_operand:VF_128_256 2 "vector_operand"))
2585 (minus:VF_128_256
2586 (match_operand:VF_128_256 3 "register_operand")
2587 (match_operand:VF_128_256 4 "vector_operand"))
2588 (match_operand 5 "const_int_operand")]))]
2589 "TARGET_SSE3
2590 && can_create_pseudo_p ()
2591 && ((rtx_equal_p (operands[1], operands[3])
2592 && rtx_equal_p (operands[2], operands[4]))
2593 || (rtx_equal_p (operands[1], operands[4])
2594 && rtx_equal_p (operands[2], operands[3])))"
2595 [(set (match_dup 0)
2596 (vec_merge:VF_128_256
2597 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2598 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2599 (match_dup 5)))]
2600 {
2601 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2602 operands[5]
2603 = GEN_INT (~INTVAL (operands[5])
2604 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2605 })
2606
2607 (define_split
2608 [(set (match_operand:VF_128_256 0 "register_operand")
2609 (match_operator:VF_128_256 7 "addsub_vs_operator"
2610 [(vec_concat:<ssedoublemode>
2611 (minus:VF_128_256
2612 (match_operand:VF_128_256 1 "register_operand")
2613 (match_operand:VF_128_256 2 "vector_operand"))
2614 (plus:VF_128_256
2615 (match_operand:VF_128_256 3 "vector_operand")
2616 (match_operand:VF_128_256 4 "vector_operand")))
2617 (match_parallel 5 "addsub_vs_parallel"
2618 [(match_operand 6 "const_int_operand")])]))]
2619 "TARGET_SSE3
2620 && can_create_pseudo_p ()
2621 && ((rtx_equal_p (operands[1], operands[3])
2622 && rtx_equal_p (operands[2], operands[4]))
2623 || (rtx_equal_p (operands[1], operands[4])
2624 && rtx_equal_p (operands[2], operands[3])))"
2625 [(set (match_dup 0)
2626 (vec_merge:VF_128_256
2627 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2628 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2629 (match_dup 5)))]
2630 {
2631 int i, nelt = XVECLEN (operands[5], 0);
2632 HOST_WIDE_INT ival = 0;
2633
2634 for (i = 0; i < nelt; i++)
2635 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2636 ival |= HOST_WIDE_INT_1 << i;
2637
2638 operands[5] = GEN_INT (ival);
2639 })
2640
2641 (define_split
2642 [(set (match_operand:VF_128_256 0 "register_operand")
2643 (match_operator:VF_128_256 7 "addsub_vs_operator"
2644 [(vec_concat:<ssedoublemode>
2645 (plus:VF_128_256
2646 (match_operand:VF_128_256 1 "vector_operand")
2647 (match_operand:VF_128_256 2 "vector_operand"))
2648 (minus:VF_128_256
2649 (match_operand:VF_128_256 3 "register_operand")
2650 (match_operand:VF_128_256 4 "vector_operand")))
2651 (match_parallel 5 "addsub_vs_parallel"
2652 [(match_operand 6 "const_int_operand")])]))]
2653 "TARGET_SSE3
2654 && can_create_pseudo_p ()
2655 && ((rtx_equal_p (operands[1], operands[3])
2656 && rtx_equal_p (operands[2], operands[4]))
2657 || (rtx_equal_p (operands[1], operands[4])
2658 && rtx_equal_p (operands[2], operands[3])))"
2659 [(set (match_dup 0)
2660 (vec_merge:VF_128_256
2661 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2662 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2663 (match_dup 5)))]
2664 {
2665 int i, nelt = XVECLEN (operands[5], 0);
2666 HOST_WIDE_INT ival = 0;
2667
2668 for (i = 0; i < nelt; i++)
2669 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2670 ival |= HOST_WIDE_INT_1 << i;
2671
2672 operands[5] = GEN_INT (ival);
2673 })
2674
2675 (define_insn "avx_h<insn>v4df3"
2676 [(set (match_operand:V4DF 0 "register_operand" "=x")
2677 (vec_concat:V4DF
2678 (vec_concat:V2DF
2679 (plusminus:DF
2680 (vec_select:DF
2681 (match_operand:V4DF 1 "register_operand" "x")
2682 (parallel [(const_int 0)]))
2683 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2684 (plusminus:DF
2685 (vec_select:DF
2686 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2687 (parallel [(const_int 0)]))
2688 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2689 (vec_concat:V2DF
2690 (plusminus:DF
2691 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2692 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2693 (plusminus:DF
2694 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2695 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2696 "TARGET_AVX"
2697 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2698 [(set_attr "type" "sseadd")
2699 (set_attr "prefix" "vex")
2700 (set_attr "mode" "V4DF")])
2701
2702 (define_expand "sse3_haddv2df3"
2703 [(set (match_operand:V2DF 0 "register_operand")
2704 (vec_concat:V2DF
2705 (plus:DF
2706 (vec_select:DF
2707 (match_operand:V2DF 1 "register_operand")
2708 (parallel [(const_int 0)]))
2709 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2710 (plus:DF
2711 (vec_select:DF
2712 (match_operand:V2DF 2 "vector_operand")
2713 (parallel [(const_int 0)]))
2714 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2715 "TARGET_SSE3")
2716
2717 (define_insn "*sse3_haddv2df3"
2718 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2719 (vec_concat:V2DF
2720 (plus:DF
2721 (vec_select:DF
2722 (match_operand:V2DF 1 "register_operand" "0,x")
2723 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2724 (vec_select:DF
2725 (match_dup 1)
2726 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2727 (plus:DF
2728 (vec_select:DF
2729 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2730 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2731 (vec_select:DF
2732 (match_dup 2)
2733 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2734 "TARGET_SSE3
2735 && INTVAL (operands[3]) != INTVAL (operands[4])
2736 && INTVAL (operands[5]) != INTVAL (operands[6])"
2737 "@
2738 haddpd\t{%2, %0|%0, %2}
2739 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2740 [(set_attr "isa" "noavx,avx")
2741 (set_attr "type" "sseadd")
2742 (set_attr "prefix" "orig,vex")
2743 (set_attr "mode" "V2DF")])
2744
2745 (define_insn "sse3_hsubv2df3"
2746 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2747 (vec_concat:V2DF
2748 (minus:DF
2749 (vec_select:DF
2750 (match_operand:V2DF 1 "register_operand" "0,x")
2751 (parallel [(const_int 0)]))
2752 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2753 (minus:DF
2754 (vec_select:DF
2755 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2756 (parallel [(const_int 0)]))
2757 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2758 "TARGET_SSE3"
2759 "@
2760 hsubpd\t{%2, %0|%0, %2}
2761 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2762 [(set_attr "isa" "noavx,avx")
2763 (set_attr "type" "sseadd")
2764 (set_attr "prefix" "orig,vex")
2765 (set_attr "mode" "V2DF")])
2766
2767 (define_insn "*sse3_haddv2df3_low"
2768 [(set (match_operand:DF 0 "register_operand" "=x,x")
2769 (plus:DF
2770 (vec_select:DF
2771 (match_operand:V2DF 1 "register_operand" "0,x")
2772 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2773 (vec_select:DF
2774 (match_dup 1)
2775 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2776 "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD
2777 && INTVAL (operands[2]) != INTVAL (operands[3])"
2778 "@
2779 haddpd\t{%0, %0|%0, %0}
2780 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2781 [(set_attr "isa" "noavx,avx")
2782 (set_attr "type" "sseadd1")
2783 (set_attr "prefix" "orig,vex")
2784 (set_attr "mode" "V2DF")])
2785
2786 (define_insn "*sse3_hsubv2df3_low"
2787 [(set (match_operand:DF 0 "register_operand" "=x,x")
2788 (minus:DF
2789 (vec_select:DF
2790 (match_operand:V2DF 1 "register_operand" "0,x")
2791 (parallel [(const_int 0)]))
2792 (vec_select:DF
2793 (match_dup 1)
2794 (parallel [(const_int 1)]))))]
2795 "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD"
2796 "@
2797 hsubpd\t{%0, %0|%0, %0}
2798 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2799 [(set_attr "isa" "noavx,avx")
2800 (set_attr "type" "sseadd1")
2801 (set_attr "prefix" "orig,vex")
2802 (set_attr "mode" "V2DF")])
2803
2804 (define_insn "avx_h<insn>v8sf3"
2805 [(set (match_operand:V8SF 0 "register_operand" "=x")
2806 (vec_concat:V8SF
2807 (vec_concat:V4SF
2808 (vec_concat:V2SF
2809 (plusminus:SF
2810 (vec_select:SF
2811 (match_operand:V8SF 1 "register_operand" "x")
2812 (parallel [(const_int 0)]))
2813 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2814 (plusminus:SF
2815 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2816 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2817 (vec_concat:V2SF
2818 (plusminus:SF
2819 (vec_select:SF
2820 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2821 (parallel [(const_int 0)]))
2822 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2823 (plusminus:SF
2824 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2825 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2826 (vec_concat:V4SF
2827 (vec_concat:V2SF
2828 (plusminus:SF
2829 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2830 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2831 (plusminus:SF
2832 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2833 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2834 (vec_concat:V2SF
2835 (plusminus:SF
2836 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2837 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2838 (plusminus:SF
2839 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2840 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2841 "TARGET_AVX"
2842 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2843 [(set_attr "type" "sseadd")
2844 (set_attr "prefix" "vex")
2845 (set_attr "mode" "V8SF")])
2846
2847 (define_insn "sse3_h<insn>v4sf3"
2848 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2849 (vec_concat:V4SF
2850 (vec_concat:V2SF
2851 (plusminus:SF
2852 (vec_select:SF
2853 (match_operand:V4SF 1 "register_operand" "0,x")
2854 (parallel [(const_int 0)]))
2855 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2856 (plusminus:SF
2857 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2858 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2859 (vec_concat:V2SF
2860 (plusminus:SF
2861 (vec_select:SF
2862 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2863 (parallel [(const_int 0)]))
2864 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2865 (plusminus:SF
2866 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2867 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2868 "TARGET_SSE3"
2869 "@
2870 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2871 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2872 [(set_attr "isa" "noavx,avx")
2873 (set_attr "type" "sseadd")
2874 (set_attr "atom_unit" "complex")
2875 (set_attr "prefix" "orig,vex")
2876 (set_attr "prefix_rep" "1,*")
2877 (set_attr "mode" "V4SF")])
2878
2879 (define_mode_iterator REDUC_SSE_PLUS_MODE
2880 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2881
2882 (define_expand "reduc_plus_scal_<mode>"
2883 [(plus:REDUC_SSE_PLUS_MODE
2884 (match_operand:<ssescalarmode> 0 "register_operand")
2885 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2886 ""
2887 {
2888 rtx tmp = gen_reg_rtx (<MODE>mode);
2889 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2890 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2891 const0_rtx));
2892 DONE;
2893 })
2894
2895 (define_expand "reduc_plus_scal_v16qi"
2896 [(plus:V16QI
2897 (match_operand:QI 0 "register_operand")
2898 (match_operand:V16QI 1 "register_operand"))]
2899 "TARGET_SSE2"
2900 {
2901 rtx tmp = gen_reg_rtx (V1TImode);
2902 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2903 GEN_INT (64)));
2904 rtx tmp2 = gen_reg_rtx (V16QImode);
2905 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2906 rtx tmp3 = gen_reg_rtx (V16QImode);
2907 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2908 rtx tmp4 = gen_reg_rtx (V2DImode);
2909 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2910 tmp4 = gen_lowpart (V16QImode, tmp4);
2911 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2912 DONE;
2913 })
2914
2915 (define_mode_iterator REDUC_PLUS_MODE
2916 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2917 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2918 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2919
2920 (define_expand "reduc_plus_scal_<mode>"
2921 [(plus:REDUC_PLUS_MODE
2922 (match_operand:<ssescalarmode> 0 "register_operand")
2923 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2924 ""
2925 {
2926 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2927 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2928 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2929 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2930 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2931 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2932 DONE;
2933 })
2934
2935 ;; Modes handled by reduc_sm{in,ax}* patterns.
2936 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2937 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2938 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2939 (V2DI "TARGET_SSE4_2")])
2940
2941 (define_expand "reduc_<code>_scal_<mode>"
2942 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2943 (match_operand:<ssescalarmode> 0 "register_operand")
2944 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2945 ""
2946 {
2947 rtx tmp = gen_reg_rtx (<MODE>mode);
2948 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2949 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2950 const0_rtx));
2951 DONE;
2952 })
2953
2954 (define_mode_iterator REDUC_SMINMAX_MODE
2955 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2956 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2957 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2958 (V64QI "TARGET_AVX512BW")
2959 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2960 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2961 (V8DF "TARGET_AVX512F")])
2962
2963 (define_expand "reduc_<code>_scal_<mode>"
2964 [(smaxmin:REDUC_SMINMAX_MODE
2965 (match_operand:<ssescalarmode> 0 "register_operand")
2966 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2967 ""
2968 {
2969 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2970 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2971 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2972 emit_insn (gen_<code><ssehalfvecmodelower>3
2973 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2974 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2975 DONE;
2976 })
2977
2978 (define_expand "reduc_<code>_scal_<mode>"
2979 [(umaxmin:VI_AVX512BW
2980 (match_operand:<ssescalarmode> 0 "register_operand")
2981 (match_operand:VI_AVX512BW 1 "register_operand"))]
2982 "TARGET_AVX512F"
2983 {
2984 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2985 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2986 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2987 emit_insn (gen_<code><ssehalfvecmodelower>3
2988 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2989 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2990 DONE;
2991 })
2992
2993 (define_expand "reduc_<code>_scal_<mode>"
2994 [(umaxmin:VI_256
2995 (match_operand:<ssescalarmode> 0 "register_operand")
2996 (match_operand:VI_256 1 "register_operand"))]
2997 "TARGET_AVX2"
2998 {
2999 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
3000 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
3001 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
3002 emit_insn (gen_<code><ssehalfvecmodelower>3
3003 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
3004 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
3005 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
3006 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
3007 (operands[0], tmp3, const0_rtx));
3008 DONE;
3009 })
3010
3011 (define_expand "reduc_umin_scal_v8hi"
3012 [(umin:V8HI
3013 (match_operand:HI 0 "register_operand")
3014 (match_operand:V8HI 1 "register_operand"))]
3015 "TARGET_SSE4_1"
3016 {
3017 rtx tmp = gen_reg_rtx (V8HImode);
3018 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
3019 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
3020 DONE;
3021 })
3022
3023 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
3024 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3025 (unspec:VF_AVX512VL
3026 [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3027 (match_operand:SI 2 "const_0_to_255_operand")]
3028 UNSPEC_REDUCE))]
3029 "TARGET_AVX512DQ"
3030 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
3031 [(set_attr "type" "sse")
3032 (set_attr "prefix" "evex")
3033 (set_attr "mode" "<MODE>")])
3034
3035 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
3036 [(set (match_operand:VF_128 0 "register_operand" "=v")
3037 (vec_merge:VF_128
3038 (unspec:VF_128
3039 [(match_operand:VF_128 1 "register_operand" "v")
3040 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
3041 (match_operand:SI 3 "const_0_to_255_operand")]
3042 UNSPEC_REDUCE)
3043 (match_dup 1)
3044 (const_int 1)))]
3045 "TARGET_AVX512DQ"
3046 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
3047 [(set_attr "type" "sse")
3048 (set_attr "prefix" "evex")
3049 (set_attr "mode" "<MODE>")])
3050
3051 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3052 ;;
3053 ;; Parallel floating point comparisons
3054 ;;
3055 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3056
3057 (define_insn "avx_cmp<mode>3"
3058 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
3059 (unspec:VF_128_256
3060 [(match_operand:VF_128_256 1 "register_operand" "x")
3061 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
3062 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3063 UNSPEC_PCMP))]
3064 "TARGET_AVX"
3065 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3066 [(set_attr "type" "ssecmp")
3067 (set_attr "length_immediate" "1")
3068 (set_attr "prefix" "vex")
3069 (set_attr "mode" "<MODE>")])
3070
3071 (define_insn_and_split "*avx_cmp<mode>3_1"
3072 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3073 (vec_merge:<sseintvecmode>
3074 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3075 (match_operand:<sseintvecmode> 2 "const0_operand")
3076 (unspec:<avx512fmaskmode>
3077 [(match_operand:VF_128_256 3 "register_operand")
3078 (match_operand:VF_128_256 4 "nonimmediate_operand")
3079 (match_operand:SI 5 "const_0_to_31_operand")]
3080 UNSPEC_PCMP)))]
3081 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3082 "#"
3083 "&& 1"
3084 [(set (match_dup 6)
3085 (unspec:VF_128_256
3086 [(match_dup 3)
3087 (match_dup 4)
3088 (match_dup 5)]
3089 UNSPEC_PCMP))
3090 (set (match_dup 0) (match_dup 7))]
3091 {
3092 operands[6] = gen_reg_rtx (<MODE>mode);
3093 operands[7]
3094 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3095 })
3096
3097 (define_insn_and_split "*avx_cmp<mode>3_2"
3098 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3099 (vec_merge:<sseintvecmode>
3100 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3101 (match_operand:<sseintvecmode> 2 "const0_operand")
3102 (not:<avx512fmaskmode>
3103 (unspec:<avx512fmaskmode>
3104 [(match_operand:VF_128_256 3 "register_operand")
3105 (match_operand:VF_128_256 4 "nonimmediate_operand")
3106 (match_operand:SI 5 "const_0_to_31_operand")]
3107 UNSPEC_PCMP))))]
3108 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3109 "#"
3110 "&& 1"
3111 [(set (match_dup 6)
3112 (unspec:VF_128_256
3113 [(match_dup 3)
3114 (match_dup 4)
3115 (match_dup 5)]
3116 UNSPEC_PCMP))
3117 (set (match_dup 0) (match_dup 7))]
3118 {
3119 operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);
3120 operands[6] = gen_reg_rtx (<MODE>mode);
3121 operands[7]
3122 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3123 })
3124
3125 (define_insn_and_split "*avx_cmp<mode>3_3"
3126 [(set (match_operand:VF_128_256 0 "register_operand")
3127 (vec_merge:VF_128_256
3128 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3129 (match_operand:VF_128_256 2 "const0_operand")
3130 (unspec:<avx512fmaskmode>
3131 [(match_operand:VF_128_256 3 "register_operand")
3132 (match_operand:VF_128_256 4 "nonimmediate_operand")
3133 (match_operand:SI 5 "const_0_to_31_operand")]
3134 UNSPEC_PCMP)))]
3135 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3136 "#"
3137 "&& 1"
3138 [(set (match_dup 0)
3139 (unspec:VF_128_256
3140 [(match_dup 3)
3141 (match_dup 4)
3142 (match_dup 5)]
3143 UNSPEC_PCMP))])
3144
3145 (define_insn_and_split "*avx_cmp<mode>3_4"
3146 [(set (match_operand:VF_128_256 0 "register_operand")
3147 (vec_merge:VF_128_256
3148 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3149 (match_operand:VF_128_256 2 "const0_operand")
3150 (not:<avx512fmaskmode>
3151 (unspec:<avx512fmaskmode>
3152 [(match_operand:VF_128_256 3 "register_operand")
3153 (match_operand:VF_128_256 4 "nonimmediate_operand")
3154 (match_operand:SI 5 "const_0_to_31_operand")]
3155 UNSPEC_PCMP))))]
3156 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3157 "#"
3158 "&& 1"
3159 [(set (match_dup 0)
3160 (unspec:VF_128_256
3161 [(match_dup 3)
3162 (match_dup 4)
3163 (match_dup 5)]
3164 UNSPEC_PCMP))]
3165 "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
3166
3167 (define_insn_and_split "*avx_cmp<mode>3_lt"
3168 [(set (match_operand:VF_128_256 0 "register_operand")
3169 (vec_merge:VF_128_256
3170 (match_operand:VF_128_256 1 "vector_operand")
3171 (match_operand:VF_128_256 2 "vector_operand")
3172 (unspec:<avx512fmaskmode>
3173 [(match_operand:<sseintvecmode> 3 "register_operand")
3174 (match_operand:<sseintvecmode> 4 "const0_operand")
3175 (match_operand:SI 5 "const_0_to_7_operand")]
3176 UNSPEC_PCMP)))]
3177 "TARGET_AVX512VL && ix86_pre_reload_split ()
3178 /* LT or GE 0 */
3179 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
3180 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
3181 "#"
3182 "&& 1"
3183 [(set (match_dup 0)
3184 (unspec:VF_128_256
3185 [(match_dup 2)
3186 (match_dup 1)
3187 (lt:VF_128_256
3188 (match_dup 3)
3189 (match_dup 4))]
3190 UNSPEC_BLENDV))]
3191 {
3192 if (INTVAL (operands[5]) == 5)
3193 std::swap (operands[1], operands[2]);
3194 })
3195
3196 (define_insn_and_split "*avx_cmp<mode>3_ltint"
3197 [(set (match_operand:VI48_AVX 0 "register_operand")
3198 (vec_merge:VI48_AVX
3199 (match_operand:VI48_AVX 1 "vector_operand")
3200 (match_operand:VI48_AVX 2 "vector_operand")
3201 (unspec:<avx512fmaskmode>
3202 [(match_operand:VI48_AVX 3 "register_operand")
3203 (match_operand:VI48_AVX 4 "const0_operand")
3204 (match_operand:SI 5 "const_0_to_7_operand")]
3205 UNSPEC_PCMP)))]
3206 "TARGET_AVX512VL && ix86_pre_reload_split ()
3207 /* LT or GE 0 */
3208 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
3209 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
3210 "#"
3211 "&& 1"
3212 [(set (match_dup 0)
3213 (unspec:<ssebytemode>
3214 [(match_dup 2)
3215 (match_dup 1)
3216 (subreg:<ssebytemode>
3217 (lt:VI48_AVX
3218 (match_dup 3)
3219 (match_dup 4)) 0)]
3220 UNSPEC_BLENDV))]
3221 {
3222 if (INTVAL (operands[5]) == 5)
3223 std::swap (operands[1], operands[2]);
3224 operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
3225 operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]);
3226 operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]);
3227 })
3228
3229 (define_insn "avx_vmcmp<mode>3"
3230 [(set (match_operand:VF_128 0 "register_operand" "=x")
3231 (vec_merge:VF_128
3232 (unspec:VF_128
3233 [(match_operand:VF_128 1 "register_operand" "x")
3234 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
3235 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3236 UNSPEC_PCMP)
3237 (match_dup 1)
3238 (const_int 1)))]
3239 "TARGET_AVX"
3240 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
3241 [(set_attr "type" "ssecmp")
3242 (set_attr "length_immediate" "1")
3243 (set_attr "prefix" "vex")
3244 (set_attr "mode" "<ssescalarmode>")])
3245
3246 (define_insn "*<sse>_maskcmp<mode>3_comm"
3247 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3248 (match_operator:VF_128_256 3 "sse_comparison_operator"
3249 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
3250 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3251 "TARGET_SSE
3252 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
3253 "@
3254 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3255 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3256 [(set_attr "isa" "noavx,avx")
3257 (set_attr "type" "ssecmp")
3258 (set_attr "length_immediate" "1")
3259 (set_attr "prefix" "orig,vex")
3260 (set_attr "mode" "<MODE>")])
3261
3262 (define_insn "<sse>_maskcmp<mode>3"
3263 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3264 (match_operator:VF_128_256 3 "sse_comparison_operator"
3265 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3266 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3267 "TARGET_SSE"
3268 "@
3269 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3270 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3271 [(set_attr "isa" "noavx,avx")
3272 (set_attr "type" "ssecmp")
3273 (set_attr "length_immediate" "1")
3274 (set_attr "prefix" "orig,vex")
3275 (set_attr "mode" "<MODE>")])
3276
3277 (define_insn "<sse>_vmmaskcmp<mode>3"
3278 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3279 (vec_merge:VF_128
3280 (match_operator:VF_128 3 "sse_comparison_operator"
3281 [(match_operand:VF_128 1 "register_operand" "0,x")
3282 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3283 (match_dup 1)
3284 (const_int 1)))]
3285 "TARGET_SSE"
3286 "@
3287 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3288 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3289 [(set_attr "isa" "noavx,avx")
3290 (set_attr "type" "ssecmp")
3291 (set_attr "length_immediate" "1,*")
3292 (set_attr "prefix" "orig,vex")
3293 (set_attr "mode" "<ssescalarmode>")])
3294
3295 (define_mode_attr cmp_imm_predicate
3296 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3297 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3298 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3299 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3300 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3301 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3302 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3303 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3304 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3305
3306 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3307 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3308 (unspec:<avx512fmaskmode>
3309 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
3310 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3311 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3312 UNSPEC_PCMP))]
3313 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3314 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3315 [(set_attr "type" "ssecmp")
3316 (set_attr "length_immediate" "1")
3317 (set_attr "prefix" "evex")
3318 (set_attr "mode" "<sseinsnmode>")])
3319
3320 (define_insn_and_split "*<avx512>_cmp<mode>3"
3321 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3322 (not:<avx512fmaskmode>
3323 (unspec:<avx512fmaskmode>
3324 [(match_operand:V48_AVX512VL 1 "register_operand")
3325 (match_operand:V48_AVX512VL 2 "nonimmediate_operand")
3326 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3327 UNSPEC_PCMP)))]
3328 "TARGET_AVX512F && ix86_pre_reload_split ()"
3329 "#"
3330 "&& 1"
3331 [(set (match_dup 0)
3332 (unspec:<avx512fmaskmode>
3333 [(match_dup 1)
3334 (match_dup 2)
3335 (match_dup 4)]
3336 UNSPEC_PCMP))]
3337 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3338
3339 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3340 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3341 (unspec:<avx512fmaskmode>
3342 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3343 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3344 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3345 UNSPEC_PCMP))]
3346 "TARGET_AVX512BW"
3347 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3348 [(set_attr "type" "ssecmp")
3349 (set_attr "length_immediate" "1")
3350 (set_attr "prefix" "evex")
3351 (set_attr "mode" "<sseinsnmode>")])
3352
3353 (define_int_iterator UNSPEC_PCMP_ITER
3354 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3355
3356 (define_insn_and_split "*<avx512>_cmp<mode>3"
3357 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3358 (not:<avx512fmaskmode>
3359 (unspec:<avx512fmaskmode>
3360 [(match_operand:VI12_AVX512VL 1 "register_operand")
3361 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3362 (match_operand:SI 3 "<cmp_imm_predicate>")]
3363 UNSPEC_PCMP_ITER)))]
3364 "TARGET_AVX512BW && ix86_pre_reload_split ()"
3365 "#"
3366 "&& 1"
3367 [(set (match_dup 0)
3368 (unspec:<avx512fmaskmode>
3369 [(match_dup 1)
3370 (match_dup 2)
3371 (match_dup 4)]
3372 UNSPEC_PCMP_ITER))]
3373 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3374
3375 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3376 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3377 (unspec:<avx512fmaskmode>
3378 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3379 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3380 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3381 UNSPEC_UNSIGNED_PCMP))]
3382 "TARGET_AVX512BW"
3383 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3384 [(set_attr "type" "ssecmp")
3385 (set_attr "length_immediate" "1")
3386 (set_attr "prefix" "evex")
3387 (set_attr "mode" "<sseinsnmode>")])
3388
3389 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3390 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3391 (unspec:<avx512fmaskmode>
3392 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3393 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3394 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3395 UNSPEC_UNSIGNED_PCMP))]
3396 "TARGET_AVX512F"
3397 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3398 [(set_attr "type" "ssecmp")
3399 (set_attr "length_immediate" "1")
3400 (set_attr "prefix" "evex")
3401 (set_attr "mode" "<sseinsnmode>")])
3402
3403 (define_insn_and_split "*<avx512>_ucmp<mode>3"
3404 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3405 (not:<avx512fmaskmode>
3406 (unspec:<avx512fmaskmode>
3407 [(match_operand:VI48_AVX512VL 1 "register_operand")
3408 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
3409 (match_operand:SI 3 "const_0_to_7_operand")]
3410 UNSPEC_UNSIGNED_PCMP)))]
3411 "TARGET_AVX512F && ix86_pre_reload_split ()"
3412 "#"
3413 "&& 1"
3414 [(set (match_dup 0)
3415 (unspec:<avx512fmaskmode>
3416 [(match_dup 1)
3417 (match_dup 2)
3418 (match_dup 4)]
3419 UNSPEC_UNSIGNED_PCMP))]
3420 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3421
3422 (define_int_attr pcmp_signed_mask
3423 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
3424
3425 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
3426 ;; For signed comparison, handle EQ 0: NEQ 4,
3427 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
3428
3429 (define_split
3430 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3431 (unspec:<avx512fmaskmode>
3432 [(us_minus:VI12_AVX512VL
3433 (match_operand:VI12_AVX512VL 1 "vector_operand")
3434 (match_operand:VI12_AVX512VL 2 "vector_operand"))
3435 (match_operand:VI12_AVX512VL 3 "const0_operand")
3436 (match_operand:SI 4 "const_0_to_7_operand")]
3437 UNSPEC_PCMP_ITER))]
3438 "TARGET_AVX512BW
3439 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
3440 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
3441 [(const_int 0)]
3442 {
3443 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
3444 int cmp_predicate = 2; /* LE */
3445 if (MEM_P (operands[1]))
3446 {
3447 std::swap (operands[1], operands[2]);
3448 cmp_predicate = 5; /* NLT (GE) */
3449 }
3450 if ((INTVAL (operands[4]) & 4) != 0)
3451 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
3452 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
3453 GEN_INT (cmp_predicate)));
3454 DONE;
3455 })
3456
3457 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3458 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3459 (and:<avx512fmaskmode>
3460 (unspec:<avx512fmaskmode>
3461 [(match_operand:VF_128 1 "register_operand" "v")
3462 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3463 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3464 UNSPEC_PCMP)
3465 (const_int 1)))]
3466 "TARGET_AVX512F"
3467 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3468 [(set_attr "type" "ssecmp")
3469 (set_attr "length_immediate" "1")
3470 (set_attr "prefix" "evex")
3471 (set_attr "mode" "<ssescalarmode>")])
3472
3473 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3474 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3475 (and:<avx512fmaskmode>
3476 (unspec:<avx512fmaskmode>
3477 [(match_operand:VF_128 1 "register_operand" "v")
3478 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3479 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3480 UNSPEC_PCMP)
3481 (and:<avx512fmaskmode>
3482 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3483 (const_int 1))))]
3484 "TARGET_AVX512F"
3485 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3486 [(set_attr "type" "ssecmp")
3487 (set_attr "length_immediate" "1")
3488 (set_attr "prefix" "evex")
3489 (set_attr "mode" "<ssescalarmode>")])
3490
3491 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3492 [(set (reg:CCFP FLAGS_REG)
3493 (compare:CCFP
3494 (vec_select:MODEF
3495 (match_operand:<ssevecmode> 0 "register_operand" "v")
3496 (parallel [(const_int 0)]))
3497 (vec_select:MODEF
3498 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3499 (parallel [(const_int 0)]))))]
3500 "SSE_FLOAT_MODE_P (<MODE>mode)"
3501 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3502 [(set_attr "type" "ssecomi")
3503 (set_attr "prefix" "maybe_vex")
3504 (set_attr "prefix_rep" "0")
3505 (set (attr "prefix_data16")
3506 (if_then_else (eq_attr "mode" "DF")
3507 (const_string "1")
3508 (const_string "0")))
3509 (set_attr "mode" "<MODE>")])
3510
3511 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3512 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3513 (match_operator:<avx512fmaskmode> 1 ""
3514 [(match_operand:V48_AVX512VL 2 "register_operand")
3515 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3516 "TARGET_AVX512F"
3517 {
3518 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3519 operands[2], operands[3]);
3520 gcc_assert (ok);
3521 DONE;
3522 })
3523
3524 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3525 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3526 (match_operator:<avx512fmaskmode> 1 ""
3527 [(match_operand:VI12_AVX512VL 2 "register_operand")
3528 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3529 "TARGET_AVX512BW"
3530 {
3531 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3532 operands[2], operands[3]);
3533 gcc_assert (ok);
3534 DONE;
3535 })
3536
3537 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3538 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3539 (match_operator:<sseintvecmode> 1 ""
3540 [(match_operand:VI_256 2 "register_operand")
3541 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3542 "TARGET_AVX2"
3543 {
3544 bool ok = ix86_expand_int_vec_cmp (operands);
3545 gcc_assert (ok);
3546 DONE;
3547 })
3548
3549 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3550 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3551 (match_operator:<sseintvecmode> 1 ""
3552 [(match_operand:VI124_128 2 "register_operand")
3553 (match_operand:VI124_128 3 "vector_operand")]))]
3554 "TARGET_SSE2"
3555 {
3556 bool ok = ix86_expand_int_vec_cmp (operands);
3557 gcc_assert (ok);
3558 DONE;
3559 })
3560
3561 (define_expand "vec_cmpv2div2di"
3562 [(set (match_operand:V2DI 0 "register_operand")
3563 (match_operator:V2DI 1 ""
3564 [(match_operand:V2DI 2 "register_operand")
3565 (match_operand:V2DI 3 "vector_operand")]))]
3566 "TARGET_SSE4_2"
3567 {
3568 bool ok = ix86_expand_int_vec_cmp (operands);
3569 gcc_assert (ok);
3570 DONE;
3571 })
3572
3573 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3574 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3575 (match_operator:<sseintvecmode> 1 ""
3576 [(match_operand:VF_256 2 "register_operand")
3577 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3578 "TARGET_AVX"
3579 {
3580 bool ok = ix86_expand_fp_vec_cmp (operands);
3581 gcc_assert (ok);
3582 DONE;
3583 })
3584
3585 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3586 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3587 (match_operator:<sseintvecmode> 1 ""
3588 [(match_operand:VF_128 2 "register_operand")
3589 (match_operand:VF_128 3 "vector_operand")]))]
3590 "TARGET_SSE"
3591 {
3592 bool ok = ix86_expand_fp_vec_cmp (operands);
3593 gcc_assert (ok);
3594 DONE;
3595 })
3596
3597 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3598 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3599 (match_operator:<avx512fmaskmode> 1 ""
3600 [(match_operand:VI48_AVX512VL 2 "register_operand")
3601 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3602 "TARGET_AVX512F"
3603 {
3604 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3605 operands[2], operands[3]);
3606 gcc_assert (ok);
3607 DONE;
3608 })
3609
3610 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3611 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3612 (match_operator:<avx512fmaskmode> 1 ""
3613 [(match_operand:VI12_AVX512VL 2 "register_operand")
3614 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3615 "TARGET_AVX512BW"
3616 {
3617 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3618 operands[2], operands[3]);
3619 gcc_assert (ok);
3620 DONE;
3621 })
3622
3623 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3624 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3625 (match_operator:<sseintvecmode> 1 ""
3626 [(match_operand:VI_256 2 "register_operand")
3627 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3628 "TARGET_AVX2"
3629 {
3630 bool ok = ix86_expand_int_vec_cmp (operands);
3631 gcc_assert (ok);
3632 DONE;
3633 })
3634
3635 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3636 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3637 (match_operator:<sseintvecmode> 1 ""
3638 [(match_operand:VI124_128 2 "register_operand")
3639 (match_operand:VI124_128 3 "vector_operand")]))]
3640 "TARGET_SSE2"
3641 {
3642 bool ok = ix86_expand_int_vec_cmp (operands);
3643 gcc_assert (ok);
3644 DONE;
3645 })
3646
3647 (define_expand "vec_cmpuv2div2di"
3648 [(set (match_operand:V2DI 0 "register_operand")
3649 (match_operator:V2DI 1 ""
3650 [(match_operand:V2DI 2 "register_operand")
3651 (match_operand:V2DI 3 "vector_operand")]))]
3652 "TARGET_SSE4_2"
3653 {
3654 bool ok = ix86_expand_int_vec_cmp (operands);
3655 gcc_assert (ok);
3656 DONE;
3657 })
3658
3659 (define_expand "vec_cmpeqv2div2di"
3660 [(set (match_operand:V2DI 0 "register_operand")
3661 (match_operator:V2DI 1 ""
3662 [(match_operand:V2DI 2 "register_operand")
3663 (match_operand:V2DI 3 "vector_operand")]))]
3664 "TARGET_SSE4_1"
3665 {
3666 bool ok = ix86_expand_int_vec_cmp (operands);
3667 gcc_assert (ok);
3668 DONE;
3669 })
3670
3671 (define_expand "vcond<V_512:mode><VF_512:mode>"
3672 [(set (match_operand:V_512 0 "register_operand")
3673 (if_then_else:V_512
3674 (match_operator 3 ""
3675 [(match_operand:VF_512 4 "nonimmediate_operand")
3676 (match_operand:VF_512 5 "nonimmediate_operand")])
3677 (match_operand:V_512 1 "general_operand")
3678 (match_operand:V_512 2 "general_operand")))]
3679 "TARGET_AVX512F
3680 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3681 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3682 {
3683 bool ok = ix86_expand_fp_vcond (operands);
3684 gcc_assert (ok);
3685 DONE;
3686 })
3687
3688 (define_expand "vcond<V_256:mode><VF_256:mode>"
3689 [(set (match_operand:V_256 0 "register_operand")
3690 (if_then_else:V_256
3691 (match_operator 3 ""
3692 [(match_operand:VF_256 4 "nonimmediate_operand")
3693 (match_operand:VF_256 5 "nonimmediate_operand")])
3694 (match_operand:V_256 1 "general_operand")
3695 (match_operand:V_256 2 "general_operand")))]
3696 "TARGET_AVX
3697 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3698 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3699 {
3700 bool ok = ix86_expand_fp_vcond (operands);
3701 gcc_assert (ok);
3702 DONE;
3703 })
3704
3705 (define_expand "vcond<V_128:mode><VF_128:mode>"
3706 [(set (match_operand:V_128 0 "register_operand")
3707 (if_then_else:V_128
3708 (match_operator 3 ""
3709 [(match_operand:VF_128 4 "vector_operand")
3710 (match_operand:VF_128 5 "vector_operand")])
3711 (match_operand:V_128 1 "general_operand")
3712 (match_operand:V_128 2 "general_operand")))]
3713 "TARGET_SSE
3714 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3715 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3716 {
3717 bool ok = ix86_expand_fp_vcond (operands);
3718 gcc_assert (ok);
3719 DONE;
3720 })
3721
3722 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3723 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3724 (vec_merge:V48_AVX512VL
3725 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3726 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3727 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3728 "TARGET_AVX512F")
3729
3730 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3731 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3732 (vec_merge:VI12_AVX512VL
3733 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3734 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3735 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3736 "TARGET_AVX512BW")
3737
3738 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3739 ;; and their condition can be folded late into a constant, we need to
3740 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3741 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3742 V8SI V4DI])
3743
3744 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3745 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3746 (vec_merge:VI_256_AVX2
3747 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3748 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3749 (match_operand:<sseintvecmode> 3 "register_operand")))]
3750 "TARGET_AVX"
3751 {
3752 ix86_expand_sse_movcc (operands[0], operands[3],
3753 operands[1], operands[2]);
3754 DONE;
3755 })
3756
3757 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3758 [(set (match_operand:VI124_128 0 "register_operand")
3759 (vec_merge:VI124_128
3760 (match_operand:VI124_128 1 "vector_operand")
3761 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3762 (match_operand:<sseintvecmode> 3 "register_operand")))]
3763 "TARGET_SSE2"
3764 {
3765 ix86_expand_sse_movcc (operands[0], operands[3],
3766 operands[1], operands[2]);
3767 DONE;
3768 })
3769
3770 (define_expand "vcond_mask_v2div2di"
3771 [(set (match_operand:V2DI 0 "register_operand")
3772 (vec_merge:V2DI
3773 (match_operand:V2DI 1 "vector_operand")
3774 (match_operand:V2DI 2 "nonimm_or_0_operand")
3775 (match_operand:V2DI 3 "register_operand")))]
3776 "TARGET_SSE4_2"
3777 {
3778 ix86_expand_sse_movcc (operands[0], operands[3],
3779 operands[1], operands[2]);
3780 DONE;
3781 })
3782
3783 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3784 [(set (match_operand:VF_256 0 "register_operand")
3785 (vec_merge:VF_256
3786 (match_operand:VF_256 1 "nonimmediate_operand")
3787 (match_operand:VF_256 2 "nonimm_or_0_operand")
3788 (match_operand:<sseintvecmode> 3 "register_operand")))]
3789 "TARGET_AVX"
3790 {
3791 ix86_expand_sse_movcc (operands[0], operands[3],
3792 operands[1], operands[2]);
3793 DONE;
3794 })
3795
3796 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3797 [(set (match_operand:VF_128 0 "register_operand")
3798 (vec_merge:VF_128
3799 (match_operand:VF_128 1 "vector_operand")
3800 (match_operand:VF_128 2 "nonimm_or_0_operand")
3801 (match_operand:<sseintvecmode> 3 "register_operand")))]
3802 "TARGET_SSE"
3803 {
3804 ix86_expand_sse_movcc (operands[0], operands[3],
3805 operands[1], operands[2]);
3806 DONE;
3807 })
3808
3809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3810 ;;
3811 ;; Parallel floating point logical operations
3812 ;;
3813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3814
3815 (define_insn "<sse>_andnot<mode>3<mask_name>"
3816 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3817 (and:VF_128_256
3818 (not:VF_128_256
3819 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3820 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3821 "TARGET_SSE && <mask_avx512vl_condition>"
3822 {
3823 char buf[128];
3824 const char *ops;
3825 const char *suffix;
3826
3827 switch (which_alternative)
3828 {
3829 case 0:
3830 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3831 break;
3832 case 1:
3833 case 2:
3834 case 3:
3835 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3836 break;
3837 default:
3838 gcc_unreachable ();
3839 }
3840
3841 switch (get_attr_mode (insn))
3842 {
3843 case MODE_V8SF:
3844 case MODE_V4SF:
3845 suffix = "ps";
3846 break;
3847 case MODE_OI:
3848 case MODE_TI:
3849 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3850 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3851 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3852 break;
3853 default:
3854 suffix = "<ssemodesuffix>";
3855 }
3856
3857 snprintf (buf, sizeof (buf), ops, suffix);
3858 output_asm_insn (buf, operands);
3859 return "";
3860 }
3861 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3862 (set_attr "type" "sselog")
3863 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3864 (set (attr "mode")
3865 (cond [(and (match_test "<mask_applied>")
3866 (and (eq_attr "alternative" "1")
3867 (match_test "!TARGET_AVX512DQ")))
3868 (const_string "<sseintvecmode2>")
3869 (eq_attr "alternative" "3")
3870 (const_string "<sseintvecmode2>")
3871 (match_test "TARGET_AVX")
3872 (const_string "<MODE>")
3873 (match_test "optimize_function_for_size_p (cfun)")
3874 (const_string "V4SF")
3875 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3876 (const_string "V4SF")
3877 ]
3878 (const_string "<MODE>")))])
3879
3880 (define_insn "<sse>_andnot<mode>3<mask_name>"
3881 [(set (match_operand:VF_512 0 "register_operand" "=v")
3882 (and:VF_512
3883 (not:VF_512
3884 (match_operand:VF_512 1 "register_operand" "v"))
3885 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3886 "TARGET_AVX512F"
3887 {
3888 char buf[128];
3889 const char *ops;
3890 const char *suffix;
3891
3892 suffix = "<ssemodesuffix>";
3893 ops = "";
3894
3895 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3896 if (!TARGET_AVX512DQ)
3897 {
3898 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3899 ops = "p";
3900 }
3901
3902 snprintf (buf, sizeof (buf),
3903 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3904 ops, suffix);
3905 output_asm_insn (buf, operands);
3906 return "";
3907 }
3908 [(set_attr "type" "sselog")
3909 (set_attr "prefix" "evex")
3910 (set (attr "mode")
3911 (if_then_else (match_test "TARGET_AVX512DQ")
3912 (const_string "<sseinsnmode>")
3913 (const_string "XI")))])
3914
3915 (define_expand "<code><mode>3<mask_name>"
3916 [(set (match_operand:VF_128_256 0 "register_operand")
3917 (any_logic:VF_128_256
3918 (match_operand:VF_128_256 1 "vector_operand")
3919 (match_operand:VF_128_256 2 "vector_operand")))]
3920 "TARGET_SSE && <mask_avx512vl_condition>"
3921 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3922
3923 (define_expand "<code><mode>3<mask_name>"
3924 [(set (match_operand:VF_512 0 "register_operand")
3925 (any_logic:VF_512
3926 (match_operand:VF_512 1 "nonimmediate_operand")
3927 (match_operand:VF_512 2 "nonimmediate_operand")))]
3928 "TARGET_AVX512F"
3929 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3930
3931 (define_insn "*<code><mode>3<mask_name>"
3932 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3933 (any_logic:VF_128_256
3934 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3935 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3936 "TARGET_SSE && <mask_avx512vl_condition>
3937 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3938 {
3939 char buf[128];
3940 const char *ops;
3941 const char *suffix;
3942
3943 switch (which_alternative)
3944 {
3945 case 0:
3946 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3947 break;
3948 case 1:
3949 case 2:
3950 case 3:
3951 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3952 break;
3953 default:
3954 gcc_unreachable ();
3955 }
3956
3957 switch (get_attr_mode (insn))
3958 {
3959 case MODE_V8SF:
3960 case MODE_V4SF:
3961 suffix = "ps";
3962 break;
3963 case MODE_OI:
3964 case MODE_TI:
3965 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3966 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3967 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3968 break;
3969 default:
3970 suffix = "<ssemodesuffix>";
3971 }
3972
3973 snprintf (buf, sizeof (buf), ops, suffix);
3974 output_asm_insn (buf, operands);
3975 return "";
3976 }
3977 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3978 (set_attr "type" "sselog")
3979 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3980 (set (attr "mode")
3981 (cond [(and (match_test "<mask_applied>")
3982 (and (eq_attr "alternative" "1")
3983 (match_test "!TARGET_AVX512DQ")))
3984 (const_string "<sseintvecmode2>")
3985 (eq_attr "alternative" "3")
3986 (const_string "<sseintvecmode2>")
3987 (match_test "TARGET_AVX")
3988 (const_string "<MODE>")
3989 (match_test "optimize_function_for_size_p (cfun)")
3990 (const_string "V4SF")
3991 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3992 (const_string "V4SF")
3993 ]
3994 (const_string "<MODE>")))])
3995
3996 (define_insn "*<code><mode>3<mask_name>"
3997 [(set (match_operand:VF_512 0 "register_operand" "=v")
3998 (any_logic:VF_512
3999 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
4000 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
4001 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4002 {
4003 char buf[128];
4004 const char *ops;
4005 const char *suffix;
4006
4007 suffix = "<ssemodesuffix>";
4008 ops = "";
4009
4010 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
4011 if (!TARGET_AVX512DQ)
4012 {
4013 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
4014 ops = "p";
4015 }
4016
4017 snprintf (buf, sizeof (buf),
4018 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
4019 ops, suffix);
4020 output_asm_insn (buf, operands);
4021 return "";
4022 }
4023 [(set_attr "type" "sselog")
4024 (set_attr "prefix" "evex")
4025 (set (attr "mode")
4026 (if_then_else (match_test "TARGET_AVX512DQ")
4027 (const_string "<sseinsnmode>")
4028 (const_string "XI")))])
4029
4030 (define_expand "copysign<mode>3"
4031 [(set (match_dup 4)
4032 (and:VF
4033 (not:VF (match_dup 3))
4034 (match_operand:VF 1 "vector_operand")))
4035 (set (match_dup 5)
4036 (and:VF (match_dup 3)
4037 (match_operand:VF 2 "vector_operand")))
4038 (set (match_operand:VF 0 "register_operand")
4039 (ior:VF (match_dup 4) (match_dup 5)))]
4040 "TARGET_SSE"
4041 {
4042 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
4043
4044 operands[4] = gen_reg_rtx (<MODE>mode);
4045 operands[5] = gen_reg_rtx (<MODE>mode);
4046 })
4047
4048 (define_expand "xorsign<mode>3"
4049 [(set (match_dup 4)
4050 (and:VF (match_dup 3)
4051 (match_operand:VF 2 "vector_operand")))
4052 (set (match_operand:VF 0 "register_operand")
4053 (xor:VF (match_dup 4)
4054 (match_operand:VF 1 "vector_operand")))]
4055 "TARGET_SSE"
4056 {
4057 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
4058
4059 operands[4] = gen_reg_rtx (<MODE>mode);
4060 })
4061
4062 (define_expand "signbit<mode>2"
4063 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4064 (lshiftrt:<sseintvecmode>
4065 (subreg:<sseintvecmode>
4066 (match_operand:VF1_AVX2 1 "register_operand") 0)
4067 (match_dup 2)))]
4068 "TARGET_SSE2"
4069 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
4070
4071 ;; Also define scalar versions. These are used for abs, neg, and
4072 ;; conditional move. Using subregs into vector modes causes register
4073 ;; allocation lossage. These patterns do not allow memory operands
4074 ;; because the native instructions read the full 128-bits.
4075
4076 (define_insn "*andnot<mode>3"
4077 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4078 (and:MODEF
4079 (not:MODEF
4080 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
4081 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4082 "SSE_FLOAT_MODE_P (<MODE>mode)"
4083 {
4084 char buf[128];
4085 const char *ops;
4086 const char *suffix
4087 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4088
4089 switch (which_alternative)
4090 {
4091 case 0:
4092 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
4093 break;
4094 case 1:
4095 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4096 break;
4097 case 2:
4098 if (TARGET_AVX512DQ)
4099 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4100 else
4101 {
4102 suffix = <MODE>mode == DFmode ? "q" : "d";
4103 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4104 }
4105 break;
4106 case 3:
4107 if (TARGET_AVX512DQ)
4108 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4109 else
4110 {
4111 suffix = <MODE>mode == DFmode ? "q" : "d";
4112 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4113 }
4114 break;
4115 default:
4116 gcc_unreachable ();
4117 }
4118
4119 snprintf (buf, sizeof (buf), ops, suffix);
4120 output_asm_insn (buf, operands);
4121 return "";
4122 }
4123 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4124 (set_attr "type" "sselog")
4125 (set_attr "prefix" "orig,vex,evex,evex")
4126 (set (attr "mode")
4127 (cond [(eq_attr "alternative" "2")
4128 (if_then_else (match_test "TARGET_AVX512DQ")
4129 (const_string "<ssevecmode>")
4130 (const_string "TI"))
4131 (eq_attr "alternative" "3")
4132 (if_then_else (match_test "TARGET_AVX512DQ")
4133 (const_string "<avx512fvecmode>")
4134 (const_string "XI"))
4135 (match_test "TARGET_AVX")
4136 (const_string "<ssevecmode>")
4137 (match_test "optimize_function_for_size_p (cfun)")
4138 (const_string "V4SF")
4139 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4140 (const_string "V4SF")
4141 ]
4142 (const_string "<ssevecmode>")))])
4143
4144 (define_insn "*andnottf3"
4145 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
4146 (and:TF
4147 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
4148 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
4149 "TARGET_SSE"
4150 {
4151 char buf[128];
4152 const char *ops;
4153 const char *tmp
4154 = (which_alternative >= 2 ? "pandnq"
4155 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
4156
4157 switch (which_alternative)
4158 {
4159 case 0:
4160 ops = "%s\t{%%2, %%0|%%0, %%2}";
4161 break;
4162 case 1:
4163 case 2:
4164 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4165 break;
4166 case 3:
4167 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4168 break;
4169 default:
4170 gcc_unreachable ();
4171 }
4172
4173 snprintf (buf, sizeof (buf), ops, tmp);
4174 output_asm_insn (buf, operands);
4175 return "";
4176 }
4177 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4178 (set_attr "type" "sselog")
4179 (set (attr "prefix_data16")
4180 (if_then_else
4181 (and (eq_attr "alternative" "0")
4182 (eq_attr "mode" "TI"))
4183 (const_string "1")
4184 (const_string "*")))
4185 (set_attr "prefix" "orig,vex,evex,evex")
4186 (set (attr "mode")
4187 (cond [(eq_attr "alternative" "2")
4188 (const_string "TI")
4189 (eq_attr "alternative" "3")
4190 (const_string "XI")
4191 (match_test "TARGET_AVX")
4192 (const_string "TI")
4193 (ior (not (match_test "TARGET_SSE2"))
4194 (match_test "optimize_function_for_size_p (cfun)"))
4195 (const_string "V4SF")
4196 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4197 (const_string "V4SF")
4198 ]
4199 (const_string "TI")))])
4200
4201 (define_insn "*<code><mode>3"
4202 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4203 (any_logic:MODEF
4204 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
4205 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4206 "SSE_FLOAT_MODE_P (<MODE>mode)"
4207 {
4208 char buf[128];
4209 const char *ops;
4210 const char *suffix
4211 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4212
4213 switch (which_alternative)
4214 {
4215 case 0:
4216 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
4217 break;
4218 case 2:
4219 if (!TARGET_AVX512DQ)
4220 {
4221 suffix = <MODE>mode == DFmode ? "q" : "d";
4222 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4223 break;
4224 }
4225 /* FALLTHRU */
4226 case 1:
4227 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4228 break;
4229 case 3:
4230 if (TARGET_AVX512DQ)
4231 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4232 else
4233 {
4234 suffix = <MODE>mode == DFmode ? "q" : "d";
4235 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4236 }
4237 break;
4238 default:
4239 gcc_unreachable ();
4240 }
4241
4242 snprintf (buf, sizeof (buf), ops, suffix);
4243 output_asm_insn (buf, operands);
4244 return "";
4245 }
4246 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4247 (set_attr "type" "sselog")
4248 (set_attr "prefix" "orig,vex,evex,evex")
4249 (set (attr "mode")
4250 (cond [(eq_attr "alternative" "2")
4251 (if_then_else (match_test "TARGET_AVX512DQ")
4252 (const_string "<ssevecmode>")
4253 (const_string "TI"))
4254 (eq_attr "alternative" "3")
4255 (if_then_else (match_test "TARGET_AVX512DQ")
4256 (const_string "<avx512fvecmode>")
4257 (const_string "XI"))
4258 (match_test "TARGET_AVX")
4259 (const_string "<ssevecmode>")
4260 (match_test "optimize_function_for_size_p (cfun)")
4261 (const_string "V4SF")
4262 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4263 (const_string "V4SF")
4264 ]
4265 (const_string "<ssevecmode>")))])
4266
4267 (define_expand "<code>tf3"
4268 [(set (match_operand:TF 0 "register_operand")
4269 (any_logic:TF
4270 (match_operand:TF 1 "vector_operand")
4271 (match_operand:TF 2 "vector_operand")))]
4272 "TARGET_SSE"
4273 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
4274
4275 (define_insn "*<code>tf3"
4276 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
4277 (any_logic:TF
4278 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
4279 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
4280 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4281 {
4282 char buf[128];
4283 const char *ops;
4284 const char *tmp
4285 = (which_alternative >= 2 ? "p<logic>q"
4286 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
4287
4288 switch (which_alternative)
4289 {
4290 case 0:
4291 ops = "%s\t{%%2, %%0|%%0, %%2}";
4292 break;
4293 case 1:
4294 case 2:
4295 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4296 break;
4297 case 3:
4298 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4299 break;
4300 default:
4301 gcc_unreachable ();
4302 }
4303
4304 snprintf (buf, sizeof (buf), ops, tmp);
4305 output_asm_insn (buf, operands);
4306 return "";
4307 }
4308 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4309 (set_attr "type" "sselog")
4310 (set (attr "prefix_data16")
4311 (if_then_else
4312 (and (eq_attr "alternative" "0")
4313 (eq_attr "mode" "TI"))
4314 (const_string "1")
4315 (const_string "*")))
4316 (set_attr "prefix" "orig,vex,evex,evex")
4317 (set (attr "mode")
4318 (cond [(eq_attr "alternative" "2")
4319 (const_string "TI")
4320 (eq_attr "alternative" "3")
4321 (const_string "QI")
4322 (match_test "TARGET_AVX")
4323 (const_string "TI")
4324 (ior (not (match_test "TARGET_SSE2"))
4325 (match_test "optimize_function_for_size_p (cfun)"))
4326 (const_string "V4SF")
4327 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4328 (const_string "V4SF")
4329 ]
4330 (const_string "TI")))])
4331
4332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4333 ;;
4334 ;; FMA floating point multiply/accumulate instructions. These include
4335 ;; scalar versions of the instructions as well as vector versions.
4336 ;;
4337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4338
4339 ;; The standard names for scalar FMA are only available with SSE math enabled.
4340 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
4341 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
4342 ;; and TARGET_FMA4 are both false.
4343 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
4344 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
4345 ;; GAS to allow proper prefix selection. However, for the moment all hardware
4346 ;; that supports AVX512F also supports FMA so we can ignore this for now.
4347 (define_mode_iterator FMAMODEM
4348 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4349 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4350 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4351 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4352 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4353 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4354 (V16SF "TARGET_AVX512F")
4355 (V8DF "TARGET_AVX512F")])
4356
4357 (define_expand "fma<mode>4"
4358 [(set (match_operand:FMAMODEM 0 "register_operand")
4359 (fma:FMAMODEM
4360 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4361 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4362 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4363
4364 (define_expand "fms<mode>4"
4365 [(set (match_operand:FMAMODEM 0 "register_operand")
4366 (fma:FMAMODEM
4367 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4368 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4369 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4370
4371 (define_expand "fnma<mode>4"
4372 [(set (match_operand:FMAMODEM 0 "register_operand")
4373 (fma:FMAMODEM
4374 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4375 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4376 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4377
4378 (define_expand "fnms<mode>4"
4379 [(set (match_operand:FMAMODEM 0 "register_operand")
4380 (fma:FMAMODEM
4381 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4382 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4383 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4384
4385 ;; The builtins for intrinsics are not constrained by SSE math enabled.
4386 (define_mode_iterator FMAMODE_AVX512
4387 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4388 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4389 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4390 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4391 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4392 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4393 (V16SF "TARGET_AVX512F")
4394 (V8DF "TARGET_AVX512F")])
4395
4396 (define_mode_iterator FMAMODE
4397 [SF DF V4SF V2DF V8SF V4DF])
4398
4399 (define_expand "fma4i_fmadd_<mode>"
4400 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4401 (fma:FMAMODE_AVX512
4402 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4403 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4404 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4405
4406 (define_expand "fma4i_fmsub_<mode>"
4407 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4408 (fma:FMAMODE_AVX512
4409 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4410 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4411 (neg:FMAMODE_AVX512
4412 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4413
4414 (define_expand "fma4i_fnmadd_<mode>"
4415 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4416 (fma:FMAMODE_AVX512
4417 (neg:FMAMODE_AVX512
4418 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4419 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4420 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4421
4422 (define_expand "fma4i_fnmsub_<mode>"
4423 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4424 (fma:FMAMODE_AVX512
4425 (neg:FMAMODE_AVX512
4426 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4427 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4428 (neg:FMAMODE_AVX512
4429 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4430
4431 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4432 [(match_operand:VF_AVX512VL 0 "register_operand")
4433 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4434 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4435 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4436 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4437 "TARGET_AVX512F && <round_mode512bit_condition>"
4438 {
4439 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4440 operands[0], operands[1], operands[2], operands[3],
4441 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4442 DONE;
4443 })
4444
4445 (define_insn "*fma_fmadd_<mode>"
4446 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4447 (fma:FMAMODE
4448 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4449 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4450 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4451 "TARGET_FMA || TARGET_FMA4"
4452 "@
4453 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4454 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4455 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4456 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4457 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4458 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4459 (set_attr "type" "ssemuladd")
4460 (set_attr "mode" "<MODE>")])
4461
4462 ;; Suppose AVX-512F as baseline
4463 (define_mode_iterator VF_SF_AVX512VL
4464 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4465 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4466
4467 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4468 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4469 (fma:VF_SF_AVX512VL
4470 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4471 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4472 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4473 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4474 "@
4475 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4476 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4477 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4478 [(set_attr "type" "ssemuladd")
4479 (set_attr "mode" "<MODE>")])
4480
4481 (define_expand "cond_fma<mode>"
4482 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4483 (vec_merge:VF_AVX512VL
4484 (fma:VF_AVX512VL
4485 (match_operand:VF_AVX512VL 2 "vector_operand")
4486 (match_operand:VF_AVX512VL 3 "vector_operand")
4487 (match_operand:VF_AVX512VL 4 "vector_operand"))
4488 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4489 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4490 "TARGET_AVX512F"
4491 {
4492 rtx tmp = gen_reg_rtx (<MODE>mode);
4493 emit_insn (gen_fma<mode>4 (tmp,
4494 operands[2],
4495 operands[3],
4496 operands[4]));
4497 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4498 tmp,
4499 operands[5],
4500 operands[1]));
4501 DONE;
4502 })
4503
4504 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4505 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4506 (vec_merge:VF_AVX512VL
4507 (fma:VF_AVX512VL
4508 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4509 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4510 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4511 (match_dup 1)
4512 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4513 "TARGET_AVX512F && <round_mode512bit_condition>"
4514 "@
4515 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4516 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4517 [(set_attr "type" "ssemuladd")
4518 (set_attr "mode" "<MODE>")])
4519
4520 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4521 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4522 (vec_merge:VF_AVX512VL
4523 (fma:VF_AVX512VL
4524 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4525 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4526 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4527 (match_dup 3)
4528 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4529 "TARGET_AVX512F"
4530 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4531 [(set_attr "type" "ssemuladd")
4532 (set_attr "mode" "<MODE>")])
4533
4534 (define_insn "*fma_fmsub_<mode>"
4535 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4536 (fma:FMAMODE
4537 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4538 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4539 (neg:FMAMODE
4540 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4541 "TARGET_FMA || TARGET_FMA4"
4542 "@
4543 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4544 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4545 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4546 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4547 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4548 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4549 (set_attr "type" "ssemuladd")
4550 (set_attr "mode" "<MODE>")])
4551
4552 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4553 [(match_operand:VF_AVX512VL 0 "register_operand")
4554 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4555 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4556 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4557 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4558 "TARGET_AVX512F && <round_mode512bit_condition>"
4559 {
4560 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4561 operands[0], operands[1], operands[2], operands[3],
4562 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4563 DONE;
4564 })
4565
4566 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4567 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4568 (fma:VF_SF_AVX512VL
4569 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4570 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4571 (neg:VF_SF_AVX512VL
4572 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4573 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4574 "@
4575 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4576 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4577 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4578 [(set_attr "type" "ssemuladd")
4579 (set_attr "mode" "<MODE>")])
4580
4581 (define_expand "cond_fms<mode>"
4582 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4583 (vec_merge:VF_AVX512VL
4584 (fma:VF_AVX512VL
4585 (match_operand:VF_AVX512VL 2 "vector_operand")
4586 (match_operand:VF_AVX512VL 3 "vector_operand")
4587 (neg:VF_AVX512VL
4588 (match_operand:VF_AVX512VL 4 "vector_operand")))
4589 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4590 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4591 "TARGET_AVX512F"
4592 {
4593 rtx tmp = gen_reg_rtx (<MODE>mode);
4594 emit_insn (gen_fms<mode>4 (tmp,
4595 operands[2],
4596 operands[3],
4597 operands[4]));
4598 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4599 tmp,
4600 operands[5],
4601 operands[1]));
4602 DONE;
4603 })
4604
4605 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4606 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4607 (vec_merge:VF_AVX512VL
4608 (fma:VF_AVX512VL
4609 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4610 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4611 (neg:VF_AVX512VL
4612 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4613 (match_dup 1)
4614 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4615 "TARGET_AVX512F"
4616 "@
4617 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4618 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4619 [(set_attr "type" "ssemuladd")
4620 (set_attr "mode" "<MODE>")])
4621
4622 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4623 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4624 (vec_merge:VF_AVX512VL
4625 (fma:VF_AVX512VL
4626 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4627 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4628 (neg:VF_AVX512VL
4629 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4630 (match_dup 3)
4631 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4632 "TARGET_AVX512F && <round_mode512bit_condition>"
4633 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4634 [(set_attr "type" "ssemuladd")
4635 (set_attr "mode" "<MODE>")])
4636
4637 (define_insn "*fma_fnmadd_<mode>"
4638 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4639 (fma:FMAMODE
4640 (neg:FMAMODE
4641 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4642 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4643 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4644 "TARGET_FMA || TARGET_FMA4"
4645 "@
4646 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4647 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4648 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4649 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4650 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4651 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4652 (set_attr "type" "ssemuladd")
4653 (set_attr "mode" "<MODE>")])
4654
4655 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4656 [(match_operand:VF_AVX512VL 0 "register_operand")
4657 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4658 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4659 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4660 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4661 "TARGET_AVX512F && <round_mode512bit_condition>"
4662 {
4663 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4664 operands[0], operands[1], operands[2], operands[3],
4665 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4666 DONE;
4667 })
4668
4669 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4670 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4671 (fma:VF_SF_AVX512VL
4672 (neg:VF_SF_AVX512VL
4673 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4674 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4675 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4676 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4677 "@
4678 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4679 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4680 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4681 [(set_attr "type" "ssemuladd")
4682 (set_attr "mode" "<MODE>")])
4683
4684 (define_expand "cond_fnma<mode>"
4685 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4686 (vec_merge:VF_AVX512VL
4687 (fma:VF_AVX512VL
4688 (neg:VF_AVX512VL
4689 (match_operand:VF_AVX512VL 2 "vector_operand"))
4690 (match_operand:VF_AVX512VL 3 "vector_operand")
4691 (match_operand:VF_AVX512VL 4 "vector_operand"))
4692 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4693 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4694 "TARGET_AVX512F"
4695 {
4696 rtx tmp = gen_reg_rtx (<MODE>mode);
4697 emit_insn (gen_fnma<mode>4 (tmp,
4698 operands[2],
4699 operands[3],
4700 operands[4]));
4701 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4702 tmp,
4703 operands[5],
4704 operands[1]));
4705 DONE;
4706 })
4707
4708 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4709 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4710 (vec_merge:VF_AVX512VL
4711 (fma:VF_AVX512VL
4712 (neg:VF_AVX512VL
4713 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4714 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4715 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4716 (match_dup 1)
4717 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4718 "TARGET_AVX512F && <round_mode512bit_condition>"
4719 "@
4720 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4721 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4722 [(set_attr "type" "ssemuladd")
4723 (set_attr "mode" "<MODE>")])
4724
4725 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4726 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4727 (vec_merge:VF_AVX512VL
4728 (fma:VF_AVX512VL
4729 (neg:VF_AVX512VL
4730 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4731 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4732 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4733 (match_dup 3)
4734 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4735 "TARGET_AVX512F && <round_mode512bit_condition>"
4736 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4737 [(set_attr "type" "ssemuladd")
4738 (set_attr "mode" "<MODE>")])
4739
4740 (define_insn "*fma_fnmsub_<mode>"
4741 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4742 (fma:FMAMODE
4743 (neg:FMAMODE
4744 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4745 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4746 (neg:FMAMODE
4747 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4748 "TARGET_FMA || TARGET_FMA4"
4749 "@
4750 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4751 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4752 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4753 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4754 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4755 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4756 (set_attr "type" "ssemuladd")
4757 (set_attr "mode" "<MODE>")])
4758
4759 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4760 [(match_operand:VF_AVX512VL 0 "register_operand")
4761 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4762 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4763 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4764 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4765 "TARGET_AVX512F && <round_mode512bit_condition>"
4766 {
4767 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4768 operands[0], operands[1], operands[2], operands[3],
4769 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4770 DONE;
4771 })
4772
4773 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4774 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4775 (fma:VF_SF_AVX512VL
4776 (neg:VF_SF_AVX512VL
4777 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4778 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4779 (neg:VF_SF_AVX512VL
4780 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4781 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4782 "@
4783 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4784 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4785 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4786 [(set_attr "type" "ssemuladd")
4787 (set_attr "mode" "<MODE>")])
4788
4789 (define_expand "cond_fnms<mode>"
4790 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4791 (vec_merge:VF_AVX512VL
4792 (fma:VF_AVX512VL
4793 (neg:VF_AVX512VL
4794 (match_operand:VF_AVX512VL 2 "vector_operand"))
4795 (match_operand:VF_AVX512VL 3 "vector_operand")
4796 (neg:VF_AVX512VL
4797 (match_operand:VF_AVX512VL 4 "vector_operand")))
4798 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4799 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4800 "TARGET_AVX512F"
4801 {
4802 rtx tmp = gen_reg_rtx (<MODE>mode);
4803 emit_insn (gen_fnms<mode>4 (tmp,
4804 operands[2],
4805 operands[3],
4806 operands[4]));
4807 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4808 tmp,
4809 operands[5],
4810 operands[1]));
4811 DONE;
4812 })
4813
4814 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4815 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4816 (vec_merge:VF_AVX512VL
4817 (fma:VF_AVX512VL
4818 (neg:VF_AVX512VL
4819 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4820 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4821 (neg:VF_AVX512VL
4822 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4823 (match_dup 1)
4824 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4825 "TARGET_AVX512F && <round_mode512bit_condition>"
4826 "@
4827 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4828 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4829 [(set_attr "type" "ssemuladd")
4830 (set_attr "mode" "<MODE>")])
4831
4832 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4833 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4834 (vec_merge:VF_AVX512VL
4835 (fma:VF_AVX512VL
4836 (neg:VF_AVX512VL
4837 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4838 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4839 (neg:VF_AVX512VL
4840 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4841 (match_dup 3)
4842 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4843 "TARGET_AVX512F"
4844 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4845 [(set_attr "type" "ssemuladd")
4846 (set_attr "mode" "<MODE>")])
4847
4848 ;; FMA parallel floating point multiply addsub and subadd operations.
4849
4850 ;; It would be possible to represent these without the UNSPEC as
4851 ;;
4852 ;; (vec_merge
4853 ;; (fma op1 op2 op3)
4854 ;; (fma op1 op2 (neg op3))
4855 ;; (merge-const))
4856 ;;
4857 ;; But this doesn't seem useful in practice.
4858
4859 (define_expand "vec_fmaddsub<mode>4"
4860 [(set (match_operand:VF 0 "register_operand")
4861 (unspec:VF
4862 [(match_operand:VF 1 "nonimmediate_operand")
4863 (match_operand:VF 2 "nonimmediate_operand")
4864 (match_operand:VF 3 "nonimmediate_operand")]
4865 UNSPEC_FMADDSUB))]
4866 "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
4867
4868 (define_expand "vec_fmsubadd<mode>4"
4869 [(set (match_operand:VF 0 "register_operand")
4870 (unspec:VF
4871 [(match_operand:VF 1 "nonimmediate_operand")
4872 (match_operand:VF 2 "nonimmediate_operand")
4873 (neg:VF
4874 (match_operand:VF 3 "nonimmediate_operand"))]
4875 UNSPEC_FMADDSUB))]
4876 "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
4877
4878 (define_expand "fmaddsub_<mode>"
4879 [(set (match_operand:VF 0 "register_operand")
4880 (unspec:VF
4881 [(match_operand:VF 1 "nonimmediate_operand")
4882 (match_operand:VF 2 "nonimmediate_operand")
4883 (match_operand:VF 3 "nonimmediate_operand")]
4884 UNSPEC_FMADDSUB))]
4885 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4886
4887 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4888 [(match_operand:VF_AVX512VL 0 "register_operand")
4889 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4890 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4891 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4892 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4893 "TARGET_AVX512F"
4894 {
4895 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4896 operands[0], operands[1], operands[2], operands[3],
4897 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4898 DONE;
4899 })
4900
4901 (define_insn "*fma_fmaddsub_<mode>"
4902 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4903 (unspec:VF_128_256
4904 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4905 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4906 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4907 UNSPEC_FMADDSUB))]
4908 "TARGET_FMA || TARGET_FMA4"
4909 "@
4910 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4911 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4912 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4913 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4914 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4915 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4916 (set_attr "type" "ssemuladd")
4917 (set_attr "mode" "<MODE>")])
4918
4919 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4920 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4921 (unspec:VF_SF_AVX512VL
4922 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4923 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4924 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4925 UNSPEC_FMADDSUB))]
4926 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4927 "@
4928 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4929 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4930 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4931 [(set_attr "type" "ssemuladd")
4932 (set_attr "mode" "<MODE>")])
4933
4934 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4935 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4936 (vec_merge:VF_AVX512VL
4937 (unspec:VF_AVX512VL
4938 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4939 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4940 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4941 UNSPEC_FMADDSUB)
4942 (match_dup 1)
4943 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4944 "TARGET_AVX512F"
4945 "@
4946 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4947 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4948 [(set_attr "type" "ssemuladd")
4949 (set_attr "mode" "<MODE>")])
4950
4951 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4952 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4953 (vec_merge:VF_AVX512VL
4954 (unspec:VF_AVX512VL
4955 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4956 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4957 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4958 UNSPEC_FMADDSUB)
4959 (match_dup 3)
4960 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4961 "TARGET_AVX512F"
4962 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4963 [(set_attr "type" "ssemuladd")
4964 (set_attr "mode" "<MODE>")])
4965
4966 (define_insn "*fma_fmsubadd_<mode>"
4967 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4968 (unspec:VF_128_256
4969 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4970 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4971 (neg:VF_128_256
4972 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4973 UNSPEC_FMADDSUB))]
4974 "TARGET_FMA || TARGET_FMA4"
4975 "@
4976 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4977 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4978 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4979 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4980 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4981 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4982 (set_attr "type" "ssemuladd")
4983 (set_attr "mode" "<MODE>")])
4984
4985 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4986 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4987 (unspec:VF_SF_AVX512VL
4988 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4989 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4990 (neg:VF_SF_AVX512VL
4991 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4992 UNSPEC_FMADDSUB))]
4993 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4994 "@
4995 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4996 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4997 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4998 [(set_attr "type" "ssemuladd")
4999 (set_attr "mode" "<MODE>")])
5000
5001 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
5002 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
5003 (vec_merge:VF_AVX512VL
5004 (unspec:VF_AVX512VL
5005 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
5006 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5007 (neg:VF_AVX512VL
5008 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
5009 UNSPEC_FMADDSUB)
5010 (match_dup 1)
5011 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5012 "TARGET_AVX512F"
5013 "@
5014 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5015 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5016 [(set_attr "type" "ssemuladd")
5017 (set_attr "mode" "<MODE>")])
5018
5019 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
5020 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
5021 (vec_merge:VF_AVX512VL
5022 (unspec:VF_AVX512VL
5023 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
5024 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5025 (neg:VF_AVX512VL
5026 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
5027 UNSPEC_FMADDSUB)
5028 (match_dup 3)
5029 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5030 "TARGET_AVX512F"
5031 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5032 [(set_attr "type" "ssemuladd")
5033 (set_attr "mode" "<MODE>")])
5034
5035 ;; FMA3 floating point scalar intrinsics. These merge result with
5036 ;; high-order elements from the destination register.
5037
5038 (define_expand "fmai_vmfmadd_<mode><round_name>"
5039 [(set (match_operand:VF_128 0 "register_operand")
5040 (vec_merge:VF_128
5041 (fma:VF_128
5042 (match_operand:VF_128 1 "register_operand")
5043 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
5044 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
5045 (match_dup 1)
5046 (const_int 1)))]
5047 "TARGET_FMA")
5048
5049 (define_expand "fmai_vmfmsub_<mode><round_name>"
5050 [(set (match_operand:VF_128 0 "register_operand")
5051 (vec_merge:VF_128
5052 (fma:VF_128
5053 (match_operand:VF_128 1 "register_operand")
5054 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
5055 (neg:VF_128
5056 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
5057 (match_dup 1)
5058 (const_int 1)))]
5059 "TARGET_FMA")
5060
5061 (define_expand "fmai_vmfnmadd_<mode><round_name>"
5062 [(set (match_operand:VF_128 0 "register_operand")
5063 (vec_merge:VF_128
5064 (fma:VF_128
5065 (neg:VF_128
5066 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
5067 (match_operand:VF_128 1 "register_operand")
5068 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
5069 (match_dup 1)
5070 (const_int 1)))]
5071 "TARGET_FMA")
5072
5073 (define_expand "fmai_vmfnmsub_<mode><round_name>"
5074 [(set (match_operand:VF_128 0 "register_operand")
5075 (vec_merge:VF_128
5076 (fma:VF_128
5077 (neg:VF_128
5078 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
5079 (match_operand:VF_128 1 "register_operand")
5080 (neg:VF_128
5081 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
5082 (match_dup 1)
5083 (const_int 1)))]
5084 "TARGET_FMA")
5085
5086 (define_insn "*fmai_fmadd_<mode>"
5087 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5088 (vec_merge:VF_128
5089 (fma:VF_128
5090 (match_operand:VF_128 1 "register_operand" "0,0")
5091 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
5092 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5093 (match_dup 1)
5094 (const_int 1)))]
5095 "TARGET_FMA || TARGET_AVX512F"
5096 "@
5097 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5098 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5099 [(set_attr "type" "ssemuladd")
5100 (set_attr "mode" "<MODE>")])
5101
5102 (define_insn "*fmai_fmsub_<mode>"
5103 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5104 (vec_merge:VF_128
5105 (fma:VF_128
5106 (match_operand:VF_128 1 "register_operand" "0,0")
5107 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5108 (neg:VF_128
5109 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5110 (match_dup 1)
5111 (const_int 1)))]
5112 "TARGET_FMA || TARGET_AVX512F"
5113 "@
5114 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5115 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5116 [(set_attr "type" "ssemuladd")
5117 (set_attr "mode" "<MODE>")])
5118
5119 (define_insn "*fmai_fnmadd_<mode><round_name>"
5120 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5121 (vec_merge:VF_128
5122 (fma:VF_128
5123 (neg:VF_128
5124 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5125 (match_operand:VF_128 1 "register_operand" "0,0")
5126 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5127 (match_dup 1)
5128 (const_int 1)))]
5129 "TARGET_FMA || TARGET_AVX512F"
5130 "@
5131 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5132 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5133 [(set_attr "type" "ssemuladd")
5134 (set_attr "mode" "<MODE>")])
5135
5136 (define_insn "*fmai_fnmsub_<mode><round_name>"
5137 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5138 (vec_merge:VF_128
5139 (fma:VF_128
5140 (neg:VF_128
5141 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5142 (match_operand:VF_128 1 "register_operand" "0,0")
5143 (neg:VF_128
5144 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5145 (match_dup 1)
5146 (const_int 1)))]
5147 "TARGET_FMA || TARGET_AVX512F"
5148 "@
5149 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5150 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5151 [(set_attr "type" "ssemuladd")
5152 (set_attr "mode" "<MODE>")])
5153
5154 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
5155 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5156 (vec_merge:VF_128
5157 (vec_merge:VF_128
5158 (fma:VF_128
5159 (match_operand:VF_128 1 "register_operand" "0,0")
5160 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5161 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5162 (match_dup 1)
5163 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5164 (match_dup 1)
5165 (const_int 1)))]
5166 "TARGET_AVX512F"
5167 "@
5168 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5169 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5170 [(set_attr "type" "ssemuladd")
5171 (set_attr "mode" "<MODE>")])
5172
5173 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
5174 [(set (match_operand:VF_128 0 "register_operand" "=v")
5175 (vec_merge:VF_128
5176 (vec_merge:VF_128
5177 (fma:VF_128
5178 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5179 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
5180 (match_operand:VF_128 3 "register_operand" "0"))
5181 (match_dup 3)
5182 (match_operand:QI 4 "register_operand" "Yk"))
5183 (match_dup 3)
5184 (const_int 1)))]
5185 "TARGET_AVX512F"
5186 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5187 [(set_attr "type" "ssemuladd")
5188 (set_attr "mode" "<MODE>")])
5189
5190 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
5191 [(match_operand:VF_128 0 "register_operand")
5192 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
5193 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
5194 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
5195 (match_operand:QI 4 "register_operand")]
5196 "TARGET_AVX512F"
5197 {
5198 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
5199 operands[0], operands[1], operands[2], operands[3],
5200 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5201 DONE;
5202 })
5203
5204 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
5205 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5206 (vec_merge:VF_128
5207 (vec_merge:VF_128
5208 (fma:VF_128
5209 (match_operand:VF_128 1 "register_operand" "0,0")
5210 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5211 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5212 (match_operand:VF_128 4 "const0_operand" "C,C")
5213 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5214 (match_dup 1)
5215 (const_int 1)))]
5216 "TARGET_AVX512F"
5217 "@
5218 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5219 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5220 [(set_attr "type" "ssemuladd")
5221 (set_attr "mode" "<MODE>")])
5222
5223 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
5224 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5225 (vec_merge:VF_128
5226 (vec_merge:VF_128
5227 (fma:VF_128
5228 (match_operand:VF_128 1 "register_operand" "0,0")
5229 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5230 (neg:VF_128
5231 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5232 (match_dup 1)
5233 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5234 (match_dup 1)
5235 (const_int 1)))]
5236 "TARGET_AVX512F"
5237 "@
5238 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5239 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5240 [(set_attr "type" "ssemuladd")
5241 (set_attr "mode" "<MODE>")])
5242
5243 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
5244 [(set (match_operand:VF_128 0 "register_operand" "=v")
5245 (vec_merge:VF_128
5246 (vec_merge:VF_128
5247 (fma:VF_128
5248 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5249 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
5250 (neg:VF_128
5251 (match_operand:VF_128 3 "register_operand" "0")))
5252 (match_dup 3)
5253 (match_operand:QI 4 "register_operand" "Yk"))
5254 (match_dup 3)
5255 (const_int 1)))]
5256 "TARGET_AVX512F"
5257 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5258 [(set_attr "type" "ssemuladd")
5259 (set_attr "mode" "<MODE>")])
5260
5261 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
5262 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5263 (vec_merge:VF_128
5264 (vec_merge:VF_128
5265 (fma:VF_128
5266 (match_operand:VF_128 1 "register_operand" "0,0")
5267 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5268 (neg:VF_128
5269 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5270 (match_operand:VF_128 4 "const0_operand" "C,C")
5271 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5272 (match_dup 1)
5273 (const_int 1)))]
5274 "TARGET_AVX512F"
5275 "@
5276 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5277 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5278 [(set_attr "type" "ssemuladd")
5279 (set_attr "mode" "<MODE>")])
5280
5281 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
5282 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5283 (vec_merge:VF_128
5284 (vec_merge:VF_128
5285 (fma:VF_128
5286 (neg:VF_128
5287 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5288 (match_operand:VF_128 1 "register_operand" "0,0")
5289 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5290 (match_dup 1)
5291 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5292 (match_dup 1)
5293 (const_int 1)))]
5294 "TARGET_AVX512F"
5295 "@
5296 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5297 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5298 [(set_attr "type" "ssemuladd")
5299 (set_attr "mode" "<MODE>")])
5300
5301 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
5302 [(set (match_operand:VF_128 0 "register_operand" "=v")
5303 (vec_merge:VF_128
5304 (vec_merge:VF_128
5305 (fma:VF_128
5306 (neg:VF_128
5307 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5308 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5309 (match_operand:VF_128 3 "register_operand" "0"))
5310 (match_dup 3)
5311 (match_operand:QI 4 "register_operand" "Yk"))
5312 (match_dup 3)
5313 (const_int 1)))]
5314 "TARGET_AVX512F"
5315 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5316 [(set_attr "type" "ssemuladd")
5317 (set_attr "mode" "<MODE>")])
5318
5319 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
5320 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5321 (vec_merge:VF_128
5322 (vec_merge:VF_128
5323 (fma:VF_128
5324 (neg:VF_128
5325 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5326 (match_operand:VF_128 1 "register_operand" "0,0")
5327 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5328 (match_operand:VF_128 4 "const0_operand" "C,C")
5329 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5330 (match_dup 1)
5331 (const_int 1)))]
5332 "TARGET_AVX512F"
5333 "@
5334 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5335 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5336 [(set_attr "type" "ssemuladd")
5337 (set_attr "mode" "<MODE>")])
5338
5339 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
5340 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5341 (vec_merge:VF_128
5342 (vec_merge:VF_128
5343 (fma:VF_128
5344 (neg:VF_128
5345 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5346 (match_operand:VF_128 1 "register_operand" "0,0")
5347 (neg:VF_128
5348 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5349 (match_dup 1)
5350 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5351 (match_dup 1)
5352 (const_int 1)))]
5353 "TARGET_AVX512F"
5354 "@
5355 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5356 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5357 [(set_attr "type" "ssemuladd")
5358 (set_attr "mode" "<MODE>")])
5359
5360 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
5361 [(set (match_operand:VF_128 0 "register_operand" "=v")
5362 (vec_merge:VF_128
5363 (vec_merge:VF_128
5364 (fma:VF_128
5365 (neg:VF_128
5366 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5367 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5368 (neg:VF_128
5369 (match_operand:VF_128 3 "register_operand" "0")))
5370 (match_dup 3)
5371 (match_operand:QI 4 "register_operand" "Yk"))
5372 (match_dup 3)
5373 (const_int 1)))]
5374 "TARGET_AVX512F"
5375 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5376 [(set_attr "type" "ssemuladd")
5377 (set_attr "mode" "<MODE>")])
5378
5379 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
5380 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5381 (vec_merge:VF_128
5382 (vec_merge:VF_128
5383 (fma:VF_128
5384 (neg:VF_128
5385 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5386 (match_operand:VF_128 1 "register_operand" "0,0")
5387 (neg:VF_128
5388 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5389 (match_operand:VF_128 4 "const0_operand" "C,C")
5390 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5391 (match_dup 1)
5392 (const_int 1)))]
5393 "TARGET_AVX512F"
5394 "@
5395 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5396 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5397 [(set_attr "type" "ssemuladd")
5398 (set_attr "mode" "<MODE>")])
5399
5400 ;; FMA4 floating point scalar intrinsics. These write the
5401 ;; entire destination register, with the high-order elements zeroed.
5402
5403 (define_expand "fma4i_vmfmadd_<mode>"
5404 [(set (match_operand:VF_128 0 "register_operand")
5405 (vec_merge:VF_128
5406 (fma:VF_128
5407 (match_operand:VF_128 1 "nonimmediate_operand")
5408 (match_operand:VF_128 2 "nonimmediate_operand")
5409 (match_operand:VF_128 3 "nonimmediate_operand"))
5410 (match_dup 4)
5411 (const_int 1)))]
5412 "TARGET_FMA4"
5413 "operands[4] = CONST0_RTX (<MODE>mode);")
5414
5415 (define_insn "*fma4i_vmfmadd_<mode>"
5416 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5417 (vec_merge:VF_128
5418 (fma:VF_128
5419 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5420 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5421 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5422 (match_operand:VF_128 4 "const0_operand")
5423 (const_int 1)))]
5424 "TARGET_FMA4"
5425 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5426 [(set_attr "type" "ssemuladd")
5427 (set_attr "mode" "<MODE>")])
5428
5429 (define_insn "*fma4i_vmfmsub_<mode>"
5430 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5431 (vec_merge:VF_128
5432 (fma:VF_128
5433 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5434 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5435 (neg:VF_128
5436 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5437 (match_operand:VF_128 4 "const0_operand")
5438 (const_int 1)))]
5439 "TARGET_FMA4"
5440 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5441 [(set_attr "type" "ssemuladd")
5442 (set_attr "mode" "<MODE>")])
5443
5444 (define_insn "*fma4i_vmfnmadd_<mode>"
5445 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5446 (vec_merge:VF_128
5447 (fma:VF_128
5448 (neg:VF_128
5449 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5450 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5451 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5452 (match_operand:VF_128 4 "const0_operand")
5453 (const_int 1)))]
5454 "TARGET_FMA4"
5455 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5456 [(set_attr "type" "ssemuladd")
5457 (set_attr "mode" "<MODE>")])
5458
5459 (define_insn "*fma4i_vmfnmsub_<mode>"
5460 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5461 (vec_merge:VF_128
5462 (fma:VF_128
5463 (neg:VF_128
5464 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5465 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5466 (neg:VF_128
5467 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5468 (match_operand:VF_128 4 "const0_operand")
5469 (const_int 1)))]
5470 "TARGET_FMA4"
5471 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5472 [(set_attr "type" "ssemuladd")
5473 (set_attr "mode" "<MODE>")])
5474
5475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5476 ;;
5477 ;; Parallel single-precision floating point conversion operations
5478 ;;
5479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5480
5481 (define_insn_and_split "sse_cvtpi2ps"
5482 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5483 (vec_merge:V4SF
5484 (vec_duplicate:V4SF
5485 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5486 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5487 (const_int 3)))
5488 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5489 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5490 "@
5491 cvtpi2ps\t{%2, %0|%0, %2}
5492 #
5493 #"
5494 "TARGET_SSE2 && reload_completed
5495 && SSE_REG_P (operands[2])"
5496 [(const_int 0)]
5497 {
5498 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5499 GET_MODE (operands[2]));
5500 /* Generate SSE2 cvtdq2ps. */
5501 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5502
5503 /* Merge operands[3] with operands[0]. */
5504 rtx mask, op1;
5505 if (TARGET_AVX)
5506 {
5507 mask = gen_rtx_PARALLEL (VOIDmode,
5508 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5509 GEN_INT (6), GEN_INT (7)));
5510 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5511 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5512 emit_insn (gen_rtx_SET (operands[0], op2));
5513 }
5514 else
5515 {
5516 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5517 mask = gen_rtx_PARALLEL (VOIDmode,
5518 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5519 GEN_INT (4), GEN_INT (5)));
5520 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5521 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5522 emit_insn (gen_rtx_SET (operands[0], op2));
5523
5524 /* Swap bits 0:63 with bits 64:127. */
5525 mask = gen_rtx_PARALLEL (VOIDmode,
5526 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5527 GEN_INT (0), GEN_INT (1)));
5528 rtx dest = lowpart_subreg (V4SImode, operands[0],
5529 GET_MODE (operands[0]));
5530 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5531 emit_insn (gen_rtx_SET (dest, op1));
5532 }
5533 DONE;
5534 }
5535 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5536 (set_attr "type" "ssecvt")
5537 (set_attr "mode" "V4SF")])
5538
5539 (define_insn_and_split "sse_cvtps2pi"
5540 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5541 (vec_select:V2SI
5542 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5543 UNSPEC_FIX_NOTRUNC)
5544 (parallel [(const_int 0) (const_int 1)])))]
5545 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5546 "@
5547 cvtps2pi\t{%1, %0|%0, %q1}
5548 #"
5549 "TARGET_SSE2 && reload_completed
5550 && SSE_REG_P (operands[0])"
5551 [(const_int 0)]
5552 {
5553 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5554 GET_MODE (operands[1]));
5555 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5556 GET_MODE (operands[0]));
5557
5558 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5559 emit_insn (gen_rtx_SET (tmp, op1));
5560
5561 rtx dest = lowpart_subreg (V4SImode, operands[0],
5562 GET_MODE (operands[0]));
5563 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5564 DONE;
5565 }
5566 [(set_attr "isa" "*,sse2")
5567 (set_attr "mmx_isa" "native,*")
5568 (set_attr "type" "ssecvt")
5569 (set_attr "unit" "mmx,*")
5570 (set_attr "mode" "DI")])
5571
5572 (define_insn_and_split "sse_cvttps2pi"
5573 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5574 (vec_select:V2SI
5575 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5576 (parallel [(const_int 0) (const_int 1)])))]
5577 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5578 "@
5579 cvttps2pi\t{%1, %0|%0, %q1}
5580 #"
5581 "TARGET_SSE2 && reload_completed
5582 && SSE_REG_P (operands[0])"
5583 [(const_int 0)]
5584 {
5585 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5586 GET_MODE (operands[1]));
5587 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5588 GET_MODE (operands[0]));
5589
5590 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5591 emit_insn (gen_rtx_SET (tmp, op1));
5592
5593 rtx dest = lowpart_subreg (V4SImode, operands[0],
5594 GET_MODE (operands[0]));
5595 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5596 DONE;
5597 }
5598 [(set_attr "isa" "*,sse2")
5599 (set_attr "mmx_isa" "native,*")
5600 (set_attr "type" "ssecvt")
5601 (set_attr "unit" "mmx,*")
5602 (set_attr "prefix_rep" "0")
5603 (set_attr "mode" "SF")])
5604
5605 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5606 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5607 (vec_merge:V4SF
5608 (vec_duplicate:V4SF
5609 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5610 (match_operand:V4SF 1 "register_operand" "0,0,v")
5611 (const_int 1)))]
5612 "TARGET_SSE"
5613 "@
5614 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5615 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5616 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5617 [(set_attr "isa" "noavx,noavx,avx")
5618 (set_attr "type" "sseicvt")
5619 (set_attr "athlon_decode" "vector,double,*")
5620 (set_attr "amdfam10_decode" "vector,double,*")
5621 (set_attr "bdver1_decode" "double,direct,*")
5622 (set_attr "btver2_decode" "double,double,double")
5623 (set_attr "znver1_decode" "double,double,double")
5624 (set (attr "length_vex")
5625 (if_then_else
5626 (and (match_test "<MODE>mode == DImode")
5627 (eq_attr "alternative" "2"))
5628 (const_string "4")
5629 (const_string "*")))
5630 (set (attr "prefix_rex")
5631 (if_then_else
5632 (and (match_test "<MODE>mode == DImode")
5633 (eq_attr "alternative" "0,1"))
5634 (const_string "1")
5635 (const_string "*")))
5636 (set_attr "prefix" "orig,orig,maybe_evex")
5637 (set_attr "mode" "SF")])
5638
5639 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5640 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5641 (unspec:SWI48
5642 [(vec_select:SF
5643 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5644 (parallel [(const_int 0)]))]
5645 UNSPEC_FIX_NOTRUNC))]
5646 "TARGET_SSE"
5647 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5648 [(set_attr "type" "sseicvt")
5649 (set_attr "athlon_decode" "double,vector")
5650 (set_attr "bdver1_decode" "double,double")
5651 (set_attr "prefix_rep" "1")
5652 (set_attr "prefix" "maybe_vex")
5653 (set_attr "mode" "<MODE>")])
5654
5655 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5656 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5657 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5658 UNSPEC_FIX_NOTRUNC))]
5659 "TARGET_SSE"
5660 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5661 [(set_attr "type" "sseicvt")
5662 (set_attr "athlon_decode" "double,vector")
5663 (set_attr "amdfam10_decode" "double,double")
5664 (set_attr "bdver1_decode" "double,double")
5665 (set_attr "prefix_rep" "1")
5666 (set_attr "prefix" "maybe_vex")
5667 (set_attr "mode" "<MODE>")])
5668
5669 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5670 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5671 (fix:SWI48
5672 (vec_select:SF
5673 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5674 (parallel [(const_int 0)]))))]
5675 "TARGET_SSE"
5676 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5677 [(set_attr "type" "sseicvt")
5678 (set_attr "athlon_decode" "double,vector")
5679 (set_attr "amdfam10_decode" "double,double")
5680 (set_attr "bdver1_decode" "double,double")
5681 (set_attr "prefix_rep" "1")
5682 (set_attr "prefix" "maybe_vex")
5683 (set_attr "mode" "<MODE>")])
5684
5685 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5686 [(set (match_operand:VF_128 0 "register_operand" "=v")
5687 (vec_merge:VF_128
5688 (vec_duplicate:VF_128
5689 (unsigned_float:<ssescalarmode>
5690 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5691 (match_operand:VF_128 1 "register_operand" "v")
5692 (const_int 1)))]
5693 "TARGET_AVX512F && <round_modev4sf_condition>"
5694 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5695 [(set_attr "type" "sseicvt")
5696 (set_attr "prefix" "evex")
5697 (set_attr "mode" "<ssescalarmode>")])
5698
5699 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5700 [(set (match_operand:VF_128 0 "register_operand" "=v")
5701 (vec_merge:VF_128
5702 (vec_duplicate:VF_128
5703 (unsigned_float:<ssescalarmode>
5704 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5705 (match_operand:VF_128 1 "register_operand" "v")
5706 (const_int 1)))]
5707 "TARGET_AVX512F && TARGET_64BIT"
5708 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5709 [(set_attr "type" "sseicvt")
5710 (set_attr "prefix" "evex")
5711 (set_attr "mode" "<ssescalarmode>")])
5712
5713 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5714 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5715 (float:VF1
5716 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5717 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5718 "@
5719 cvtdq2ps\t{%1, %0|%0, %1}
5720 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5721 [(set_attr "isa" "noavx,avx")
5722 (set_attr "type" "ssecvt")
5723 (set_attr "prefix" "maybe_vex")
5724 (set_attr "mode" "<sseinsnmode>")])
5725
5726 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5727 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5728 (unsigned_float:VF1_AVX512VL
5729 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5730 "TARGET_AVX512F"
5731 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5732 [(set_attr "type" "ssecvt")
5733 (set_attr "prefix" "evex")
5734 (set_attr "mode" "<MODE>")])
5735
5736 (define_expand "floatuns<sseintvecmodelower><mode>2"
5737 [(match_operand:VF1 0 "register_operand")
5738 (match_operand:<sseintvecmode> 1 "register_operand")]
5739 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5740 {
5741 if (<MODE>mode == V16SFmode)
5742 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5743 else
5744 if (TARGET_AVX512VL)
5745 {
5746 if (<MODE>mode == V4SFmode)
5747 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5748 else
5749 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5750 }
5751 else
5752 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5753
5754 DONE;
5755 })
5756
5757
5758 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5759 (define_mode_attr sf2simodelower
5760 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5761
5762 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5763 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5764 (unspec:VI4_AVX
5765 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5766 UNSPEC_FIX_NOTRUNC))]
5767 "TARGET_SSE2 && <mask_mode512bit_condition>"
5768 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5769 [(set_attr "type" "ssecvt")
5770 (set (attr "prefix_data16")
5771 (if_then_else
5772 (match_test "TARGET_AVX")
5773 (const_string "*")
5774 (const_string "1")))
5775 (set_attr "prefix" "maybe_vex")
5776 (set_attr "mode" "<sseinsnmode>")])
5777
5778 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5779 [(set (match_operand:V16SI 0 "register_operand" "=v")
5780 (unspec:V16SI
5781 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5782 UNSPEC_FIX_NOTRUNC))]
5783 "TARGET_AVX512F"
5784 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5785 [(set_attr "type" "ssecvt")
5786 (set_attr "prefix" "evex")
5787 (set_attr "mode" "XI")])
5788
5789 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5790 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5791 (unspec:VI4_AVX512VL
5792 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5793 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5794 "TARGET_AVX512F"
5795 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5796 [(set_attr "type" "ssecvt")
5797 (set_attr "prefix" "evex")
5798 (set_attr "mode" "<sseinsnmode>")])
5799
5800 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5801 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5802 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5803 UNSPEC_FIX_NOTRUNC))]
5804 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5805 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5806 [(set_attr "type" "ssecvt")
5807 (set_attr "prefix" "evex")
5808 (set_attr "mode" "<sseinsnmode>")])
5809
5810 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5811 [(set (match_operand:V2DI 0 "register_operand" "=v")
5812 (unspec:V2DI
5813 [(vec_select:V2SF
5814 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5815 (parallel [(const_int 0) (const_int 1)]))]
5816 UNSPEC_FIX_NOTRUNC))]
5817 "TARGET_AVX512DQ && TARGET_AVX512VL"
5818 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5819 [(set_attr "type" "ssecvt")
5820 (set_attr "prefix" "evex")
5821 (set_attr "mode" "TI")])
5822
5823 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5824 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5825 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5826 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5827 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5828 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5829 [(set_attr "type" "ssecvt")
5830 (set_attr "prefix" "evex")
5831 (set_attr "mode" "<sseinsnmode>")])
5832
5833 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5834 [(set (match_operand:V2DI 0 "register_operand" "=v")
5835 (unspec:V2DI
5836 [(vec_select:V2SF
5837 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5838 (parallel [(const_int 0) (const_int 1)]))]
5839 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5840 "TARGET_AVX512DQ && TARGET_AVX512VL"
5841 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5842 [(set_attr "type" "ssecvt")
5843 (set_attr "prefix" "evex")
5844 (set_attr "mode" "TI")])
5845
5846 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5847 [(set (match_operand:V16SI 0 "register_operand" "=v")
5848 (any_fix:V16SI
5849 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5850 "TARGET_AVX512F"
5851 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5852 [(set_attr "type" "ssecvt")
5853 (set_attr "prefix" "evex")
5854 (set_attr "mode" "XI")])
5855
5856 (define_insn "fix_truncv8sfv8si2<mask_name>"
5857 [(set (match_operand:V8SI 0 "register_operand" "=v")
5858 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5859 "TARGET_AVX && <mask_avx512vl_condition>"
5860 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5861 [(set_attr "type" "ssecvt")
5862 (set_attr "prefix" "<mask_prefix>")
5863 (set_attr "mode" "OI")])
5864
5865 (define_insn "fix_truncv4sfv4si2<mask_name>"
5866 [(set (match_operand:V4SI 0 "register_operand" "=v")
5867 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5868 "TARGET_SSE2 && <mask_avx512vl_condition>"
5869 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5870 [(set_attr "type" "ssecvt")
5871 (set (attr "prefix_rep")
5872 (if_then_else
5873 (match_test "TARGET_AVX")
5874 (const_string "*")
5875 (const_string "1")))
5876 (set (attr "prefix_data16")
5877 (if_then_else
5878 (match_test "TARGET_AVX")
5879 (const_string "*")
5880 (const_string "0")))
5881 (set_attr "prefix_data16" "0")
5882 (set_attr "prefix" "<mask_prefix2>")
5883 (set_attr "mode" "TI")])
5884
5885 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5886 [(match_operand:<sseintvecmode> 0 "register_operand")
5887 (match_operand:VF1 1 "register_operand")]
5888 "TARGET_SSE2"
5889 {
5890 if (<MODE>mode == V16SFmode)
5891 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5892 operands[1]));
5893 else
5894 {
5895 rtx tmp[3];
5896 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5897 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5898 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5899 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5900 }
5901 DONE;
5902 })
5903
5904 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5905 ;;
5906 ;; Parallel double-precision floating point conversion operations
5907 ;;
5908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5909
5910 (define_insn "sse2_cvtpi2pd"
5911 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5912 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5913 "TARGET_SSE2"
5914 "@
5915 %vcvtdq2pd\t{%1, %0|%0, %1}
5916 cvtpi2pd\t{%1, %0|%0, %1}"
5917 [(set_attr "mmx_isa" "*,native")
5918 (set_attr "type" "ssecvt")
5919 (set_attr "unit" "*,mmx")
5920 (set_attr "prefix_data16" "*,1")
5921 (set_attr "prefix" "maybe_vex,*")
5922 (set_attr "mode" "V2DF")])
5923
5924 (define_expand "floatv2siv2df2"
5925 [(set (match_operand:V2DF 0 "register_operand")
5926 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5927 "TARGET_MMX_WITH_SSE")
5928
5929 (define_insn "floatunsv2siv2df2"
5930 [(set (match_operand:V2DF 0 "register_operand" "=v")
5931 (unsigned_float:V2DF
5932 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5933 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5934 "vcvtudq2pd\t{%1, %0|%0, %1}"
5935 [(set_attr "type" "ssecvt")
5936 (set_attr "prefix" "evex")
5937 (set_attr "mode" "V2DF")])
5938
5939 (define_insn "sse2_cvtpd2pi"
5940 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5941 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5942 UNSPEC_FIX_NOTRUNC))]
5943 "TARGET_SSE2"
5944 "@
5945 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5946 cvtpd2pi\t{%1, %0|%0, %1}"
5947 [(set_attr "mmx_isa" "*,native")
5948 (set_attr "type" "ssecvt")
5949 (set_attr "unit" "*,mmx")
5950 (set_attr "amdfam10_decode" "double")
5951 (set_attr "athlon_decode" "vector")
5952 (set_attr "bdver1_decode" "double")
5953 (set_attr "prefix_data16" "*,1")
5954 (set_attr "prefix" "maybe_vex,*")
5955 (set_attr "mode" "TI")])
5956
5957 (define_insn "sse2_cvttpd2pi"
5958 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5959 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5960 "TARGET_SSE2"
5961 "@
5962 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5963 cvttpd2pi\t{%1, %0|%0, %1}"
5964 [(set_attr "mmx_isa" "*,native")
5965 (set_attr "type" "ssecvt")
5966 (set_attr "unit" "*,mmx")
5967 (set_attr "amdfam10_decode" "double")
5968 (set_attr "athlon_decode" "vector")
5969 (set_attr "bdver1_decode" "double")
5970 (set_attr "prefix_data16" "*,1")
5971 (set_attr "prefix" "maybe_vex,*")
5972 (set_attr "mode" "TI")])
5973
5974 (define_expand "fix_truncv2dfv2si2"
5975 [(set (match_operand:V2SI 0 "register_operand")
5976 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5977 "TARGET_MMX_WITH_SSE")
5978
5979 (define_insn "fixuns_truncv2dfv2si2"
5980 [(set (match_operand:V2SI 0 "register_operand" "=v")
5981 (unsigned_fix:V2SI
5982 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5983 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5984 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5985 [(set_attr "type" "ssecvt")
5986 (set_attr "prefix" "evex")
5987 (set_attr "mode" "TI")])
5988
5989 (define_insn "sse2_cvtsi2sd"
5990 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5991 (vec_merge:V2DF
5992 (vec_duplicate:V2DF
5993 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5994 (match_operand:V2DF 1 "register_operand" "0,0,v")
5995 (const_int 1)))]
5996 "TARGET_SSE2"
5997 "@
5998 cvtsi2sd{l}\t{%2, %0|%0, %2}
5999 cvtsi2sd{l}\t{%2, %0|%0, %2}
6000 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
6001 [(set_attr "isa" "noavx,noavx,avx")
6002 (set_attr "type" "sseicvt")
6003 (set_attr "athlon_decode" "double,direct,*")
6004 (set_attr "amdfam10_decode" "vector,double,*")
6005 (set_attr "bdver1_decode" "double,direct,*")
6006 (set_attr "btver2_decode" "double,double,double")
6007 (set_attr "znver1_decode" "double,double,double")
6008 (set_attr "prefix" "orig,orig,maybe_evex")
6009 (set_attr "mode" "DF")])
6010
6011 (define_insn "sse2_cvtsi2sdq<round_name>"
6012 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6013 (vec_merge:V2DF
6014 (vec_duplicate:V2DF
6015 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
6016 (match_operand:V2DF 1 "register_operand" "0,0,v")
6017 (const_int 1)))]
6018 "TARGET_SSE2 && TARGET_64BIT"
6019 "@
6020 cvtsi2sd{q}\t{%2, %0|%0, %2}
6021 cvtsi2sd{q}\t{%2, %0|%0, %2}
6022 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
6023 [(set_attr "isa" "noavx,noavx,avx")
6024 (set_attr "type" "sseicvt")
6025 (set_attr "athlon_decode" "double,direct,*")
6026 (set_attr "amdfam10_decode" "vector,double,*")
6027 (set_attr "bdver1_decode" "double,direct,*")
6028 (set_attr "length_vex" "*,*,4")
6029 (set_attr "prefix_rex" "1,1,*")
6030 (set_attr "prefix" "orig,orig,maybe_evex")
6031 (set_attr "mode" "DF")])
6032
6033 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
6034 [(set (match_operand:SWI48 0 "register_operand" "=r")
6035 (unspec:SWI48
6036 [(vec_select:SF
6037 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
6038 (parallel [(const_int 0)]))]
6039 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6040 "TARGET_AVX512F"
6041 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
6042 [(set_attr "type" "sseicvt")
6043 (set_attr "prefix" "evex")
6044 (set_attr "mode" "<MODE>")])
6045
6046 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
6047 [(set (match_operand:SWI48 0 "register_operand" "=r")
6048 (unsigned_fix:SWI48
6049 (vec_select:SF
6050 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
6051 (parallel [(const_int 0)]))))]
6052 "TARGET_AVX512F"
6053 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
6054 [(set_attr "type" "sseicvt")
6055 (set_attr "prefix" "evex")
6056 (set_attr "mode" "<MODE>")])
6057
6058 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
6059 [(set (match_operand:SWI48 0 "register_operand" "=r")
6060 (unspec:SWI48
6061 [(vec_select:DF
6062 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
6063 (parallel [(const_int 0)]))]
6064 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6065 "TARGET_AVX512F"
6066 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
6067 [(set_attr "type" "sseicvt")
6068 (set_attr "prefix" "evex")
6069 (set_attr "mode" "<MODE>")])
6070
6071 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
6072 [(set (match_operand:SWI48 0 "register_operand" "=r")
6073 (unsigned_fix:SWI48
6074 (vec_select:DF
6075 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
6076 (parallel [(const_int 0)]))))]
6077 "TARGET_AVX512F"
6078 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
6079 [(set_attr "type" "sseicvt")
6080 (set_attr "prefix" "evex")
6081 (set_attr "mode" "<MODE>")])
6082
6083 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
6084 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
6085 (unspec:SWI48
6086 [(vec_select:DF
6087 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
6088 (parallel [(const_int 0)]))]
6089 UNSPEC_FIX_NOTRUNC))]
6090 "TARGET_SSE2"
6091 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
6092 [(set_attr "type" "sseicvt")
6093 (set_attr "athlon_decode" "double,vector")
6094 (set_attr "bdver1_decode" "double,double")
6095 (set_attr "btver2_decode" "double,double")
6096 (set_attr "prefix_rep" "1")
6097 (set_attr "prefix" "maybe_vex")
6098 (set_attr "mode" "<MODE>")])
6099
6100 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
6101 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
6102 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
6103 UNSPEC_FIX_NOTRUNC))]
6104 "TARGET_SSE2"
6105 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
6106 [(set_attr "type" "sseicvt")
6107 (set_attr "athlon_decode" "double,vector")
6108 (set_attr "amdfam10_decode" "double,double")
6109 (set_attr "bdver1_decode" "double,double")
6110 (set_attr "prefix_rep" "1")
6111 (set_attr "prefix" "maybe_vex")
6112 (set_attr "mode" "<MODE>")])
6113
6114 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
6115 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
6116 (fix:SWI48
6117 (vec_select:DF
6118 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
6119 (parallel [(const_int 0)]))))]
6120 "TARGET_SSE2"
6121 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
6122 [(set_attr "type" "sseicvt")
6123 (set_attr "athlon_decode" "double,vector")
6124 (set_attr "amdfam10_decode" "double,double")
6125 (set_attr "bdver1_decode" "double,double")
6126 (set_attr "btver2_decode" "double,double")
6127 (set_attr "prefix_rep" "1")
6128 (set_attr "prefix" "maybe_vex")
6129 (set_attr "mode" "<MODE>")])
6130
6131 ;; For float<si2dfmode><mode>2 insn pattern
6132 (define_mode_attr si2dfmode
6133 [(V8DF "V8SI") (V4DF "V4SI")])
6134 (define_mode_attr si2dfmodelower
6135 [(V8DF "v8si") (V4DF "v4si")])
6136
6137 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
6138 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6139 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
6140 "TARGET_AVX && <mask_mode512bit_condition>"
6141 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6142 [(set_attr "type" "ssecvt")
6143 (set_attr "prefix" "maybe_vex")
6144 (set_attr "mode" "<MODE>")])
6145
6146 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
6147 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
6148 (any_float:VF2_AVX512VL
6149 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
6150 "TARGET_AVX512DQ"
6151 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6152 [(set_attr "type" "ssecvt")
6153 (set_attr "prefix" "evex")
6154 (set_attr "mode" "<MODE>")])
6155
6156 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
6157 (define_mode_attr qq2pssuff
6158 [(V8SF "") (V4SF "{y}")])
6159
6160 (define_mode_attr sselongvecmode
6161 [(V8SF "V8DI") (V4SF "V4DI")])
6162
6163 (define_mode_attr sselongvecmodelower
6164 [(V8SF "v8di") (V4SF "v4di")])
6165
6166 (define_mode_attr sseintvecmode3
6167 [(V8SF "XI") (V4SF "OI")
6168 (V8DF "OI") (V4DF "TI")])
6169
6170 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
6171 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
6172 (any_float:VF1_128_256VL
6173 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
6174 "TARGET_AVX512DQ && <round_modev8sf_condition>"
6175 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6176 [(set_attr "type" "ssecvt")
6177 (set_attr "prefix" "evex")
6178 (set_attr "mode" "<MODE>")])
6179
6180 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
6181 [(set (match_operand:V4SF 0 "register_operand" "=v")
6182 (vec_concat:V4SF
6183 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6184 (match_dup 2)))]
6185 "TARGET_AVX512DQ && TARGET_AVX512VL"
6186 "operands[2] = CONST0_RTX (V2SFmode);")
6187
6188 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
6189 [(set (match_operand:V4SF 0 "register_operand" "=v")
6190 (vec_concat:V4SF
6191 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6192 (match_operand:V2SF 2 "const0_operand" "C")))]
6193 "TARGET_AVX512DQ && TARGET_AVX512VL"
6194 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
6195 [(set_attr "type" "ssecvt")
6196 (set_attr "prefix" "evex")
6197 (set_attr "mode" "V4SF")])
6198
6199 (define_expand "float<floatunssuffix>v2div2sf2"
6200 [(set (match_operand:V2SF 0 "register_operand")
6201 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
6202 "TARGET_AVX512DQ && TARGET_AVX512VL"
6203 {
6204 operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
6205 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
6206 (operands[0], operands[1]));
6207 DONE;
6208 })
6209
6210 (define_mode_attr vpckfloat_concat_mode
6211 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
6212 (define_mode_attr vpckfloat_temp_mode
6213 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
6214 (define_mode_attr vpckfloat_op_mode
6215 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
6216
6217 (define_expand "vec_pack<floatprefix>_float_<mode>"
6218 [(match_operand:<ssePSmode> 0 "register_operand")
6219 (any_float:<ssePSmode>
6220 (match_operand:VI8_AVX512VL 1 "register_operand"))
6221 (match_operand:VI8_AVX512VL 2 "register_operand")]
6222 "TARGET_AVX512DQ"
6223 {
6224 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
6225 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
6226 rtx (*gen) (rtx, rtx);
6227
6228 if (<MODE>mode == V2DImode)
6229 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
6230 else
6231 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
6232 emit_insn (gen (r1, operands[1]));
6233 emit_insn (gen (r2, operands[2]));
6234 if (<MODE>mode == V2DImode)
6235 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
6236 else
6237 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
6238 r1, r2));
6239 DONE;
6240 })
6241
6242 (define_expand "float<floatunssuffix>v2div2sf2_mask"
6243 [(set (match_operand:V4SF 0 "register_operand" "=v")
6244 (vec_concat:V4SF
6245 (vec_merge:V2SF
6246 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6247 (vec_select:V2SF
6248 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6249 (parallel [(const_int 0) (const_int 1)]))
6250 (match_operand:QI 3 "register_operand" "Yk"))
6251 (match_dup 4)))]
6252 "TARGET_AVX512DQ && TARGET_AVX512VL"
6253 "operands[4] = CONST0_RTX (V2SFmode);")
6254
6255 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
6256 [(set (match_operand:V4SF 0 "register_operand" "=v")
6257 (vec_concat:V4SF
6258 (vec_merge:V2SF
6259 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6260 (vec_select:V2SF
6261 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6262 (parallel [(const_int 0) (const_int 1)]))
6263 (match_operand:QI 3 "register_operand" "Yk"))
6264 (match_operand:V2SF 4 "const0_operand" "C")))]
6265 "TARGET_AVX512DQ && TARGET_AVX512VL"
6266 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6267 [(set_attr "type" "ssecvt")
6268 (set_attr "prefix" "evex")
6269 (set_attr "mode" "V4SF")])
6270
6271 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
6272 [(set (match_operand:V4SF 0 "register_operand" "=v")
6273 (vec_concat:V4SF
6274 (vec_merge:V2SF
6275 (any_float:V2SF (match_operand:V2DI 1
6276 "nonimmediate_operand" "vm"))
6277 (match_operand:V2SF 3 "const0_operand" "C")
6278 (match_operand:QI 2 "register_operand" "Yk"))
6279 (match_operand:V2SF 4 "const0_operand" "C")))]
6280 "TARGET_AVX512DQ && TARGET_AVX512VL"
6281 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6282 [(set_attr "type" "ssecvt")
6283 (set_attr "prefix" "evex")
6284 (set_attr "mode" "V4SF")])
6285
6286 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
6287 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
6288 (unsigned_float:VF2_512_256VL
6289 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
6290 "TARGET_AVX512F"
6291 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6292 [(set_attr "type" "ssecvt")
6293 (set_attr "prefix" "evex")
6294 (set_attr "mode" "<MODE>")])
6295
6296 (define_insn "ufloatv2siv2df2<mask_name>"
6297 [(set (match_operand:V2DF 0 "register_operand" "=v")
6298 (unsigned_float:V2DF
6299 (vec_select:V2SI
6300 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6301 (parallel [(const_int 0) (const_int 1)]))))]
6302 "TARGET_AVX512VL"
6303 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6304 [(set_attr "type" "ssecvt")
6305 (set_attr "prefix" "evex")
6306 (set_attr "mode" "V2DF")])
6307
6308 (define_insn "avx512f_cvtdq2pd512_2"
6309 [(set (match_operand:V8DF 0 "register_operand" "=v")
6310 (float:V8DF
6311 (vec_select:V8SI
6312 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
6313 (parallel [(const_int 0) (const_int 1)
6314 (const_int 2) (const_int 3)
6315 (const_int 4) (const_int 5)
6316 (const_int 6) (const_int 7)]))))]
6317 "TARGET_AVX512F"
6318 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
6319 [(set_attr "type" "ssecvt")
6320 (set_attr "prefix" "evex")
6321 (set_attr "mode" "V8DF")])
6322
6323 (define_insn "avx_cvtdq2pd256_2"
6324 [(set (match_operand:V4DF 0 "register_operand" "=v")
6325 (float:V4DF
6326 (vec_select:V4SI
6327 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
6328 (parallel [(const_int 0) (const_int 1)
6329 (const_int 2) (const_int 3)]))))]
6330 "TARGET_AVX"
6331 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
6332 [(set_attr "type" "ssecvt")
6333 (set_attr "prefix" "maybe_evex")
6334 (set_attr "mode" "V4DF")])
6335
6336 (define_insn "sse2_cvtdq2pd<mask_name>"
6337 [(set (match_operand:V2DF 0 "register_operand" "=v")
6338 (float:V2DF
6339 (vec_select:V2SI
6340 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6341 (parallel [(const_int 0) (const_int 1)]))))]
6342 "TARGET_SSE2 && <mask_avx512vl_condition>"
6343 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6344 [(set_attr "type" "ssecvt")
6345 (set_attr "prefix" "maybe_vex")
6346 (set_attr "mode" "V2DF")])
6347
6348 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
6349 [(set (match_operand:V8SI 0 "register_operand" "=v")
6350 (unspec:V8SI
6351 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
6352 UNSPEC_FIX_NOTRUNC))]
6353 "TARGET_AVX512F"
6354 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6355 [(set_attr "type" "ssecvt")
6356 (set_attr "prefix" "evex")
6357 (set_attr "mode" "OI")])
6358
6359 (define_insn "avx_cvtpd2dq256<mask_name>"
6360 [(set (match_operand:V4SI 0 "register_operand" "=v")
6361 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6362 UNSPEC_FIX_NOTRUNC))]
6363 "TARGET_AVX && <mask_avx512vl_condition>"
6364 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6365 [(set_attr "type" "ssecvt")
6366 (set_attr "prefix" "<mask_prefix>")
6367 (set_attr "mode" "OI")])
6368
6369 (define_expand "avx_cvtpd2dq256_2"
6370 [(set (match_operand:V8SI 0 "register_operand")
6371 (vec_concat:V8SI
6372 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
6373 UNSPEC_FIX_NOTRUNC)
6374 (match_dup 2)))]
6375 "TARGET_AVX"
6376 "operands[2] = CONST0_RTX (V4SImode);")
6377
6378 (define_insn "*avx_cvtpd2dq256_2"
6379 [(set (match_operand:V8SI 0 "register_operand" "=v")
6380 (vec_concat:V8SI
6381 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6382 UNSPEC_FIX_NOTRUNC)
6383 (match_operand:V4SI 2 "const0_operand")))]
6384 "TARGET_AVX"
6385 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
6386 [(set_attr "type" "ssecvt")
6387 (set_attr "prefix" "vex")
6388 (set_attr "btver2_decode" "vector")
6389 (set_attr "mode" "OI")])
6390
6391 (define_insn "sse2_cvtpd2dq"
6392 [(set (match_operand:V4SI 0 "register_operand" "=v")
6393 (vec_concat:V4SI
6394 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
6395 UNSPEC_FIX_NOTRUNC)
6396 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6397 "TARGET_SSE2"
6398 {
6399 if (TARGET_AVX)
6400 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
6401 else
6402 return "cvtpd2dq\t{%1, %0|%0, %1}";
6403 }
6404 [(set_attr "type" "ssecvt")
6405 (set_attr "prefix_rep" "1")
6406 (set_attr "prefix_data16" "0")
6407 (set_attr "prefix" "maybe_vex")
6408 (set_attr "mode" "TI")
6409 (set_attr "amdfam10_decode" "double")
6410 (set_attr "athlon_decode" "vector")
6411 (set_attr "bdver1_decode" "double")])
6412
6413 (define_insn "sse2_cvtpd2dq_mask"
6414 [(set (match_operand:V4SI 0 "register_operand" "=v")
6415 (vec_concat:V4SI
6416 (vec_merge:V2SI
6417 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6418 UNSPEC_FIX_NOTRUNC)
6419 (vec_select:V2SI
6420 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6421 (parallel [(const_int 0) (const_int 1)]))
6422 (match_operand:QI 3 "register_operand" "Yk"))
6423 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6424 "TARGET_AVX512VL"
6425 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6426 [(set_attr "type" "ssecvt")
6427 (set_attr "prefix" "evex")
6428 (set_attr "mode" "TI")])
6429
6430 (define_insn "*sse2_cvtpd2dq_mask_1"
6431 [(set (match_operand:V4SI 0 "register_operand" "=v")
6432 (vec_concat:V4SI
6433 (vec_merge:V2SI
6434 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6435 UNSPEC_FIX_NOTRUNC)
6436 (const_vector:V2SI [(const_int 0) (const_int 0)])
6437 (match_operand:QI 2 "register_operand" "Yk"))
6438 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6439 "TARGET_AVX512VL"
6440 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6441 [(set_attr "type" "ssecvt")
6442 (set_attr "prefix" "evex")
6443 (set_attr "mode" "TI")])
6444
6445 ;; For ufix_notrunc* insn patterns
6446 (define_mode_attr pd2udqsuff
6447 [(V8DF "") (V4DF "{y}")])
6448
6449 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6450 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6451 (unspec:<si2dfmode>
6452 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6453 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6454 "TARGET_AVX512F"
6455 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6456 [(set_attr "type" "ssecvt")
6457 (set_attr "prefix" "evex")
6458 (set_attr "mode" "<sseinsnmode>")])
6459
6460 (define_insn "ufix_notruncv2dfv2si2"
6461 [(set (match_operand:V4SI 0 "register_operand" "=v")
6462 (vec_concat:V4SI
6463 (unspec:V2SI
6464 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6465 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6466 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6467 "TARGET_AVX512VL"
6468 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6469 [(set_attr "type" "ssecvt")
6470 (set_attr "prefix" "evex")
6471 (set_attr "mode" "TI")])
6472
6473 (define_insn "ufix_notruncv2dfv2si2_mask"
6474 [(set (match_operand:V4SI 0 "register_operand" "=v")
6475 (vec_concat:V4SI
6476 (vec_merge:V2SI
6477 (unspec:V2SI
6478 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6479 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6480 (vec_select:V2SI
6481 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6482 (parallel [(const_int 0) (const_int 1)]))
6483 (match_operand:QI 3 "register_operand" "Yk"))
6484 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6485 "TARGET_AVX512VL"
6486 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6487 [(set_attr "type" "ssecvt")
6488 (set_attr "prefix" "evex")
6489 (set_attr "mode" "TI")])
6490
6491 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6492 [(set (match_operand:V4SI 0 "register_operand" "=v")
6493 (vec_concat:V4SI
6494 (vec_merge:V2SI
6495 (unspec:V2SI
6496 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6497 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6498 (const_vector:V2SI [(const_int 0) (const_int 0)])
6499 (match_operand:QI 2 "register_operand" "Yk"))
6500 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6501 "TARGET_AVX512VL"
6502 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6503 [(set_attr "type" "ssecvt")
6504 (set_attr "prefix" "evex")
6505 (set_attr "mode" "TI")])
6506
6507 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6508 [(set (match_operand:V8SI 0 "register_operand" "=v")
6509 (any_fix:V8SI
6510 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6511 "TARGET_AVX512F"
6512 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6513 [(set_attr "type" "ssecvt")
6514 (set_attr "prefix" "evex")
6515 (set_attr "mode" "OI")])
6516
6517 (define_insn "ufix_truncv2dfv2si2"
6518 [(set (match_operand:V4SI 0 "register_operand" "=v")
6519 (vec_concat:V4SI
6520 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6521 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6522 "TARGET_AVX512VL"
6523 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6524 [(set_attr "type" "ssecvt")
6525 (set_attr "prefix" "evex")
6526 (set_attr "mode" "TI")])
6527
6528 (define_insn "ufix_truncv2dfv2si2_mask"
6529 [(set (match_operand:V4SI 0 "register_operand" "=v")
6530 (vec_concat:V4SI
6531 (vec_merge:V2SI
6532 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6533 (vec_select:V2SI
6534 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6535 (parallel [(const_int 0) (const_int 1)]))
6536 (match_operand:QI 3 "register_operand" "Yk"))
6537 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6538 "TARGET_AVX512VL"
6539 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6540 [(set_attr "type" "ssecvt")
6541 (set_attr "prefix" "evex")
6542 (set_attr "mode" "TI")])
6543
6544 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6545 [(set (match_operand:V4SI 0 "register_operand" "=v")
6546 (vec_concat:V4SI
6547 (vec_merge:V2SI
6548 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6549 (const_vector:V2SI [(const_int 0) (const_int 0)])
6550 (match_operand:QI 2 "register_operand" "Yk"))
6551 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6552 "TARGET_AVX512VL"
6553 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6554 [(set_attr "type" "ssecvt")
6555 (set_attr "prefix" "evex")
6556 (set_attr "mode" "TI")])
6557
6558 (define_insn "fix_truncv4dfv4si2<mask_name>"
6559 [(set (match_operand:V4SI 0 "register_operand" "=v")
6560 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6561 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6562 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6563 [(set_attr "type" "ssecvt")
6564 (set_attr "prefix" "maybe_evex")
6565 (set_attr "mode" "OI")])
6566
6567 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6568 [(set (match_operand:V4SI 0 "register_operand" "=v")
6569 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6570 "TARGET_AVX512VL && TARGET_AVX512F"
6571 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6572 [(set_attr "type" "ssecvt")
6573 (set_attr "prefix" "maybe_evex")
6574 (set_attr "mode" "OI")])
6575
6576 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6577 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6578 (any_fix:<sseintvecmode>
6579 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6580 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6581 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6582 [(set_attr "type" "ssecvt")
6583 (set_attr "prefix" "evex")
6584 (set_attr "mode" "<sseintvecmode2>")])
6585
6586 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6587 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6588 (unspec:<sseintvecmode>
6589 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6590 UNSPEC_FIX_NOTRUNC))]
6591 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6592 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6593 [(set_attr "type" "ssecvt")
6594 (set_attr "prefix" "evex")
6595 (set_attr "mode" "<sseintvecmode2>")])
6596
6597 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6598 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6599 (unspec:<sseintvecmode>
6600 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6601 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6602 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6603 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6604 [(set_attr "type" "ssecvt")
6605 (set_attr "prefix" "evex")
6606 (set_attr "mode" "<sseintvecmode2>")])
6607
6608 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6609 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6610 (any_fix:<sselongvecmode>
6611 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6612 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6613 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6614 [(set_attr "type" "ssecvt")
6615 (set_attr "prefix" "evex")
6616 (set_attr "mode" "<sseintvecmode3>")])
6617
6618 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6619 [(set (match_operand:V2DI 0 "register_operand" "=v")
6620 (any_fix:V2DI
6621 (vec_select:V2SF
6622 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6623 (parallel [(const_int 0) (const_int 1)]))))]
6624 "TARGET_AVX512DQ && TARGET_AVX512VL"
6625 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6626 [(set_attr "type" "ssecvt")
6627 (set_attr "prefix" "evex")
6628 (set_attr "mode" "TI")])
6629
6630 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
6631 [(set (match_operand:V2DI 0 "register_operand")
6632 (any_fix:V2DI
6633 (match_operand:V2SF 1 "register_operand")))]
6634 "TARGET_AVX512DQ && TARGET_AVX512VL"
6635 {
6636 operands[1] = force_reg (V2SFmode, operands[1]);
6637 operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
6638 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
6639 (operands[0], operands[1]));
6640 DONE;
6641 })
6642
6643 (define_mode_attr vunpckfixt_mode
6644 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6645 (define_mode_attr vunpckfixt_model
6646 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6647 (define_mode_attr vunpckfixt_extract_mode
6648 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6649
6650 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6651 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6652 (any_fix:<vunpckfixt_mode>
6653 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6654 "TARGET_AVX512DQ"
6655 {
6656 rtx tem = operands[1];
6657 rtx (*gen) (rtx, rtx);
6658
6659 if (<MODE>mode != V4SFmode)
6660 {
6661 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6662 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6663 operands[1]));
6664 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6665 }
6666 else
6667 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6668
6669 emit_insn (gen (operands[0], tem));
6670 DONE;
6671 })
6672
6673 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6674 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6675 (any_fix:<vunpckfixt_mode>
6676 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6677 "TARGET_AVX512DQ"
6678 {
6679 rtx tem;
6680 rtx (*gen) (rtx, rtx);
6681
6682 if (<MODE>mode != V4SFmode)
6683 {
6684 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6685 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6686 operands[1]));
6687 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6688 }
6689 else
6690 {
6691 tem = gen_reg_rtx (V4SFmode);
6692 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6693 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6694 }
6695
6696 emit_insn (gen (operands[0], tem));
6697 DONE;
6698 })
6699
6700 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6701 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6702 (unsigned_fix:<sseintvecmode>
6703 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6704 "TARGET_AVX512VL"
6705 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6706 [(set_attr "type" "ssecvt")
6707 (set_attr "prefix" "evex")
6708 (set_attr "mode" "<sseintvecmode2>")])
6709
6710 (define_expand "avx_cvttpd2dq256_2"
6711 [(set (match_operand:V8SI 0 "register_operand")
6712 (vec_concat:V8SI
6713 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6714 (match_dup 2)))]
6715 "TARGET_AVX"
6716 "operands[2] = CONST0_RTX (V4SImode);")
6717
6718 (define_insn "sse2_cvttpd2dq"
6719 [(set (match_operand:V4SI 0 "register_operand" "=v")
6720 (vec_concat:V4SI
6721 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6722 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6723 "TARGET_SSE2"
6724 {
6725 if (TARGET_AVX)
6726 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6727 else
6728 return "cvttpd2dq\t{%1, %0|%0, %1}";
6729 }
6730 [(set_attr "type" "ssecvt")
6731 (set_attr "amdfam10_decode" "double")
6732 (set_attr "athlon_decode" "vector")
6733 (set_attr "bdver1_decode" "double")
6734 (set_attr "prefix" "maybe_vex")
6735 (set_attr "mode" "TI")])
6736
6737 (define_insn "sse2_cvttpd2dq_mask"
6738 [(set (match_operand:V4SI 0 "register_operand" "=v")
6739 (vec_concat:V4SI
6740 (vec_merge:V2SI
6741 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6742 (vec_select:V2SI
6743 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6744 (parallel [(const_int 0) (const_int 1)]))
6745 (match_operand:QI 3 "register_operand" "Yk"))
6746 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6747 "TARGET_AVX512VL"
6748 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6749 [(set_attr "type" "ssecvt")
6750 (set_attr "prefix" "evex")
6751 (set_attr "mode" "TI")])
6752
6753 (define_insn "*sse2_cvttpd2dq_mask_1"
6754 [(set (match_operand:V4SI 0 "register_operand" "=v")
6755 (vec_concat:V4SI
6756 (vec_merge:V2SI
6757 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6758 (const_vector:V2SI [(const_int 0) (const_int 0)])
6759 (match_operand:QI 2 "register_operand" "Yk"))
6760 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6761 "TARGET_AVX512VL"
6762 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6763 [(set_attr "type" "ssecvt")
6764 (set_attr "prefix" "evex")
6765 (set_attr "mode" "TI")])
6766
6767 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
6768 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6769 (vec_merge:V4SF
6770 (vec_duplicate:V4SF
6771 (float_truncate:V2SF
6772 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6773 (match_operand:V4SF 1 "register_operand" "0,0,v")
6774 (const_int 1)))]
6775 "TARGET_SSE2"
6776 "@
6777 cvtsd2ss\t{%2, %0|%0, %2}
6778 cvtsd2ss\t{%2, %0|%0, %q2}
6779 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
6780 [(set_attr "isa" "noavx,noavx,avx")
6781 (set_attr "type" "ssecvt")
6782 (set_attr "athlon_decode" "vector,double,*")
6783 (set_attr "amdfam10_decode" "vector,double,*")
6784 (set_attr "bdver1_decode" "direct,direct,*")
6785 (set_attr "btver2_decode" "double,double,double")
6786 (set_attr "prefix" "orig,orig,<round_prefix>")
6787 (set_attr "mode" "SF")])
6788
6789 (define_insn "*sse2_vd_cvtsd2ss"
6790 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6791 (vec_merge:V4SF
6792 (vec_duplicate:V4SF
6793 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6794 (match_operand:V4SF 1 "register_operand" "0,0,v")
6795 (const_int 1)))]
6796 "TARGET_SSE2"
6797 "@
6798 cvtsd2ss\t{%2, %0|%0, %2}
6799 cvtsd2ss\t{%2, %0|%0, %2}
6800 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6801 [(set_attr "isa" "noavx,noavx,avx")
6802 (set_attr "type" "ssecvt")
6803 (set_attr "athlon_decode" "vector,double,*")
6804 (set_attr "amdfam10_decode" "vector,double,*")
6805 (set_attr "bdver1_decode" "direct,direct,*")
6806 (set_attr "btver2_decode" "double,double,double")
6807 (set_attr "prefix" "orig,orig,vex")
6808 (set_attr "mode" "SF")])
6809
6810 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
6811 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6812 (vec_merge:V2DF
6813 (float_extend:V2DF
6814 (vec_select:V2SF
6815 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6816 (parallel [(const_int 0) (const_int 1)])))
6817 (match_operand:V2DF 1 "register_operand" "0,0,v")
6818 (const_int 1)))]
6819 "TARGET_SSE2"
6820 "@
6821 cvtss2sd\t{%2, %0|%0, %2}
6822 cvtss2sd\t{%2, %0|%0, %k2}
6823 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
6824 [(set_attr "isa" "noavx,noavx,avx")
6825 (set_attr "type" "ssecvt")
6826 (set_attr "amdfam10_decode" "vector,double,*")
6827 (set_attr "athlon_decode" "direct,direct,*")
6828 (set_attr "bdver1_decode" "direct,direct,*")
6829 (set_attr "btver2_decode" "double,double,double")
6830 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6831 (set_attr "mode" "DF")])
6832
6833 (define_insn "*sse2_vd_cvtss2sd"
6834 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6835 (vec_merge:V2DF
6836 (vec_duplicate:V2DF
6837 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6838 (match_operand:V2DF 1 "register_operand" "0,0,v")
6839 (const_int 1)))]
6840 "TARGET_SSE2"
6841 "@
6842 cvtss2sd\t{%2, %0|%0, %2}
6843 cvtss2sd\t{%2, %0|%0, %2}
6844 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6845 [(set_attr "isa" "noavx,noavx,avx")
6846 (set_attr "type" "ssecvt")
6847 (set_attr "amdfam10_decode" "vector,double,*")
6848 (set_attr "athlon_decode" "direct,direct,*")
6849 (set_attr "bdver1_decode" "direct,direct,*")
6850 (set_attr "btver2_decode" "double,double,double")
6851 (set_attr "prefix" "orig,orig,vex")
6852 (set_attr "mode" "DF")])
6853
6854 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6855 [(set (match_operand:V8SF 0 "register_operand" "=v")
6856 (float_truncate:V8SF
6857 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6858 "TARGET_AVX512F"
6859 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6860 [(set_attr "type" "ssecvt")
6861 (set_attr "prefix" "evex")
6862 (set_attr "mode" "V8SF")])
6863
6864 (define_insn "avx_cvtpd2ps256<mask_name>"
6865 [(set (match_operand:V4SF 0 "register_operand" "=v")
6866 (float_truncate:V4SF
6867 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6868 "TARGET_AVX && <mask_avx512vl_condition>"
6869 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6870 [(set_attr "type" "ssecvt")
6871 (set_attr "prefix" "maybe_evex")
6872 (set_attr "btver2_decode" "vector")
6873 (set_attr "mode" "V4SF")])
6874
6875 (define_expand "sse2_cvtpd2ps"
6876 [(set (match_operand:V4SF 0 "register_operand")
6877 (vec_concat:V4SF
6878 (float_truncate:V2SF
6879 (match_operand:V2DF 1 "vector_operand"))
6880 (match_dup 2)))]
6881 "TARGET_SSE2"
6882 "operands[2] = CONST0_RTX (V2SFmode);")
6883
6884 (define_expand "sse2_cvtpd2ps_mask"
6885 [(set (match_operand:V4SF 0 "register_operand")
6886 (vec_concat:V4SF
6887 (vec_merge:V2SF
6888 (float_truncate:V2SF
6889 (match_operand:V2DF 1 "vector_operand"))
6890 (vec_select:V2SF
6891 (match_operand:V4SF 2 "nonimm_or_0_operand")
6892 (parallel [(const_int 0) (const_int 1)]))
6893 (match_operand:QI 3 "register_operand"))
6894 (match_dup 4)))]
6895 "TARGET_SSE2"
6896 "operands[4] = CONST0_RTX (V2SFmode);")
6897
6898 (define_insn "*sse2_cvtpd2ps"
6899 [(set (match_operand:V4SF 0 "register_operand" "=v")
6900 (vec_concat:V4SF
6901 (float_truncate:V2SF
6902 (match_operand:V2DF 1 "vector_operand" "vBm"))
6903 (match_operand:V2SF 2 "const0_operand" "C")))]
6904 "TARGET_SSE2"
6905 {
6906 if (TARGET_AVX)
6907 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6908 else
6909 return "cvtpd2ps\t{%1, %0|%0, %1}";
6910 }
6911 [(set_attr "type" "ssecvt")
6912 (set_attr "amdfam10_decode" "double")
6913 (set_attr "athlon_decode" "vector")
6914 (set_attr "bdver1_decode" "double")
6915 (set_attr "prefix_data16" "1")
6916 (set_attr "prefix" "maybe_vex")
6917 (set_attr "mode" "V4SF")])
6918
6919 (define_insn "truncv2dfv2sf2"
6920 [(set (match_operand:V2SF 0 "register_operand" "=v")
6921 (float_truncate:V2SF
6922 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6923 "TARGET_MMX_WITH_SSE"
6924 {
6925 if (TARGET_AVX)
6926 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6927 else
6928 return "cvtpd2ps\t{%1, %0|%0, %1}";
6929 }
6930 [(set_attr "type" "ssecvt")
6931 (set_attr "amdfam10_decode" "double")
6932 (set_attr "athlon_decode" "vector")
6933 (set_attr "bdver1_decode" "double")
6934 (set_attr "prefix_data16" "1")
6935 (set_attr "prefix" "maybe_vex")
6936 (set_attr "mode" "V4SF")])
6937
6938 (define_insn "*sse2_cvtpd2ps_mask"
6939 [(set (match_operand:V4SF 0 "register_operand" "=v")
6940 (vec_concat:V4SF
6941 (vec_merge:V2SF
6942 (float_truncate:V2SF
6943 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6944 (vec_select:V2SF
6945 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6946 (parallel [(const_int 0) (const_int 1)]))
6947 (match_operand:QI 3 "register_operand" "Yk"))
6948 (match_operand:V2SF 4 "const0_operand" "C")))]
6949 "TARGET_AVX512VL"
6950 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6951 [(set_attr "type" "ssecvt")
6952 (set_attr "prefix" "evex")
6953 (set_attr "mode" "V4SF")])
6954
6955 (define_insn "*sse2_cvtpd2ps_mask_1"
6956 [(set (match_operand:V4SF 0 "register_operand" "=v")
6957 (vec_concat:V4SF
6958 (vec_merge:V2SF
6959 (float_truncate:V2SF
6960 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6961 (match_operand:V2SF 3 "const0_operand" "C")
6962 (match_operand:QI 2 "register_operand" "Yk"))
6963 (match_operand:V2SF 4 "const0_operand" "C")))]
6964 "TARGET_AVX512VL"
6965 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6966 [(set_attr "type" "ssecvt")
6967 (set_attr "prefix" "evex")
6968 (set_attr "mode" "V4SF")])
6969
6970 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6971 (define_mode_attr sf2dfmode
6972 [(V8DF "V8SF") (V4DF "V4SF")])
6973 (define_mode_attr sf2dfmode_lower
6974 [(V8DF "v8sf") (V4DF "v4sf")])
6975
6976 (define_expand "trunc<mode><sf2dfmode_lower>2"
6977 [(set (match_operand:<sf2dfmode> 0 "register_operand")
6978 (float_truncate:<sf2dfmode>
6979 (match_operand:VF2_512_256 1 "vector_operand")))]
6980 "TARGET_AVX")
6981
6982 (define_expand "extend<sf2dfmode_lower><mode>2"
6983 [(set (match_operand:VF2_512_256 0 "register_operand")
6984 (float_extend:VF2_512_256
6985 (match_operand:<sf2dfmode> 1 "vector_operand")))]
6986 "TARGET_AVX")
6987
6988 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6989 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6990 (float_extend:VF2_512_256
6991 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6992 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6993 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6994 [(set_attr "type" "ssecvt")
6995 (set_attr "prefix" "maybe_vex")
6996 (set_attr "mode" "<MODE>")])
6997
6998 (define_insn "*avx_cvtps2pd256_2"
6999 [(set (match_operand:V4DF 0 "register_operand" "=v")
7000 (float_extend:V4DF
7001 (vec_select:V4SF
7002 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7003 (parallel [(const_int 0) (const_int 1)
7004 (const_int 2) (const_int 3)]))))]
7005 "TARGET_AVX"
7006 "vcvtps2pd\t{%x1, %0|%0, %x1}"
7007 [(set_attr "type" "ssecvt")
7008 (set_attr "prefix" "vex")
7009 (set_attr "mode" "V4DF")])
7010
7011 (define_insn "vec_unpacks_lo_v16sf"
7012 [(set (match_operand:V8DF 0 "register_operand" "=v")
7013 (float_extend:V8DF
7014 (vec_select:V8SF
7015 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7016 (parallel [(const_int 0) (const_int 1)
7017 (const_int 2) (const_int 3)
7018 (const_int 4) (const_int 5)
7019 (const_int 6) (const_int 7)]))))]
7020 "TARGET_AVX512F"
7021 "vcvtps2pd\t{%t1, %0|%0, %t1}"
7022 [(set_attr "type" "ssecvt")
7023 (set_attr "prefix" "evex")
7024 (set_attr "mode" "V8DF")])
7025
7026 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
7027 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
7028 (unspec:<avx512fmaskmode>
7029 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
7030 UNSPEC_CVTINT2MASK))]
7031 "TARGET_AVX512BW"
7032 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
7033 [(set_attr "prefix" "evex")
7034 (set_attr "mode" "<sseinsnmode>")])
7035
7036 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
7037 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
7038 (unspec:<avx512fmaskmode>
7039 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
7040 UNSPEC_CVTINT2MASK))]
7041 "TARGET_AVX512DQ"
7042 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
7043 [(set_attr "prefix" "evex")
7044 (set_attr "mode" "<sseinsnmode>")])
7045
7046 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
7047 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
7048 (vec_merge:VI12_AVX512VL
7049 (match_dup 2)
7050 (match_dup 3)
7051 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
7052 "TARGET_AVX512BW"
7053 {
7054 operands[2] = CONSTM1_RTX (<MODE>mode);
7055 operands[3] = CONST0_RTX (<MODE>mode);
7056 })
7057
7058 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
7059 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
7060 (vec_merge:VI12_AVX512VL
7061 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
7062 (match_operand:VI12_AVX512VL 3 "const0_operand")
7063 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
7064 "TARGET_AVX512BW"
7065 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
7066 [(set_attr "prefix" "evex")
7067 (set_attr "mode" "<sseinsnmode>")])
7068
7069 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
7070 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
7071 (vec_merge:VI48_AVX512VL
7072 (match_dup 2)
7073 (match_dup 3)
7074 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
7075 "TARGET_AVX512F"
7076 "{
7077 operands[2] = CONSTM1_RTX (<MODE>mode);
7078 operands[3] = CONST0_RTX (<MODE>mode);
7079 }")
7080
7081 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
7082 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
7083 (vec_merge:VI48_AVX512VL
7084 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
7085 (match_operand:VI48_AVX512VL 3 "const0_operand")
7086 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
7087 "TARGET_AVX512F"
7088 "@
7089 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
7090 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
7091 [(set_attr "isa" "avx512dq,*")
7092 (set_attr "length_immediate" "0,1")
7093 (set_attr "prefix" "evex")
7094 (set_attr "mode" "<sseinsnmode>")])
7095
7096 (define_insn "sse2_cvtps2pd<mask_name>"
7097 [(set (match_operand:V2DF 0 "register_operand" "=v")
7098 (float_extend:V2DF
7099 (vec_select:V2SF
7100 (match_operand:V4SF 1 "vector_operand" "vm")
7101 (parallel [(const_int 0) (const_int 1)]))))]
7102 "TARGET_SSE2 && <mask_avx512vl_condition>"
7103 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7104 [(set_attr "type" "ssecvt")
7105 (set_attr "amdfam10_decode" "direct")
7106 (set_attr "athlon_decode" "double")
7107 (set_attr "bdver1_decode" "double")
7108 (set_attr "prefix_data16" "0")
7109 (set_attr "prefix" "maybe_vex")
7110 (set_attr "mode" "V2DF")])
7111
7112 (define_insn "extendv2sfv2df2"
7113 [(set (match_operand:V2DF 0 "register_operand" "=v")
7114 (float_extend:V2DF
7115 (match_operand:V2SF 1 "register_operand" "v")))]
7116 "TARGET_MMX_WITH_SSE"
7117 "%vcvtps2pd\t{%1, %0|%0, %1}"
7118 [(set_attr "type" "ssecvt")
7119 (set_attr "amdfam10_decode" "direct")
7120 (set_attr "athlon_decode" "double")
7121 (set_attr "bdver1_decode" "double")
7122 (set_attr "prefix_data16" "0")
7123 (set_attr "prefix" "maybe_vex")
7124 (set_attr "mode" "V2DF")])
7125
7126 (define_expand "vec_unpacks_hi_v4sf"
7127 [(set (match_dup 2)
7128 (vec_select:V4SF
7129 (vec_concat:V8SF
7130 (match_dup 2)
7131 (match_operand:V4SF 1 "vector_operand"))
7132 (parallel [(const_int 6) (const_int 7)
7133 (const_int 2) (const_int 3)])))
7134 (set (match_operand:V2DF 0 "register_operand")
7135 (float_extend:V2DF
7136 (vec_select:V2SF
7137 (match_dup 2)
7138 (parallel [(const_int 0) (const_int 1)]))))]
7139 "TARGET_SSE2"
7140 "operands[2] = gen_reg_rtx (V4SFmode);")
7141
7142 (define_expand "vec_unpacks_hi_v8sf"
7143 [(set (match_dup 2)
7144 (vec_select:V4SF
7145 (match_operand:V8SF 1 "register_operand")
7146 (parallel [(const_int 4) (const_int 5)
7147 (const_int 6) (const_int 7)])))
7148 (set (match_operand:V4DF 0 "register_operand")
7149 (float_extend:V4DF
7150 (match_dup 2)))]
7151 "TARGET_AVX"
7152 "operands[2] = gen_reg_rtx (V4SFmode);")
7153
7154 (define_expand "vec_unpacks_hi_v16sf"
7155 [(set (match_dup 2)
7156 (vec_select:V8SF
7157 (match_operand:V16SF 1 "register_operand")
7158 (parallel [(const_int 8) (const_int 9)
7159 (const_int 10) (const_int 11)
7160 (const_int 12) (const_int 13)
7161 (const_int 14) (const_int 15)])))
7162 (set (match_operand:V8DF 0 "register_operand")
7163 (float_extend:V8DF
7164 (match_dup 2)))]
7165 "TARGET_AVX512F"
7166 "operands[2] = gen_reg_rtx (V8SFmode);")
7167
7168 (define_expand "vec_unpacks_lo_v4sf"
7169 [(set (match_operand:V2DF 0 "register_operand")
7170 (float_extend:V2DF
7171 (vec_select:V2SF
7172 (match_operand:V4SF 1 "vector_operand")
7173 (parallel [(const_int 0) (const_int 1)]))))]
7174 "TARGET_SSE2")
7175
7176 (define_expand "vec_unpacks_lo_v8sf"
7177 [(set (match_operand:V4DF 0 "register_operand")
7178 (float_extend:V4DF
7179 (vec_select:V4SF
7180 (match_operand:V8SF 1 "nonimmediate_operand")
7181 (parallel [(const_int 0) (const_int 1)
7182 (const_int 2) (const_int 3)]))))]
7183 "TARGET_AVX")
7184
7185 (define_mode_attr sseunpackfltmode
7186 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
7187 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
7188
7189 (define_expand "vec_unpacks_float_hi_<mode>"
7190 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7191 (match_operand:VI2_AVX512F 1 "register_operand")]
7192 "TARGET_SSE2"
7193 {
7194 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7195
7196 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
7197 emit_insn (gen_rtx_SET (operands[0],
7198 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7199 DONE;
7200 })
7201
7202 (define_expand "vec_unpacks_float_lo_<mode>"
7203 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7204 (match_operand:VI2_AVX512F 1 "register_operand")]
7205 "TARGET_SSE2"
7206 {
7207 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7208
7209 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
7210 emit_insn (gen_rtx_SET (operands[0],
7211 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7212 DONE;
7213 })
7214
7215 (define_expand "vec_unpacku_float_hi_<mode>"
7216 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7217 (match_operand:VI2_AVX512F 1 "register_operand")]
7218 "TARGET_SSE2"
7219 {
7220 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7221
7222 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
7223 emit_insn (gen_rtx_SET (operands[0],
7224 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7225 DONE;
7226 })
7227
7228 (define_expand "vec_unpacku_float_lo_<mode>"
7229 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7230 (match_operand:VI2_AVX512F 1 "register_operand")]
7231 "TARGET_SSE2"
7232 {
7233 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7234
7235 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
7236 emit_insn (gen_rtx_SET (operands[0],
7237 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7238 DONE;
7239 })
7240
7241 (define_expand "vec_unpacks_float_hi_v4si"
7242 [(set (match_dup 2)
7243 (vec_select:V4SI
7244 (match_operand:V4SI 1 "vector_operand")
7245 (parallel [(const_int 2) (const_int 3)
7246 (const_int 2) (const_int 3)])))
7247 (set (match_operand:V2DF 0 "register_operand")
7248 (float:V2DF
7249 (vec_select:V2SI
7250 (match_dup 2)
7251 (parallel [(const_int 0) (const_int 1)]))))]
7252 "TARGET_SSE2"
7253 "operands[2] = gen_reg_rtx (V4SImode);")
7254
7255 (define_expand "vec_unpacks_float_lo_v4si"
7256 [(set (match_operand:V2DF 0 "register_operand")
7257 (float:V2DF
7258 (vec_select:V2SI
7259 (match_operand:V4SI 1 "vector_operand")
7260 (parallel [(const_int 0) (const_int 1)]))))]
7261 "TARGET_SSE2")
7262
7263 (define_expand "vec_unpacks_float_hi_v8si"
7264 [(set (match_dup 2)
7265 (vec_select:V4SI
7266 (match_operand:V8SI 1 "vector_operand")
7267 (parallel [(const_int 4) (const_int 5)
7268 (const_int 6) (const_int 7)])))
7269 (set (match_operand:V4DF 0 "register_operand")
7270 (float:V4DF
7271 (match_dup 2)))]
7272 "TARGET_AVX"
7273 "operands[2] = gen_reg_rtx (V4SImode);")
7274
7275 (define_expand "vec_unpacks_float_lo_v8si"
7276 [(set (match_operand:V4DF 0 "register_operand")
7277 (float:V4DF
7278 (vec_select:V4SI
7279 (match_operand:V8SI 1 "nonimmediate_operand")
7280 (parallel [(const_int 0) (const_int 1)
7281 (const_int 2) (const_int 3)]))))]
7282 "TARGET_AVX")
7283
7284 (define_expand "vec_unpacks_float_hi_v16si"
7285 [(set (match_dup 2)
7286 (vec_select:V8SI
7287 (match_operand:V16SI 1 "nonimmediate_operand")
7288 (parallel [(const_int 8) (const_int 9)
7289 (const_int 10) (const_int 11)
7290 (const_int 12) (const_int 13)
7291 (const_int 14) (const_int 15)])))
7292 (set (match_operand:V8DF 0 "register_operand")
7293 (float:V8DF
7294 (match_dup 2)))]
7295 "TARGET_AVX512F"
7296 "operands[2] = gen_reg_rtx (V8SImode);")
7297
7298 (define_expand "vec_unpacks_float_lo_v16si"
7299 [(set (match_operand:V8DF 0 "register_operand")
7300 (float:V8DF
7301 (vec_select:V8SI
7302 (match_operand:V16SI 1 "nonimmediate_operand")
7303 (parallel [(const_int 0) (const_int 1)
7304 (const_int 2) (const_int 3)
7305 (const_int 4) (const_int 5)
7306 (const_int 6) (const_int 7)]))))]
7307 "TARGET_AVX512F")
7308
7309 (define_expand "vec_unpacku_float_hi_v4si"
7310 [(set (match_dup 5)
7311 (vec_select:V4SI
7312 (match_operand:V4SI 1 "vector_operand")
7313 (parallel [(const_int 2) (const_int 3)
7314 (const_int 2) (const_int 3)])))
7315 (set (match_dup 6)
7316 (float:V2DF
7317 (vec_select:V2SI
7318 (match_dup 5)
7319 (parallel [(const_int 0) (const_int 1)]))))
7320 (set (match_dup 7)
7321 (lt:V2DF (match_dup 6) (match_dup 3)))
7322 (set (match_dup 8)
7323 (and:V2DF (match_dup 7) (match_dup 4)))
7324 (set (match_operand:V2DF 0 "register_operand")
7325 (plus:V2DF (match_dup 6) (match_dup 8)))]
7326 "TARGET_SSE2"
7327 {
7328 REAL_VALUE_TYPE TWO32r;
7329 rtx x;
7330 int i;
7331
7332 real_ldexp (&TWO32r, &dconst1, 32);
7333 x = const_double_from_real_value (TWO32r, DFmode);
7334
7335 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7336 operands[4] = force_reg (V2DFmode,
7337 ix86_build_const_vector (V2DFmode, 1, x));
7338
7339 operands[5] = gen_reg_rtx (V4SImode);
7340
7341 for (i = 6; i < 9; i++)
7342 operands[i] = gen_reg_rtx (V2DFmode);
7343 })
7344
7345 (define_expand "vec_unpacku_float_lo_v4si"
7346 [(set (match_dup 5)
7347 (float:V2DF
7348 (vec_select:V2SI
7349 (match_operand:V4SI 1 "vector_operand")
7350 (parallel [(const_int 0) (const_int 1)]))))
7351 (set (match_dup 6)
7352 (lt:V2DF (match_dup 5) (match_dup 3)))
7353 (set (match_dup 7)
7354 (and:V2DF (match_dup 6) (match_dup 4)))
7355 (set (match_operand:V2DF 0 "register_operand")
7356 (plus:V2DF (match_dup 5) (match_dup 7)))]
7357 "TARGET_SSE2"
7358 {
7359 REAL_VALUE_TYPE TWO32r;
7360 rtx x;
7361 int i;
7362
7363 real_ldexp (&TWO32r, &dconst1, 32);
7364 x = const_double_from_real_value (TWO32r, DFmode);
7365
7366 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7367 operands[4] = force_reg (V2DFmode,
7368 ix86_build_const_vector (V2DFmode, 1, x));
7369
7370 for (i = 5; i < 8; i++)
7371 operands[i] = gen_reg_rtx (V2DFmode);
7372 })
7373
7374 (define_expand "vec_unpacku_float_hi_v8si"
7375 [(match_operand:V4DF 0 "register_operand")
7376 (match_operand:V8SI 1 "register_operand")]
7377 "TARGET_AVX"
7378 {
7379 REAL_VALUE_TYPE TWO32r;
7380 rtx x, tmp[6];
7381 int i;
7382
7383 real_ldexp (&TWO32r, &dconst1, 32);
7384 x = const_double_from_real_value (TWO32r, DFmode);
7385
7386 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7387 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7388 tmp[5] = gen_reg_rtx (V4SImode);
7389
7390 for (i = 2; i < 5; i++)
7391 tmp[i] = gen_reg_rtx (V4DFmode);
7392 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
7393 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
7394 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7395 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7396 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7397 DONE;
7398 })
7399
7400 (define_expand "vec_unpacku_float_hi_v16si"
7401 [(match_operand:V8DF 0 "register_operand")
7402 (match_operand:V16SI 1 "register_operand")]
7403 "TARGET_AVX512F"
7404 {
7405 REAL_VALUE_TYPE TWO32r;
7406 rtx k, x, tmp[4];
7407
7408 real_ldexp (&TWO32r, &dconst1, 32);
7409 x = const_double_from_real_value (TWO32r, DFmode);
7410
7411 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7412 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7413 tmp[2] = gen_reg_rtx (V8DFmode);
7414 tmp[3] = gen_reg_rtx (V8SImode);
7415 k = gen_reg_rtx (QImode);
7416
7417 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
7418 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
7419 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7420 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7421 emit_move_insn (operands[0], tmp[2]);
7422 DONE;
7423 })
7424
7425 (define_expand "vec_unpacku_float_lo_v8si"
7426 [(match_operand:V4DF 0 "register_operand")
7427 (match_operand:V8SI 1 "nonimmediate_operand")]
7428 "TARGET_AVX"
7429 {
7430 REAL_VALUE_TYPE TWO32r;
7431 rtx x, tmp[5];
7432 int i;
7433
7434 real_ldexp (&TWO32r, &dconst1, 32);
7435 x = const_double_from_real_value (TWO32r, DFmode);
7436
7437 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7438 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7439
7440 for (i = 2; i < 5; i++)
7441 tmp[i] = gen_reg_rtx (V4DFmode);
7442 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
7443 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7444 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7445 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7446 DONE;
7447 })
7448
7449 (define_expand "vec_unpacku_float_lo_v16si"
7450 [(match_operand:V8DF 0 "register_operand")
7451 (match_operand:V16SI 1 "nonimmediate_operand")]
7452 "TARGET_AVX512F"
7453 {
7454 REAL_VALUE_TYPE TWO32r;
7455 rtx k, x, tmp[3];
7456
7457 real_ldexp (&TWO32r, &dconst1, 32);
7458 x = const_double_from_real_value (TWO32r, DFmode);
7459
7460 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7461 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7462 tmp[2] = gen_reg_rtx (V8DFmode);
7463 k = gen_reg_rtx (QImode);
7464
7465 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7466 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7467 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7468 emit_move_insn (operands[0], tmp[2]);
7469 DONE;
7470 })
7471
7472 (define_expand "vec_pack_trunc_<mode>"
7473 [(set (match_dup 3)
7474 (float_truncate:<sf2dfmode>
7475 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7476 (set (match_dup 4)
7477 (float_truncate:<sf2dfmode>
7478 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7479 (set (match_operand:<ssePSmode> 0 "register_operand")
7480 (vec_concat:<ssePSmode>
7481 (match_dup 3)
7482 (match_dup 4)))]
7483 "TARGET_AVX"
7484 {
7485 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7486 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7487 })
7488
7489 (define_expand "vec_pack_trunc_v2df"
7490 [(match_operand:V4SF 0 "register_operand")
7491 (match_operand:V2DF 1 "vector_operand")
7492 (match_operand:V2DF 2 "vector_operand")]
7493 "TARGET_SSE2"
7494 {
7495 rtx tmp0, tmp1;
7496
7497 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7498 {
7499 tmp0 = gen_reg_rtx (V4DFmode);
7500 tmp1 = force_reg (V2DFmode, operands[1]);
7501
7502 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7503 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7504 }
7505 else
7506 {
7507 tmp0 = gen_reg_rtx (V4SFmode);
7508 tmp1 = gen_reg_rtx (V4SFmode);
7509
7510 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7511 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7512 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7513 }
7514 DONE;
7515 })
7516
7517 (define_expand "vec_pack_sfix_trunc_v8df"
7518 [(match_operand:V16SI 0 "register_operand")
7519 (match_operand:V8DF 1 "nonimmediate_operand")
7520 (match_operand:V8DF 2 "nonimmediate_operand")]
7521 "TARGET_AVX512F"
7522 {
7523 rtx r1, r2;
7524
7525 r1 = gen_reg_rtx (V8SImode);
7526 r2 = gen_reg_rtx (V8SImode);
7527
7528 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7529 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7530 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7531 DONE;
7532 })
7533
7534 (define_expand "vec_pack_sfix_trunc_v4df"
7535 [(match_operand:V8SI 0 "register_operand")
7536 (match_operand:V4DF 1 "nonimmediate_operand")
7537 (match_operand:V4DF 2 "nonimmediate_operand")]
7538 "TARGET_AVX"
7539 {
7540 rtx r1, r2;
7541
7542 r1 = gen_reg_rtx (V4SImode);
7543 r2 = gen_reg_rtx (V4SImode);
7544
7545 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7546 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7547 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7548 DONE;
7549 })
7550
7551 (define_expand "vec_pack_sfix_trunc_v2df"
7552 [(match_operand:V4SI 0 "register_operand")
7553 (match_operand:V2DF 1 "vector_operand")
7554 (match_operand:V2DF 2 "vector_operand")]
7555 "TARGET_SSE2"
7556 {
7557 rtx tmp0, tmp1, tmp2;
7558
7559 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7560 {
7561 tmp0 = gen_reg_rtx (V4DFmode);
7562 tmp1 = force_reg (V2DFmode, operands[1]);
7563
7564 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7565 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7566 }
7567 else
7568 {
7569 tmp0 = gen_reg_rtx (V4SImode);
7570 tmp1 = gen_reg_rtx (V4SImode);
7571 tmp2 = gen_reg_rtx (V2DImode);
7572
7573 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7574 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7575 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7576 gen_lowpart (V2DImode, tmp0),
7577 gen_lowpart (V2DImode, tmp1)));
7578 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7579 }
7580 DONE;
7581 })
7582
7583 (define_mode_attr ssepackfltmode
7584 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7585
7586 (define_expand "vec_pack_ufix_trunc_<mode>"
7587 [(match_operand:<ssepackfltmode> 0 "register_operand")
7588 (match_operand:VF2 1 "register_operand")
7589 (match_operand:VF2 2 "register_operand")]
7590 "TARGET_SSE2"
7591 {
7592 if (<MODE>mode == V8DFmode)
7593 {
7594 rtx r1, r2;
7595
7596 r1 = gen_reg_rtx (V8SImode);
7597 r2 = gen_reg_rtx (V8SImode);
7598
7599 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7600 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7601 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7602 }
7603 else
7604 {
7605 rtx tmp[7];
7606 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7607 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7608 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7609 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7610 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7611 {
7612 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7613 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7614 }
7615 else
7616 {
7617 tmp[5] = gen_reg_rtx (V8SFmode);
7618 ix86_expand_vec_extract_even_odd (tmp[5],
7619 gen_lowpart (V8SFmode, tmp[2]),
7620 gen_lowpart (V8SFmode, tmp[3]), 0);
7621 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7622 }
7623 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7624 operands[0], 0, OPTAB_DIRECT);
7625 if (tmp[6] != operands[0])
7626 emit_move_insn (operands[0], tmp[6]);
7627 }
7628
7629 DONE;
7630 })
7631
7632 (define_expand "avx512f_vec_pack_sfix_v8df"
7633 [(match_operand:V16SI 0 "register_operand")
7634 (match_operand:V8DF 1 "nonimmediate_operand")
7635 (match_operand:V8DF 2 "nonimmediate_operand")]
7636 "TARGET_AVX512F"
7637 {
7638 rtx r1, r2;
7639
7640 r1 = gen_reg_rtx (V8SImode);
7641 r2 = gen_reg_rtx (V8SImode);
7642
7643 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7644 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7645 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7646 DONE;
7647 })
7648
7649 (define_expand "vec_pack_sfix_v4df"
7650 [(match_operand:V8SI 0 "register_operand")
7651 (match_operand:V4DF 1 "nonimmediate_operand")
7652 (match_operand:V4DF 2 "nonimmediate_operand")]
7653 "TARGET_AVX"
7654 {
7655 rtx r1, r2;
7656
7657 r1 = gen_reg_rtx (V4SImode);
7658 r2 = gen_reg_rtx (V4SImode);
7659
7660 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7661 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7662 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7663 DONE;
7664 })
7665
7666 (define_expand "vec_pack_sfix_v2df"
7667 [(match_operand:V4SI 0 "register_operand")
7668 (match_operand:V2DF 1 "vector_operand")
7669 (match_operand:V2DF 2 "vector_operand")]
7670 "TARGET_SSE2"
7671 {
7672 rtx tmp0, tmp1, tmp2;
7673
7674 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7675 {
7676 tmp0 = gen_reg_rtx (V4DFmode);
7677 tmp1 = force_reg (V2DFmode, operands[1]);
7678
7679 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7680 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7681 }
7682 else
7683 {
7684 tmp0 = gen_reg_rtx (V4SImode);
7685 tmp1 = gen_reg_rtx (V4SImode);
7686 tmp2 = gen_reg_rtx (V2DImode);
7687
7688 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7689 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7690 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7691 gen_lowpart (V2DImode, tmp0),
7692 gen_lowpart (V2DImode, tmp1)));
7693 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7694 }
7695 DONE;
7696 })
7697
7698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7699 ;;
7700 ;; Parallel single-precision floating point element swizzling
7701 ;;
7702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7703
7704 (define_expand "sse_movhlps_exp"
7705 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7706 (vec_select:V4SF
7707 (vec_concat:V8SF
7708 (match_operand:V4SF 1 "nonimmediate_operand")
7709 (match_operand:V4SF 2 "nonimmediate_operand"))
7710 (parallel [(const_int 6)
7711 (const_int 7)
7712 (const_int 2)
7713 (const_int 3)])))]
7714 "TARGET_SSE"
7715 {
7716 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7717
7718 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7719
7720 /* Fix up the destination if needed. */
7721 if (dst != operands[0])
7722 emit_move_insn (operands[0], dst);
7723
7724 DONE;
7725 })
7726
7727 (define_insn "sse_movhlps"
7728 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7729 (vec_select:V4SF
7730 (vec_concat:V8SF
7731 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7732 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7733 (parallel [(const_int 6)
7734 (const_int 7)
7735 (const_int 2)
7736 (const_int 3)])))]
7737 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7738 "@
7739 movhlps\t{%2, %0|%0, %2}
7740 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7741 movlps\t{%H2, %0|%0, %H2}
7742 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7743 %vmovhps\t{%2, %0|%q0, %2}"
7744 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7745 (set_attr "type" "ssemov")
7746 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7747 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7748
7749 (define_expand "sse_movlhps_exp"
7750 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7751 (vec_select:V4SF
7752 (vec_concat:V8SF
7753 (match_operand:V4SF 1 "nonimmediate_operand")
7754 (match_operand:V4SF 2 "nonimmediate_operand"))
7755 (parallel [(const_int 0)
7756 (const_int 1)
7757 (const_int 4)
7758 (const_int 5)])))]
7759 "TARGET_SSE"
7760 {
7761 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7762
7763 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7764
7765 /* Fix up the destination if needed. */
7766 if (dst != operands[0])
7767 emit_move_insn (operands[0], dst);
7768
7769 DONE;
7770 })
7771
7772 (define_insn "sse_movlhps"
7773 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7774 (vec_select:V4SF
7775 (vec_concat:V8SF
7776 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7777 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7778 (parallel [(const_int 0)
7779 (const_int 1)
7780 (const_int 4)
7781 (const_int 5)])))]
7782 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7783 "@
7784 movlhps\t{%2, %0|%0, %2}
7785 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7786 movhps\t{%2, %0|%0, %q2}
7787 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7788 %vmovlps\t{%2, %H0|%H0, %2}"
7789 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7790 (set_attr "type" "ssemov")
7791 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7792 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7793
7794 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7795 [(set (match_operand:V16SF 0 "register_operand" "=v")
7796 (vec_select:V16SF
7797 (vec_concat:V32SF
7798 (match_operand:V16SF 1 "register_operand" "v")
7799 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7800 (parallel [(const_int 2) (const_int 18)
7801 (const_int 3) (const_int 19)
7802 (const_int 6) (const_int 22)
7803 (const_int 7) (const_int 23)
7804 (const_int 10) (const_int 26)
7805 (const_int 11) (const_int 27)
7806 (const_int 14) (const_int 30)
7807 (const_int 15) (const_int 31)])))]
7808 "TARGET_AVX512F"
7809 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7810 [(set_attr "type" "sselog")
7811 (set_attr "prefix" "evex")
7812 (set_attr "mode" "V16SF")])
7813
7814 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7815 (define_insn "avx_unpckhps256<mask_name>"
7816 [(set (match_operand:V8SF 0 "register_operand" "=v")
7817 (vec_select:V8SF
7818 (vec_concat:V16SF
7819 (match_operand:V8SF 1 "register_operand" "v")
7820 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7821 (parallel [(const_int 2) (const_int 10)
7822 (const_int 3) (const_int 11)
7823 (const_int 6) (const_int 14)
7824 (const_int 7) (const_int 15)])))]
7825 "TARGET_AVX && <mask_avx512vl_condition>"
7826 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7827 [(set_attr "type" "sselog")
7828 (set_attr "prefix" "vex")
7829 (set_attr "mode" "V8SF")])
7830
7831 (define_expand "vec_interleave_highv8sf"
7832 [(set (match_dup 3)
7833 (vec_select:V8SF
7834 (vec_concat:V16SF
7835 (match_operand:V8SF 1 "register_operand")
7836 (match_operand:V8SF 2 "nonimmediate_operand"))
7837 (parallel [(const_int 0) (const_int 8)
7838 (const_int 1) (const_int 9)
7839 (const_int 4) (const_int 12)
7840 (const_int 5) (const_int 13)])))
7841 (set (match_dup 4)
7842 (vec_select:V8SF
7843 (vec_concat:V16SF
7844 (match_dup 1)
7845 (match_dup 2))
7846 (parallel [(const_int 2) (const_int 10)
7847 (const_int 3) (const_int 11)
7848 (const_int 6) (const_int 14)
7849 (const_int 7) (const_int 15)])))
7850 (set (match_operand:V8SF 0 "register_operand")
7851 (vec_select:V8SF
7852 (vec_concat:V16SF
7853 (match_dup 3)
7854 (match_dup 4))
7855 (parallel [(const_int 4) (const_int 5)
7856 (const_int 6) (const_int 7)
7857 (const_int 12) (const_int 13)
7858 (const_int 14) (const_int 15)])))]
7859 "TARGET_AVX"
7860 {
7861 operands[3] = gen_reg_rtx (V8SFmode);
7862 operands[4] = gen_reg_rtx (V8SFmode);
7863 })
7864
7865 (define_insn "vec_interleave_highv4sf<mask_name>"
7866 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7867 (vec_select:V4SF
7868 (vec_concat:V8SF
7869 (match_operand:V4SF 1 "register_operand" "0,v")
7870 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7871 (parallel [(const_int 2) (const_int 6)
7872 (const_int 3) (const_int 7)])))]
7873 "TARGET_SSE && <mask_avx512vl_condition>"
7874 "@
7875 unpckhps\t{%2, %0|%0, %2}
7876 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7877 [(set_attr "isa" "noavx,avx")
7878 (set_attr "type" "sselog")
7879 (set_attr "prefix" "orig,vex")
7880 (set_attr "mode" "V4SF")])
7881
7882 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7883 [(set (match_operand:V16SF 0 "register_operand" "=v")
7884 (vec_select:V16SF
7885 (vec_concat:V32SF
7886 (match_operand:V16SF 1 "register_operand" "v")
7887 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7888 (parallel [(const_int 0) (const_int 16)
7889 (const_int 1) (const_int 17)
7890 (const_int 4) (const_int 20)
7891 (const_int 5) (const_int 21)
7892 (const_int 8) (const_int 24)
7893 (const_int 9) (const_int 25)
7894 (const_int 12) (const_int 28)
7895 (const_int 13) (const_int 29)])))]
7896 "TARGET_AVX512F"
7897 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7898 [(set_attr "type" "sselog")
7899 (set_attr "prefix" "evex")
7900 (set_attr "mode" "V16SF")])
7901
7902 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7903 (define_insn "avx_unpcklps256<mask_name>"
7904 [(set (match_operand:V8SF 0 "register_operand" "=v")
7905 (vec_select:V8SF
7906 (vec_concat:V16SF
7907 (match_operand:V8SF 1 "register_operand" "v")
7908 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7909 (parallel [(const_int 0) (const_int 8)
7910 (const_int 1) (const_int 9)
7911 (const_int 4) (const_int 12)
7912 (const_int 5) (const_int 13)])))]
7913 "TARGET_AVX && <mask_avx512vl_condition>"
7914 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7915 [(set_attr "type" "sselog")
7916 (set_attr "prefix" "vex")
7917 (set_attr "mode" "V8SF")])
7918
7919 (define_insn "unpcklps128_mask"
7920 [(set (match_operand:V4SF 0 "register_operand" "=v")
7921 (vec_merge:V4SF
7922 (vec_select:V4SF
7923 (vec_concat:V8SF
7924 (match_operand:V4SF 1 "register_operand" "v")
7925 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7926 (parallel [(const_int 0) (const_int 4)
7927 (const_int 1) (const_int 5)]))
7928 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7929 (match_operand:QI 4 "register_operand" "Yk")))]
7930 "TARGET_AVX512VL"
7931 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7932 [(set_attr "type" "sselog")
7933 (set_attr "prefix" "evex")
7934 (set_attr "mode" "V4SF")])
7935
7936 (define_expand "vec_interleave_lowv8sf"
7937 [(set (match_dup 3)
7938 (vec_select:V8SF
7939 (vec_concat:V16SF
7940 (match_operand:V8SF 1 "register_operand")
7941 (match_operand:V8SF 2 "nonimmediate_operand"))
7942 (parallel [(const_int 0) (const_int 8)
7943 (const_int 1) (const_int 9)
7944 (const_int 4) (const_int 12)
7945 (const_int 5) (const_int 13)])))
7946 (set (match_dup 4)
7947 (vec_select:V8SF
7948 (vec_concat:V16SF
7949 (match_dup 1)
7950 (match_dup 2))
7951 (parallel [(const_int 2) (const_int 10)
7952 (const_int 3) (const_int 11)
7953 (const_int 6) (const_int 14)
7954 (const_int 7) (const_int 15)])))
7955 (set (match_operand:V8SF 0 "register_operand")
7956 (vec_select:V8SF
7957 (vec_concat:V16SF
7958 (match_dup 3)
7959 (match_dup 4))
7960 (parallel [(const_int 0) (const_int 1)
7961 (const_int 2) (const_int 3)
7962 (const_int 8) (const_int 9)
7963 (const_int 10) (const_int 11)])))]
7964 "TARGET_AVX"
7965 {
7966 operands[3] = gen_reg_rtx (V8SFmode);
7967 operands[4] = gen_reg_rtx (V8SFmode);
7968 })
7969
7970 (define_insn "vec_interleave_lowv4sf"
7971 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7972 (vec_select:V4SF
7973 (vec_concat:V8SF
7974 (match_operand:V4SF 1 "register_operand" "0,v")
7975 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7976 (parallel [(const_int 0) (const_int 4)
7977 (const_int 1) (const_int 5)])))]
7978 "TARGET_SSE"
7979 "@
7980 unpcklps\t{%2, %0|%0, %2}
7981 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7982 [(set_attr "isa" "noavx,avx")
7983 (set_attr "type" "sselog")
7984 (set_attr "prefix" "orig,maybe_evex")
7985 (set_attr "mode" "V4SF")])
7986
7987 ;; These are modeled with the same vec_concat as the others so that we
7988 ;; capture users of shufps that can use the new instructions
7989 (define_insn "avx_movshdup256<mask_name>"
7990 [(set (match_operand:V8SF 0 "register_operand" "=v")
7991 (vec_select:V8SF
7992 (vec_concat:V16SF
7993 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7994 (match_dup 1))
7995 (parallel [(const_int 1) (const_int 1)
7996 (const_int 3) (const_int 3)
7997 (const_int 5) (const_int 5)
7998 (const_int 7) (const_int 7)])))]
7999 "TARGET_AVX && <mask_avx512vl_condition>"
8000 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8001 [(set_attr "type" "sse")
8002 (set_attr "prefix" "vex")
8003 (set_attr "mode" "V8SF")])
8004
8005 (define_insn "sse3_movshdup<mask_name>"
8006 [(set (match_operand:V4SF 0 "register_operand" "=v")
8007 (vec_select:V4SF
8008 (vec_concat:V8SF
8009 (match_operand:V4SF 1 "vector_operand" "vBm")
8010 (match_dup 1))
8011 (parallel [(const_int 1)
8012 (const_int 1)
8013 (const_int 7)
8014 (const_int 7)])))]
8015 "TARGET_SSE3 && <mask_avx512vl_condition>"
8016 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8017 [(set_attr "type" "sse")
8018 (set_attr "prefix_rep" "1")
8019 (set_attr "prefix" "maybe_vex")
8020 (set_attr "mode" "V4SF")])
8021
8022 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
8023 [(set (match_operand:V16SF 0 "register_operand" "=v")
8024 (vec_select:V16SF
8025 (vec_concat:V32SF
8026 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
8027 (match_dup 1))
8028 (parallel [(const_int 1) (const_int 1)
8029 (const_int 3) (const_int 3)
8030 (const_int 5) (const_int 5)
8031 (const_int 7) (const_int 7)
8032 (const_int 9) (const_int 9)
8033 (const_int 11) (const_int 11)
8034 (const_int 13) (const_int 13)
8035 (const_int 15) (const_int 15)])))]
8036 "TARGET_AVX512F"
8037 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8038 [(set_attr "type" "sse")
8039 (set_attr "prefix" "evex")
8040 (set_attr "mode" "V16SF")])
8041
8042 (define_insn "avx_movsldup256<mask_name>"
8043 [(set (match_operand:V8SF 0 "register_operand" "=v")
8044 (vec_select:V8SF
8045 (vec_concat:V16SF
8046 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
8047 (match_dup 1))
8048 (parallel [(const_int 0) (const_int 0)
8049 (const_int 2) (const_int 2)
8050 (const_int 4) (const_int 4)
8051 (const_int 6) (const_int 6)])))]
8052 "TARGET_AVX && <mask_avx512vl_condition>"
8053 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8054 [(set_attr "type" "sse")
8055 (set_attr "prefix" "vex")
8056 (set_attr "mode" "V8SF")])
8057
8058 (define_insn "sse3_movsldup<mask_name>"
8059 [(set (match_operand:V4SF 0 "register_operand" "=v")
8060 (vec_select:V4SF
8061 (vec_concat:V8SF
8062 (match_operand:V4SF 1 "vector_operand" "vBm")
8063 (match_dup 1))
8064 (parallel [(const_int 0)
8065 (const_int 0)
8066 (const_int 6)
8067 (const_int 6)])))]
8068 "TARGET_SSE3 && <mask_avx512vl_condition>"
8069 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8070 [(set_attr "type" "sse")
8071 (set_attr "prefix_rep" "1")
8072 (set_attr "prefix" "maybe_vex")
8073 (set_attr "mode" "V4SF")])
8074
8075 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
8076 [(set (match_operand:V16SF 0 "register_operand" "=v")
8077 (vec_select:V16SF
8078 (vec_concat:V32SF
8079 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
8080 (match_dup 1))
8081 (parallel [(const_int 0) (const_int 0)
8082 (const_int 2) (const_int 2)
8083 (const_int 4) (const_int 4)
8084 (const_int 6) (const_int 6)
8085 (const_int 8) (const_int 8)
8086 (const_int 10) (const_int 10)
8087 (const_int 12) (const_int 12)
8088 (const_int 14) (const_int 14)])))]
8089 "TARGET_AVX512F"
8090 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8091 [(set_attr "type" "sse")
8092 (set_attr "prefix" "evex")
8093 (set_attr "mode" "V16SF")])
8094
8095 (define_expand "avx_shufps256<mask_expand4_name>"
8096 [(match_operand:V8SF 0 "register_operand")
8097 (match_operand:V8SF 1 "register_operand")
8098 (match_operand:V8SF 2 "nonimmediate_operand")
8099 (match_operand:SI 3 "const_int_operand")]
8100 "TARGET_AVX"
8101 {
8102 int mask = INTVAL (operands[3]);
8103 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
8104 operands[1],
8105 operands[2],
8106 GEN_INT ((mask >> 0) & 3),
8107 GEN_INT ((mask >> 2) & 3),
8108 GEN_INT (((mask >> 4) & 3) + 8),
8109 GEN_INT (((mask >> 6) & 3) + 8),
8110 GEN_INT (((mask >> 0) & 3) + 4),
8111 GEN_INT (((mask >> 2) & 3) + 4),
8112 GEN_INT (((mask >> 4) & 3) + 12),
8113 GEN_INT (((mask >> 6) & 3) + 12)
8114 <mask_expand4_args>));
8115 DONE;
8116 })
8117
8118 ;; One bit in mask selects 2 elements.
8119 (define_insn "avx_shufps256_1<mask_name>"
8120 [(set (match_operand:V8SF 0 "register_operand" "=v")
8121 (vec_select:V8SF
8122 (vec_concat:V16SF
8123 (match_operand:V8SF 1 "register_operand" "v")
8124 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
8125 (parallel [(match_operand 3 "const_0_to_3_operand" )
8126 (match_operand 4 "const_0_to_3_operand" )
8127 (match_operand 5 "const_8_to_11_operand" )
8128 (match_operand 6 "const_8_to_11_operand" )
8129 (match_operand 7 "const_4_to_7_operand" )
8130 (match_operand 8 "const_4_to_7_operand" )
8131 (match_operand 9 "const_12_to_15_operand")
8132 (match_operand 10 "const_12_to_15_operand")])))]
8133 "TARGET_AVX
8134 && <mask_avx512vl_condition>
8135 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8136 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8137 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8138 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
8139 {
8140 int mask;
8141 mask = INTVAL (operands[3]);
8142 mask |= INTVAL (operands[4]) << 2;
8143 mask |= (INTVAL (operands[5]) - 8) << 4;
8144 mask |= (INTVAL (operands[6]) - 8) << 6;
8145 operands[3] = GEN_INT (mask);
8146
8147 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8148 }
8149 [(set_attr "type" "sseshuf")
8150 (set_attr "length_immediate" "1")
8151 (set_attr "prefix" "<mask_prefix>")
8152 (set_attr "mode" "V8SF")])
8153
8154 (define_expand "sse_shufps<mask_expand4_name>"
8155 [(match_operand:V4SF 0 "register_operand")
8156 (match_operand:V4SF 1 "register_operand")
8157 (match_operand:V4SF 2 "vector_operand")
8158 (match_operand:SI 3 "const_int_operand")]
8159 "TARGET_SSE"
8160 {
8161 int mask = INTVAL (operands[3]);
8162 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
8163 operands[1],
8164 operands[2],
8165 GEN_INT ((mask >> 0) & 3),
8166 GEN_INT ((mask >> 2) & 3),
8167 GEN_INT (((mask >> 4) & 3) + 4),
8168 GEN_INT (((mask >> 6) & 3) + 4)
8169 <mask_expand4_args>));
8170 DONE;
8171 })
8172
8173 (define_insn "sse_shufps_v4sf_mask"
8174 [(set (match_operand:V4SF 0 "register_operand" "=v")
8175 (vec_merge:V4SF
8176 (vec_select:V4SF
8177 (vec_concat:V8SF
8178 (match_operand:V4SF 1 "register_operand" "v")
8179 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
8180 (parallel [(match_operand 3 "const_0_to_3_operand")
8181 (match_operand 4 "const_0_to_3_operand")
8182 (match_operand 5 "const_4_to_7_operand")
8183 (match_operand 6 "const_4_to_7_operand")]))
8184 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
8185 (match_operand:QI 8 "register_operand" "Yk")))]
8186 "TARGET_AVX512VL"
8187 {
8188 int mask = 0;
8189 mask |= INTVAL (operands[3]) << 0;
8190 mask |= INTVAL (operands[4]) << 2;
8191 mask |= (INTVAL (operands[5]) - 4) << 4;
8192 mask |= (INTVAL (operands[6]) - 4) << 6;
8193 operands[3] = GEN_INT (mask);
8194
8195 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
8196 }
8197 [(set_attr "type" "sseshuf")
8198 (set_attr "length_immediate" "1")
8199 (set_attr "prefix" "evex")
8200 (set_attr "mode" "V4SF")])
8201
8202 (define_insn "sse_shufps_<mode>"
8203 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
8204 (vec_select:VI4F_128
8205 (vec_concat:<ssedoublevecmode>
8206 (match_operand:VI4F_128 1 "register_operand" "0,v")
8207 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
8208 (parallel [(match_operand 3 "const_0_to_3_operand")
8209 (match_operand 4 "const_0_to_3_operand")
8210 (match_operand 5 "const_4_to_7_operand")
8211 (match_operand 6 "const_4_to_7_operand")])))]
8212 "TARGET_SSE"
8213 {
8214 int mask = 0;
8215 mask |= INTVAL (operands[3]) << 0;
8216 mask |= INTVAL (operands[4]) << 2;
8217 mask |= (INTVAL (operands[5]) - 4) << 4;
8218 mask |= (INTVAL (operands[6]) - 4) << 6;
8219 operands[3] = GEN_INT (mask);
8220
8221 switch (which_alternative)
8222 {
8223 case 0:
8224 return "shufps\t{%3, %2, %0|%0, %2, %3}";
8225 case 1:
8226 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8227 default:
8228 gcc_unreachable ();
8229 }
8230 }
8231 [(set_attr "isa" "noavx,avx")
8232 (set_attr "type" "sseshuf")
8233 (set_attr "length_immediate" "1")
8234 (set_attr "prefix" "orig,maybe_evex")
8235 (set_attr "mode" "V4SF")])
8236
8237 (define_insn "sse_storehps"
8238 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
8239 (vec_select:V2SF
8240 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
8241 (parallel [(const_int 2) (const_int 3)])))]
8242 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8243 "@
8244 %vmovhps\t{%1, %0|%q0, %1}
8245 %vmovhlps\t{%1, %d0|%d0, %1}
8246 %vmovlps\t{%H1, %d0|%d0, %H1}"
8247 [(set_attr "type" "ssemov")
8248 (set_attr "prefix" "maybe_vex")
8249 (set_attr "mode" "V2SF,V4SF,V2SF")])
8250
8251 (define_expand "sse_loadhps_exp"
8252 [(set (match_operand:V4SF 0 "nonimmediate_operand")
8253 (vec_concat:V4SF
8254 (vec_select:V2SF
8255 (match_operand:V4SF 1 "nonimmediate_operand")
8256 (parallel [(const_int 0) (const_int 1)]))
8257 (match_operand:V2SF 2 "nonimmediate_operand")))]
8258 "TARGET_SSE"
8259 {
8260 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
8261
8262 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
8263
8264 /* Fix up the destination if needed. */
8265 if (dst != operands[0])
8266 emit_move_insn (operands[0], dst);
8267
8268 DONE;
8269 })
8270
8271 (define_insn "sse_loadhps"
8272 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
8273 (vec_concat:V4SF
8274 (vec_select:V2SF
8275 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
8276 (parallel [(const_int 0) (const_int 1)]))
8277 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
8278 "TARGET_SSE"
8279 "@
8280 movhps\t{%2, %0|%0, %q2}
8281 vmovhps\t{%2, %1, %0|%0, %1, %q2}
8282 movlhps\t{%2, %0|%0, %2}
8283 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8284 %vmovlps\t{%2, %H0|%H0, %2}"
8285 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8286 (set_attr "type" "ssemov")
8287 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8288 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
8289
8290 (define_insn "sse_storelps"
8291 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
8292 (vec_select:V2SF
8293 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
8294 (parallel [(const_int 0) (const_int 1)])))]
8295 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8296 "@
8297 %vmovlps\t{%1, %0|%q0, %1}
8298 %vmovaps\t{%1, %0|%0, %1}
8299 %vmovlps\t{%1, %d0|%d0, %q1}"
8300 [(set_attr "type" "ssemov")
8301 (set_attr "prefix" "maybe_vex")
8302 (set_attr "mode" "V2SF,V4SF,V2SF")])
8303
8304 (define_expand "sse_loadlps_exp"
8305 [(set (match_operand:V4SF 0 "nonimmediate_operand")
8306 (vec_concat:V4SF
8307 (match_operand:V2SF 2 "nonimmediate_operand")
8308 (vec_select:V2SF
8309 (match_operand:V4SF 1 "nonimmediate_operand")
8310 (parallel [(const_int 2) (const_int 3)]))))]
8311 "TARGET_SSE"
8312 {
8313 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
8314
8315 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
8316
8317 /* Fix up the destination if needed. */
8318 if (dst != operands[0])
8319 emit_move_insn (operands[0], dst);
8320
8321 DONE;
8322 })
8323
8324 (define_insn "sse_loadlps"
8325 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
8326 (vec_concat:V4SF
8327 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
8328 (vec_select:V2SF
8329 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
8330 (parallel [(const_int 2) (const_int 3)]))))]
8331 "TARGET_SSE"
8332 "@
8333 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
8334 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
8335 movlps\t{%2, %0|%0, %q2}
8336 vmovlps\t{%2, %1, %0|%0, %1, %q2}
8337 %vmovlps\t{%2, %0|%q0, %2}"
8338 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8339 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
8340 (set (attr "length_immediate")
8341 (if_then_else (eq_attr "alternative" "0,1")
8342 (const_string "1")
8343 (const_string "*")))
8344 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8345 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
8346
8347 (define_insn "sse_movss"
8348 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
8349 (vec_merge:V4SF
8350 (match_operand:V4SF 2 "register_operand" " x,v")
8351 (match_operand:V4SF 1 "register_operand" " 0,v")
8352 (const_int 1)))]
8353 "TARGET_SSE"
8354 "@
8355 movss\t{%2, %0|%0, %2}
8356 vmovss\t{%2, %1, %0|%0, %1, %2}"
8357 [(set_attr "isa" "noavx,avx")
8358 (set_attr "type" "ssemov")
8359 (set_attr "prefix" "orig,maybe_evex")
8360 (set_attr "mode" "SF")])
8361
8362 (define_insn "avx2_vec_dup<mode>"
8363 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
8364 (vec_duplicate:VF1_128_256
8365 (vec_select:SF
8366 (match_operand:V4SF 1 "register_operand" "v")
8367 (parallel [(const_int 0)]))))]
8368 "TARGET_AVX2"
8369 "vbroadcastss\t{%1, %0|%0, %1}"
8370 [(set_attr "type" "sselog1")
8371 (set_attr "prefix" "maybe_evex")
8372 (set_attr "mode" "<MODE>")])
8373
8374 (define_insn "avx2_vec_dupv8sf_1"
8375 [(set (match_operand:V8SF 0 "register_operand" "=v")
8376 (vec_duplicate:V8SF
8377 (vec_select:SF
8378 (match_operand:V8SF 1 "register_operand" "v")
8379 (parallel [(const_int 0)]))))]
8380 "TARGET_AVX2"
8381 "vbroadcastss\t{%x1, %0|%0, %x1}"
8382 [(set_attr "type" "sselog1")
8383 (set_attr "prefix" "maybe_evex")
8384 (set_attr "mode" "V8SF")])
8385
8386 (define_insn "avx512f_vec_dup<mode>_1"
8387 [(set (match_operand:VF_512 0 "register_operand" "=v")
8388 (vec_duplicate:VF_512
8389 (vec_select:<ssescalarmode>
8390 (match_operand:VF_512 1 "register_operand" "v")
8391 (parallel [(const_int 0)]))))]
8392 "TARGET_AVX512F"
8393 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
8394 [(set_attr "type" "sselog1")
8395 (set_attr "prefix" "evex")
8396 (set_attr "mode" "<MODE>")])
8397
8398 ;; Although insertps takes register source, we prefer
8399 ;; unpcklps with register source since it is shorter.
8400 (define_insn "*vec_concatv2sf_sse4_1"
8401 [(set (match_operand:V2SF 0 "register_operand"
8402 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
8403 (vec_concat:V2SF
8404 (match_operand:SF 1 "nonimmediate_operand"
8405 " 0, 0,Yv, 0,0, v,m, 0 , m")
8406 (match_operand:SF 2 "nonimm_or_0_operand"
8407 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
8408 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8409 "@
8410 unpcklps\t{%2, %0|%0, %2}
8411 unpcklps\t{%2, %0|%0, %2}
8412 vunpcklps\t{%2, %1, %0|%0, %1, %2}
8413 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8414 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8415 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
8416 %vmovss\t{%1, %0|%0, %1}
8417 punpckldq\t{%2, %0|%0, %2}
8418 movd\t{%1, %0|%0, %1}"
8419 [(set (attr "isa")
8420 (cond [(eq_attr "alternative" "0,1,3,4")
8421 (const_string "noavx")
8422 (eq_attr "alternative" "2,5")
8423 (const_string "avx")
8424 ]
8425 (const_string "*")))
8426 (set (attr "type")
8427 (cond [(eq_attr "alternative" "6")
8428 (const_string "ssemov")
8429 (eq_attr "alternative" "7")
8430 (const_string "mmxcvt")
8431 (eq_attr "alternative" "8")
8432 (const_string "mmxmov")
8433 ]
8434 (const_string "sselog")))
8435 (set (attr "mmx_isa")
8436 (if_then_else (eq_attr "alternative" "7,8")
8437 (const_string "native")
8438 (const_string "*")))
8439 (set (attr "prefix_data16")
8440 (if_then_else (eq_attr "alternative" "3,4")
8441 (const_string "1")
8442 (const_string "*")))
8443 (set (attr "prefix_extra")
8444 (if_then_else (eq_attr "alternative" "3,4,5")
8445 (const_string "1")
8446 (const_string "*")))
8447 (set (attr "length_immediate")
8448 (if_then_else (eq_attr "alternative" "3,4,5")
8449 (const_string "1")
8450 (const_string "*")))
8451 (set (attr "prefix")
8452 (cond [(eq_attr "alternative" "2,5")
8453 (const_string "maybe_evex")
8454 (eq_attr "alternative" "6")
8455 (const_string "maybe_vex")
8456 ]
8457 (const_string "orig")))
8458 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8459
8460 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8461 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8462 ;; alternatives pretty much forces the MMX alternative to be chosen.
8463 (define_insn "*vec_concatv2sf_sse"
8464 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8465 (vec_concat:V2SF
8466 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8467 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8468 "TARGET_SSE"
8469 "@
8470 unpcklps\t{%2, %0|%0, %2}
8471 movss\t{%1, %0|%0, %1}
8472 punpckldq\t{%2, %0|%0, %2}
8473 movd\t{%1, %0|%0, %1}"
8474 [(set_attr "mmx_isa" "*,*,native,native")
8475 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8476 (set_attr "mode" "V4SF,SF,DI,DI")])
8477
8478 (define_insn "*vec_concatv4sf"
8479 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8480 (vec_concat:V4SF
8481 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8482 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8483 "TARGET_SSE"
8484 "@
8485 movlhps\t{%2, %0|%0, %2}
8486 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8487 movhps\t{%2, %0|%0, %q2}
8488 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8489 [(set_attr "isa" "noavx,avx,noavx,avx")
8490 (set_attr "type" "ssemov")
8491 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8492 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8493
8494 (define_insn "*vec_concatv4sf_0"
8495 [(set (match_operand:V4SF 0 "register_operand" "=v")
8496 (vec_concat:V4SF
8497 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
8498 (match_operand:V2SF 2 "const0_operand" " C")))]
8499 "TARGET_SSE2"
8500 "%vmovq\t{%1, %0|%0, %1}"
8501 [(set_attr "type" "ssemov")
8502 (set_attr "prefix" "maybe_vex")
8503 (set_attr "mode" "DF")])
8504
8505 ;; Avoid combining registers from different units in a single alternative,
8506 ;; see comment above inline_secondary_memory_needed function in i386.c
8507 (define_insn "vec_set<mode>_0"
8508 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8509 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8510 (vec_merge:VI4F_128
8511 (vec_duplicate:VI4F_128
8512 (match_operand:<ssescalarmode> 2 "general_operand"
8513 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8514 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8515 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8516 (const_int 1)))]
8517 "TARGET_SSE"
8518 "@
8519 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8520 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8521 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8522 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8523 %vmovd\t{%2, %0|%0, %2}
8524 movss\t{%2, %0|%0, %2}
8525 movss\t{%2, %0|%0, %2}
8526 vmovss\t{%2, %1, %0|%0, %1, %2}
8527 pinsrd\t{$0, %2, %0|%0, %2, 0}
8528 pinsrd\t{$0, %2, %0|%0, %2, 0}
8529 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8530 #
8531 #
8532 #"
8533 [(set (attr "isa")
8534 (cond [(eq_attr "alternative" "0,1,8,9")
8535 (const_string "sse4_noavx")
8536 (eq_attr "alternative" "2,7,10")
8537 (const_string "avx")
8538 (eq_attr "alternative" "3,4")
8539 (const_string "sse2")
8540 (eq_attr "alternative" "5,6")
8541 (const_string "noavx")
8542 ]
8543 (const_string "*")))
8544 (set (attr "type")
8545 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8546 (const_string "sselog")
8547 (eq_attr "alternative" "12")
8548 (const_string "imov")
8549 (eq_attr "alternative" "13")
8550 (const_string "fmov")
8551 ]
8552 (const_string "ssemov")))
8553 (set (attr "prefix_extra")
8554 (if_then_else (eq_attr "alternative" "8,9,10")
8555 (const_string "1")
8556 (const_string "*")))
8557 (set (attr "length_immediate")
8558 (if_then_else (eq_attr "alternative" "8,9,10")
8559 (const_string "1")
8560 (const_string "*")))
8561 (set (attr "prefix")
8562 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8563 (const_string "orig")
8564 (eq_attr "alternative" "2")
8565 (const_string "maybe_evex")
8566 (eq_attr "alternative" "3,4")
8567 (const_string "maybe_vex")
8568 (eq_attr "alternative" "7,10")
8569 (const_string "vex")
8570 ]
8571 (const_string "*")))
8572 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8573 (set (attr "preferred_for_speed")
8574 (cond [(eq_attr "alternative" "4")
8575 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8576 ]
8577 (symbol_ref "true")))])
8578
8579 ;; A subset is vec_setv4sf.
8580 (define_insn "*vec_setv4sf_sse4_1"
8581 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8582 (vec_merge:V4SF
8583 (vec_duplicate:V4SF
8584 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8585 (match_operand:V4SF 1 "register_operand" "0,0,v")
8586 (match_operand:SI 3 "const_int_operand")))]
8587 "TARGET_SSE4_1
8588 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8589 < GET_MODE_NUNITS (V4SFmode))"
8590 {
8591 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8592 switch (which_alternative)
8593 {
8594 case 0:
8595 case 1:
8596 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8597 case 2:
8598 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8599 default:
8600 gcc_unreachable ();
8601 }
8602 }
8603 [(set_attr "isa" "noavx,noavx,avx")
8604 (set_attr "type" "sselog")
8605 (set_attr "prefix_data16" "1,1,*")
8606 (set_attr "prefix_extra" "1")
8607 (set_attr "length_immediate" "1")
8608 (set_attr "prefix" "orig,orig,maybe_evex")
8609 (set_attr "mode" "V4SF")])
8610
8611 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8612 (define_insn "vec_set<mode>_0"
8613 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8614 (vec_merge:VI4F_256_512
8615 (vec_duplicate:VI4F_256_512
8616 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8617 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8618 (const_int 1)))]
8619 "TARGET_AVX"
8620 "@
8621 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8622 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8623 vmovd\t{%2, %x0|%x0, %2}"
8624 [(set (attr "type")
8625 (if_then_else (eq_attr "alternative" "0")
8626 (const_string "sselog")
8627 (const_string "ssemov")))
8628 (set_attr "prefix" "maybe_evex")
8629 (set_attr "mode" "SF,<ssescalarmode>,SI")
8630 (set (attr "preferred_for_speed")
8631 (cond [(eq_attr "alternative" "2")
8632 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8633 ]
8634 (symbol_ref "true")))])
8635
8636 (define_insn "sse4_1_insertps"
8637 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8638 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8639 (match_operand:V4SF 1 "register_operand" "0,0,v")
8640 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8641 UNSPEC_INSERTPS))]
8642 "TARGET_SSE4_1"
8643 {
8644 if (MEM_P (operands[2]))
8645 {
8646 unsigned count_s = INTVAL (operands[3]) >> 6;
8647 if (count_s)
8648 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8649 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8650 }
8651 switch (which_alternative)
8652 {
8653 case 0:
8654 case 1:
8655 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8656 case 2:
8657 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8658 default:
8659 gcc_unreachable ();
8660 }
8661 }
8662 [(set_attr "isa" "noavx,noavx,avx")
8663 (set_attr "type" "sselog")
8664 (set_attr "prefix_data16" "1,1,*")
8665 (set_attr "prefix_extra" "1")
8666 (set_attr "length_immediate" "1")
8667 (set_attr "prefix" "orig,orig,maybe_evex")
8668 (set_attr "mode" "V4SF")])
8669
8670 (define_split
8671 [(set (match_operand:VI4F_128 0 "memory_operand")
8672 (vec_merge:VI4F_128
8673 (vec_duplicate:VI4F_128
8674 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8675 (match_dup 0)
8676 (const_int 1)))]
8677 "TARGET_SSE && reload_completed"
8678 [(set (match_dup 0) (match_dup 1))]
8679 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8680
8681 ;; Standard scalar operation patterns which preserve the rest of the
8682 ;; vector for combiner.
8683 (define_insn "vec_setv2df_0"
8684 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8685 (vec_merge:V2DF
8686 (vec_duplicate:V2DF
8687 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8688 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8689 (const_int 1)))]
8690 "TARGET_SSE2"
8691 "@
8692 movsd\t{%2, %0|%0, %2}
8693 vmovsd\t{%2, %1, %0|%0, %1, %2}
8694 movlpd\t{%2, %0|%0, %2}
8695 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8696 [(set_attr "isa" "noavx,avx,noavx,avx")
8697 (set_attr "type" "ssemov")
8698 (set_attr "mode" "DF")])
8699
8700 (define_expand "vec_set<mode>"
8701 [(match_operand:V_128 0 "register_operand")
8702 (match_operand:<ssescalarmode> 1 "register_operand")
8703 (match_operand 2 "vec_setm_sse41_operand")]
8704 "TARGET_SSE"
8705 {
8706 if (CONST_INT_P (operands[2]))
8707 ix86_expand_vector_set (false, operands[0], operands[1],
8708 INTVAL (operands[2]));
8709 else
8710 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8711 DONE;
8712 })
8713
8714 (define_expand "vec_set<mode>"
8715 [(match_operand:V_256_512 0 "register_operand")
8716 (match_operand:<ssescalarmode> 1 "register_operand")
8717 (match_operand 2 "vec_setm_avx2_operand")]
8718 "TARGET_AVX"
8719 {
8720 if (CONST_INT_P (operands[2]))
8721 ix86_expand_vector_set (false, operands[0], operands[1],
8722 INTVAL (operands[2]));
8723 else
8724 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8725 DONE;
8726 })
8727
8728 (define_insn_and_split "*vec_extractv4sf_0"
8729 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8730 (vec_select:SF
8731 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8732 (parallel [(const_int 0)])))]
8733 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8734 "#"
8735 "&& reload_completed"
8736 [(set (match_dup 0) (match_dup 1))]
8737 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8738
8739 (define_insn_and_split "*sse4_1_extractps"
8740 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8741 (vec_select:SF
8742 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8743 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8744 "TARGET_SSE4_1"
8745 "@
8746 extractps\t{%2, %1, %0|%0, %1, %2}
8747 extractps\t{%2, %1, %0|%0, %1, %2}
8748 vextractps\t{%2, %1, %0|%0, %1, %2}
8749 #
8750 #"
8751 "&& reload_completed && SSE_REG_P (operands[0])"
8752 [(const_int 0)]
8753 {
8754 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8755 switch (INTVAL (operands[2]))
8756 {
8757 case 1:
8758 case 3:
8759 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8760 operands[2], operands[2],
8761 GEN_INT (INTVAL (operands[2]) + 4),
8762 GEN_INT (INTVAL (operands[2]) + 4)));
8763 break;
8764 case 2:
8765 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8766 break;
8767 default:
8768 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8769 gcc_unreachable ();
8770 }
8771 DONE;
8772 }
8773 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8774 (set_attr "type" "sselog,sselog,sselog,*,*")
8775 (set_attr "prefix_data16" "1,1,1,*,*")
8776 (set_attr "prefix_extra" "1,1,1,*,*")
8777 (set_attr "length_immediate" "1,1,1,*,*")
8778 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8779 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8780
8781 (define_insn_and_split "*vec_extractv4sf_mem"
8782 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8783 (vec_select:SF
8784 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8785 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8786 "TARGET_SSE"
8787 "#"
8788 "&& reload_completed"
8789 [(set (match_dup 0) (match_dup 1))]
8790 {
8791 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8792 })
8793
8794 (define_mode_attr extract_type
8795 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8796
8797 (define_mode_attr extract_suf
8798 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8799
8800 (define_mode_iterator AVX512_VEC
8801 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8802
8803 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8804 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8805 (match_operand:AVX512_VEC 1 "register_operand")
8806 (match_operand:SI 2 "const_0_to_3_operand")
8807 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8808 (match_operand:QI 4 "register_operand")]
8809 "TARGET_AVX512F"
8810 {
8811 int mask;
8812 mask = INTVAL (operands[2]);
8813 rtx dest = operands[0];
8814
8815 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8816 dest = gen_reg_rtx (<ssequartermode>mode);
8817
8818 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8819 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8820 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8821 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8822 operands[4]));
8823 else
8824 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8825 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8826 operands[4]));
8827 if (dest != operands[0])
8828 emit_move_insn (operands[0], dest);
8829 DONE;
8830 })
8831
8832 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8833 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8834 (vec_merge:<ssequartermode>
8835 (vec_select:<ssequartermode>
8836 (match_operand:V8FI 1 "register_operand" "v,v")
8837 (parallel [(match_operand 2 "const_0_to_7_operand")
8838 (match_operand 3 "const_0_to_7_operand")]))
8839 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8840 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8841 "TARGET_AVX512DQ
8842 && INTVAL (operands[2]) % 2 == 0
8843 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8844 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8845 {
8846 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8847 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8848 }
8849 [(set_attr "type" "sselog1")
8850 (set_attr "prefix_extra" "1")
8851 (set_attr "length_immediate" "1")
8852 (set_attr "prefix" "evex")
8853 (set_attr "mode" "<sseinsnmode>")])
8854
8855 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8856 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8857 (vec_select:<ssequartermode>
8858 (match_operand:V8FI 1 "register_operand" "v")
8859 (parallel [(match_operand 2 "const_0_to_7_operand")
8860 (match_operand 3 "const_0_to_7_operand")])))]
8861 "TARGET_AVX512DQ
8862 && INTVAL (operands[2]) % 2 == 0
8863 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8864 {
8865 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8866 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8867 }
8868 [(set_attr "type" "sselog1")
8869 (set_attr "prefix_extra" "1")
8870 (set_attr "length_immediate" "1")
8871 (set_attr "prefix" "evex")
8872 (set_attr "mode" "<sseinsnmode>")])
8873
8874 (define_split
8875 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8876 (vec_select:<ssequartermode>
8877 (match_operand:V8FI 1 "register_operand")
8878 (parallel [(const_int 0) (const_int 1)])))]
8879 "TARGET_AVX512DQ
8880 && reload_completed
8881 && (TARGET_AVX512VL
8882 || REG_P (operands[0])
8883 || !EXT_REX_SSE_REG_P (operands[1]))"
8884 [(set (match_dup 0) (match_dup 1))]
8885 {
8886 if (!TARGET_AVX512VL
8887 && REG_P (operands[0])
8888 && EXT_REX_SSE_REG_P (operands[1]))
8889 operands[0]
8890 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8891 else
8892 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8893 })
8894
8895 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8896 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8897 (vec_merge:<ssequartermode>
8898 (vec_select:<ssequartermode>
8899 (match_operand:V16FI 1 "register_operand" "v,v")
8900 (parallel [(match_operand 2 "const_0_to_15_operand")
8901 (match_operand 3 "const_0_to_15_operand")
8902 (match_operand 4 "const_0_to_15_operand")
8903 (match_operand 5 "const_0_to_15_operand")]))
8904 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8905 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8906 "TARGET_AVX512F
8907 && INTVAL (operands[2]) % 4 == 0
8908 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8909 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8910 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8911 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8912 {
8913 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8914 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8915 }
8916 [(set_attr "type" "sselog1")
8917 (set_attr "prefix_extra" "1")
8918 (set_attr "length_immediate" "1")
8919 (set_attr "prefix" "evex")
8920 (set_attr "mode" "<sseinsnmode>")])
8921
8922 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8923 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8924 (vec_select:<ssequartermode>
8925 (match_operand:V16FI 1 "register_operand" "v")
8926 (parallel [(match_operand 2 "const_0_to_15_operand")
8927 (match_operand 3 "const_0_to_15_operand")
8928 (match_operand 4 "const_0_to_15_operand")
8929 (match_operand 5 "const_0_to_15_operand")])))]
8930 "TARGET_AVX512F
8931 && INTVAL (operands[2]) % 4 == 0
8932 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8933 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8934 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8935 {
8936 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8937 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8938 }
8939 [(set_attr "type" "sselog1")
8940 (set_attr "prefix_extra" "1")
8941 (set_attr "length_immediate" "1")
8942 (set_attr "prefix" "evex")
8943 (set_attr "mode" "<sseinsnmode>")])
8944
8945 (define_split
8946 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8947 (vec_select:<ssequartermode>
8948 (match_operand:V16FI 1 "register_operand")
8949 (parallel [(const_int 0) (const_int 1)
8950 (const_int 2) (const_int 3)])))]
8951 "TARGET_AVX512F
8952 && reload_completed
8953 && (TARGET_AVX512VL
8954 || REG_P (operands[0])
8955 || !EXT_REX_SSE_REG_P (operands[1]))"
8956 [(set (match_dup 0) (match_dup 1))]
8957 {
8958 if (!TARGET_AVX512VL
8959 && REG_P (operands[0])
8960 && EXT_REX_SSE_REG_P (operands[1]))
8961 operands[0]
8962 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8963 else
8964 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8965 })
8966
8967 (define_mode_attr extract_type_2
8968 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8969
8970 (define_mode_attr extract_suf_2
8971 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8972
8973 (define_mode_iterator AVX512_VEC_2
8974 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8975
8976 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8977 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8978 (match_operand:AVX512_VEC_2 1 "register_operand")
8979 (match_operand:SI 2 "const_0_to_1_operand")
8980 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8981 (match_operand:QI 4 "register_operand")]
8982 "TARGET_AVX512F"
8983 {
8984 rtx (*insn)(rtx, rtx, rtx, rtx);
8985 rtx dest = operands[0];
8986
8987 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8988 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8989
8990 switch (INTVAL (operands[2]))
8991 {
8992 case 0:
8993 insn = gen_vec_extract_lo_<mode>_mask;
8994 break;
8995 case 1:
8996 insn = gen_vec_extract_hi_<mode>_mask;
8997 break;
8998 default:
8999 gcc_unreachable ();
9000 }
9001
9002 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
9003 if (dest != operands[0])
9004 emit_move_insn (operands[0], dest);
9005 DONE;
9006 })
9007
9008 (define_split
9009 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9010 (vec_select:<ssehalfvecmode>
9011 (match_operand:V8FI 1 "nonimmediate_operand")
9012 (parallel [(const_int 0) (const_int 1)
9013 (const_int 2) (const_int 3)])))]
9014 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9015 && reload_completed
9016 && (TARGET_AVX512VL
9017 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
9018 [(set (match_dup 0) (match_dup 1))]
9019 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9020
9021 (define_insn "vec_extract_lo_<mode>_mask"
9022 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9023 (vec_merge:<ssehalfvecmode>
9024 (vec_select:<ssehalfvecmode>
9025 (match_operand:V8FI 1 "register_operand" "v,v")
9026 (parallel [(const_int 0) (const_int 1)
9027 (const_int 2) (const_int 3)]))
9028 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9029 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9030 "TARGET_AVX512F
9031 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9032 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9033 [(set_attr "type" "sselog1")
9034 (set_attr "prefix_extra" "1")
9035 (set_attr "length_immediate" "1")
9036 (set_attr "memory" "none,store")
9037 (set_attr "prefix" "evex")
9038 (set_attr "mode" "<sseinsnmode>")])
9039
9040 (define_insn "vec_extract_lo_<mode>"
9041 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
9042 (vec_select:<ssehalfvecmode>
9043 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
9044 (parallel [(const_int 0) (const_int 1)
9045 (const_int 2) (const_int 3)])))]
9046 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9047 {
9048 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
9049 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9050 else
9051 return "#";
9052 }
9053 [(set_attr "type" "sselog1")
9054 (set_attr "prefix_extra" "1")
9055 (set_attr "length_immediate" "1")
9056 (set_attr "memory" "none,store,load")
9057 (set_attr "prefix" "evex")
9058 (set_attr "mode" "<sseinsnmode>")])
9059
9060 (define_insn "vec_extract_hi_<mode>_mask"
9061 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9062 (vec_merge:<ssehalfvecmode>
9063 (vec_select:<ssehalfvecmode>
9064 (match_operand:V8FI 1 "register_operand" "v,v")
9065 (parallel [(const_int 4) (const_int 5)
9066 (const_int 6) (const_int 7)]))
9067 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9068 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9069 "TARGET_AVX512F
9070 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9071 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9072 [(set_attr "type" "sselog1")
9073 (set_attr "prefix_extra" "1")
9074 (set_attr "length_immediate" "1")
9075 (set_attr "prefix" "evex")
9076 (set_attr "mode" "<sseinsnmode>")])
9077
9078 (define_insn "vec_extract_hi_<mode>"
9079 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
9080 (vec_select:<ssehalfvecmode>
9081 (match_operand:V8FI 1 "register_operand" "v")
9082 (parallel [(const_int 4) (const_int 5)
9083 (const_int 6) (const_int 7)])))]
9084 "TARGET_AVX512F"
9085 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9086 [(set_attr "type" "sselog1")
9087 (set_attr "prefix_extra" "1")
9088 (set_attr "length_immediate" "1")
9089 (set_attr "prefix" "evex")
9090 (set_attr "mode" "<sseinsnmode>")])
9091
9092 (define_insn "vec_extract_hi_<mode>_mask"
9093 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9094 (vec_merge:<ssehalfvecmode>
9095 (vec_select:<ssehalfvecmode>
9096 (match_operand:V16FI 1 "register_operand" "v,v")
9097 (parallel [(const_int 8) (const_int 9)
9098 (const_int 10) (const_int 11)
9099 (const_int 12) (const_int 13)
9100 (const_int 14) (const_int 15)]))
9101 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9102 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9103 "TARGET_AVX512DQ
9104 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9105 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9106 [(set_attr "type" "sselog1")
9107 (set_attr "prefix_extra" "1")
9108 (set_attr "length_immediate" "1")
9109 (set_attr "prefix" "evex")
9110 (set_attr "mode" "<sseinsnmode>")])
9111
9112 (define_insn "vec_extract_hi_<mode>"
9113 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
9114 (vec_select:<ssehalfvecmode>
9115 (match_operand:V16FI 1 "register_operand" "v,v")
9116 (parallel [(const_int 8) (const_int 9)
9117 (const_int 10) (const_int 11)
9118 (const_int 12) (const_int 13)
9119 (const_int 14) (const_int 15)])))]
9120 "TARGET_AVX512F"
9121 "@
9122 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
9123 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9124 [(set_attr "type" "sselog1")
9125 (set_attr "prefix_extra" "1")
9126 (set_attr "isa" "avx512dq,noavx512dq")
9127 (set_attr "length_immediate" "1")
9128 (set_attr "prefix" "evex")
9129 (set_attr "mode" "<sseinsnmode>")])
9130
9131 (define_mode_iterator VI48F_256_DQ
9132 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
9133
9134 (define_expand "avx512vl_vextractf128<mode>"
9135 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9136 (match_operand:VI48F_256_DQ 1 "register_operand")
9137 (match_operand:SI 2 "const_0_to_1_operand")
9138 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
9139 (match_operand:QI 4 "register_operand")]
9140 "TARGET_AVX512VL"
9141 {
9142 rtx (*insn)(rtx, rtx, rtx, rtx);
9143 rtx dest = operands[0];
9144
9145 if (MEM_P (dest)
9146 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
9147 /* For V8S[IF]mode there are maskm insns with =m and 0
9148 constraints. */
9149 ? !rtx_equal_p (dest, operands[3])
9150 /* For V4D[IF]mode, hi insns don't allow memory, and
9151 lo insns have =m and 0C constraints. */
9152 : (operands[2] != const0_rtx
9153 || (!rtx_equal_p (dest, operands[3])
9154 && GET_CODE (operands[3]) != CONST_VECTOR))))
9155 dest = gen_reg_rtx (<ssehalfvecmode>mode);
9156 switch (INTVAL (operands[2]))
9157 {
9158 case 0:
9159 insn = gen_vec_extract_lo_<mode>_mask;
9160 break;
9161 case 1:
9162 insn = gen_vec_extract_hi_<mode>_mask;
9163 break;
9164 default:
9165 gcc_unreachable ();
9166 }
9167
9168 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
9169 if (dest != operands[0])
9170 emit_move_insn (operands[0], dest);
9171 DONE;
9172 })
9173
9174 (define_expand "avx_vextractf128<mode>"
9175 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9176 (match_operand:V_256 1 "register_operand")
9177 (match_operand:SI 2 "const_0_to_1_operand")]
9178 "TARGET_AVX"
9179 {
9180 rtx (*insn)(rtx, rtx);
9181
9182 switch (INTVAL (operands[2]))
9183 {
9184 case 0:
9185 insn = gen_vec_extract_lo_<mode>;
9186 break;
9187 case 1:
9188 insn = gen_vec_extract_hi_<mode>;
9189 break;
9190 default:
9191 gcc_unreachable ();
9192 }
9193
9194 emit_insn (insn (operands[0], operands[1]));
9195 DONE;
9196 })
9197
9198 (define_insn "vec_extract_lo_<mode>_mask"
9199 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9200 (vec_merge:<ssehalfvecmode>
9201 (vec_select:<ssehalfvecmode>
9202 (match_operand:V16FI 1 "register_operand" "v,v")
9203 (parallel [(const_int 0) (const_int 1)
9204 (const_int 2) (const_int 3)
9205 (const_int 4) (const_int 5)
9206 (const_int 6) (const_int 7)]))
9207 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9208 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9209 "TARGET_AVX512DQ
9210 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9211 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9212 [(set_attr "type" "sselog1")
9213 (set_attr "prefix_extra" "1")
9214 (set_attr "length_immediate" "1")
9215 (set_attr "memory" "none,store")
9216 (set_attr "prefix" "evex")
9217 (set_attr "mode" "<sseinsnmode>")])
9218
9219 (define_insn "vec_extract_lo_<mode>"
9220 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
9221 (vec_select:<ssehalfvecmode>
9222 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
9223 (parallel [(const_int 0) (const_int 1)
9224 (const_int 2) (const_int 3)
9225 (const_int 4) (const_int 5)
9226 (const_int 6) (const_int 7)])))]
9227 "TARGET_AVX512F
9228 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9229 {
9230 if (!TARGET_AVX512VL
9231 && !REG_P (operands[0])
9232 && EXT_REX_SSE_REG_P (operands[1]))
9233 {
9234 if (TARGET_AVX512DQ)
9235 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
9236 else
9237 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9238 }
9239 else
9240 return "#";
9241 }
9242 [(set_attr "type" "sselog1")
9243 (set_attr "prefix_extra" "1")
9244 (set_attr "length_immediate" "1")
9245 (set_attr "memory" "none,load,store")
9246 (set_attr "prefix" "evex")
9247 (set_attr "mode" "<sseinsnmode>")])
9248
9249 (define_split
9250 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9251 (vec_select:<ssehalfvecmode>
9252 (match_operand:V16FI 1 "nonimmediate_operand")
9253 (parallel [(const_int 0) (const_int 1)
9254 (const_int 2) (const_int 3)
9255 (const_int 4) (const_int 5)
9256 (const_int 6) (const_int 7)])))]
9257 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9258 && reload_completed
9259 && (TARGET_AVX512VL
9260 || REG_P (operands[0])
9261 || !EXT_REX_SSE_REG_P (operands[1]))"
9262 [(set (match_dup 0) (match_dup 1))]
9263 {
9264 if (!TARGET_AVX512VL
9265 && REG_P (operands[0])
9266 && EXT_REX_SSE_REG_P (operands[1]))
9267 operands[0]
9268 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
9269 else
9270 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
9271 })
9272
9273 (define_insn "vec_extract_lo_<mode>_mask"
9274 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9275 (vec_merge:<ssehalfvecmode>
9276 (vec_select:<ssehalfvecmode>
9277 (match_operand:VI8F_256 1 "register_operand" "v,v")
9278 (parallel [(const_int 0) (const_int 1)]))
9279 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9280 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9281 "TARGET_AVX512DQ
9282 && TARGET_AVX512VL
9283 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9284 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9285 [(set_attr "type" "sselog1")
9286 (set_attr "prefix_extra" "1")
9287 (set_attr "length_immediate" "1")
9288 (set_attr "memory" "none,store")
9289 (set_attr "prefix" "evex")
9290 (set_attr "mode" "XI")])
9291
9292 (define_insn "vec_extract_lo_<mode>"
9293 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9294 (vec_select:<ssehalfvecmode>
9295 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
9296 (parallel [(const_int 0) (const_int 1)])))]
9297 "TARGET_AVX
9298 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9299 "#")
9300
9301 (define_split
9302 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9303 (vec_select:<ssehalfvecmode>
9304 (match_operand:VI8F_256 1 "nonimmediate_operand")
9305 (parallel [(const_int 0) (const_int 1)])))]
9306 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9307 && reload_completed"
9308 [(set (match_dup 0) (match_dup 1))]
9309 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9310
9311 (define_insn "vec_extract_hi_<mode>_mask"
9312 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9313 (vec_merge:<ssehalfvecmode>
9314 (vec_select:<ssehalfvecmode>
9315 (match_operand:VI8F_256 1 "register_operand" "v,v")
9316 (parallel [(const_int 2) (const_int 3)]))
9317 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9318 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9319 "TARGET_AVX512DQ
9320 && TARGET_AVX512VL
9321 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9322 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9323 [(set_attr "type" "sselog1")
9324 (set_attr "prefix_extra" "1")
9325 (set_attr "length_immediate" "1")
9326 (set_attr "prefix" "vex")
9327 (set_attr "mode" "<sseinsnmode>")])
9328
9329 (define_insn "vec_extract_hi_<mode>"
9330 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
9331 (vec_select:<ssehalfvecmode>
9332 (match_operand:VI8F_256 1 "register_operand" "v")
9333 (parallel [(const_int 2) (const_int 3)])))]
9334 "TARGET_AVX"
9335 {
9336 if (TARGET_AVX512VL)
9337 {
9338 if (TARGET_AVX512DQ)
9339 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
9340 else
9341 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
9342 }
9343 else
9344 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
9345 }
9346 [(set_attr "type" "sselog1")
9347 (set_attr "prefix_extra" "1")
9348 (set_attr "length_immediate" "1")
9349 (set_attr "prefix" "vex")
9350 (set_attr "mode" "<sseinsnmode>")])
9351
9352 (define_split
9353 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9354 (vec_select:<ssehalfvecmode>
9355 (match_operand:VI4F_256 1 "nonimmediate_operand")
9356 (parallel [(const_int 0) (const_int 1)
9357 (const_int 2) (const_int 3)])))]
9358 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9359 && reload_completed"
9360 [(set (match_dup 0) (match_dup 1))]
9361 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9362
9363 (define_insn "vec_extract_lo_<mode>_mask"
9364 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9365 (vec_merge:<ssehalfvecmode>
9366 (vec_select:<ssehalfvecmode>
9367 (match_operand:VI4F_256 1 "register_operand" "v,v")
9368 (parallel [(const_int 0) (const_int 1)
9369 (const_int 2) (const_int 3)]))
9370 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9371 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9372 "TARGET_AVX512VL
9373 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9374 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9375 [(set_attr "type" "sselog1")
9376 (set_attr "prefix_extra" "1")
9377 (set_attr "length_immediate" "1")
9378 (set_attr "prefix" "evex")
9379 (set_attr "mode" "<sseinsnmode>")])
9380
9381 (define_insn "vec_extract_lo_<mode>"
9382 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9383 (vec_select:<ssehalfvecmode>
9384 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
9385 (parallel [(const_int 0) (const_int 1)
9386 (const_int 2) (const_int 3)])))]
9387 "TARGET_AVX
9388 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9389 "#"
9390 [(set_attr "type" "sselog1")
9391 (set_attr "prefix_extra" "1")
9392 (set_attr "length_immediate" "1")
9393 (set_attr "prefix" "evex")
9394 (set_attr "mode" "<sseinsnmode>")])
9395
9396 (define_insn "vec_extract_hi_<mode>_mask"
9397 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
9398 (vec_merge:<ssehalfvecmode>
9399 (vec_select:<ssehalfvecmode>
9400 (match_operand:VI4F_256 1 "register_operand" "v,v")
9401 (parallel [(const_int 4) (const_int 5)
9402 (const_int 6) (const_int 7)]))
9403 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9404 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9405 "TARGET_AVX512VL
9406 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9407 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9408 [(set_attr "type" "sselog1")
9409 (set_attr "length_immediate" "1")
9410 (set_attr "prefix" "evex")
9411 (set_attr "mode" "<sseinsnmode>")])
9412
9413 (define_insn "vec_extract_hi_<mode>"
9414 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
9415 (vec_select:<ssehalfvecmode>
9416 (match_operand:VI4F_256 1 "register_operand" "x, v")
9417 (parallel [(const_int 4) (const_int 5)
9418 (const_int 6) (const_int 7)])))]
9419 "TARGET_AVX"
9420 "@
9421 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
9422 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9423 [(set_attr "isa" "*, avx512vl")
9424 (set_attr "prefix" "vex, evex")
9425 (set_attr "type" "sselog1")
9426 (set_attr "length_immediate" "1")
9427 (set_attr "mode" "<sseinsnmode>")])
9428
9429 (define_insn_and_split "vec_extract_lo_v32hi"
9430 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
9431 (vec_select:V16HI
9432 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
9433 (parallel [(const_int 0) (const_int 1)
9434 (const_int 2) (const_int 3)
9435 (const_int 4) (const_int 5)
9436 (const_int 6) (const_int 7)
9437 (const_int 8) (const_int 9)
9438 (const_int 10) (const_int 11)
9439 (const_int 12) (const_int 13)
9440 (const_int 14) (const_int 15)])))]
9441 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9442 {
9443 if (TARGET_AVX512VL
9444 || REG_P (operands[0])
9445 || !EXT_REX_SSE_REG_P (operands[1]))
9446 return "#";
9447 else
9448 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9449 }
9450 "&& reload_completed
9451 && (TARGET_AVX512VL
9452 || REG_P (operands[0])
9453 || !EXT_REX_SSE_REG_P (operands[1]))"
9454 [(set (match_dup 0) (match_dup 1))]
9455 {
9456 if (!TARGET_AVX512VL
9457 && REG_P (operands[0])
9458 && EXT_REX_SSE_REG_P (operands[1]))
9459 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
9460 else
9461 operands[1] = gen_lowpart (V16HImode, operands[1]);
9462 }
9463 [(set_attr "type" "sselog1")
9464 (set_attr "prefix_extra" "1")
9465 (set_attr "length_immediate" "1")
9466 (set_attr "memory" "none,load,store")
9467 (set_attr "prefix" "evex")
9468 (set_attr "mode" "XI")])
9469
9470 (define_insn "vec_extract_hi_v32hi"
9471 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9472 (vec_select:V16HI
9473 (match_operand:V32HI 1 "register_operand" "v")
9474 (parallel [(const_int 16) (const_int 17)
9475 (const_int 18) (const_int 19)
9476 (const_int 20) (const_int 21)
9477 (const_int 22) (const_int 23)
9478 (const_int 24) (const_int 25)
9479 (const_int 26) (const_int 27)
9480 (const_int 28) (const_int 29)
9481 (const_int 30) (const_int 31)])))]
9482 "TARGET_AVX512F"
9483 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9484 [(set_attr "type" "sselog1")
9485 (set_attr "prefix_extra" "1")
9486 (set_attr "length_immediate" "1")
9487 (set_attr "prefix" "evex")
9488 (set_attr "mode" "XI")])
9489
9490 (define_insn_and_split "vec_extract_lo_v16hi"
9491 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9492 (vec_select:V8HI
9493 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9494 (parallel [(const_int 0) (const_int 1)
9495 (const_int 2) (const_int 3)
9496 (const_int 4) (const_int 5)
9497 (const_int 6) (const_int 7)])))]
9498 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9499 "#"
9500 "&& reload_completed"
9501 [(set (match_dup 0) (match_dup 1))]
9502 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9503
9504 (define_insn "vec_extract_hi_v16hi"
9505 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9506 (vec_select:V8HI
9507 (match_operand:V16HI 1 "register_operand" "x,v,v")
9508 (parallel [(const_int 8) (const_int 9)
9509 (const_int 10) (const_int 11)
9510 (const_int 12) (const_int 13)
9511 (const_int 14) (const_int 15)])))]
9512 "TARGET_AVX"
9513 "@
9514 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9515 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9516 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9517 [(set_attr "type" "sselog1")
9518 (set_attr "prefix_extra" "1")
9519 (set_attr "length_immediate" "1")
9520 (set_attr "isa" "*,avx512dq,avx512f")
9521 (set_attr "prefix" "vex,evex,evex")
9522 (set_attr "mode" "OI")])
9523
9524 (define_insn_and_split "vec_extract_lo_v64qi"
9525 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9526 (vec_select:V32QI
9527 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9528 (parallel [(const_int 0) (const_int 1)
9529 (const_int 2) (const_int 3)
9530 (const_int 4) (const_int 5)
9531 (const_int 6) (const_int 7)
9532 (const_int 8) (const_int 9)
9533 (const_int 10) (const_int 11)
9534 (const_int 12) (const_int 13)
9535 (const_int 14) (const_int 15)
9536 (const_int 16) (const_int 17)
9537 (const_int 18) (const_int 19)
9538 (const_int 20) (const_int 21)
9539 (const_int 22) (const_int 23)
9540 (const_int 24) (const_int 25)
9541 (const_int 26) (const_int 27)
9542 (const_int 28) (const_int 29)
9543 (const_int 30) (const_int 31)])))]
9544 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9545 {
9546 if (TARGET_AVX512VL
9547 || REG_P (operands[0])
9548 || !EXT_REX_SSE_REG_P (operands[1]))
9549 return "#";
9550 else
9551 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9552 }
9553 "&& reload_completed
9554 && (TARGET_AVX512VL
9555 || REG_P (operands[0])
9556 || !EXT_REX_SSE_REG_P (operands[1]))"
9557 [(set (match_dup 0) (match_dup 1))]
9558 {
9559 if (!TARGET_AVX512VL
9560 && REG_P (operands[0])
9561 && EXT_REX_SSE_REG_P (operands[1]))
9562 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9563 else
9564 operands[1] = gen_lowpart (V32QImode, operands[1]);
9565 }
9566 [(set_attr "type" "sselog1")
9567 (set_attr "prefix_extra" "1")
9568 (set_attr "length_immediate" "1")
9569 (set_attr "memory" "none,load,store")
9570 (set_attr "prefix" "evex")
9571 (set_attr "mode" "XI")])
9572
9573 (define_insn "vec_extract_hi_v64qi"
9574 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9575 (vec_select:V32QI
9576 (match_operand:V64QI 1 "register_operand" "v")
9577 (parallel [(const_int 32) (const_int 33)
9578 (const_int 34) (const_int 35)
9579 (const_int 36) (const_int 37)
9580 (const_int 38) (const_int 39)
9581 (const_int 40) (const_int 41)
9582 (const_int 42) (const_int 43)
9583 (const_int 44) (const_int 45)
9584 (const_int 46) (const_int 47)
9585 (const_int 48) (const_int 49)
9586 (const_int 50) (const_int 51)
9587 (const_int 52) (const_int 53)
9588 (const_int 54) (const_int 55)
9589 (const_int 56) (const_int 57)
9590 (const_int 58) (const_int 59)
9591 (const_int 60) (const_int 61)
9592 (const_int 62) (const_int 63)])))]
9593 "TARGET_AVX512F"
9594 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9595 [(set_attr "type" "sselog1")
9596 (set_attr "prefix_extra" "1")
9597 (set_attr "length_immediate" "1")
9598 (set_attr "prefix" "evex")
9599 (set_attr "mode" "XI")])
9600
9601 (define_insn_and_split "vec_extract_lo_v32qi"
9602 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9603 (vec_select:V16QI
9604 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9605 (parallel [(const_int 0) (const_int 1)
9606 (const_int 2) (const_int 3)
9607 (const_int 4) (const_int 5)
9608 (const_int 6) (const_int 7)
9609 (const_int 8) (const_int 9)
9610 (const_int 10) (const_int 11)
9611 (const_int 12) (const_int 13)
9612 (const_int 14) (const_int 15)])))]
9613 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9614 "#"
9615 "&& reload_completed"
9616 [(set (match_dup 0) (match_dup 1))]
9617 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9618
9619 (define_insn "vec_extract_hi_v32qi"
9620 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9621 (vec_select:V16QI
9622 (match_operand:V32QI 1 "register_operand" "x,v,v")
9623 (parallel [(const_int 16) (const_int 17)
9624 (const_int 18) (const_int 19)
9625 (const_int 20) (const_int 21)
9626 (const_int 22) (const_int 23)
9627 (const_int 24) (const_int 25)
9628 (const_int 26) (const_int 27)
9629 (const_int 28) (const_int 29)
9630 (const_int 30) (const_int 31)])))]
9631 "TARGET_AVX"
9632 "@
9633 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9634 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9635 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9636 [(set_attr "type" "sselog1")
9637 (set_attr "prefix_extra" "1")
9638 (set_attr "length_immediate" "1")
9639 (set_attr "isa" "*,avx512dq,avx512f")
9640 (set_attr "prefix" "vex,evex,evex")
9641 (set_attr "mode" "OI")])
9642
9643 ;; Modes handled by vec_extract patterns.
9644 (define_mode_iterator VEC_EXTRACT_MODE
9645 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9646 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9647 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9648 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9649 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9650 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9651 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9652
9653 (define_expand "vec_extract<mode><ssescalarmodelower>"
9654 [(match_operand:<ssescalarmode> 0 "register_operand")
9655 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9656 (match_operand 2 "const_int_operand")]
9657 "TARGET_SSE"
9658 {
9659 ix86_expand_vector_extract (false, operands[0], operands[1],
9660 INTVAL (operands[2]));
9661 DONE;
9662 })
9663
9664 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9665 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9666 (match_operand:V_256_512 1 "register_operand")
9667 (match_operand 2 "const_0_to_1_operand")]
9668 "TARGET_AVX"
9669 {
9670 if (INTVAL (operands[2]))
9671 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9672 else
9673 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9674 DONE;
9675 })
9676
9677 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9678 ;;
9679 ;; Parallel double-precision floating point element swizzling
9680 ;;
9681 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9682
9683 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9684 [(set (match_operand:V8DF 0 "register_operand" "=v")
9685 (vec_select:V8DF
9686 (vec_concat:V16DF
9687 (match_operand:V8DF 1 "register_operand" "v")
9688 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9689 (parallel [(const_int 1) (const_int 9)
9690 (const_int 3) (const_int 11)
9691 (const_int 5) (const_int 13)
9692 (const_int 7) (const_int 15)])))]
9693 "TARGET_AVX512F"
9694 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9695 [(set_attr "type" "sselog")
9696 (set_attr "prefix" "evex")
9697 (set_attr "mode" "V8DF")])
9698
9699 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9700 (define_insn "avx_unpckhpd256<mask_name>"
9701 [(set (match_operand:V4DF 0 "register_operand" "=v")
9702 (vec_select:V4DF
9703 (vec_concat:V8DF
9704 (match_operand:V4DF 1 "register_operand" "v")
9705 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9706 (parallel [(const_int 1) (const_int 5)
9707 (const_int 3) (const_int 7)])))]
9708 "TARGET_AVX && <mask_avx512vl_condition>"
9709 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9710 [(set_attr "type" "sselog")
9711 (set_attr "prefix" "vex")
9712 (set_attr "mode" "V4DF")])
9713
9714 (define_expand "vec_interleave_highv4df"
9715 [(set (match_dup 3)
9716 (vec_select:V4DF
9717 (vec_concat:V8DF
9718 (match_operand:V4DF 1 "register_operand")
9719 (match_operand:V4DF 2 "nonimmediate_operand"))
9720 (parallel [(const_int 0) (const_int 4)
9721 (const_int 2) (const_int 6)])))
9722 (set (match_dup 4)
9723 (vec_select:V4DF
9724 (vec_concat:V8DF
9725 (match_dup 1)
9726 (match_dup 2))
9727 (parallel [(const_int 1) (const_int 5)
9728 (const_int 3) (const_int 7)])))
9729 (set (match_operand:V4DF 0 "register_operand")
9730 (vec_select:V4DF
9731 (vec_concat:V8DF
9732 (match_dup 3)
9733 (match_dup 4))
9734 (parallel [(const_int 2) (const_int 3)
9735 (const_int 6) (const_int 7)])))]
9736 "TARGET_AVX"
9737 {
9738 operands[3] = gen_reg_rtx (V4DFmode);
9739 operands[4] = gen_reg_rtx (V4DFmode);
9740 })
9741
9742
9743 (define_insn "avx512vl_unpckhpd128_mask"
9744 [(set (match_operand:V2DF 0 "register_operand" "=v")
9745 (vec_merge:V2DF
9746 (vec_select:V2DF
9747 (vec_concat:V4DF
9748 (match_operand:V2DF 1 "register_operand" "v")
9749 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9750 (parallel [(const_int 1) (const_int 3)]))
9751 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9752 (match_operand:QI 4 "register_operand" "Yk")))]
9753 "TARGET_AVX512VL"
9754 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9755 [(set_attr "type" "sselog")
9756 (set_attr "prefix" "evex")
9757 (set_attr "mode" "V2DF")])
9758
9759 (define_expand "vec_interleave_highv2df"
9760 [(set (match_operand:V2DF 0 "register_operand")
9761 (vec_select:V2DF
9762 (vec_concat:V4DF
9763 (match_operand:V2DF 1 "nonimmediate_operand")
9764 (match_operand:V2DF 2 "nonimmediate_operand"))
9765 (parallel [(const_int 1)
9766 (const_int 3)])))]
9767 "TARGET_SSE2"
9768 {
9769 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9770 operands[2] = force_reg (V2DFmode, operands[2]);
9771 })
9772
9773 (define_insn "*vec_interleave_highv2df"
9774 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9775 (vec_select:V2DF
9776 (vec_concat:V4DF
9777 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9778 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9779 (parallel [(const_int 1)
9780 (const_int 3)])))]
9781 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9782 "@
9783 unpckhpd\t{%2, %0|%0, %2}
9784 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9785 %vmovddup\t{%H1, %0|%0, %H1}
9786 movlpd\t{%H1, %0|%0, %H1}
9787 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9788 %vmovhpd\t{%1, %0|%q0, %1}"
9789 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9790 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9791 (set (attr "prefix_data16")
9792 (if_then_else (eq_attr "alternative" "3,5")
9793 (const_string "1")
9794 (const_string "*")))
9795 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9796 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9797
9798 (define_expand "avx512f_movddup512<mask_name>"
9799 [(set (match_operand:V8DF 0 "register_operand")
9800 (vec_select:V8DF
9801 (vec_concat:V16DF
9802 (match_operand:V8DF 1 "nonimmediate_operand")
9803 (match_dup 1))
9804 (parallel [(const_int 0) (const_int 8)
9805 (const_int 2) (const_int 10)
9806 (const_int 4) (const_int 12)
9807 (const_int 6) (const_int 14)])))]
9808 "TARGET_AVX512F")
9809
9810 (define_expand "avx512f_unpcklpd512<mask_name>"
9811 [(set (match_operand:V8DF 0 "register_operand")
9812 (vec_select:V8DF
9813 (vec_concat:V16DF
9814 (match_operand:V8DF 1 "register_operand")
9815 (match_operand:V8DF 2 "nonimmediate_operand"))
9816 (parallel [(const_int 0) (const_int 8)
9817 (const_int 2) (const_int 10)
9818 (const_int 4) (const_int 12)
9819 (const_int 6) (const_int 14)])))]
9820 "TARGET_AVX512F")
9821
9822 (define_insn "*avx512f_unpcklpd512<mask_name>"
9823 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9824 (vec_select:V8DF
9825 (vec_concat:V16DF
9826 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9827 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9828 (parallel [(const_int 0) (const_int 8)
9829 (const_int 2) (const_int 10)
9830 (const_int 4) (const_int 12)
9831 (const_int 6) (const_int 14)])))]
9832 "TARGET_AVX512F"
9833 "@
9834 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9835 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9836 [(set_attr "type" "sselog")
9837 (set_attr "prefix" "evex")
9838 (set_attr "mode" "V8DF")])
9839
9840 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9841 (define_expand "avx_movddup256<mask_name>"
9842 [(set (match_operand:V4DF 0 "register_operand")
9843 (vec_select:V4DF
9844 (vec_concat:V8DF
9845 (match_operand:V4DF 1 "nonimmediate_operand")
9846 (match_dup 1))
9847 (parallel [(const_int 0) (const_int 4)
9848 (const_int 2) (const_int 6)])))]
9849 "TARGET_AVX && <mask_avx512vl_condition>")
9850
9851 (define_expand "avx_unpcklpd256<mask_name>"
9852 [(set (match_operand:V4DF 0 "register_operand")
9853 (vec_select:V4DF
9854 (vec_concat:V8DF
9855 (match_operand:V4DF 1 "register_operand")
9856 (match_operand:V4DF 2 "nonimmediate_operand"))
9857 (parallel [(const_int 0) (const_int 4)
9858 (const_int 2) (const_int 6)])))]
9859 "TARGET_AVX && <mask_avx512vl_condition>")
9860
9861 (define_insn "*avx_unpcklpd256<mask_name>"
9862 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9863 (vec_select:V4DF
9864 (vec_concat:V8DF
9865 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9866 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9867 (parallel [(const_int 0) (const_int 4)
9868 (const_int 2) (const_int 6)])))]
9869 "TARGET_AVX && <mask_avx512vl_condition>"
9870 "@
9871 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9872 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9873 [(set_attr "type" "sselog")
9874 (set_attr "prefix" "vex")
9875 (set_attr "mode" "V4DF")])
9876
9877 (define_expand "vec_interleave_lowv4df"
9878 [(set (match_dup 3)
9879 (vec_select:V4DF
9880 (vec_concat:V8DF
9881 (match_operand:V4DF 1 "register_operand")
9882 (match_operand:V4DF 2 "nonimmediate_operand"))
9883 (parallel [(const_int 0) (const_int 4)
9884 (const_int 2) (const_int 6)])))
9885 (set (match_dup 4)
9886 (vec_select:V4DF
9887 (vec_concat:V8DF
9888 (match_dup 1)
9889 (match_dup 2))
9890 (parallel [(const_int 1) (const_int 5)
9891 (const_int 3) (const_int 7)])))
9892 (set (match_operand:V4DF 0 "register_operand")
9893 (vec_select:V4DF
9894 (vec_concat:V8DF
9895 (match_dup 3)
9896 (match_dup 4))
9897 (parallel [(const_int 0) (const_int 1)
9898 (const_int 4) (const_int 5)])))]
9899 "TARGET_AVX"
9900 {
9901 operands[3] = gen_reg_rtx (V4DFmode);
9902 operands[4] = gen_reg_rtx (V4DFmode);
9903 })
9904
9905 (define_insn "avx512vl_unpcklpd128_mask"
9906 [(set (match_operand:V2DF 0 "register_operand" "=v")
9907 (vec_merge:V2DF
9908 (vec_select:V2DF
9909 (vec_concat:V4DF
9910 (match_operand:V2DF 1 "register_operand" "v")
9911 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9912 (parallel [(const_int 0) (const_int 2)]))
9913 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9914 (match_operand:QI 4 "register_operand" "Yk")))]
9915 "TARGET_AVX512VL"
9916 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9917 [(set_attr "type" "sselog")
9918 (set_attr "prefix" "evex")
9919 (set_attr "mode" "V2DF")])
9920
9921 (define_expand "vec_interleave_lowv2df"
9922 [(set (match_operand:V2DF 0 "register_operand")
9923 (vec_select:V2DF
9924 (vec_concat:V4DF
9925 (match_operand:V2DF 1 "nonimmediate_operand")
9926 (match_operand:V2DF 2 "nonimmediate_operand"))
9927 (parallel [(const_int 0)
9928 (const_int 2)])))]
9929 "TARGET_SSE2"
9930 {
9931 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9932 operands[1] = force_reg (V2DFmode, operands[1]);
9933 })
9934
9935 (define_insn "*vec_interleave_lowv2df"
9936 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9937 (vec_select:V2DF
9938 (vec_concat:V4DF
9939 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9940 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9941 (parallel [(const_int 0)
9942 (const_int 2)])))]
9943 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9944 "@
9945 unpcklpd\t{%2, %0|%0, %2}
9946 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9947 %vmovddup\t{%1, %0|%0, %q1}
9948 movhpd\t{%2, %0|%0, %q2}
9949 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9950 %vmovlpd\t{%2, %H0|%H0, %2}"
9951 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9952 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9953 (set (attr "prefix_data16")
9954 (if_then_else (eq_attr "alternative" "3,5")
9955 (const_string "1")
9956 (const_string "*")))
9957 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9958 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9959
9960 (define_split
9961 [(set (match_operand:V2DF 0 "memory_operand")
9962 (vec_select:V2DF
9963 (vec_concat:V4DF
9964 (match_operand:V2DF 1 "register_operand")
9965 (match_dup 1))
9966 (parallel [(const_int 0)
9967 (const_int 2)])))]
9968 "TARGET_SSE3 && reload_completed"
9969 [(const_int 0)]
9970 {
9971 rtx low = gen_lowpart (DFmode, operands[1]);
9972
9973 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9974 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9975 DONE;
9976 })
9977
9978 (define_split
9979 [(set (match_operand:V2DF 0 "register_operand")
9980 (vec_select:V2DF
9981 (vec_concat:V4DF
9982 (match_operand:V2DF 1 "memory_operand")
9983 (match_dup 1))
9984 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9985 (match_operand:SI 3 "const_int_operand")])))]
9986 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9987 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9988 {
9989 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9990 })
9991
9992 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9993 [(set (match_operand:VF_128 0 "register_operand" "=v")
9994 (vec_merge:VF_128
9995 (unspec:VF_128
9996 [(match_operand:VF_128 1 "register_operand" "v")
9997 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9998 UNSPEC_SCALEF)
9999 (match_dup 1)
10000 (const_int 1)))]
10001 "TARGET_AVX512F"
10002 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
10003 [(set_attr "prefix" "evex")
10004 (set_attr "mode" "<ssescalarmode>")])
10005
10006 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
10007 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10008 (unspec:VF_AVX512VL
10009 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
10010 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
10011 UNSPEC_SCALEF))]
10012 "TARGET_AVX512F"
10013 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
10014 [(set_attr "prefix" "evex")
10015 (set_attr "mode" "<MODE>")])
10016
10017 (define_expand "<avx512>_vternlog<mode>_maskz"
10018 [(match_operand:VI48_AVX512VL 0 "register_operand")
10019 (match_operand:VI48_AVX512VL 1 "register_operand")
10020 (match_operand:VI48_AVX512VL 2 "register_operand")
10021 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
10022 (match_operand:SI 4 "const_0_to_255_operand")
10023 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10024 "TARGET_AVX512F"
10025 {
10026 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
10027 operands[0], operands[1], operands[2], operands[3],
10028 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
10029 DONE;
10030 })
10031
10032 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
10033 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10034 (unspec:VI48_AVX512VL
10035 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
10036 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
10037 (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr")
10038 (match_operand:SI 4 "const_0_to_255_operand")]
10039 UNSPEC_VTERNLOG))]
10040 "TARGET_AVX512F"
10041 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
10042 [(set_attr "type" "sselog")
10043 (set_attr "prefix" "evex")
10044 (set_attr "mode" "<sseinsnmode>")])
10045
10046 (define_insn "*<avx512>_vternlog<mode>_all"
10047 [(set (match_operand:V 0 "register_operand" "=v")
10048 (unspec:V
10049 [(match_operand:V 1 "register_operand" "0")
10050 (match_operand:V 2 "register_operand" "v")
10051 (match_operand:V 3 "bcst_vector_operand" "vmBr")
10052 (match_operand:SI 4 "const_0_to_255_operand")]
10053 UNSPEC_VTERNLOG))]
10054 "TARGET_AVX512F"
10055 "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
10056 [(set_attr "type" "sselog")
10057 (set_attr "prefix" "evex")
10058 (set_attr "mode" "<sseinsnmode>")])
10059
10060 ;; There must be lots of other combinations like
10061 ;;
10062 ;; (any_logic:V
10063 ;; (any_logic:V op1 op2)
10064 ;; (any_logic:V op1 op3))
10065 ;;
10066 ;; (any_logic:V
10067 ;; (any_logic:V
10068 ;; (any_logic:V op1, op2)
10069 ;; op3)
10070 ;; op1)
10071 ;;
10072 ;; and so on.
10073
10074 (define_code_iterator any_logic1 [and ior xor])
10075 (define_code_iterator any_logic2 [and ior xor])
10076 (define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
10077
10078 (define_insn_and_split "*<avx512>_vpternlog<mode>_1"
10079 [(set (match_operand:V 0 "register_operand")
10080 (any_logic:V
10081 (any_logic1:V
10082 (match_operand:V 1 "reg_or_notreg_operand")
10083 (match_operand:V 2 "reg_or_notreg_operand"))
10084 (any_logic2:V
10085 (match_operand:V 3 "reg_or_notreg_operand")
10086 (match_operand:V 4 "reg_or_notreg_operand"))))]
10087 "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
10088 && ix86_pre_reload_split ()
10089 && (rtx_equal_p (STRIP_UNARY (operands[1]),
10090 STRIP_UNARY (operands[4]))
10091 || rtx_equal_p (STRIP_UNARY (operands[2]),
10092 STRIP_UNARY (operands[4]))
10093 || rtx_equal_p (STRIP_UNARY (operands[1]),
10094 STRIP_UNARY (operands[3]))
10095 || rtx_equal_p (STRIP_UNARY (operands[2]),
10096 STRIP_UNARY (operands[3])))"
10097 "#"
10098 "&& 1"
10099 [(set (match_dup 0)
10100 (unspec:V
10101 [(match_dup 6)
10102 (match_dup 2)
10103 (match_dup 1)
10104 (match_dup 5)]
10105 UNSPEC_VTERNLOG))]
10106 {
10107 /* VPTERNLOGD reg6, reg2, reg1, imm8. */
10108 int reg6 = 0xF0;
10109 int reg2 = 0xCC;
10110 int reg1 = 0xAA;
10111 int reg3 = 0;
10112 int reg4 = 0;
10113 int reg_mask, tmp1, tmp2;
10114 if (rtx_equal_p (STRIP_UNARY (operands[1]),
10115 STRIP_UNARY (operands[4])))
10116 {
10117 reg4 = reg1;
10118 reg3 = reg6;
10119 operands[6] = operands[3];
10120 }
10121 else if (rtx_equal_p (STRIP_UNARY (operands[2]),
10122 STRIP_UNARY (operands[4])))
10123 {
10124 reg4 = reg2;
10125 reg3 = reg6;
10126 operands[6] = operands[3];
10127 }
10128 else if (rtx_equal_p (STRIP_UNARY (operands[1]),
10129 STRIP_UNARY (operands[3])))
10130 {
10131 reg4 = reg6;
10132 reg3 = reg1;
10133 operands[6] = operands[4];
10134 }
10135 else
10136 {
10137 reg4 = reg6;
10138 reg3 = reg2;
10139 operands[6] = operands[4];
10140 }
10141
10142 reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
10143 reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
10144 reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
10145 reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
10146
10147 tmp1 = reg1 <any_logic1:logic_op> reg2;
10148 tmp2 = reg3 <any_logic2:logic_op> reg4;
10149 reg_mask = tmp1 <any_logic:logic_op> tmp2;
10150 reg_mask &= 0xFF;
10151
10152 operands[1] = STRIP_UNARY (operands[1]);
10153 operands[2] = STRIP_UNARY (operands[2]);
10154 operands[6] = STRIP_UNARY (operands[6]);
10155 operands[5] = GEN_INT (reg_mask);
10156 })
10157
10158 (define_insn_and_split "*<avx512>_vpternlog<mode>_2"
10159 [(set (match_operand:V 0 "register_operand")
10160 (any_logic:V
10161 (any_logic1:V
10162 (any_logic2:V
10163 (match_operand:V 1 "reg_or_notreg_operand")
10164 (match_operand:V 2 "reg_or_notreg_operand"))
10165 (match_operand:V 3 "reg_or_notreg_operand"))
10166 (match_operand:V 4 "reg_or_notreg_operand")))]
10167 "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
10168 && ix86_pre_reload_split ()
10169 && (rtx_equal_p (STRIP_UNARY (operands[1]),
10170 STRIP_UNARY (operands[4]))
10171 || rtx_equal_p (STRIP_UNARY (operands[2]),
10172 STRIP_UNARY (operands[4]))
10173 || rtx_equal_p (STRIP_UNARY (operands[1]),
10174 STRIP_UNARY (operands[3]))
10175 || rtx_equal_p (STRIP_UNARY (operands[2]),
10176 STRIP_UNARY (operands[3])))"
10177 "#"
10178 "&& 1"
10179 [(set (match_dup 0)
10180 (unspec:V
10181 [(match_dup 6)
10182 (match_dup 2)
10183 (match_dup 1)
10184 (match_dup 5)]
10185 UNSPEC_VTERNLOG))]
10186 {
10187 /* VPTERNLOGD reg6, reg2, reg1, imm8. */
10188 int reg6 = 0xF0;
10189 int reg2 = 0xCC;
10190 int reg1 = 0xAA;
10191 int reg3 = 0;
10192 int reg4 = 0;
10193 int reg_mask, tmp1, tmp2;
10194 if (rtx_equal_p (STRIP_UNARY (operands[1]),
10195 STRIP_UNARY (operands[4])))
10196 {
10197 reg4 = reg1;
10198 reg3 = reg6;
10199 operands[6] = operands[3];
10200 }
10201 else if (rtx_equal_p (STRIP_UNARY (operands[2]),
10202 STRIP_UNARY (operands[4])))
10203 {
10204 reg4 = reg2;
10205 reg3 = reg6;
10206 operands[6] = operands[3];
10207 }
10208 else if (rtx_equal_p (STRIP_UNARY (operands[1]),
10209 STRIP_UNARY (operands[3])))
10210 {
10211 reg4 = reg6;
10212 reg3 = reg1;
10213 operands[6] = operands[4];
10214 }
10215 else
10216 {
10217 reg4 = reg6;
10218 reg3 = reg2;
10219 operands[6] = operands[4];
10220 }
10221
10222 reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
10223 reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
10224 reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
10225 reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
10226
10227 tmp1 = reg1 <any_logic2:logic_op> reg2;
10228 tmp2 = tmp1 <any_logic1:logic_op> reg3;
10229 reg_mask = tmp2 <any_logic:logic_op> reg4;
10230 reg_mask &= 0xFF;
10231
10232 operands[1] = STRIP_UNARY (operands[1]);
10233 operands[2] = STRIP_UNARY (operands[2]);
10234 operands[6] = STRIP_UNARY (operands[6]);
10235 operands[5] = GEN_INT (reg_mask);
10236 })
10237
10238 (define_insn_and_split "*<avx512>_vpternlog<mode>_3"
10239 [(set (match_operand:V 0 "register_operand")
10240 (any_logic:V
10241 (any_logic1:V
10242 (match_operand:V 1 "reg_or_notreg_operand")
10243 (match_operand:V 2 "reg_or_notreg_operand"))
10244 (match_operand:V 3 "reg_or_notreg_operand")))]
10245 "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
10246 && ix86_pre_reload_split ()"
10247 "#"
10248 "&& 1"
10249 [(set (match_dup 0)
10250 (unspec:V
10251 [(match_dup 3)
10252 (match_dup 2)
10253 (match_dup 1)
10254 (match_dup 4)]
10255 UNSPEC_VTERNLOG))]
10256 {
10257 /* VPTERNLOGD reg3, reg2, reg1, imm8. */
10258 int reg3 = 0xF0;
10259 int reg2 = 0xCC;
10260 int reg1 = 0xAA;
10261 int reg_mask, tmp1;
10262
10263 reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
10264 reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
10265 reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
10266
10267 tmp1 = reg1 <any_logic1:logic_op> reg2;
10268 reg_mask = tmp1 <any_logic:logic_op> reg3;
10269 reg_mask &= 0xFF;
10270
10271 operands[1] = STRIP_UNARY (operands[1]);
10272 operands[2] = STRIP_UNARY (operands[2]);
10273 operands[3] = STRIP_UNARY (operands[3]);
10274 operands[4] = GEN_INT (reg_mask);
10275 })
10276
10277
10278 (define_insn "<avx512>_vternlog<mode>_mask"
10279 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10280 (vec_merge:VI48_AVX512VL
10281 (unspec:VI48_AVX512VL
10282 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
10283 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
10284 (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr")
10285 (match_operand:SI 4 "const_0_to_255_operand")]
10286 UNSPEC_VTERNLOG)
10287 (match_dup 1)
10288 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
10289 "TARGET_AVX512F"
10290 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
10291 [(set_attr "type" "sselog")
10292 (set_attr "prefix" "evex")
10293 (set_attr "mode" "<sseinsnmode>")])
10294
10295 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
10296 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10297 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
10298 UNSPEC_GETEXP))]
10299 "TARGET_AVX512F"
10300 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
10301 [(set_attr "prefix" "evex")
10302 (set_attr "mode" "<MODE>")])
10303
10304 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
10305 [(set (match_operand:VF_128 0 "register_operand" "=v")
10306 (vec_merge:VF_128
10307 (unspec:VF_128
10308 [(match_operand:VF_128 1 "register_operand" "v")
10309 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
10310 UNSPEC_GETEXP)
10311 (match_dup 1)
10312 (const_int 1)))]
10313 "TARGET_AVX512F"
10314 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
10315 [(set_attr "prefix" "evex")
10316 (set_attr "mode" "<ssescalarmode>")])
10317
10318 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
10319 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10320 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10321 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
10322 (match_operand:SI 3 "const_0_to_255_operand")]
10323 UNSPEC_ALIGN))]
10324 "TARGET_AVX512F"
10325 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
10326 [(set_attr "prefix" "evex")
10327 (set_attr "mode" "<sseinsnmode>")])
10328
10329 (define_expand "avx512f_shufps512_mask"
10330 [(match_operand:V16SF 0 "register_operand")
10331 (match_operand:V16SF 1 "register_operand")
10332 (match_operand:V16SF 2 "nonimmediate_operand")
10333 (match_operand:SI 3 "const_0_to_255_operand")
10334 (match_operand:V16SF 4 "register_operand")
10335 (match_operand:HI 5 "register_operand")]
10336 "TARGET_AVX512F"
10337 {
10338 int mask = INTVAL (operands[3]);
10339 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
10340 GEN_INT ((mask >> 0) & 3),
10341 GEN_INT ((mask >> 2) & 3),
10342 GEN_INT (((mask >> 4) & 3) + 16),
10343 GEN_INT (((mask >> 6) & 3) + 16),
10344 GEN_INT (((mask >> 0) & 3) + 4),
10345 GEN_INT (((mask >> 2) & 3) + 4),
10346 GEN_INT (((mask >> 4) & 3) + 20),
10347 GEN_INT (((mask >> 6) & 3) + 20),
10348 GEN_INT (((mask >> 0) & 3) + 8),
10349 GEN_INT (((mask >> 2) & 3) + 8),
10350 GEN_INT (((mask >> 4) & 3) + 24),
10351 GEN_INT (((mask >> 6) & 3) + 24),
10352 GEN_INT (((mask >> 0) & 3) + 12),
10353 GEN_INT (((mask >> 2) & 3) + 12),
10354 GEN_INT (((mask >> 4) & 3) + 28),
10355 GEN_INT (((mask >> 6) & 3) + 28),
10356 operands[4], operands[5]));
10357 DONE;
10358 })
10359
10360
10361 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
10362 [(match_operand:VF_AVX512VL 0 "register_operand")
10363 (match_operand:VF_AVX512VL 1 "register_operand")
10364 (match_operand:VF_AVX512VL 2 "register_operand")
10365 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
10366 (match_operand:SI 4 "const_0_to_255_operand")
10367 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10368 "TARGET_AVX512F"
10369 {
10370 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
10371 operands[0], operands[1], operands[2], operands[3],
10372 operands[4], CONST0_RTX (<MODE>mode), operands[5]
10373 <round_saeonly_expand_operand6>));
10374 DONE;
10375 })
10376
10377 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
10378 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10379 (unspec:VF_AVX512VL
10380 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
10381 (match_operand:VF_AVX512VL 2 "register_operand" "v")
10382 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
10383 (match_operand:SI 4 "const_0_to_255_operand")]
10384 UNSPEC_FIXUPIMM))]
10385 "TARGET_AVX512F"
10386 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
10387 [(set_attr "prefix" "evex")
10388 (set_attr "mode" "<MODE>")])
10389
10390 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
10391 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10392 (vec_merge:VF_AVX512VL
10393 (unspec:VF_AVX512VL
10394 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
10395 (match_operand:VF_AVX512VL 2 "register_operand" "v")
10396 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
10397 (match_operand:SI 4 "const_0_to_255_operand")]
10398 UNSPEC_FIXUPIMM)
10399 (match_dup 1)
10400 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
10401 "TARGET_AVX512F"
10402 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
10403 [(set_attr "prefix" "evex")
10404 (set_attr "mode" "<MODE>")])
10405
10406 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
10407 [(match_operand:VF_128 0 "register_operand")
10408 (match_operand:VF_128 1 "register_operand")
10409 (match_operand:VF_128 2 "register_operand")
10410 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
10411 (match_operand:SI 4 "const_0_to_255_operand")
10412 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10413 "TARGET_AVX512F"
10414 {
10415 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
10416 operands[0], operands[1], operands[2], operands[3],
10417 operands[4], CONST0_RTX (<MODE>mode), operands[5]
10418 <round_saeonly_expand_operand6>));
10419 DONE;
10420 })
10421
10422 (define_insn "avx512f_sfixupimm<mode><maskz_scalar_name><round_saeonly_name>"
10423 [(set (match_operand:VF_128 0 "register_operand" "=v")
10424 (vec_merge:VF_128
10425 (unspec:VF_128
10426 [(match_operand:VF_128 1 "register_operand" "0")
10427 (match_operand:VF_128 2 "register_operand" "v")
10428 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
10429 (match_operand:SI 4 "const_0_to_255_operand")]
10430 UNSPEC_FIXUPIMM)
10431 (match_dup 2)
10432 (const_int 1)))]
10433 "TARGET_AVX512F"
10434 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_maskz_scalar_op5>%3, %2, %0<maskz_scalar_op5>|%0<maskz_scalar_op5>, %2, %<iptr>3<round_saeonly_maskz_scalar_op5>, %4}";
10435 [(set_attr "prefix" "evex")
10436 (set_attr "mode" "<ssescalarmode>")])
10437
10438 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
10439 [(set (match_operand:VF_128 0 "register_operand" "=v")
10440 (vec_merge:VF_128
10441 (vec_merge:VF_128
10442 (unspec:VF_128
10443 [(match_operand:VF_128 1 "register_operand" "0")
10444 (match_operand:VF_128 2 "register_operand" "v")
10445 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
10446 (match_operand:SI 4 "const_0_to_255_operand")]
10447 UNSPEC_FIXUPIMM)
10448 (match_dup 2)
10449 (const_int 1))
10450 (match_dup 1)
10451 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
10452 "TARGET_AVX512F"
10453 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
10454 [(set_attr "prefix" "evex")
10455 (set_attr "mode" "<ssescalarmode>")])
10456
10457 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
10458 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10459 (unspec:VF_AVX512VL
10460 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
10461 (match_operand:SI 2 "const_0_to_255_operand")]
10462 UNSPEC_ROUND))]
10463 "TARGET_AVX512F"
10464 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
10465 [(set_attr "length_immediate" "1")
10466 (set_attr "prefix" "evex")
10467 (set_attr "mode" "<MODE>")])
10468
10469 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
10470 [(set (match_operand:VF_128 0 "register_operand" "=v")
10471 (vec_merge:VF_128
10472 (unspec:VF_128
10473 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
10474 (match_operand:SI 3 "const_0_to_255_operand")]
10475 UNSPEC_ROUND)
10476 (match_operand:VF_128 1 "register_operand" "v")
10477 (const_int 1)))]
10478 "TARGET_AVX512F"
10479 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
10480 [(set_attr "length_immediate" "1")
10481 (set_attr "prefix" "evex")
10482 (set_attr "mode" "<MODE>")])
10483
10484 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
10485 [(set (match_operand:VF_128 0 "register_operand" "=v")
10486 (vec_merge:VF_128
10487 (vec_duplicate:VF_128
10488 (unspec:<ssescalarmode>
10489 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
10490 (match_operand:SI 3 "const_0_to_255_operand")]
10491 UNSPEC_ROUND))
10492 (match_operand:VF_128 1 "register_operand" "v")
10493 (const_int 1)))]
10494 "TARGET_AVX512F"
10495 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
10496 [(set_attr "length_immediate" "1")
10497 (set_attr "prefix" "evex")
10498 (set_attr "mode" "<MODE>")])
10499
10500 ;; One bit in mask selects 2 elements.
10501 (define_insn "avx512f_shufps512_1<mask_name>"
10502 [(set (match_operand:V16SF 0 "register_operand" "=v")
10503 (vec_select:V16SF
10504 (vec_concat:V32SF
10505 (match_operand:V16SF 1 "register_operand" "v")
10506 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
10507 (parallel [(match_operand 3 "const_0_to_3_operand")
10508 (match_operand 4 "const_0_to_3_operand")
10509 (match_operand 5 "const_16_to_19_operand")
10510 (match_operand 6 "const_16_to_19_operand")
10511 (match_operand 7 "const_4_to_7_operand")
10512 (match_operand 8 "const_4_to_7_operand")
10513 (match_operand 9 "const_20_to_23_operand")
10514 (match_operand 10 "const_20_to_23_operand")
10515 (match_operand 11 "const_8_to_11_operand")
10516 (match_operand 12 "const_8_to_11_operand")
10517 (match_operand 13 "const_24_to_27_operand")
10518 (match_operand 14 "const_24_to_27_operand")
10519 (match_operand 15 "const_12_to_15_operand")
10520 (match_operand 16 "const_12_to_15_operand")
10521 (match_operand 17 "const_28_to_31_operand")
10522 (match_operand 18 "const_28_to_31_operand")])))]
10523 "TARGET_AVX512F
10524 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
10525 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
10526 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
10527 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
10528 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
10529 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
10530 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
10531 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
10532 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
10533 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
10534 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
10535 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
10536 {
10537 int mask;
10538 mask = INTVAL (operands[3]);
10539 mask |= INTVAL (operands[4]) << 2;
10540 mask |= (INTVAL (operands[5]) - 16) << 4;
10541 mask |= (INTVAL (operands[6]) - 16) << 6;
10542 operands[3] = GEN_INT (mask);
10543
10544 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
10545 }
10546 [(set_attr "type" "sselog")
10547 (set_attr "length_immediate" "1")
10548 (set_attr "prefix" "evex")
10549 (set_attr "mode" "V16SF")])
10550
10551 (define_expand "avx512f_shufpd512_mask"
10552 [(match_operand:V8DF 0 "register_operand")
10553 (match_operand:V8DF 1 "register_operand")
10554 (match_operand:V8DF 2 "nonimmediate_operand")
10555 (match_operand:SI 3 "const_0_to_255_operand")
10556 (match_operand:V8DF 4 "register_operand")
10557 (match_operand:QI 5 "register_operand")]
10558 "TARGET_AVX512F"
10559 {
10560 int mask = INTVAL (operands[3]);
10561 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
10562 GEN_INT (mask & 1),
10563 GEN_INT (mask & 2 ? 9 : 8),
10564 GEN_INT (mask & 4 ? 3 : 2),
10565 GEN_INT (mask & 8 ? 11 : 10),
10566 GEN_INT (mask & 16 ? 5 : 4),
10567 GEN_INT (mask & 32 ? 13 : 12),
10568 GEN_INT (mask & 64 ? 7 : 6),
10569 GEN_INT (mask & 128 ? 15 : 14),
10570 operands[4], operands[5]));
10571 DONE;
10572 })
10573
10574 (define_insn "avx512f_shufpd512_1<mask_name>"
10575 [(set (match_operand:V8DF 0 "register_operand" "=v")
10576 (vec_select:V8DF
10577 (vec_concat:V16DF
10578 (match_operand:V8DF 1 "register_operand" "v")
10579 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
10580 (parallel [(match_operand 3 "const_0_to_1_operand")
10581 (match_operand 4 "const_8_to_9_operand")
10582 (match_operand 5 "const_2_to_3_operand")
10583 (match_operand 6 "const_10_to_11_operand")
10584 (match_operand 7 "const_4_to_5_operand")
10585 (match_operand 8 "const_12_to_13_operand")
10586 (match_operand 9 "const_6_to_7_operand")
10587 (match_operand 10 "const_14_to_15_operand")])))]
10588 "TARGET_AVX512F"
10589 {
10590 int mask;
10591 mask = INTVAL (operands[3]);
10592 mask |= (INTVAL (operands[4]) - 8) << 1;
10593 mask |= (INTVAL (operands[5]) - 2) << 2;
10594 mask |= (INTVAL (operands[6]) - 10) << 3;
10595 mask |= (INTVAL (operands[7]) - 4) << 4;
10596 mask |= (INTVAL (operands[8]) - 12) << 5;
10597 mask |= (INTVAL (operands[9]) - 6) << 6;
10598 mask |= (INTVAL (operands[10]) - 14) << 7;
10599 operands[3] = GEN_INT (mask);
10600
10601 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
10602 }
10603 [(set_attr "type" "sselog")
10604 (set_attr "length_immediate" "1")
10605 (set_attr "prefix" "evex")
10606 (set_attr "mode" "V8DF")])
10607
10608 (define_expand "avx_shufpd256<mask_expand4_name>"
10609 [(match_operand:V4DF 0 "register_operand")
10610 (match_operand:V4DF 1 "register_operand")
10611 (match_operand:V4DF 2 "nonimmediate_operand")
10612 (match_operand:SI 3 "const_int_operand")]
10613 "TARGET_AVX"
10614 {
10615 int mask = INTVAL (operands[3]);
10616 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
10617 operands[1],
10618 operands[2],
10619 GEN_INT (mask & 1),
10620 GEN_INT (mask & 2 ? 5 : 4),
10621 GEN_INT (mask & 4 ? 3 : 2),
10622 GEN_INT (mask & 8 ? 7 : 6)
10623 <mask_expand4_args>));
10624 DONE;
10625 })
10626
10627 (define_insn "avx_shufpd256_1<mask_name>"
10628 [(set (match_operand:V4DF 0 "register_operand" "=v")
10629 (vec_select:V4DF
10630 (vec_concat:V8DF
10631 (match_operand:V4DF 1 "register_operand" "v")
10632 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
10633 (parallel [(match_operand 3 "const_0_to_1_operand")
10634 (match_operand 4 "const_4_to_5_operand")
10635 (match_operand 5 "const_2_to_3_operand")
10636 (match_operand 6 "const_6_to_7_operand")])))]
10637 "TARGET_AVX && <mask_avx512vl_condition>"
10638 {
10639 int mask;
10640 mask = INTVAL (operands[3]);
10641 mask |= (INTVAL (operands[4]) - 4) << 1;
10642 mask |= (INTVAL (operands[5]) - 2) << 2;
10643 mask |= (INTVAL (operands[6]) - 6) << 3;
10644 operands[3] = GEN_INT (mask);
10645
10646 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
10647 }
10648 [(set_attr "type" "sseshuf")
10649 (set_attr "length_immediate" "1")
10650 (set_attr "prefix" "vex")
10651 (set_attr "mode" "V4DF")])
10652
10653 (define_expand "sse2_shufpd<mask_expand4_name>"
10654 [(match_operand:V2DF 0 "register_operand")
10655 (match_operand:V2DF 1 "register_operand")
10656 (match_operand:V2DF 2 "vector_operand")
10657 (match_operand:SI 3 "const_int_operand")]
10658 "TARGET_SSE2"
10659 {
10660 int mask = INTVAL (operands[3]);
10661 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
10662 operands[2], GEN_INT (mask & 1),
10663 GEN_INT (mask & 2 ? 3 : 2)
10664 <mask_expand4_args>));
10665 DONE;
10666 })
10667
10668 (define_insn "sse2_shufpd_v2df_mask"
10669 [(set (match_operand:V2DF 0 "register_operand" "=v")
10670 (vec_merge:V2DF
10671 (vec_select:V2DF
10672 (vec_concat:V4DF
10673 (match_operand:V2DF 1 "register_operand" "v")
10674 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
10675 (parallel [(match_operand 3 "const_0_to_1_operand")
10676 (match_operand 4 "const_2_to_3_operand")]))
10677 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
10678 (match_operand:QI 6 "register_operand" "Yk")))]
10679 "TARGET_AVX512VL"
10680 {
10681 int mask;
10682 mask = INTVAL (operands[3]);
10683 mask |= (INTVAL (operands[4]) - 2) << 1;
10684 operands[3] = GEN_INT (mask);
10685
10686 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
10687 }
10688 [(set_attr "type" "sseshuf")
10689 (set_attr "length_immediate" "1")
10690 (set_attr "prefix" "evex")
10691 (set_attr "mode" "V2DF")])
10692
10693 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
10694 (define_insn "avx2_interleave_highv4di<mask_name>"
10695 [(set (match_operand:V4DI 0 "register_operand" "=v")
10696 (vec_select:V4DI
10697 (vec_concat:V8DI
10698 (match_operand:V4DI 1 "register_operand" "v")
10699 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10700 (parallel [(const_int 1)
10701 (const_int 5)
10702 (const_int 3)
10703 (const_int 7)])))]
10704 "TARGET_AVX2 && <mask_avx512vl_condition>"
10705 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10706 [(set_attr "type" "sselog")
10707 (set_attr "prefix" "vex")
10708 (set_attr "mode" "OI")])
10709
10710 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10711 [(set (match_operand:V8DI 0 "register_operand" "=v")
10712 (vec_select:V8DI
10713 (vec_concat:V16DI
10714 (match_operand:V8DI 1 "register_operand" "v")
10715 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10716 (parallel [(const_int 1) (const_int 9)
10717 (const_int 3) (const_int 11)
10718 (const_int 5) (const_int 13)
10719 (const_int 7) (const_int 15)])))]
10720 "TARGET_AVX512F"
10721 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10722 [(set_attr "type" "sselog")
10723 (set_attr "prefix" "evex")
10724 (set_attr "mode" "XI")])
10725
10726 (define_insn "vec_interleave_highv2di<mask_name>"
10727 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10728 (vec_select:V2DI
10729 (vec_concat:V4DI
10730 (match_operand:V2DI 1 "register_operand" "0,v")
10731 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10732 (parallel [(const_int 1)
10733 (const_int 3)])))]
10734 "TARGET_SSE2 && <mask_avx512vl_condition>"
10735 "@
10736 punpckhqdq\t{%2, %0|%0, %2}
10737 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10738 [(set_attr "isa" "noavx,avx")
10739 (set_attr "type" "sselog")
10740 (set_attr "prefix_data16" "1,*")
10741 (set_attr "prefix" "orig,<mask_prefix>")
10742 (set_attr "mode" "TI")])
10743
10744 (define_insn "avx2_interleave_lowv4di<mask_name>"
10745 [(set (match_operand:V4DI 0 "register_operand" "=v")
10746 (vec_select:V4DI
10747 (vec_concat:V8DI
10748 (match_operand:V4DI 1 "register_operand" "v")
10749 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10750 (parallel [(const_int 0)
10751 (const_int 4)
10752 (const_int 2)
10753 (const_int 6)])))]
10754 "TARGET_AVX2 && <mask_avx512vl_condition>"
10755 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10756 [(set_attr "type" "sselog")
10757 (set_attr "prefix" "vex")
10758 (set_attr "mode" "OI")])
10759
10760 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10761 [(set (match_operand:V8DI 0 "register_operand" "=v")
10762 (vec_select:V8DI
10763 (vec_concat:V16DI
10764 (match_operand:V8DI 1 "register_operand" "v")
10765 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10766 (parallel [(const_int 0) (const_int 8)
10767 (const_int 2) (const_int 10)
10768 (const_int 4) (const_int 12)
10769 (const_int 6) (const_int 14)])))]
10770 "TARGET_AVX512F"
10771 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10772 [(set_attr "type" "sselog")
10773 (set_attr "prefix" "evex")
10774 (set_attr "mode" "XI")])
10775
10776 (define_insn "vec_interleave_lowv2di<mask_name>"
10777 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10778 (vec_select:V2DI
10779 (vec_concat:V4DI
10780 (match_operand:V2DI 1 "register_operand" "0,v")
10781 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10782 (parallel [(const_int 0)
10783 (const_int 2)])))]
10784 "TARGET_SSE2 && <mask_avx512vl_condition>"
10785 "@
10786 punpcklqdq\t{%2, %0|%0, %2}
10787 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10788 [(set_attr "isa" "noavx,avx")
10789 (set_attr "type" "sselog")
10790 (set_attr "prefix_data16" "1,*")
10791 (set_attr "prefix" "orig,vex")
10792 (set_attr "mode" "TI")])
10793
10794 (define_insn "sse2_shufpd_<mode>"
10795 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10796 (vec_select:VI8F_128
10797 (vec_concat:<ssedoublevecmode>
10798 (match_operand:VI8F_128 1 "register_operand" "0,v")
10799 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10800 (parallel [(match_operand 3 "const_0_to_1_operand")
10801 (match_operand 4 "const_2_to_3_operand")])))]
10802 "TARGET_SSE2"
10803 {
10804 int mask;
10805 mask = INTVAL (operands[3]);
10806 mask |= (INTVAL (operands[4]) - 2) << 1;
10807 operands[3] = GEN_INT (mask);
10808
10809 switch (which_alternative)
10810 {
10811 case 0:
10812 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10813 case 1:
10814 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10815 default:
10816 gcc_unreachable ();
10817 }
10818 }
10819 [(set_attr "isa" "noavx,avx")
10820 (set_attr "type" "sseshuf")
10821 (set_attr "length_immediate" "1")
10822 (set_attr "prefix" "orig,maybe_evex")
10823 (set_attr "mode" "V2DF")])
10824
10825 ;; Avoid combining registers from different units in a single alternative,
10826 ;; see comment above inline_secondary_memory_needed function in i386.c
10827 (define_insn "sse2_storehpd"
10828 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10829 (vec_select:DF
10830 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10831 (parallel [(const_int 1)])))]
10832 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10833 "@
10834 %vmovhpd\t{%1, %0|%0, %1}
10835 unpckhpd\t%0, %0
10836 vunpckhpd\t{%d1, %0|%0, %d1}
10837 #
10838 #
10839 #"
10840 [(set_attr "isa" "*,noavx,avx,*,*,*")
10841 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10842 (set (attr "prefix_data16")
10843 (if_then_else
10844 (and (eq_attr "alternative" "0")
10845 (not (match_test "TARGET_AVX")))
10846 (const_string "1")
10847 (const_string "*")))
10848 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10849 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10850
10851 (define_split
10852 [(set (match_operand:DF 0 "register_operand")
10853 (vec_select:DF
10854 (match_operand:V2DF 1 "memory_operand")
10855 (parallel [(const_int 1)])))]
10856 "TARGET_SSE2 && reload_completed"
10857 [(set (match_dup 0) (match_dup 1))]
10858 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10859
10860 (define_insn "*vec_extractv2df_1_sse"
10861 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10862 (vec_select:DF
10863 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10864 (parallel [(const_int 1)])))]
10865 "!TARGET_SSE2 && TARGET_SSE
10866 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10867 "@
10868 movhps\t{%1, %0|%0, %1}
10869 movhlps\t{%1, %0|%0, %1}
10870 movlps\t{%H1, %0|%0, %H1}"
10871 [(set_attr "type" "ssemov")
10872 (set_attr "mode" "V2SF,V4SF,V2SF")])
10873
10874 ;; Avoid combining registers from different units in a single alternative,
10875 ;; see comment above inline_secondary_memory_needed function in i386.c
10876 (define_insn "sse2_storelpd"
10877 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10878 (vec_select:DF
10879 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10880 (parallel [(const_int 0)])))]
10881 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10882 "@
10883 %vmovlpd\t{%1, %0|%0, %1}
10884 #
10885 #
10886 #
10887 #"
10888 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10889 (set (attr "prefix_data16")
10890 (if_then_else (eq_attr "alternative" "0")
10891 (const_string "1")
10892 (const_string "*")))
10893 (set_attr "prefix" "maybe_vex")
10894 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10895
10896 (define_split
10897 [(set (match_operand:DF 0 "register_operand")
10898 (vec_select:DF
10899 (match_operand:V2DF 1 "nonimmediate_operand")
10900 (parallel [(const_int 0)])))]
10901 "TARGET_SSE2 && reload_completed"
10902 [(set (match_dup 0) (match_dup 1))]
10903 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10904
10905 (define_insn "*vec_extractv2df_0_sse"
10906 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10907 (vec_select:DF
10908 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10909 (parallel [(const_int 0)])))]
10910 "!TARGET_SSE2 && TARGET_SSE
10911 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10912 "@
10913 movlps\t{%1, %0|%0, %1}
10914 movaps\t{%1, %0|%0, %1}
10915 movlps\t{%1, %0|%0, %q1}"
10916 [(set_attr "type" "ssemov")
10917 (set_attr "mode" "V2SF,V4SF,V2SF")])
10918
10919 (define_expand "sse2_loadhpd_exp"
10920 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10921 (vec_concat:V2DF
10922 (vec_select:DF
10923 (match_operand:V2DF 1 "nonimmediate_operand")
10924 (parallel [(const_int 0)]))
10925 (match_operand:DF 2 "nonimmediate_operand")))]
10926 "TARGET_SSE2"
10927 {
10928 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10929
10930 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10931
10932 /* Fix up the destination if needed. */
10933 if (dst != operands[0])
10934 emit_move_insn (operands[0], dst);
10935
10936 DONE;
10937 })
10938
10939 ;; Avoid combining registers from different units in a single alternative,
10940 ;; see comment above inline_secondary_memory_needed function in i386.c
10941 (define_insn "sse2_loadhpd"
10942 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10943 "=x,v,x,v ,o,o ,o")
10944 (vec_concat:V2DF
10945 (vec_select:DF
10946 (match_operand:V2DF 1 "nonimmediate_operand"
10947 " 0,v,0,v ,0,0 ,0")
10948 (parallel [(const_int 0)]))
10949 (match_operand:DF 2 "nonimmediate_operand"
10950 " m,m,x,Yv,x,*f,r")))]
10951 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10952 "@
10953 movhpd\t{%2, %0|%0, %2}
10954 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10955 unpcklpd\t{%2, %0|%0, %2}
10956 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10957 #
10958 #
10959 #"
10960 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10961 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10962 (set (attr "prefix_data16")
10963 (if_then_else (eq_attr "alternative" "0")
10964 (const_string "1")
10965 (const_string "*")))
10966 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10967 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10968
10969 (define_split
10970 [(set (match_operand:V2DF 0 "memory_operand")
10971 (vec_concat:V2DF
10972 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10973 (match_operand:DF 1 "register_operand")))]
10974 "TARGET_SSE2 && reload_completed"
10975 [(set (match_dup 0) (match_dup 1))]
10976 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10977
10978 (define_expand "sse2_loadlpd_exp"
10979 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10980 (vec_concat:V2DF
10981 (match_operand:DF 2 "nonimmediate_operand")
10982 (vec_select:DF
10983 (match_operand:V2DF 1 "nonimmediate_operand")
10984 (parallel [(const_int 1)]))))]
10985 "TARGET_SSE2"
10986 {
10987 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10988
10989 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10990
10991 /* Fix up the destination if needed. */
10992 if (dst != operands[0])
10993 emit_move_insn (operands[0], dst);
10994
10995 DONE;
10996 })
10997
10998 ;; Avoid combining registers from different units in a single alternative,
10999 ;; see comment above inline_secondary_memory_needed function in i386.c
11000 (define_insn "sse2_loadlpd"
11001 [(set (match_operand:V2DF 0 "nonimmediate_operand"
11002 "=v,x,v,x,v,x,x,v,m,m ,m")
11003 (vec_concat:V2DF
11004 (match_operand:DF 2 "nonimmediate_operand"
11005 "vm,m,m,x,v,0,0,v,x,*f,r")
11006 (vec_select:DF
11007 (match_operand:V2DF 1 "nonimm_or_0_operand"
11008 " C,0,v,0,v,x,o,o,0,0 ,0")
11009 (parallel [(const_int 1)]))))]
11010 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11011 "@
11012 %vmovq\t{%2, %0|%0, %2}
11013 movlpd\t{%2, %0|%0, %2}
11014 vmovlpd\t{%2, %1, %0|%0, %1, %2}
11015 movsd\t{%2, %0|%0, %2}
11016 vmovsd\t{%2, %1, %0|%0, %1, %2}
11017 shufpd\t{$2, %1, %0|%0, %1, 2}
11018 movhpd\t{%H1, %0|%0, %H1}
11019 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
11020 #
11021 #
11022 #"
11023 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
11024 (set (attr "type")
11025 (cond [(eq_attr "alternative" "5")
11026 (const_string "sselog")
11027 (eq_attr "alternative" "9")
11028 (const_string "fmov")
11029 (eq_attr "alternative" "10")
11030 (const_string "imov")
11031 ]
11032 (const_string "ssemov")))
11033 (set (attr "prefix_data16")
11034 (if_then_else (eq_attr "alternative" "1,6")
11035 (const_string "1")
11036 (const_string "*")))
11037 (set (attr "length_immediate")
11038 (if_then_else (eq_attr "alternative" "5")
11039 (const_string "1")
11040 (const_string "*")))
11041 (set (attr "prefix")
11042 (cond [(eq_attr "alternative" "0")
11043 (const_string "maybe_vex")
11044 (eq_attr "alternative" "1,3,5,6")
11045 (const_string "orig")
11046 (eq_attr "alternative" "2,4,7")
11047 (const_string "maybe_evex")
11048 ]
11049 (const_string "*")))
11050 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
11051
11052 (define_split
11053 [(set (match_operand:V2DF 0 "memory_operand")
11054 (vec_concat:V2DF
11055 (match_operand:DF 1 "register_operand")
11056 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
11057 "TARGET_SSE2 && reload_completed"
11058 [(set (match_dup 0) (match_dup 1))]
11059 "operands[0] = adjust_address (operands[0], DFmode, 0);")
11060
11061 (define_insn "sse2_movsd"
11062 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
11063 (vec_merge:V2DF
11064 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
11065 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
11066 (const_int 1)))]
11067 "TARGET_SSE2"
11068 "@
11069 movsd\t{%2, %0|%0, %2}
11070 vmovsd\t{%2, %1, %0|%0, %1, %2}
11071 movlpd\t{%2, %0|%0, %q2}
11072 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
11073 %vmovlpd\t{%2, %0|%q0, %2}
11074 shufpd\t{$2, %1, %0|%0, %1, 2}
11075 movhps\t{%H1, %0|%0, %H1}
11076 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
11077 %vmovhps\t{%1, %H0|%H0, %1}"
11078 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
11079 (set (attr "type")
11080 (if_then_else
11081 (eq_attr "alternative" "5")
11082 (const_string "sselog")
11083 (const_string "ssemov")))
11084 (set (attr "prefix_data16")
11085 (if_then_else
11086 (and (eq_attr "alternative" "2,4")
11087 (not (match_test "TARGET_AVX")))
11088 (const_string "1")
11089 (const_string "*")))
11090 (set (attr "length_immediate")
11091 (if_then_else (eq_attr "alternative" "5")
11092 (const_string "1")
11093 (const_string "*")))
11094 (set (attr "prefix")
11095 (cond [(eq_attr "alternative" "1,3,7")
11096 (const_string "maybe_evex")
11097 (eq_attr "alternative" "4,8")
11098 (const_string "maybe_vex")
11099 ]
11100 (const_string "orig")))
11101 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
11102
11103 (define_insn "vec_dupv2df<mask_name>"
11104 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
11105 (vec_duplicate:V2DF
11106 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
11107 "TARGET_SSE2 && <mask_avx512vl_condition>"
11108 "@
11109 unpcklpd\t%0, %0
11110 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
11111 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11112 [(set_attr "isa" "noavx,sse3,avx512vl")
11113 (set_attr "type" "sselog1")
11114 (set_attr "prefix" "orig,maybe_vex,evex")
11115 (set_attr "mode" "V2DF,DF,DF")])
11116
11117 (define_insn "vec_concatv2df"
11118 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
11119 (vec_concat:V2DF
11120 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
11121 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
11122 "TARGET_SSE
11123 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
11124 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
11125 "@
11126 unpcklpd\t{%2, %0|%0, %2}
11127 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
11128 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
11129 %vmovddup\t{%1, %0|%0, %1}
11130 vmovddup\t{%1, %0|%0, %1}
11131 movhpd\t{%2, %0|%0, %2}
11132 vmovhpd\t{%2, %1, %0|%0, %1, %2}
11133 %vmovq\t{%1, %0|%0, %1}
11134 movlhps\t{%2, %0|%0, %2}
11135 movhps\t{%2, %0|%0, %2}"
11136 [(set (attr "isa")
11137 (cond [(eq_attr "alternative" "0,5")
11138 (const_string "sse2_noavx")
11139 (eq_attr "alternative" "1,6")
11140 (const_string "avx")
11141 (eq_attr "alternative" "2,4")
11142 (const_string "avx512vl")
11143 (eq_attr "alternative" "3")
11144 (const_string "sse3")
11145 (eq_attr "alternative" "7")
11146 (const_string "sse2")
11147 ]
11148 (const_string "noavx")))
11149 (set (attr "type")
11150 (if_then_else
11151 (eq_attr "alternative" "0,1,2,3,4")
11152 (const_string "sselog")
11153 (const_string "ssemov")))
11154 (set (attr "prefix_data16")
11155 (if_then_else (eq_attr "alternative" "5")
11156 (const_string "1")
11157 (const_string "*")))
11158 (set (attr "prefix")
11159 (cond [(eq_attr "alternative" "1,6")
11160 (const_string "vex")
11161 (eq_attr "alternative" "2,4")
11162 (const_string "evex")
11163 (eq_attr "alternative" "3,7")
11164 (const_string "maybe_vex")
11165 ]
11166 (const_string "orig")))
11167 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
11168
11169 ;; vmovq clears also the higher bits.
11170 (define_insn "vec_set<mode>_0"
11171 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
11172 (vec_merge:VF2_512_256
11173 (vec_duplicate:VF2_512_256
11174 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
11175 (match_operand:VF2_512_256 1 "const0_operand" "C")
11176 (const_int 1)))]
11177 "TARGET_AVX"
11178 "vmovq\t{%2, %x0|%x0, %2}"
11179 [(set_attr "type" "ssemov")
11180 (set_attr "prefix" "maybe_evex")
11181 (set_attr "mode" "DF")])
11182
11183 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11184 ;;
11185 ;; Parallel integer down-conversion operations
11186 ;;
11187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11188
11189 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
11190 (define_mode_attr pmov_src_mode
11191 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
11192 (define_mode_attr pmov_src_lower
11193 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
11194 (define_mode_attr pmov_suff_1
11195 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
11196
11197 (define_expand "trunc<pmov_src_lower><mode>2"
11198 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
11199 (truncate:PMOV_DST_MODE_1
11200 (match_operand:<pmov_src_mode> 1 "register_operand")))]
11201 "TARGET_AVX512F")
11202
11203 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
11204 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
11205 (any_truncate:PMOV_DST_MODE_1
11206 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
11207 "TARGET_AVX512F"
11208 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
11209 [(set_attr "type" "ssemov")
11210 (set_attr "memory" "none,store")
11211 (set_attr "prefix" "evex")
11212 (set_attr "mode" "<sseinsnmode>")])
11213
11214 (define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1"
11215 [(set (match_operand:V16HI 0 "nonimmediate_operand")
11216 (vec_select:V16HI
11217 (unspec:V32HI
11218 [(match_operand:V32HI 1 "register_operand")
11219 (match_operand:V32HI 2 "permvar_truncate_operand")]
11220 UNSPEC_VPERMVAR)
11221 (parallel [(const_int 0) (const_int 1)
11222 (const_int 2) (const_int 3)
11223 (const_int 4) (const_int 5)
11224 (const_int 6) (const_int 7)
11225 (const_int 8) (const_int 9)
11226 (const_int 10) (const_int 11)
11227 (const_int 12) (const_int 13)
11228 (const_int 14) (const_int 15)])))]
11229 "TARGET_AVX512BW && ix86_pre_reload_split ()"
11230 "#"
11231 "&& 1"
11232 [(set (match_dup 0)
11233 (truncate:V16HI (match_dup 1)))]
11234 "operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);")
11235
11236 (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1"
11237 [(set (match_operand:V8HI 0 "nonimmediate_operand")
11238 (vec_select:V8HI
11239 (unspec:V16HI
11240 [(match_operand:V16HI 1 "register_operand")
11241 (match_operand:V16HI 2 "permvar_truncate_operand")]
11242 UNSPEC_VPERMVAR)
11243 (parallel [(const_int 0) (const_int 1)
11244 (const_int 2) (const_int 3)
11245 (const_int 4) (const_int 5)
11246 (const_int 6) (const_int 7)])))]
11247 "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()"
11248 "#"
11249 "&& 1"
11250 [(set (match_dup 0)
11251 (truncate:V8HI (match_dup 1)))]
11252 "operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);")
11253
11254 (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1"
11255 [(set (match_operand:V8SI 0 "nonimmediate_operand")
11256 (vec_select:V8SI
11257 (unspec:V16SI
11258 [(match_operand:V16SI 1 "register_operand")
11259 (match_operand:V16SI 2 "permvar_truncate_operand")]
11260 UNSPEC_VPERMVAR)
11261 (parallel [(const_int 0) (const_int 1)
11262 (const_int 2) (const_int 3)
11263 (const_int 4) (const_int 5)
11264 (const_int 6) (const_int 7)])))]
11265 "TARGET_AVX512F && ix86_pre_reload_split ()"
11266 "#"
11267 "&& 1"
11268 [(set (match_dup 0)
11269 (truncate:V8SI (match_dup 1)))]
11270 "operands[1] = lowpart_subreg (V8DImode, operands[1], V16SImode);")
11271
11272 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
11273 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
11274 (vec_merge:PMOV_DST_MODE_1
11275 (any_truncate:PMOV_DST_MODE_1
11276 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
11277 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
11278 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
11279 "TARGET_AVX512F"
11280 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11281 [(set_attr "type" "ssemov")
11282 (set_attr "memory" "none,store")
11283 (set_attr "prefix" "evex")
11284 (set_attr "mode" "<sseinsnmode>")])
11285
11286 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
11287 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
11288 (vec_merge:PMOV_DST_MODE_1
11289 (any_truncate:PMOV_DST_MODE_1
11290 (match_operand:<pmov_src_mode> 1 "register_operand"))
11291 (match_dup 0)
11292 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
11293 "TARGET_AVX512F")
11294
11295 (define_expand "truncv32hiv32qi2"
11296 [(set (match_operand:V32QI 0 "nonimmediate_operand")
11297 (truncate:V32QI
11298 (match_operand:V32HI 1 "register_operand")))]
11299 "TARGET_AVX512BW")
11300
11301 (define_insn "avx512bw_<code>v32hiv32qi2"
11302 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
11303 (any_truncate:V32QI
11304 (match_operand:V32HI 1 "register_operand" "v,v")))]
11305 "TARGET_AVX512BW"
11306 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
11307 [(set_attr "type" "ssemov")
11308 (set_attr "memory" "none,store")
11309 (set_attr "prefix" "evex")
11310 (set_attr "mode" "XI")])
11311
11312 (define_insn_and_split "*avx512f_permvar_truncv32hiv32qi_1"
11313 [(set (match_operand:V32QI 0 "nonimmediate_operand")
11314 (vec_select:V32QI
11315 (unspec:V64QI
11316 [(match_operand:V64QI 1 "register_operand")
11317 (match_operand:V64QI 2 "permvar_truncate_operand")]
11318 UNSPEC_VPERMVAR)
11319 (parallel [(const_int 0) (const_int 1)
11320 (const_int 2) (const_int 3)
11321 (const_int 4) (const_int 5)
11322 (const_int 6) (const_int 7)
11323 (const_int 8) (const_int 9)
11324 (const_int 10) (const_int 11)
11325 (const_int 12) (const_int 13)
11326 (const_int 14) (const_int 15)
11327 (const_int 16) (const_int 17)
11328 (const_int 18) (const_int 19)
11329 (const_int 20) (const_int 21)
11330 (const_int 22) (const_int 23)
11331 (const_int 24) (const_int 25)
11332 (const_int 26) (const_int 27)
11333 (const_int 28) (const_int 29)
11334 (const_int 30) (const_int 31)])))]
11335 "TARGET_AVX512VBMI && ix86_pre_reload_split ()"
11336 "#"
11337 "&& 1"
11338 [(set (match_dup 0)
11339 (truncate:V32QI (match_dup 1)))]
11340 "operands[1] = lowpart_subreg (V32HImode, operands[1], V64QImode);")
11341
11342 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
11343 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
11344 (vec_merge:V32QI
11345 (any_truncate:V32QI
11346 (match_operand:V32HI 1 "register_operand" "v,v"))
11347 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
11348 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
11349 "TARGET_AVX512BW"
11350 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11351 [(set_attr "type" "ssemov")
11352 (set_attr "memory" "none,store")
11353 (set_attr "prefix" "evex")
11354 (set_attr "mode" "XI")])
11355
11356 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
11357 [(set (match_operand:V32QI 0 "nonimmediate_operand")
11358 (vec_merge:V32QI
11359 (any_truncate:V32QI
11360 (match_operand:V32HI 1 "register_operand"))
11361 (match_dup 0)
11362 (match_operand:SI 2 "register_operand")))]
11363 "TARGET_AVX512BW")
11364
11365 (define_mode_iterator PMOV_DST_MODE_2
11366 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
11367 (define_mode_attr pmov_suff_2
11368 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
11369
11370 (define_expand "trunc<ssedoublemodelower><mode>2"
11371 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
11372 (truncate:PMOV_DST_MODE_2
11373 (match_operand:<ssedoublemode> 1 "register_operand")))]
11374 "TARGET_AVX512VL")
11375
11376 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
11377 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
11378 (any_truncate:PMOV_DST_MODE_2
11379 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
11380 "TARGET_AVX512VL"
11381 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
11382 [(set_attr "type" "ssemov")
11383 (set_attr "memory" "none,store")
11384 (set_attr "prefix" "evex")
11385 (set_attr "mode" "<sseinsnmode>")])
11386
11387 (define_insn_and_split "*avx512f_permvar_truncv16hiv16qi_1"
11388 [(set (match_operand:V16QI 0 "nonimmediate_operand")
11389 (vec_select:V16QI
11390 (unspec:V32QI
11391 [(match_operand:V32QI 1 "register_operand")
11392 (match_operand:V32QI 2 "permvar_truncate_operand")]
11393 UNSPEC_VPERMVAR)
11394 (parallel [(const_int 0) (const_int 1)
11395 (const_int 2) (const_int 3)
11396 (const_int 4) (const_int 5)
11397 (const_int 6) (const_int 7)
11398 (const_int 8) (const_int 9)
11399 (const_int 10) (const_int 11)
11400 (const_int 12) (const_int 13)
11401 (const_int 14) (const_int 15)])))]
11402 "TARGET_AVX512VL && TARGET_AVX512VBMI
11403 && ix86_pre_reload_split ()"
11404 "#"
11405 "&& 1"
11406 [(set (match_dup 0)
11407 (truncate:V16QI (match_dup 1)))]
11408 "operands[1] = lowpart_subreg (V16HImode, operands[1], V32QImode);")
11409
11410 (define_insn_and_split "*avx512f_permvar_truncv4div4si_1"
11411 [(set (match_operand:V4SI 0 "nonimmediate_operand")
11412 (vec_select:V4SI
11413 (unspec:V8SI
11414 [(match_operand:V8SI 1 "register_operand")
11415 (match_operand:V8SI 2 "permvar_truncate_operand")]
11416 UNSPEC_VPERMVAR)
11417 (parallel [(const_int 0) (const_int 1)
11418 (const_int 2) (const_int 3)])))]
11419 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11420 "#"
11421 "&& 1"
11422 [(set (match_dup 0)
11423 (truncate:V4SI (match_dup 1)))]
11424 "operands[1] = lowpart_subreg (V4DImode, operands[1], V8SImode);")
11425
11426 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
11427 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
11428 (vec_merge:PMOV_DST_MODE_2
11429 (any_truncate:PMOV_DST_MODE_2
11430 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
11431 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
11432 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
11433 "TARGET_AVX512VL"
11434 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11435 [(set_attr "type" "ssemov")
11436 (set_attr "memory" "none,store")
11437 (set_attr "prefix" "evex")
11438 (set_attr "mode" "<sseinsnmode>")])
11439
11440 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
11441 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
11442 (vec_merge:PMOV_DST_MODE_2
11443 (any_truncate:PMOV_DST_MODE_2
11444 (match_operand:<ssedoublemode> 1 "register_operand"))
11445 (match_dup 0)
11446 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
11447 "TARGET_AVX512VL")
11448
11449 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
11450 (define_mode_attr pmov_dst_3_lower
11451 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
11452 (define_mode_attr pmov_dst_3
11453 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
11454 (define_mode_attr pmov_dst_zeroed_3
11455 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
11456 (define_mode_attr pmov_suff_3
11457 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
11458
11459 (define_expand "trunc<mode><pmov_dst_3_lower>2"
11460 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
11461 (truncate:<pmov_dst_3>
11462 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
11463 "TARGET_AVX512VL"
11464 {
11465 operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
11466 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
11467 operands[1],
11468 CONST0_RTX (<pmov_dst_zeroed_3>mode)));
11469 DONE;
11470 })
11471
11472 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
11473 [(set (match_operand:V16QI 0 "register_operand" "=v")
11474 (vec_concat:V16QI
11475 (any_truncate:<pmov_dst_3>
11476 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
11477 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
11478 "TARGET_AVX512VL"
11479 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11480 [(set_attr "type" "ssemov")
11481 (set_attr "prefix" "evex")
11482 (set_attr "mode" "TI")])
11483
11484 (define_insn_and_split "*avx512f_pshufb_truncv8hiv8qi_1"
11485 [(set (match_operand:DI 0 "register_operand")
11486 (vec_select:DI
11487 (subreg:V2DI
11488 (unspec:V16QI
11489 [(match_operand:V16QI 1 "register_operand")
11490 (match_operand:V16QI 2 "pshufb_truncv8hiv8qi_operand")]
11491 UNSPEC_PSHUFB) 0)
11492 (parallel [(const_int 0)])))]
11493 "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()"
11494 "#"
11495 "&& 1"
11496 [(const_int 0)]
11497 {
11498 rtx op1 = gen_reg_rtx (V8QImode);
11499 operands[1] = lowpart_subreg (V8HImode, operands[1], V16QImode);
11500 emit_insn (gen_truncv8hiv8qi2 (op1, operands[1]));
11501 emit_move_insn (operands[0], lowpart_subreg (DImode, op1, V8QImode));
11502 DONE;
11503 })
11504
11505 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
11506 [(set (match_operand:V2QI 0 "memory_operand" "=m")
11507 (any_truncate:V2QI
11508 (match_operand:V2DI 1 "register_operand" "v")))]
11509 "TARGET_AVX512VL"
11510 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11511 [(set_attr "type" "ssemov")
11512 (set_attr "memory" "store")
11513 (set_attr "prefix" "evex")
11514 (set_attr "mode" "TI")])
11515
11516 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
11517 [(set (match_operand:HI 0 "memory_operand")
11518 (subreg:HI
11519 (any_truncate:V2QI
11520 (match_operand:V2DI 1 "register_operand")) 0))]
11521 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11522 "#"
11523 "&& 1"
11524 [(set (match_dup 0)
11525 (any_truncate:V2QI (match_dup 1)))]
11526 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
11527
11528 (define_insn "avx512vl_<code>v2div2qi2_mask"
11529 [(set (match_operand:V16QI 0 "register_operand" "=v")
11530 (vec_concat:V16QI
11531 (vec_merge:V2QI
11532 (any_truncate:V2QI
11533 (match_operand:V2DI 1 "register_operand" "v"))
11534 (vec_select:V2QI
11535 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11536 (parallel [(const_int 0) (const_int 1)]))
11537 (match_operand:QI 3 "register_operand" "Yk"))
11538 (const_vector:V14QI [(const_int 0) (const_int 0)
11539 (const_int 0) (const_int 0)
11540 (const_int 0) (const_int 0)
11541 (const_int 0) (const_int 0)
11542 (const_int 0) (const_int 0)
11543 (const_int 0) (const_int 0)
11544 (const_int 0) (const_int 0)])))]
11545 "TARGET_AVX512VL"
11546 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11547 [(set_attr "type" "ssemov")
11548 (set_attr "prefix" "evex")
11549 (set_attr "mode" "TI")])
11550
11551 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
11552 [(set (match_operand:V16QI 0 "register_operand" "=v")
11553 (vec_concat:V16QI
11554 (vec_merge:V2QI
11555 (any_truncate:V2QI
11556 (match_operand:V2DI 1 "register_operand" "v"))
11557 (const_vector:V2QI [(const_int 0) (const_int 0)])
11558 (match_operand:QI 2 "register_operand" "Yk"))
11559 (const_vector:V14QI [(const_int 0) (const_int 0)
11560 (const_int 0) (const_int 0)
11561 (const_int 0) (const_int 0)
11562 (const_int 0) (const_int 0)
11563 (const_int 0) (const_int 0)
11564 (const_int 0) (const_int 0)
11565 (const_int 0) (const_int 0)])))]
11566 "TARGET_AVX512VL"
11567 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11568 [(set_attr "type" "ssemov")
11569 (set_attr "prefix" "evex")
11570 (set_attr "mode" "TI")])
11571
11572 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
11573 [(set (match_operand:V2QI 0 "memory_operand" "=m")
11574 (vec_merge:V2QI
11575 (any_truncate:V2QI
11576 (match_operand:V2DI 1 "register_operand" "v"))
11577 (match_dup 0)
11578 (match_operand:QI 2 "register_operand" "Yk")))]
11579 "TARGET_AVX512VL"
11580 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11581 [(set_attr "type" "ssemov")
11582 (set_attr "memory" "store")
11583 (set_attr "prefix" "evex")
11584 (set_attr "mode" "TI")])
11585
11586 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
11587 [(set (match_operand:HI 0 "memory_operand")
11588 (subreg:HI
11589 (vec_merge:V2QI
11590 (any_truncate:V2QI
11591 (match_operand:V2DI 1 "register_operand"))
11592 (vec_select:V2QI
11593 (subreg:V4QI
11594 (vec_concat:V2HI
11595 (match_dup 0)
11596 (const_int 0)) 0)
11597 (parallel [(const_int 0) (const_int 1)]))
11598 (match_operand:QI 2 "register_operand")) 0))]
11599 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11600 "#"
11601 "&& 1"
11602 [(set (match_dup 0)
11603 (vec_merge:V2QI
11604 (any_truncate:V2QI (match_dup 1))
11605 (match_dup 0)
11606 (match_dup 2)))]
11607 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
11608
11609 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
11610 [(set (match_operand:V4QI 0 "memory_operand" "=m")
11611 (any_truncate:V4QI
11612 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11613 "TARGET_AVX512VL"
11614 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11615 [(set_attr "type" "ssemov")
11616 (set_attr "memory" "store")
11617 (set_attr "prefix" "evex")
11618 (set_attr "mode" "TI")])
11619
11620 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
11621 [(set (match_operand:SI 0 "memory_operand")
11622 (subreg:SI
11623 (any_truncate:V4QI
11624 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11625 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11626 "#"
11627 "&& 1"
11628 [(set (match_dup 0)
11629 (any_truncate:V4QI (match_dup 1)))]
11630 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
11631
11632 (define_insn "avx512vl_<code><mode>v4qi2_mask"
11633 [(set (match_operand:V16QI 0 "register_operand" "=v")
11634 (vec_concat:V16QI
11635 (vec_merge:V4QI
11636 (any_truncate:V4QI
11637 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11638 (vec_select:V4QI
11639 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11640 (parallel [(const_int 0) (const_int 1)
11641 (const_int 2) (const_int 3)]))
11642 (match_operand:QI 3 "register_operand" "Yk"))
11643 (const_vector:V12QI [(const_int 0) (const_int 0)
11644 (const_int 0) (const_int 0)
11645 (const_int 0) (const_int 0)
11646 (const_int 0) (const_int 0)
11647 (const_int 0) (const_int 0)
11648 (const_int 0) (const_int 0)])))]
11649 "TARGET_AVX512VL"
11650 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11651 [(set_attr "type" "ssemov")
11652 (set_attr "prefix" "evex")
11653 (set_attr "mode" "TI")])
11654
11655 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
11656 [(set (match_operand:V16QI 0 "register_operand" "=v")
11657 (vec_concat:V16QI
11658 (vec_merge:V4QI
11659 (any_truncate:V4QI
11660 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11661 (const_vector:V4QI [(const_int 0) (const_int 0)
11662 (const_int 0) (const_int 0)])
11663 (match_operand:QI 2 "register_operand" "Yk"))
11664 (const_vector:V12QI [(const_int 0) (const_int 0)
11665 (const_int 0) (const_int 0)
11666 (const_int 0) (const_int 0)
11667 (const_int 0) (const_int 0)
11668 (const_int 0) (const_int 0)
11669 (const_int 0) (const_int 0)])))]
11670 "TARGET_AVX512VL"
11671 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11672 [(set_attr "type" "ssemov")
11673 (set_attr "prefix" "evex")
11674 (set_attr "mode" "TI")])
11675
11676 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
11677 [(set (match_operand:V4QI 0 "memory_operand" "=m")
11678 (vec_merge:V4QI
11679 (any_truncate:V4QI
11680 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11681 (match_dup 0)
11682 (match_operand:QI 2 "register_operand" "Yk")))]
11683 "TARGET_AVX512VL"
11684 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11685 [(set_attr "type" "ssemov")
11686 (set_attr "memory" "store")
11687 (set_attr "prefix" "evex")
11688 (set_attr "mode" "TI")])
11689
11690 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
11691 [(set (match_operand:SI 0 "memory_operand")
11692 (subreg:SI
11693 (vec_merge:V4QI
11694 (any_truncate:V4QI
11695 (match_operand:VI4_128_8_256 1 "register_operand"))
11696 (vec_select:V4QI
11697 (subreg:V8QI
11698 (vec_concat:V2SI
11699 (match_dup 0)
11700 (const_int 0)) 0)
11701 (parallel [(const_int 0) (const_int 1)
11702 (const_int 2) (const_int 3)]))
11703 (match_operand:QI 2 "register_operand")) 0))]
11704 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11705 "#"
11706 "&& 1"
11707 [(set (match_dup 0)
11708 (vec_merge:V4QI
11709 (any_truncate:V4QI (match_dup 1))
11710 (match_dup 0)
11711 (match_dup 2)))]
11712 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
11713
11714 (define_mode_iterator VI2_128_BW_4_256
11715 [(V8HI "TARGET_AVX512BW") V8SI])
11716
11717 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
11718 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11719 (any_truncate:V8QI
11720 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
11721 "TARGET_AVX512VL"
11722 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11723 [(set_attr "type" "ssemov")
11724 (set_attr "memory" "store")
11725 (set_attr "prefix" "evex")
11726 (set_attr "mode" "TI")])
11727
11728 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
11729 [(set (match_operand:DI 0 "memory_operand" "=m")
11730 (subreg:DI
11731 (any_truncate:V8QI
11732 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
11733 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11734 "#"
11735 "&& 1"
11736 [(set (match_dup 0)
11737 (any_truncate:V8QI (match_dup 1)))]
11738 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11739
11740 (define_insn "avx512vl_<code><mode>v8qi2_mask"
11741 [(set (match_operand:V16QI 0 "register_operand" "=v")
11742 (vec_concat:V16QI
11743 (vec_merge:V8QI
11744 (any_truncate:V8QI
11745 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11746 (vec_select:V8QI
11747 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11748 (parallel [(const_int 0) (const_int 1)
11749 (const_int 2) (const_int 3)
11750 (const_int 4) (const_int 5)
11751 (const_int 6) (const_int 7)]))
11752 (match_operand:QI 3 "register_operand" "Yk"))
11753 (const_vector:V8QI [(const_int 0) (const_int 0)
11754 (const_int 0) (const_int 0)
11755 (const_int 0) (const_int 0)
11756 (const_int 0) (const_int 0)])))]
11757 "TARGET_AVX512VL"
11758 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11759 [(set_attr "type" "ssemov")
11760 (set_attr "prefix" "evex")
11761 (set_attr "mode" "TI")])
11762
11763 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
11764 [(set (match_operand:V16QI 0 "register_operand" "=v")
11765 (vec_concat:V16QI
11766 (vec_merge:V8QI
11767 (any_truncate:V8QI
11768 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11769 (const_vector:V8QI [(const_int 0) (const_int 0)
11770 (const_int 0) (const_int 0)
11771 (const_int 0) (const_int 0)
11772 (const_int 0) (const_int 0)])
11773 (match_operand:QI 2 "register_operand" "Yk"))
11774 (const_vector:V8QI [(const_int 0) (const_int 0)
11775 (const_int 0) (const_int 0)
11776 (const_int 0) (const_int 0)
11777 (const_int 0) (const_int 0)])))]
11778 "TARGET_AVX512VL"
11779 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11780 [(set_attr "type" "ssemov")
11781 (set_attr "prefix" "evex")
11782 (set_attr "mode" "TI")])
11783
11784 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
11785 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11786 (vec_merge:V8QI
11787 (any_truncate:V8QI
11788 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11789 (match_dup 0)
11790 (match_operand:QI 2 "register_operand" "Yk")))]
11791 "TARGET_AVX512VL"
11792 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11793 [(set_attr "type" "ssemov")
11794 (set_attr "memory" "store")
11795 (set_attr "prefix" "evex")
11796 (set_attr "mode" "TI")])
11797
11798 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
11799 [(set (match_operand:DI 0 "memory_operand")
11800 (subreg:DI
11801 (vec_merge:V8QI
11802 (any_truncate:V8QI
11803 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
11804 (vec_select:V8QI
11805 (subreg:V16QI
11806 (vec_concat:V2DI
11807 (match_dup 0)
11808 (const_int 0)) 0)
11809 (parallel [(const_int 0) (const_int 1)
11810 (const_int 2) (const_int 3)
11811 (const_int 4) (const_int 5)
11812 (const_int 6) (const_int 7)]))
11813 (match_operand:QI 2 "register_operand")) 0))]
11814 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11815 "#"
11816 "&& 1"
11817 [(set (match_dup 0)
11818 (vec_merge:V8QI
11819 (any_truncate:V8QI (match_dup 1))
11820 (match_dup 0)
11821 (match_dup 2)))]
11822 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11823
11824 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
11825 (define_mode_attr pmov_dst_4
11826 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
11827 (define_mode_attr pmov_dst_4_lower
11828 [(V4DI "v4hi") (V2DI "v2hi") (V4SI "v4hi")])
11829 (define_mode_attr pmov_dst_zeroed_4
11830 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
11831 (define_mode_attr pmov_suff_4
11832 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
11833
11834 (define_expand "trunc<mode><pmov_dst_4_lower>2"
11835 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
11836 (truncate:<pmov_dst_4>
11837 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
11838 "TARGET_AVX512VL"
11839 {
11840 operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
11841 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
11842 operands[1],
11843 CONST0_RTX (<pmov_dst_zeroed_4>mode)));
11844 DONE;
11845
11846 })
11847
11848 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
11849 [(set (match_operand:V8HI 0 "register_operand" "=v")
11850 (vec_concat:V8HI
11851 (any_truncate:<pmov_dst_4>
11852 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
11853 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
11854 "TARGET_AVX512VL"
11855 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11856 [(set_attr "type" "ssemov")
11857 (set_attr "prefix" "evex")
11858 (set_attr "mode" "TI")])
11859
11860 (define_insn_and_split "*avx512f_pshufb_truncv4siv4hi_1"
11861 [(set (match_operand:DI 0 "register_operand")
11862 (vec_select:DI
11863 (subreg:V2DI
11864 (unspec:V16QI
11865 [(match_operand:V16QI 1 "register_operand")
11866 (match_operand:V16QI 2 "pshufb_truncv4siv4hi_operand")]
11867 UNSPEC_PSHUFB) 0)
11868 (parallel [(const_int 0)])))]
11869 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11870 "#"
11871 "&& 1"
11872 [(const_int 0)]
11873 {
11874 rtx op1 = gen_reg_rtx (V4HImode);
11875 operands[1] = lowpart_subreg (V4SImode, operands[1], V16QImode);
11876 emit_insn (gen_truncv4siv4hi2 (op1, operands[1]));
11877 emit_move_insn (operands[0], lowpart_subreg (DImode, op1, V4HImode));
11878 DONE;
11879 })
11880
11881 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
11882 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11883 (any_truncate:V4HI
11884 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11885 "TARGET_AVX512VL"
11886 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11887 [(set_attr "type" "ssemov")
11888 (set_attr "memory" "store")
11889 (set_attr "prefix" "evex")
11890 (set_attr "mode" "TI")])
11891
11892 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
11893 [(set (match_operand:DI 0 "memory_operand")
11894 (subreg:DI
11895 (any_truncate:V4HI
11896 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11897 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11898 "#"
11899 "&& 1"
11900 [(set (match_dup 0)
11901 (any_truncate:V4HI (match_dup 1)))]
11902 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11903
11904 (define_insn "avx512vl_<code><mode>v4hi2_mask"
11905 [(set (match_operand:V8HI 0 "register_operand" "=v")
11906 (vec_concat:V8HI
11907 (vec_merge:V4HI
11908 (any_truncate:V4HI
11909 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11910 (vec_select:V4HI
11911 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11912 (parallel [(const_int 0) (const_int 1)
11913 (const_int 2) (const_int 3)]))
11914 (match_operand:QI 3 "register_operand" "Yk"))
11915 (const_vector:V4HI [(const_int 0) (const_int 0)
11916 (const_int 0) (const_int 0)])))]
11917 "TARGET_AVX512VL"
11918 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11919 [(set_attr "type" "ssemov")
11920 (set_attr "prefix" "evex")
11921 (set_attr "mode" "TI")])
11922
11923 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
11924 [(set (match_operand:V8HI 0 "register_operand" "=v")
11925 (vec_concat:V8HI
11926 (vec_merge:V4HI
11927 (any_truncate:V4HI
11928 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11929 (const_vector:V4HI [(const_int 0) (const_int 0)
11930 (const_int 0) (const_int 0)])
11931 (match_operand:QI 2 "register_operand" "Yk"))
11932 (const_vector:V4HI [(const_int 0) (const_int 0)
11933 (const_int 0) (const_int 0)])))]
11934 "TARGET_AVX512VL"
11935 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11936 [(set_attr "type" "ssemov")
11937 (set_attr "prefix" "evex")
11938 (set_attr "mode" "TI")])
11939
11940 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
11941 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11942 (vec_merge:V4HI
11943 (any_truncate:V4HI
11944 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11945 (match_dup 0)
11946 (match_operand:QI 2 "register_operand" "Yk")))]
11947 "TARGET_AVX512VL"
11948 {
11949 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
11950 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
11951 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
11952 }
11953 [(set_attr "type" "ssemov")
11954 (set_attr "memory" "store")
11955 (set_attr "prefix" "evex")
11956 (set_attr "mode" "TI")])
11957
11958 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
11959 [(set (match_operand:DI 0 "memory_operand")
11960 (subreg:DI
11961 (vec_merge:V4HI
11962 (any_truncate:V4HI
11963 (match_operand:VI4_128_8_256 1 "register_operand"))
11964 (vec_select:V4HI
11965 (subreg:V8HI
11966 (vec_concat:V2DI
11967 (match_dup 0)
11968 (const_int 0)) 0)
11969 (parallel [(const_int 0) (const_int 1)
11970 (const_int 2) (const_int 3)]))
11971 (match_operand:QI 2 "register_operand")) 0))]
11972 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11973 "#"
11974 "&& 1"
11975 [(set (match_dup 0)
11976 (vec_merge:V4HI
11977 (any_truncate:V4HI (match_dup 1))
11978 (match_dup 0)
11979 (match_dup 2)))]
11980 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11981
11982
11983 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
11984 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11985 (any_truncate:V2HI
11986 (match_operand:V2DI 1 "register_operand" "v")))]
11987 "TARGET_AVX512VL"
11988 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11989 [(set_attr "type" "ssemov")
11990 (set_attr "memory" "store")
11991 (set_attr "prefix" "evex")
11992 (set_attr "mode" "TI")])
11993
11994 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
11995 [(set (match_operand:SI 0 "memory_operand")
11996 (subreg:SI
11997 (any_truncate:V2HI
11998 (match_operand:V2DI 1 "register_operand")) 0))]
11999 "TARGET_AVX512VL && ix86_pre_reload_split ()"
12000 "#"
12001 "&& 1"
12002 [(set (match_dup 0)
12003 (any_truncate:V2HI (match_dup 1)))]
12004 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
12005
12006 (define_insn "avx512vl_<code>v2div2hi2_mask"
12007 [(set (match_operand:V8HI 0 "register_operand" "=v")
12008 (vec_concat:V8HI
12009 (vec_merge:V2HI
12010 (any_truncate:V2HI
12011 (match_operand:V2DI 1 "register_operand" "v"))
12012 (vec_select:V2HI
12013 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
12014 (parallel [(const_int 0) (const_int 1)]))
12015 (match_operand:QI 3 "register_operand" "Yk"))
12016 (const_vector:V6HI [(const_int 0) (const_int 0)
12017 (const_int 0) (const_int 0)
12018 (const_int 0) (const_int 0)])))]
12019 "TARGET_AVX512VL"
12020 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
12021 [(set_attr "type" "ssemov")
12022 (set_attr "prefix" "evex")
12023 (set_attr "mode" "TI")])
12024
12025 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
12026 [(set (match_operand:V8HI 0 "register_operand" "=v")
12027 (vec_concat:V8HI
12028 (vec_merge:V2HI
12029 (any_truncate:V2HI
12030 (match_operand:V2DI 1 "register_operand" "v"))
12031 (const_vector:V2HI [(const_int 0) (const_int 0)])
12032 (match_operand:QI 2 "register_operand" "Yk"))
12033 (const_vector:V6HI [(const_int 0) (const_int 0)
12034 (const_int 0) (const_int 0)
12035 (const_int 0) (const_int 0)])))]
12036 "TARGET_AVX512VL"
12037 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
12038 [(set_attr "type" "ssemov")
12039 (set_attr "prefix" "evex")
12040 (set_attr "mode" "TI")])
12041
12042 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
12043 [(set (match_operand:V2HI 0 "memory_operand" "=m")
12044 (vec_merge:V2HI
12045 (any_truncate:V2HI
12046 (match_operand:V2DI 1 "register_operand" "v"))
12047 (match_dup 0)
12048 (match_operand:QI 2 "register_operand" "Yk")))]
12049 "TARGET_AVX512VL"
12050 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
12051 [(set_attr "type" "ssemov")
12052 (set_attr "memory" "store")
12053 (set_attr "prefix" "evex")
12054 (set_attr "mode" "TI")])
12055
12056 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
12057 [(set (match_operand:SI 0 "memory_operand")
12058 (subreg:SI
12059 (vec_merge:V2HI
12060 (any_truncate:V2HI
12061 (match_operand:V2DI 1 "register_operand"))
12062 (vec_select:V2HI
12063 (subreg:V4HI
12064 (vec_concat:V2SI
12065 (match_dup 0)
12066 (const_int 0)) 0)
12067 (parallel [(const_int 0) (const_int 1)]))
12068 (match_operand:QI 2 "register_operand")) 0))]
12069 "TARGET_AVX512VL && ix86_pre_reload_split ()"
12070 "#"
12071 "&& 1"
12072 [(set (match_dup 0)
12073 (vec_merge:V2HI
12074 (any_truncate:V2HI (match_dup 1))
12075 (match_dup 0)
12076 (match_dup 2)))]
12077 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
12078
12079 (define_expand "truncv2div2si2"
12080 [(set (match_operand:V2SI 0 "register_operand")
12081 (truncate:V2SI
12082 (match_operand:V2DI 1 "register_operand")))]
12083 "TARGET_AVX512VL"
12084 {
12085 operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
12086 emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
12087 operands[1],
12088 CONST0_RTX (V2SImode)));
12089 DONE;
12090 })
12091
12092 (define_insn "avx512vl_<code>v2div2si2"
12093 [(set (match_operand:V4SI 0 "register_operand" "=v")
12094 (vec_concat:V4SI
12095 (any_truncate:V2SI
12096 (match_operand:V2DI 1 "register_operand" "v"))
12097 (match_operand:V2SI 2 "const0_operand")))]
12098 "TARGET_AVX512VL"
12099 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
12100 [(set_attr "type" "ssemov")
12101 (set_attr "prefix" "evex")
12102 (set_attr "mode" "TI")])
12103
12104 (define_insn_and_split "*avx512f_pshufd_truncv2div2si_1"
12105 [(set (match_operand:DI 0 "register_operand")
12106 (vec_select:DI
12107 (subreg:V2DI
12108 (vec_select:V4SI
12109 (match_operand:V4SI 1 "register_operand")
12110 (parallel [(const_int 0) (const_int 2)
12111 (const_int 2) (const_int 3)])) 0)
12112 (parallel [(const_int 0)])))]
12113 "TARGET_AVX512VL && ix86_pre_reload_split ()"
12114 "#"
12115 "&& 1"
12116 [(const_int 0)]
12117 {
12118 rtx op1 = gen_reg_rtx (V2SImode);
12119 operands[1] = lowpart_subreg (V2DImode, operands[1], V4SImode);
12120 emit_insn (gen_truncv2div2si2 (op1, operands[1]));
12121 emit_move_insn (operands[0], lowpart_subreg (DImode, op1, V2SImode));
12122 DONE;
12123 })
12124
12125 (define_insn "*avx512vl_<code>v2div2si2_store_1"
12126 [(set (match_operand:V2SI 0 "memory_operand" "=m")
12127 (any_truncate:V2SI
12128 (match_operand:V2DI 1 "register_operand" "v")))]
12129 "TARGET_AVX512VL"
12130 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
12131 [(set_attr "type" "ssemov")
12132 (set_attr "memory" "store")
12133 (set_attr "prefix" "evex")
12134 (set_attr "mode" "TI")])
12135
12136 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
12137 [(set (match_operand:DI 0 "memory_operand")
12138 (subreg:DI
12139 (any_truncate:V2SI
12140 (match_operand:V2DI 1 "register_operand")) 0))]
12141 "TARGET_AVX512VL && ix86_pre_reload_split ()"
12142 "#"
12143 "&& 1"
12144 [(set (match_dup 0)
12145 (any_truncate:V2SI (match_dup 1)))]
12146 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
12147
12148 (define_insn "avx512vl_<code>v2div2si2_mask"
12149 [(set (match_operand:V4SI 0 "register_operand" "=v")
12150 (vec_concat:V4SI
12151 (vec_merge:V2SI
12152 (any_truncate:V2SI
12153 (match_operand:V2DI 1 "register_operand" "v"))
12154 (vec_select:V2SI
12155 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
12156 (parallel [(const_int 0) (const_int 1)]))
12157 (match_operand:QI 3 "register_operand" "Yk"))
12158 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
12159 "TARGET_AVX512VL"
12160 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
12161 [(set_attr "type" "ssemov")
12162 (set_attr "prefix" "evex")
12163 (set_attr "mode" "TI")])
12164
12165 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
12166 [(set (match_operand:V4SI 0 "register_operand" "=v")
12167 (vec_concat:V4SI
12168 (vec_merge:V2SI
12169 (any_truncate:V2SI
12170 (match_operand:V2DI 1 "register_operand" "v"))
12171 (const_vector:V2SI [(const_int 0) (const_int 0)])
12172 (match_operand:QI 2 "register_operand" "Yk"))
12173 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
12174 "TARGET_AVX512VL"
12175 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
12176 [(set_attr "type" "ssemov")
12177 (set_attr "prefix" "evex")
12178 (set_attr "mode" "TI")])
12179
12180 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
12181 [(set (match_operand:V2SI 0 "memory_operand" "=m")
12182 (vec_merge:V2SI
12183 (any_truncate:V2SI
12184 (match_operand:V2DI 1 "register_operand" "v"))
12185 (match_dup 0)
12186 (match_operand:QI 2 "register_operand" "Yk")))]
12187 "TARGET_AVX512VL"
12188 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
12189 [(set_attr "type" "ssemov")
12190 (set_attr "memory" "store")
12191 (set_attr "prefix" "evex")
12192 (set_attr "mode" "TI")])
12193
12194 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
12195 [(set (match_operand:DI 0 "memory_operand")
12196 (subreg:DI
12197 (vec_merge:V2SI
12198 (any_truncate:V2SI
12199 (match_operand:V2DI 1 "register_operand"))
12200 (vec_select:V2SI
12201 (subreg:V4SI
12202 (vec_concat:V2DI
12203 (match_dup 0)
12204 (const_int 0)) 0)
12205 (parallel [(const_int 0) (const_int 1)]))
12206 (match_operand:QI 2 "register_operand")) 0))]
12207 "TARGET_AVX512VL && ix86_pre_reload_split ()"
12208 "#"
12209 "&& 1"
12210 [(set (match_dup 0)
12211 (vec_merge:V2SI
12212 (any_truncate:V2SI (match_dup 1))
12213 (match_dup 0)
12214 (match_dup 2)))]
12215 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
12216
12217 (define_expand "truncv8div8qi2"
12218 [(set (match_operand:V8QI 0 "register_operand")
12219 (truncate:V8QI
12220 (match_operand:V8DI 1 "register_operand")))]
12221 "TARGET_AVX512F"
12222 {
12223 operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
12224 emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
12225 DONE;
12226 })
12227
12228 (define_insn "avx512f_<code>v8div16qi2"
12229 [(set (match_operand:V16QI 0 "register_operand" "=v")
12230 (vec_concat:V16QI
12231 (any_truncate:V8QI
12232 (match_operand:V8DI 1 "register_operand" "v"))
12233 (const_vector:V8QI [(const_int 0) (const_int 0)
12234 (const_int 0) (const_int 0)
12235 (const_int 0) (const_int 0)
12236 (const_int 0) (const_int 0)])))]
12237 "TARGET_AVX512F"
12238 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
12239 [(set_attr "type" "ssemov")
12240 (set_attr "prefix" "evex")
12241 (set_attr "mode" "TI")])
12242
12243 (define_insn "*avx512f_<code>v8div16qi2_store_1"
12244 [(set (match_operand:V8QI 0 "memory_operand" "=m")
12245 (any_truncate:V8QI
12246 (match_operand:V8DI 1 "register_operand" "v")))]
12247 "TARGET_AVX512F"
12248 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
12249 [(set_attr "type" "ssemov")
12250 (set_attr "memory" "store")
12251 (set_attr "prefix" "evex")
12252 (set_attr "mode" "TI")])
12253
12254 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
12255 [(set (match_operand:DI 0 "memory_operand")
12256 (subreg:DI
12257 (any_truncate:V8QI
12258 (match_operand:V8DI 1 "register_operand")) 0))]
12259 "TARGET_AVX512F && ix86_pre_reload_split ()"
12260 "#"
12261 "&& 1"
12262 [(set (match_dup 0)
12263 (any_truncate:V8QI (match_dup 1)))]
12264 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
12265
12266 (define_insn "avx512f_<code>v8div16qi2_mask"
12267 [(set (match_operand:V16QI 0 "register_operand" "=v")
12268 (vec_concat:V16QI
12269 (vec_merge:V8QI
12270 (any_truncate:V8QI
12271 (match_operand:V8DI 1 "register_operand" "v"))
12272 (vec_select:V8QI
12273 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
12274 (parallel [(const_int 0) (const_int 1)
12275 (const_int 2) (const_int 3)
12276 (const_int 4) (const_int 5)
12277 (const_int 6) (const_int 7)]))
12278 (match_operand:QI 3 "register_operand" "Yk"))
12279 (const_vector:V8QI [(const_int 0) (const_int 0)
12280 (const_int 0) (const_int 0)
12281 (const_int 0) (const_int 0)
12282 (const_int 0) (const_int 0)])))]
12283 "TARGET_AVX512F"
12284 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
12285 [(set_attr "type" "ssemov")
12286 (set_attr "prefix" "evex")
12287 (set_attr "mode" "TI")])
12288
12289 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
12290 [(set (match_operand:V16QI 0 "register_operand" "=v")
12291 (vec_concat:V16QI
12292 (vec_merge:V8QI
12293 (any_truncate:V8QI
12294 (match_operand:V8DI 1 "register_operand" "v"))
12295 (const_vector:V8QI [(const_int 0) (const_int 0)
12296 (const_int 0) (const_int 0)
12297 (const_int 0) (const_int 0)
12298 (const_int 0) (const_int 0)])
12299 (match_operand:QI 2 "register_operand" "Yk"))
12300 (const_vector:V8QI [(const_int 0) (const_int 0)
12301 (const_int 0) (const_int 0)
12302 (const_int 0) (const_int 0)
12303 (const_int 0) (const_int 0)])))]
12304 "TARGET_AVX512F"
12305 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
12306 [(set_attr "type" "ssemov")
12307 (set_attr "prefix" "evex")
12308 (set_attr "mode" "TI")])
12309
12310 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
12311 [(set (match_operand:V8QI 0 "memory_operand" "=m")
12312 (vec_merge:V8QI
12313 (any_truncate:V8QI
12314 (match_operand:V8DI 1 "register_operand" "v"))
12315 (match_dup 0)
12316 (match_operand:QI 2 "register_operand" "Yk")))]
12317 "TARGET_AVX512F"
12318 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
12319 [(set_attr "type" "ssemov")
12320 (set_attr "memory" "store")
12321 (set_attr "prefix" "evex")
12322 (set_attr "mode" "TI")])
12323
12324 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
12325 [(set (match_operand:DI 0 "memory_operand")
12326 (subreg:DI
12327 (vec_merge:V8QI
12328 (any_truncate:V8QI
12329 (match_operand:V8DI 1 "register_operand"))
12330 (vec_select:V8QI
12331 (subreg:V16QI
12332 (vec_concat:V2DI
12333 (match_dup 0)
12334 (const_int 0)) 0)
12335 (parallel [(const_int 0) (const_int 1)
12336 (const_int 2) (const_int 3)
12337 (const_int 4) (const_int 5)
12338 (const_int 6) (const_int 7)]))
12339 (match_operand:QI 2 "register_operand")) 0))]
12340 "TARGET_AVX512F && ix86_pre_reload_split ()"
12341 "#"
12342 "&& 1"
12343 [(set (match_dup 0)
12344 (vec_merge:V8QI
12345 (any_truncate:V8QI (match_dup 1))
12346 (match_dup 0)
12347 (match_dup 2)))]
12348 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
12349
12350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12351 ;;
12352 ;; Parallel integral arithmetic
12353 ;;
12354 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12355
12356 (define_expand "neg<mode>2"
12357 [(set (match_operand:VI_AVX2 0 "register_operand")
12358 (minus:VI_AVX2
12359 (match_dup 2)
12360 (match_operand:VI_AVX2 1 "vector_operand")))]
12361 "TARGET_SSE2"
12362 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
12363
12364 (define_expand "<insn><mode>3"
12365 [(set (match_operand:VI_AVX2 0 "register_operand")
12366 (plusminus:VI_AVX2
12367 (match_operand:VI_AVX2 1 "vector_operand")
12368 (match_operand:VI_AVX2 2 "vector_operand")))]
12369 "TARGET_SSE2"
12370 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12371
12372 (define_expand "cond_<insn><mode>"
12373 [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
12374 (vec_merge:VI1248_AVX512VLBW
12375 (plusminus:VI1248_AVX512VLBW
12376 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand")
12377 (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand"))
12378 (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand")
12379 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
12380 "TARGET_AVX512F"
12381 {
12382 emit_insn (gen_<insn><mode>3_mask (operands[0],
12383 operands[2],
12384 operands[3],
12385 operands[4],
12386 operands[1]));
12387 DONE;
12388 })
12389
12390 (define_expand "<insn><mode>3_mask"
12391 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12392 (vec_merge:VI48_AVX512VL
12393 (plusminus:VI48_AVX512VL
12394 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12395 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12396 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12397 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12398 "TARGET_AVX512F"
12399 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12400
12401 (define_expand "<insn><mode>3_mask"
12402 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
12403 (vec_merge:VI12_AVX512VL
12404 (plusminus:VI12_AVX512VL
12405 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12406 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
12407 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
12408 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12409 "TARGET_AVX512BW"
12410 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12411
12412 (define_insn "*<insn><mode>3"
12413 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
12414 (plusminus:VI_AVX2
12415 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
12416 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
12417 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12418 "@
12419 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
12420 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12421 [(set_attr "isa" "noavx,avx")
12422 (set_attr "type" "sseiadd")
12423 (set_attr "prefix_data16" "1,*")
12424 (set_attr "prefix" "orig,maybe_evex")
12425 (set_attr "mode" "<sseinsnmode>")])
12426
12427 (define_insn "*<insn><mode>3_mask"
12428 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12429 (vec_merge:VI48_AVX512VL
12430 (plusminus:VI48_AVX512VL
12431 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
12432 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12433 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12434 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12435 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12436 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
12437 [(set_attr "type" "sseiadd")
12438 (set_attr "prefix" "evex")
12439 (set_attr "mode" "<sseinsnmode>")])
12440
12441 (define_insn "*<insn><mode>3_mask"
12442 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12443 (vec_merge:VI12_AVX512VL
12444 (plusminus:VI12_AVX512VL
12445 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
12446 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
12447 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
12448 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12449 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12450 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
12451 [(set_attr "type" "sseiadd")
12452 (set_attr "prefix" "evex")
12453 (set_attr "mode" "<sseinsnmode>")])
12454
12455 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
12456 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12457 (sat_plusminus:VI12_AVX2_AVX512BW
12458 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
12459 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
12460 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12461 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12462
12463 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
12464 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
12465 (sat_plusminus:VI12_AVX2_AVX512BW
12466 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>")
12467 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))]
12468 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
12469 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12470 "@
12471 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
12472 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12473 [(set_attr "isa" "noavx,avx")
12474 (set_attr "type" "sseiadd")
12475 (set_attr "prefix_data16" "1,*")
12476 (set_attr "prefix" "orig,maybe_evex")
12477 (set_attr "mode" "TI")])
12478
12479 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
12480 (define_split
12481 [(set (match_operand:VI12_AVX2 0 "register_operand")
12482 (eq:VI12_AVX2
12483 (us_minus:VI12_AVX2
12484 (match_operand:VI12_AVX2 1 "vector_operand")
12485 (match_operand:VI12_AVX2 2 "vector_operand"))
12486 (match_operand:VI12_AVX2 3 "const0_operand")))]
12487 "TARGET_SSE2
12488 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
12489 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
12490 [(set (match_dup 4)
12491 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
12492 (set (match_dup 0)
12493 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
12494 "operands[4] = gen_reg_rtx (<MODE>mode);")
12495
12496 (define_expand "mulv8qi3"
12497 [(set (match_operand:V8QI 0 "register_operand")
12498 (mult:V8QI (match_operand:V8QI 1 "register_operand")
12499 (match_operand:V8QI 2 "register_operand")))]
12500 "TARGET_AVX512VL && TARGET_AVX512BW && TARGET_64BIT"
12501 {
12502 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
12503 DONE;
12504 })
12505
12506 (define_expand "mul<mode>3"
12507 [(set (match_operand:VI1_AVX512 0 "register_operand")
12508 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
12509 (match_operand:VI1_AVX512 2 "register_operand")))]
12510 "TARGET_SSE2"
12511 {
12512 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
12513 DONE;
12514 })
12515
12516 (define_expand "cond_mul<mode>"
12517 [(set (match_operand:VI2_AVX512VL 0 "register_operand")
12518 (vec_merge:VI2_AVX512VL
12519 (mult:VI2_AVX512VL
12520 (match_operand:VI2_AVX512VL 2 "vector_operand")
12521 (match_operand:VI2_AVX512VL 3 "vector_operand"))
12522 (match_operand:VI2_AVX512VL 4 "nonimm_or_0_operand")
12523 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
12524 "TARGET_AVX512BW"
12525 {
12526 emit_insn (gen_mul<mode>3_mask (operands[0],
12527 operands[2],
12528 operands[3],
12529 operands[4],
12530 operands[1]));
12531 DONE;
12532 })
12533
12534 (define_expand "mul<mode>3<mask_name>"
12535 [(set (match_operand:VI2_AVX2 0 "register_operand")
12536 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
12537 (match_operand:VI2_AVX2 2 "vector_operand")))]
12538 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12539 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
12540
12541 (define_insn "*mul<mode>3<mask_name>"
12542 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
12543 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
12544 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
12545 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
12546 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12547 "@
12548 pmullw\t{%2, %0|%0, %2}
12549 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12550 [(set_attr "isa" "noavx,avx")
12551 (set_attr "type" "sseimul")
12552 (set_attr "prefix_data16" "1,*")
12553 (set_attr "prefix" "orig,vex")
12554 (set_attr "mode" "<sseinsnmode>")])
12555
12556 (define_expand "<s>mul<mode>3_highpart<mask_name>"
12557 [(set (match_operand:VI2_AVX2 0 "register_operand")
12558 (truncate:VI2_AVX2
12559 (lshiftrt:<ssedoublemode>
12560 (mult:<ssedoublemode>
12561 (any_extend:<ssedoublemode>
12562 (match_operand:VI2_AVX2 1 "vector_operand"))
12563 (any_extend:<ssedoublemode>
12564 (match_operand:VI2_AVX2 2 "vector_operand")))
12565 (const_int 16))))]
12566 "TARGET_SSE2
12567 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12568 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
12569
12570 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
12571 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
12572 (truncate:VI2_AVX2
12573 (lshiftrt:<ssedoublemode>
12574 (mult:<ssedoublemode>
12575 (any_extend:<ssedoublemode>
12576 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
12577 (any_extend:<ssedoublemode>
12578 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
12579 (const_int 16))))]
12580 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
12581 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12582 "@
12583 pmulh<u>w\t{%2, %0|%0, %2}
12584 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12585 [(set_attr "isa" "noavx,avx")
12586 (set_attr "type" "sseimul")
12587 (set_attr "prefix_data16" "1,*")
12588 (set_attr "prefix" "orig,vex")
12589 (set_attr "mode" "<sseinsnmode>")])
12590
12591 (define_expand "vec_widen_umult_even_v16si<mask_name>"
12592 [(set (match_operand:V8DI 0 "register_operand")
12593 (mult:V8DI
12594 (zero_extend:V8DI
12595 (vec_select:V8SI
12596 (match_operand:V16SI 1 "nonimmediate_operand")
12597 (parallel [(const_int 0) (const_int 2)
12598 (const_int 4) (const_int 6)
12599 (const_int 8) (const_int 10)
12600 (const_int 12) (const_int 14)])))
12601 (zero_extend:V8DI
12602 (vec_select:V8SI
12603 (match_operand:V16SI 2 "nonimmediate_operand")
12604 (parallel [(const_int 0) (const_int 2)
12605 (const_int 4) (const_int 6)
12606 (const_int 8) (const_int 10)
12607 (const_int 12) (const_int 14)])))))]
12608 "TARGET_AVX512F"
12609 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
12610
12611 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
12612 [(set (match_operand:V8DI 0 "register_operand" "=v")
12613 (mult:V8DI
12614 (zero_extend:V8DI
12615 (vec_select:V8SI
12616 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
12617 (parallel [(const_int 0) (const_int 2)
12618 (const_int 4) (const_int 6)
12619 (const_int 8) (const_int 10)
12620 (const_int 12) (const_int 14)])))
12621 (zero_extend:V8DI
12622 (vec_select:V8SI
12623 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
12624 (parallel [(const_int 0) (const_int 2)
12625 (const_int 4) (const_int 6)
12626 (const_int 8) (const_int 10)
12627 (const_int 12) (const_int 14)])))))]
12628 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12629 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12630 [(set_attr "type" "sseimul")
12631 (set_attr "prefix_extra" "1")
12632 (set_attr "prefix" "evex")
12633 (set_attr "mode" "XI")])
12634
12635 (define_expand "vec_widen_umult_even_v8si<mask_name>"
12636 [(set (match_operand:V4DI 0 "register_operand")
12637 (mult:V4DI
12638 (zero_extend:V4DI
12639 (vec_select:V4SI
12640 (match_operand:V8SI 1 "nonimmediate_operand")
12641 (parallel [(const_int 0) (const_int 2)
12642 (const_int 4) (const_int 6)])))
12643 (zero_extend:V4DI
12644 (vec_select:V4SI
12645 (match_operand:V8SI 2 "nonimmediate_operand")
12646 (parallel [(const_int 0) (const_int 2)
12647 (const_int 4) (const_int 6)])))))]
12648 "TARGET_AVX2 && <mask_avx512vl_condition>"
12649 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
12650
12651 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
12652 [(set (match_operand:V4DI 0 "register_operand" "=v")
12653 (mult:V4DI
12654 (zero_extend:V4DI
12655 (vec_select:V4SI
12656 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
12657 (parallel [(const_int 0) (const_int 2)
12658 (const_int 4) (const_int 6)])))
12659 (zero_extend:V4DI
12660 (vec_select:V4SI
12661 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
12662 (parallel [(const_int 0) (const_int 2)
12663 (const_int 4) (const_int 6)])))))]
12664 "TARGET_AVX2 && <mask_avx512vl_condition>
12665 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12666 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12667 [(set_attr "type" "sseimul")
12668 (set_attr "prefix" "maybe_evex")
12669 (set_attr "mode" "OI")])
12670
12671 (define_expand "vec_widen_umult_even_v4si<mask_name>"
12672 [(set (match_operand:V2DI 0 "register_operand")
12673 (mult:V2DI
12674 (zero_extend:V2DI
12675 (vec_select:V2SI
12676 (match_operand:V4SI 1 "vector_operand")
12677 (parallel [(const_int 0) (const_int 2)])))
12678 (zero_extend:V2DI
12679 (vec_select:V2SI
12680 (match_operand:V4SI 2 "vector_operand")
12681 (parallel [(const_int 0) (const_int 2)])))))]
12682 "TARGET_SSE2 && <mask_avx512vl_condition>"
12683 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
12684
12685 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
12686 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
12687 (mult:V2DI
12688 (zero_extend:V2DI
12689 (vec_select:V2SI
12690 (match_operand:V4SI 1 "vector_operand" "%0,v")
12691 (parallel [(const_int 0) (const_int 2)])))
12692 (zero_extend:V2DI
12693 (vec_select:V2SI
12694 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
12695 (parallel [(const_int 0) (const_int 2)])))))]
12696 "TARGET_SSE2 && <mask_avx512vl_condition>
12697 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12698 "@
12699 pmuludq\t{%2, %0|%0, %2}
12700 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12701 [(set_attr "isa" "noavx,avx")
12702 (set_attr "type" "sseimul")
12703 (set_attr "prefix_data16" "1,*")
12704 (set_attr "prefix" "orig,maybe_evex")
12705 (set_attr "mode" "TI")])
12706
12707 (define_expand "vec_widen_smult_even_v16si<mask_name>"
12708 [(set (match_operand:V8DI 0 "register_operand")
12709 (mult:V8DI
12710 (sign_extend:V8DI
12711 (vec_select:V8SI
12712 (match_operand:V16SI 1 "nonimmediate_operand")
12713 (parallel [(const_int 0) (const_int 2)
12714 (const_int 4) (const_int 6)
12715 (const_int 8) (const_int 10)
12716 (const_int 12) (const_int 14)])))
12717 (sign_extend:V8DI
12718 (vec_select:V8SI
12719 (match_operand:V16SI 2 "nonimmediate_operand")
12720 (parallel [(const_int 0) (const_int 2)
12721 (const_int 4) (const_int 6)
12722 (const_int 8) (const_int 10)
12723 (const_int 12) (const_int 14)])))))]
12724 "TARGET_AVX512F"
12725 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
12726
12727 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
12728 [(set (match_operand:V8DI 0 "register_operand" "=v")
12729 (mult:V8DI
12730 (sign_extend:V8DI
12731 (vec_select:V8SI
12732 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
12733 (parallel [(const_int 0) (const_int 2)
12734 (const_int 4) (const_int 6)
12735 (const_int 8) (const_int 10)
12736 (const_int 12) (const_int 14)])))
12737 (sign_extend:V8DI
12738 (vec_select:V8SI
12739 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
12740 (parallel [(const_int 0) (const_int 2)
12741 (const_int 4) (const_int 6)
12742 (const_int 8) (const_int 10)
12743 (const_int 12) (const_int 14)])))))]
12744 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12745 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12746 [(set_attr "type" "sseimul")
12747 (set_attr "prefix_extra" "1")
12748 (set_attr "prefix" "evex")
12749 (set_attr "mode" "XI")])
12750
12751 (define_expand "vec_widen_smult_even_v8si<mask_name>"
12752 [(set (match_operand:V4DI 0 "register_operand")
12753 (mult:V4DI
12754 (sign_extend:V4DI
12755 (vec_select:V4SI
12756 (match_operand:V8SI 1 "nonimmediate_operand")
12757 (parallel [(const_int 0) (const_int 2)
12758 (const_int 4) (const_int 6)])))
12759 (sign_extend:V4DI
12760 (vec_select:V4SI
12761 (match_operand:V8SI 2 "nonimmediate_operand")
12762 (parallel [(const_int 0) (const_int 2)
12763 (const_int 4) (const_int 6)])))))]
12764 "TARGET_AVX2 && <mask_avx512vl_condition>"
12765 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
12766
12767 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
12768 [(set (match_operand:V4DI 0 "register_operand" "=v")
12769 (mult:V4DI
12770 (sign_extend:V4DI
12771 (vec_select:V4SI
12772 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
12773 (parallel [(const_int 0) (const_int 2)
12774 (const_int 4) (const_int 6)])))
12775 (sign_extend:V4DI
12776 (vec_select:V4SI
12777 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
12778 (parallel [(const_int 0) (const_int 2)
12779 (const_int 4) (const_int 6)])))))]
12780 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12781 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12782 [(set_attr "type" "sseimul")
12783 (set_attr "prefix_extra" "1")
12784 (set_attr "prefix" "vex")
12785 (set_attr "mode" "OI")])
12786
12787 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
12788 [(set (match_operand:V2DI 0 "register_operand")
12789 (mult:V2DI
12790 (sign_extend:V2DI
12791 (vec_select:V2SI
12792 (match_operand:V4SI 1 "vector_operand")
12793 (parallel [(const_int 0) (const_int 2)])))
12794 (sign_extend:V2DI
12795 (vec_select:V2SI
12796 (match_operand:V4SI 2 "vector_operand")
12797 (parallel [(const_int 0) (const_int 2)])))))]
12798 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
12799 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
12800
12801 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
12802 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
12803 (mult:V2DI
12804 (sign_extend:V2DI
12805 (vec_select:V2SI
12806 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
12807 (parallel [(const_int 0) (const_int 2)])))
12808 (sign_extend:V2DI
12809 (vec_select:V2SI
12810 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
12811 (parallel [(const_int 0) (const_int 2)])))))]
12812 "TARGET_SSE4_1 && <mask_avx512vl_condition>
12813 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12814 "@
12815 pmuldq\t{%2, %0|%0, %2}
12816 pmuldq\t{%2, %0|%0, %2}
12817 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12818 [(set_attr "isa" "noavx,noavx,avx")
12819 (set_attr "type" "sseimul")
12820 (set_attr "prefix_data16" "1,1,*")
12821 (set_attr "prefix_extra" "1")
12822 (set_attr "prefix" "orig,orig,vex")
12823 (set_attr "mode" "TI")])
12824
12825 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
12826 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
12827 (unspec:<sseunpackmode>
12828 [(match_operand:VI2_AVX2 1 "register_operand" "v")
12829 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
12830 UNSPEC_PMADDWD512))]
12831 "TARGET_AVX512BW && <mask_mode512bit_condition>"
12832 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
12833 [(set_attr "type" "sseiadd")
12834 (set_attr "prefix" "evex")
12835 (set_attr "mode" "XI")])
12836
12837 (define_expand "avx2_pmaddwd"
12838 [(set (match_operand:V8SI 0 "register_operand")
12839 (plus:V8SI
12840 (mult:V8SI
12841 (sign_extend:V8SI
12842 (vec_select:V8HI
12843 (match_operand:V16HI 1 "nonimmediate_operand")
12844 (parallel [(const_int 0) (const_int 2)
12845 (const_int 4) (const_int 6)
12846 (const_int 8) (const_int 10)
12847 (const_int 12) (const_int 14)])))
12848 (sign_extend:V8SI
12849 (vec_select:V8HI
12850 (match_operand:V16HI 2 "nonimmediate_operand")
12851 (parallel [(const_int 0) (const_int 2)
12852 (const_int 4) (const_int 6)
12853 (const_int 8) (const_int 10)
12854 (const_int 12) (const_int 14)]))))
12855 (mult:V8SI
12856 (sign_extend:V8SI
12857 (vec_select:V8HI (match_dup 1)
12858 (parallel [(const_int 1) (const_int 3)
12859 (const_int 5) (const_int 7)
12860 (const_int 9) (const_int 11)
12861 (const_int 13) (const_int 15)])))
12862 (sign_extend:V8SI
12863 (vec_select:V8HI (match_dup 2)
12864 (parallel [(const_int 1) (const_int 3)
12865 (const_int 5) (const_int 7)
12866 (const_int 9) (const_int 11)
12867 (const_int 13) (const_int 15)]))))))]
12868 "TARGET_AVX2"
12869 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
12870
12871 (define_insn "*avx2_pmaddwd"
12872 [(set (match_operand:V8SI 0 "register_operand" "=Yw")
12873 (plus:V8SI
12874 (mult:V8SI
12875 (sign_extend:V8SI
12876 (vec_select:V8HI
12877 (match_operand:V16HI 1 "nonimmediate_operand" "%Yw")
12878 (parallel [(const_int 0) (const_int 2)
12879 (const_int 4) (const_int 6)
12880 (const_int 8) (const_int 10)
12881 (const_int 12) (const_int 14)])))
12882 (sign_extend:V8SI
12883 (vec_select:V8HI
12884 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")
12885 (parallel [(const_int 0) (const_int 2)
12886 (const_int 4) (const_int 6)
12887 (const_int 8) (const_int 10)
12888 (const_int 12) (const_int 14)]))))
12889 (mult:V8SI
12890 (sign_extend:V8SI
12891 (vec_select:V8HI (match_dup 1)
12892 (parallel [(const_int 1) (const_int 3)
12893 (const_int 5) (const_int 7)
12894 (const_int 9) (const_int 11)
12895 (const_int 13) (const_int 15)])))
12896 (sign_extend:V8SI
12897 (vec_select:V8HI (match_dup 2)
12898 (parallel [(const_int 1) (const_int 3)
12899 (const_int 5) (const_int 7)
12900 (const_int 9) (const_int 11)
12901 (const_int 13) (const_int 15)]))))))]
12902 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12903 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12904 [(set_attr "type" "sseiadd")
12905 (set_attr "prefix" "vex")
12906 (set_attr "mode" "OI")])
12907
12908 (define_expand "sse2_pmaddwd"
12909 [(set (match_operand:V4SI 0 "register_operand")
12910 (plus:V4SI
12911 (mult:V4SI
12912 (sign_extend:V4SI
12913 (vec_select:V4HI
12914 (match_operand:V8HI 1 "vector_operand")
12915 (parallel [(const_int 0) (const_int 2)
12916 (const_int 4) (const_int 6)])))
12917 (sign_extend:V4SI
12918 (vec_select:V4HI
12919 (match_operand:V8HI 2 "vector_operand")
12920 (parallel [(const_int 0) (const_int 2)
12921 (const_int 4) (const_int 6)]))))
12922 (mult:V4SI
12923 (sign_extend:V4SI
12924 (vec_select:V4HI (match_dup 1)
12925 (parallel [(const_int 1) (const_int 3)
12926 (const_int 5) (const_int 7)])))
12927 (sign_extend:V4SI
12928 (vec_select:V4HI (match_dup 2)
12929 (parallel [(const_int 1) (const_int 3)
12930 (const_int 5) (const_int 7)]))))))]
12931 "TARGET_SSE2"
12932 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
12933
12934 (define_insn "*sse2_pmaddwd"
12935 [(set (match_operand:V4SI 0 "register_operand" "=x,Yw")
12936 (plus:V4SI
12937 (mult:V4SI
12938 (sign_extend:V4SI
12939 (vec_select:V4HI
12940 (match_operand:V8HI 1 "vector_operand" "%0,Yw")
12941 (parallel [(const_int 0) (const_int 2)
12942 (const_int 4) (const_int 6)])))
12943 (sign_extend:V4SI
12944 (vec_select:V4HI
12945 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")
12946 (parallel [(const_int 0) (const_int 2)
12947 (const_int 4) (const_int 6)]))))
12948 (mult:V4SI
12949 (sign_extend:V4SI
12950 (vec_select:V4HI (match_dup 1)
12951 (parallel [(const_int 1) (const_int 3)
12952 (const_int 5) (const_int 7)])))
12953 (sign_extend:V4SI
12954 (vec_select:V4HI (match_dup 2)
12955 (parallel [(const_int 1) (const_int 3)
12956 (const_int 5) (const_int 7)]))))))]
12957 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12958 "@
12959 pmaddwd\t{%2, %0|%0, %2}
12960 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12961 [(set_attr "isa" "noavx,avx")
12962 (set_attr "type" "sseiadd")
12963 (set_attr "atom_unit" "simul")
12964 (set_attr "prefix_data16" "1,*")
12965 (set_attr "prefix" "orig,vex")
12966 (set_attr "mode" "TI")])
12967
12968 (define_expand "cond_mul<mode>"
12969 [(set (match_operand:VI8_AVX512VL 0 "register_operand")
12970 (vec_merge:VI8_AVX512VL
12971 (mult:VI8_AVX512VL
12972 (match_operand:VI8_AVX512VL 2 "vector_operand")
12973 (match_operand:VI8_AVX512VL 3 "vector_operand"))
12974 (match_operand:VI8_AVX512VL 4 "nonimm_or_0_operand")
12975 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
12976 "TARGET_AVX512DQ"
12977 {
12978 emit_insn (gen_avx512dq_mul<mode>3_mask (operands[0],
12979 operands[2],
12980 operands[3],
12981 operands[4],
12982 operands[1]));
12983 DONE;
12984 })
12985
12986 (define_insn "avx512dq_mul<mode>3<mask_name>"
12987 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
12988 (mult:VI8_AVX512VL
12989 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
12990 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
12991 "TARGET_AVX512DQ && <mask_mode512bit_condition>
12992 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
12993 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12994 [(set_attr "type" "sseimul")
12995 (set_attr "prefix" "evex")
12996 (set_attr "mode" "<sseinsnmode>")])
12997
12998 (define_expand "cond_mul<mode>"
12999 [(set (match_operand:VI4_AVX512VL 0 "register_operand")
13000 (vec_merge:VI4_AVX512VL
13001 (mult:VI4_AVX512VL
13002 (match_operand:VI4_AVX512VL 2 "vector_operand")
13003 (match_operand:VI4_AVX512VL 3 "vector_operand"))
13004 (match_operand:VI4_AVX512VL 4 "nonimm_or_0_operand")
13005 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
13006 "TARGET_AVX512F"
13007 {
13008 emit_insn (gen_mul<mode>3_mask (operands[0],
13009 operands[2],
13010 operands[3],
13011 operands[4],
13012 operands[1]));
13013 DONE;
13014 })
13015
13016 (define_expand "mul<mode>3<mask_name>"
13017 [(set (match_operand:VI4_AVX512F 0 "register_operand")
13018 (mult:VI4_AVX512F
13019 (match_operand:VI4_AVX512F 1 "general_vector_operand")
13020 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
13021 "TARGET_SSE2 && <mask_mode512bit_condition>"
13022 {
13023 if (TARGET_SSE4_1)
13024 {
13025 if (!vector_operand (operands[1], <MODE>mode))
13026 operands[1] = force_reg (<MODE>mode, operands[1]);
13027 if (!vector_operand (operands[2], <MODE>mode))
13028 operands[2] = force_reg (<MODE>mode, operands[2]);
13029 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13030 }
13031 else
13032 {
13033 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
13034 DONE;
13035 }
13036 })
13037
13038 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
13039 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
13040 (mult:VI4_AVX512F
13041 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
13042 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
13043 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
13044 && <mask_mode512bit_condition>"
13045 "@
13046 pmulld\t{%2, %0|%0, %2}
13047 pmulld\t{%2, %0|%0, %2}
13048 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13049 [(set_attr "isa" "noavx,noavx,avx")
13050 (set_attr "type" "sseimul")
13051 (set_attr "prefix_extra" "1")
13052 (set_attr "prefix" "<bcst_mask_prefix4>")
13053 (set_attr "btver2_decode" "vector,vector,vector")
13054 (set_attr "mode" "<sseinsnmode>")])
13055
13056 (define_expand "mul<mode>3"
13057 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
13058 (mult:VI8_AVX2_AVX512F
13059 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
13060 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
13061 "TARGET_SSE2"
13062 {
13063 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
13064 DONE;
13065 })
13066
13067 (define_expand "vec_widen_<s>mult_hi_<mode>"
13068 [(match_operand:<sseunpackmode> 0 "register_operand")
13069 (any_extend:<sseunpackmode>
13070 (match_operand:VI124_AVX2 1 "register_operand"))
13071 (match_operand:VI124_AVX2 2 "register_operand")]
13072 "TARGET_SSE2"
13073 {
13074 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
13075 <u_bool>, true);
13076 DONE;
13077 })
13078
13079 (define_expand "vec_widen_<s>mult_lo_<mode>"
13080 [(match_operand:<sseunpackmode> 0 "register_operand")
13081 (any_extend:<sseunpackmode>
13082 (match_operand:VI124_AVX2 1 "register_operand"))
13083 (match_operand:VI124_AVX2 2 "register_operand")]
13084 "TARGET_SSE2"
13085 {
13086 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
13087 <u_bool>, false);
13088 DONE;
13089 })
13090
13091 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
13092 ;; named patterns, but signed V4SI needs special help for plain SSE2.
13093 (define_expand "vec_widen_smult_even_v4si"
13094 [(match_operand:V2DI 0 "register_operand")
13095 (match_operand:V4SI 1 "vector_operand")
13096 (match_operand:V4SI 2 "vector_operand")]
13097 "TARGET_SSE2"
13098 {
13099 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
13100 false, false);
13101 DONE;
13102 })
13103
13104 (define_expand "vec_widen_<s>mult_odd_<mode>"
13105 [(match_operand:<sseunpackmode> 0 "register_operand")
13106 (any_extend:<sseunpackmode>
13107 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
13108 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
13109 "TARGET_SSE2"
13110 {
13111 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
13112 <u_bool>, true);
13113 DONE;
13114 })
13115
13116 (define_mode_attr SDOT_PMADD_SUF
13117 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
13118
13119 (define_expand "sdot_prod<mode>"
13120 [(match_operand:<sseunpackmode> 0 "register_operand")
13121 (match_operand:VI2_AVX2 1 "register_operand")
13122 (match_operand:VI2_AVX2 2 "register_operand")
13123 (match_operand:<sseunpackmode> 3 "register_operand")]
13124 "TARGET_SSE2"
13125 {
13126 rtx t = gen_reg_rtx (<sseunpackmode>mode);
13127 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
13128 emit_insn (gen_rtx_SET (operands[0],
13129 gen_rtx_PLUS (<sseunpackmode>mode,
13130 operands[3], t)));
13131 DONE;
13132 })
13133
13134 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
13135 ;; back together when madd is available.
13136 (define_expand "sdot_prodv4si"
13137 [(match_operand:V2DI 0 "register_operand")
13138 (match_operand:V4SI 1 "register_operand")
13139 (match_operand:V4SI 2 "register_operand")
13140 (match_operand:V2DI 3 "register_operand")]
13141 "TARGET_XOP"
13142 {
13143 rtx t = gen_reg_rtx (V2DImode);
13144 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
13145 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
13146 DONE;
13147 })
13148
13149 (define_expand "uavg<mode>3_ceil"
13150 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
13151 (truncate:VI12_AVX2_AVX512BW
13152 (lshiftrt:<ssedoublemode>
13153 (plus:<ssedoublemode>
13154 (plus:<ssedoublemode>
13155 (zero_extend:<ssedoublemode>
13156 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
13157 (zero_extend:<ssedoublemode>
13158 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
13159 (match_dup 3))
13160 (const_int 1))))]
13161 "TARGET_SSE2"
13162 {
13163 operands[3] = CONST1_RTX(<ssedoublemode>mode);
13164 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13165 })
13166
13167 (define_expand "usadv16qi"
13168 [(match_operand:V4SI 0 "register_operand")
13169 (match_operand:V16QI 1 "register_operand")
13170 (match_operand:V16QI 2 "vector_operand")
13171 (match_operand:V4SI 3 "vector_operand")]
13172 "TARGET_SSE2"
13173 {
13174 rtx t1 = gen_reg_rtx (V2DImode);
13175 rtx t2 = gen_reg_rtx (V4SImode);
13176 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
13177 convert_move (t2, t1, 0);
13178 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
13179 DONE;
13180 })
13181
13182 (define_expand "usadv32qi"
13183 [(match_operand:V8SI 0 "register_operand")
13184 (match_operand:V32QI 1 "register_operand")
13185 (match_operand:V32QI 2 "nonimmediate_operand")
13186 (match_operand:V8SI 3 "nonimmediate_operand")]
13187 "TARGET_AVX2"
13188 {
13189 rtx t1 = gen_reg_rtx (V4DImode);
13190 rtx t2 = gen_reg_rtx (V8SImode);
13191 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
13192 convert_move (t2, t1, 0);
13193 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
13194 DONE;
13195 })
13196
13197 (define_expand "usadv64qi"
13198 [(match_operand:V16SI 0 "register_operand")
13199 (match_operand:V64QI 1 "register_operand")
13200 (match_operand:V64QI 2 "nonimmediate_operand")
13201 (match_operand:V16SI 3 "nonimmediate_operand")]
13202 "TARGET_AVX512BW"
13203 {
13204 rtx t1 = gen_reg_rtx (V8DImode);
13205 rtx t2 = gen_reg_rtx (V16SImode);
13206 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
13207 convert_move (t2, t1, 0);
13208 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
13209 DONE;
13210 })
13211
13212 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
13213 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
13214 (ashiftrt:VI248_AVX512BW_1
13215 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
13216 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
13217 "TARGET_AVX512VL"
13218 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13219 [(set_attr "type" "sseishft")
13220 (set (attr "length_immediate")
13221 (if_then_else (match_operand 2 "const_int_operand")
13222 (const_string "1")
13223 (const_string "0")))
13224 (set_attr "mode" "<sseinsnmode>")])
13225
13226 (define_insn "ashr<mode>3"
13227 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>")
13228 (ashiftrt:VI24_AVX2
13229 (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>")
13230 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
13231 "TARGET_SSE2"
13232 "@
13233 psra<ssemodesuffix>\t{%2, %0|%0, %2}
13234 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13235 [(set_attr "isa" "noavx,avx")
13236 (set_attr "type" "sseishft")
13237 (set (attr "length_immediate")
13238 (if_then_else (match_operand 2 "const_int_operand")
13239 (const_string "1")
13240 (const_string "0")))
13241 (set_attr "prefix_data16" "1,*")
13242 (set_attr "prefix" "orig,vex")
13243 (set_attr "mode" "<sseinsnmode>")])
13244
13245 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
13246 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
13247 (ashiftrt:VI248_AVX512BW_AVX512VL
13248 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
13249 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
13250 "TARGET_AVX512F"
13251 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13252 [(set_attr "type" "sseishft")
13253 (set (attr "length_immediate")
13254 (if_then_else (match_operand 2 "const_int_operand")
13255 (const_string "1")
13256 (const_string "0")))
13257 (set_attr "mode" "<sseinsnmode>")])
13258
13259 (define_expand "ashr<mode>3"
13260 [(set (match_operand:VI248_AVX512BW 0 "register_operand")
13261 (ashiftrt:VI248_AVX512BW
13262 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
13263 (match_operand:DI 2 "nonmemory_operand")))]
13264 "TARGET_AVX512F")
13265
13266 (define_expand "ashrv4di3"
13267 [(set (match_operand:V4DI 0 "register_operand")
13268 (ashiftrt:V4DI
13269 (match_operand:V4DI 1 "nonimmediate_operand")
13270 (match_operand:DI 2 "nonmemory_operand")))]
13271 "TARGET_AVX2"
13272 {
13273 if (!TARGET_AVX512VL)
13274 {
13275 if (CONST_INT_P (operands[2]) && UINTVAL (operands[2]) >= 63)
13276 {
13277 rtx zero = force_reg (V4DImode, CONST0_RTX (V4DImode));
13278 emit_insn (gen_avx2_gtv4di3 (operands[0], zero, operands[1]));
13279 DONE;
13280 }
13281 if (operands[2] == const0_rtx)
13282 {
13283 emit_move_insn (operands[0], operands[1]);
13284 DONE;
13285 }
13286 operands[1] = force_reg (V4DImode, operands[1]);
13287 if (CONST_INT_P (operands[2]))
13288 {
13289 vec_perm_builder sel (8, 8, 1);
13290 sel.quick_grow (8);
13291 rtx arg0, arg1;
13292 rtx op1 = lowpart_subreg (V8SImode, operands[1], V4DImode);
13293 rtx target = gen_reg_rtx (V8SImode);
13294 if (INTVAL (operands[2]) > 32)
13295 {
13296 arg0 = gen_reg_rtx (V8SImode);
13297 arg1 = gen_reg_rtx (V8SImode);
13298 emit_insn (gen_ashrv8si3 (arg1, op1, GEN_INT (31)));
13299 emit_insn (gen_ashrv8si3 (arg0, op1,
13300 GEN_INT (INTVAL (operands[2]) - 32)));
13301 sel[0] = 1;
13302 sel[1] = 9;
13303 sel[2] = 3;
13304 sel[3] = 11;
13305 sel[4] = 5;
13306 sel[5] = 13;
13307 sel[6] = 7;
13308 sel[7] = 15;
13309 }
13310 else if (INTVAL (operands[2]) == 32)
13311 {
13312 arg0 = op1;
13313 arg1 = gen_reg_rtx (V8SImode);
13314 emit_insn (gen_ashrv8si3 (arg1, op1, GEN_INT (31)));
13315 sel[0] = 1;
13316 sel[1] = 9;
13317 sel[2] = 3;
13318 sel[3] = 11;
13319 sel[4] = 5;
13320 sel[5] = 13;
13321 sel[6] = 7;
13322 sel[7] = 15;
13323 }
13324 else
13325 {
13326 arg0 = gen_reg_rtx (V4DImode);
13327 arg1 = gen_reg_rtx (V8SImode);
13328 emit_insn (gen_lshrv4di3 (arg0, operands[1], operands[2]));
13329 emit_insn (gen_ashrv8si3 (arg1, op1, operands[2]));
13330 arg0 = lowpart_subreg (V8SImode, arg0, V4DImode);
13331 sel[0] = 0;
13332 sel[1] = 9;
13333 sel[2] = 2;
13334 sel[3] = 11;
13335 sel[4] = 4;
13336 sel[5] = 13;
13337 sel[6] = 6;
13338 sel[7] = 15;
13339 }
13340 vec_perm_indices indices (sel, 2, 8);
13341 bool ok = targetm.vectorize.vec_perm_const (V8SImode, target,
13342 arg0, arg1, indices);
13343 gcc_assert (ok);
13344 emit_move_insn (operands[0],
13345 lowpart_subreg (V4DImode, target, V8SImode));
13346 DONE;
13347 }
13348
13349 rtx zero = force_reg (V4DImode, CONST0_RTX (V4DImode));
13350 rtx zero_or_all_ones = gen_reg_rtx (V4DImode);
13351 emit_insn (gen_avx2_gtv4di3 (zero_or_all_ones, zero, operands[1]));
13352 rtx lshr_res = gen_reg_rtx (V4DImode);
13353 emit_insn (gen_lshrv4di3 (lshr_res, operands[1], operands[2]));
13354 rtx ashl_res = gen_reg_rtx (V4DImode);
13355 rtx amount;
13356 if (TARGET_64BIT)
13357 {
13358 amount = gen_reg_rtx (DImode);
13359 emit_insn (gen_subdi3 (amount, force_reg (DImode, GEN_INT (64)),
13360 operands[2]));
13361 }
13362 else
13363 {
13364 rtx temp = gen_reg_rtx (SImode);
13365 emit_insn (gen_subsi3 (temp, force_reg (SImode, GEN_INT (64)),
13366 lowpart_subreg (SImode, operands[2],
13367 DImode)));
13368 amount = gen_reg_rtx (V4SImode);
13369 emit_insn (gen_vec_setv4si_0 (amount, CONST0_RTX (V4SImode),
13370 temp));
13371 }
13372 amount = lowpart_subreg (DImode, amount, GET_MODE (amount));
13373 emit_insn (gen_ashlv4di3 (ashl_res, zero_or_all_ones, amount));
13374 emit_insn (gen_iorv4di3 (operands[0], lshr_res, ashl_res));
13375 DONE;
13376 }
13377 })
13378
13379 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
13380 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
13381 (any_lshift:VI248_AVX512BW_2
13382 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
13383 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
13384 "TARGET_AVX512VL"
13385 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13386 [(set_attr "type" "sseishft")
13387 (set (attr "length_immediate")
13388 (if_then_else (match_operand 2 "const_int_operand")
13389 (const_string "1")
13390 (const_string "0")))
13391 (set_attr "mode" "<sseinsnmode>")])
13392
13393 (define_insn "<insn><mode>3"
13394 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>")
13395 (any_lshift:VI248_AVX2
13396 (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>")
13397 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
13398 "TARGET_SSE2"
13399 "@
13400 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
13401 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13402 [(set_attr "isa" "noavx,avx")
13403 (set_attr "type" "sseishft")
13404 (set (attr "length_immediate")
13405 (if_then_else (match_operand 2 "const_int_operand")
13406 (const_string "1")
13407 (const_string "0")))
13408 (set_attr "prefix_data16" "1,*")
13409 (set_attr "prefix" "orig,vex")
13410 (set_attr "mode" "<sseinsnmode>")])
13411
13412 (define_insn "<insn><mode>3<mask_name>"
13413 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
13414 (any_lshift:VI248_AVX512BW
13415 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
13416 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
13417 "TARGET_AVX512F"
13418 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13419 [(set_attr "type" "sseishft")
13420 (set (attr "length_immediate")
13421 (if_then_else (match_operand 2 "const_int_operand")
13422 (const_string "1")
13423 (const_string "0")))
13424 (set_attr "mode" "<sseinsnmode>")])
13425
13426
13427 (define_expand "vec_shl_<mode>"
13428 [(set (match_dup 3)
13429 (ashift:V1TI
13430 (match_operand:V_128 1 "register_operand")
13431 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
13432 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
13433 "TARGET_SSE2"
13434 {
13435 operands[1] = gen_lowpart (V1TImode, operands[1]);
13436 operands[3] = gen_reg_rtx (V1TImode);
13437 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
13438 })
13439
13440 (define_expand "vec_shr_<mode>"
13441 [(set (match_dup 3)
13442 (lshiftrt:V1TI
13443 (match_operand:V_128 1 "register_operand")
13444 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
13445 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
13446 "TARGET_SSE2"
13447 {
13448 operands[1] = gen_lowpart (V1TImode, operands[1]);
13449 operands[3] = gen_reg_rtx (V1TImode);
13450 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
13451 })
13452
13453 (define_insn "avx512bw_<insn><mode>3"
13454 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
13455 (any_lshift:VIMAX_AVX512VL
13456 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
13457 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
13458 "TARGET_AVX512BW"
13459 {
13460 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
13461 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
13462 }
13463 [(set_attr "type" "sseishft")
13464 (set_attr "length_immediate" "1")
13465 (set_attr "prefix" "maybe_evex")
13466 (set_attr "mode" "<sseinsnmode>")])
13467
13468 (define_insn "<sse2_avx2>_<insn><mode>3"
13469 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw")
13470 (any_lshift:VIMAX_AVX2
13471 (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw")
13472 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
13473 "TARGET_SSE2"
13474 {
13475 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
13476
13477 switch (which_alternative)
13478 {
13479 case 0:
13480 return "p<vshift>dq\t{%2, %0|%0, %2}";
13481 case 1:
13482 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
13483 default:
13484 gcc_unreachable ();
13485 }
13486 }
13487 [(set_attr "isa" "noavx,avx")
13488 (set_attr "type" "sseishft")
13489 (set_attr "length_immediate" "1")
13490 (set_attr "atom_unit" "sishuf")
13491 (set_attr "prefix_data16" "1,*")
13492 (set_attr "prefix" "orig,vex")
13493 (set_attr "mode" "<sseinsnmode>")])
13494
13495 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
13496 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13497 (any_rotate:VI48_AVX512VL
13498 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
13499 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
13500 "TARGET_AVX512F"
13501 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13502 [(set_attr "prefix" "evex")
13503 (set_attr "mode" "<sseinsnmode>")])
13504
13505 (define_insn "<avx512>_<rotate><mode><mask_name>"
13506 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13507 (any_rotate:VI48_AVX512VL
13508 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
13509 (match_operand:SI 2 "const_0_to_255_operand")))]
13510 "TARGET_AVX512F"
13511 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13512 [(set_attr "prefix" "evex")
13513 (set_attr "mode" "<sseinsnmode>")])
13514
13515 (define_expand "<code><mode>3"
13516 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
13517 (maxmin:VI124_256_AVX512F_AVX512BW
13518 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
13519 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
13520 "TARGET_AVX2"
13521 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
13522
13523 (define_insn "*avx2_<code><mode>3"
13524 [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
13525 (maxmin:VI124_256
13526 (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
13527 (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
13528 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13529 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13530 [(set_attr "type" "sseiadd")
13531 (set_attr "prefix_extra" "1")
13532 (set_attr "prefix" "vex")
13533 (set_attr "mode" "OI")])
13534
13535 (define_expand "cond_<code><mode>"
13536 [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
13537 (vec_merge:VI1248_AVX512VLBW
13538 (maxmin:VI1248_AVX512VLBW
13539 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand")
13540 (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand"))
13541 (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand")
13542 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
13543 "TARGET_AVX512F"
13544 {
13545 emit_insn (gen_<code><mode>3_mask (operands[0],
13546 operands[2],
13547 operands[3],
13548 operands[4],
13549 operands[1]));
13550 DONE;
13551 })
13552
13553 (define_expand "<code><mode>3_mask"
13554 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13555 (vec_merge:VI48_AVX512VL
13556 (maxmin:VI48_AVX512VL
13557 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
13558 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13559 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13560 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13561 "TARGET_AVX512F"
13562 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
13563
13564 (define_insn "*avx512f_<code><mode>3<mask_name>"
13565 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13566 (maxmin:VI48_AVX512VL
13567 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
13568 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
13569 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13570 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13571 [(set_attr "type" "sseiadd")
13572 (set_attr "prefix_extra" "1")
13573 (set_attr "prefix" "maybe_evex")
13574 (set_attr "mode" "<sseinsnmode>")])
13575
13576 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13577 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13578 (maxmin:VI12_AVX512VL
13579 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
13580 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
13581 "TARGET_AVX512BW"
13582 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13583 [(set_attr "type" "sseiadd")
13584 (set_attr "prefix" "evex")
13585 (set_attr "mode" "<sseinsnmode>")])
13586
13587 (define_expand "<code><mode>3"
13588 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
13589 (maxmin:VI8_AVX2_AVX512F
13590 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
13591 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
13592 "TARGET_SSE4_2"
13593 {
13594 if (TARGET_AVX512F
13595 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
13596 ;
13597 else
13598 {
13599 enum rtx_code code;
13600 rtx xops[6];
13601 bool ok;
13602
13603
13604 xops[0] = operands[0];
13605
13606 if (<CODE> == SMAX || <CODE> == UMAX)
13607 {
13608 xops[1] = operands[1];
13609 xops[2] = operands[2];
13610 }
13611 else
13612 {
13613 xops[1] = operands[2];
13614 xops[2] = operands[1];
13615 }
13616
13617 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
13618
13619 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
13620 xops[4] = operands[1];
13621 xops[5] = operands[2];
13622
13623 ok = ix86_expand_int_vcond (xops);
13624 gcc_assert (ok);
13625 DONE;
13626 }
13627 })
13628
13629 (define_expand "<code><mode>3"
13630 [(set (match_operand:VI124_128 0 "register_operand")
13631 (smaxmin:VI124_128
13632 (match_operand:VI124_128 1 "vector_operand")
13633 (match_operand:VI124_128 2 "vector_operand")))]
13634 "TARGET_SSE2"
13635 {
13636 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
13637 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
13638 else
13639 {
13640 rtx xops[6];
13641 bool ok;
13642
13643 xops[0] = operands[0];
13644 operands[1] = force_reg (<MODE>mode, operands[1]);
13645 operands[2] = force_reg (<MODE>mode, operands[2]);
13646
13647 if (<CODE> == SMAX)
13648 {
13649 xops[1] = operands[1];
13650 xops[2] = operands[2];
13651 }
13652 else
13653 {
13654 xops[1] = operands[2];
13655 xops[2] = operands[1];
13656 }
13657
13658 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
13659 xops[4] = operands[1];
13660 xops[5] = operands[2];
13661
13662 ok = ix86_expand_int_vcond (xops);
13663 gcc_assert (ok);
13664 DONE;
13665 }
13666 })
13667
13668 (define_insn "*sse4_1_<code><mode>3<mask_name>"
13669 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
13670 (smaxmin:VI14_128
13671 (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
13672 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
13673 "TARGET_SSE4_1
13674 && <mask_mode512bit_condition>
13675 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13676 "@
13677 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13678 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13679 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13680 [(set_attr "isa" "noavx,noavx,avx")
13681 (set_attr "type" "sseiadd")
13682 (set_attr "prefix_extra" "1,1,*")
13683 (set_attr "prefix" "orig,orig,vex")
13684 (set_attr "mode" "TI")])
13685
13686 (define_insn "*<code>v8hi3"
13687 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
13688 (smaxmin:V8HI
13689 (match_operand:V8HI 1 "vector_operand" "%0,Yw")
13690 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))]
13691 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13692 "@
13693 p<maxmin_int>w\t{%2, %0|%0, %2}
13694 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
13695 [(set_attr "isa" "noavx,avx")
13696 (set_attr "type" "sseiadd")
13697 (set_attr "prefix_data16" "1,*")
13698 (set_attr "prefix_extra" "*,1")
13699 (set_attr "prefix" "orig,vex")
13700 (set_attr "mode" "TI")])
13701
13702 (define_expand "<code><mode>3"
13703 [(set (match_operand:VI124_128 0 "register_operand")
13704 (umaxmin:VI124_128
13705 (match_operand:VI124_128 1 "vector_operand")
13706 (match_operand:VI124_128 2 "vector_operand")))]
13707 "TARGET_SSE2"
13708 {
13709 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
13710 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
13711 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
13712 {
13713 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
13714 operands[1] = force_reg (<MODE>mode, operands[1]);
13715 if (rtx_equal_p (op3, op2))
13716 op3 = gen_reg_rtx (V8HImode);
13717 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
13718 emit_insn (gen_addv8hi3 (op0, op3, op2));
13719 DONE;
13720 }
13721 else
13722 {
13723 rtx xops[6];
13724 bool ok;
13725
13726 operands[1] = force_reg (<MODE>mode, operands[1]);
13727 operands[2] = force_reg (<MODE>mode, operands[2]);
13728
13729 xops[0] = operands[0];
13730
13731 if (<CODE> == UMAX)
13732 {
13733 xops[1] = operands[1];
13734 xops[2] = operands[2];
13735 }
13736 else
13737 {
13738 xops[1] = operands[2];
13739 xops[2] = operands[1];
13740 }
13741
13742 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
13743 xops[4] = operands[1];
13744 xops[5] = operands[2];
13745
13746 ok = ix86_expand_int_vcond (xops);
13747 gcc_assert (ok);
13748 DONE;
13749 }
13750 })
13751
13752 (define_insn "*sse4_1_<code><mode>3<mask_name>"
13753 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
13754 (umaxmin:VI24_128
13755 (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
13756 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
13757 "TARGET_SSE4_1
13758 && <mask_mode512bit_condition>
13759 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13760 "@
13761 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13762 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13763 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13764 [(set_attr "isa" "noavx,noavx,avx")
13765 (set_attr "type" "sseiadd")
13766 (set_attr "prefix_extra" "1,1,*")
13767 (set_attr "prefix" "orig,orig,vex")
13768 (set_attr "mode" "TI")])
13769
13770 (define_insn "*<code>v16qi3"
13771 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
13772 (umaxmin:V16QI
13773 (match_operand:V16QI 1 "vector_operand" "%0,Yw")
13774 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))]
13775 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13776 "@
13777 p<maxmin_int>b\t{%2, %0|%0, %2}
13778 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
13779 [(set_attr "isa" "noavx,avx")
13780 (set_attr "type" "sseiadd")
13781 (set_attr "prefix_data16" "1,*")
13782 (set_attr "prefix_extra" "*,1")
13783 (set_attr "prefix" "orig,vex")
13784 (set_attr "mode" "TI")])
13785
13786 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13787 ;;
13788 ;; Parallel integral comparisons
13789 ;;
13790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13791
13792 (define_insn "*avx2_eq<mode>3"
13793 [(set (match_operand:VI_256 0 "register_operand" "=x")
13794 (eq:VI_256
13795 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
13796 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
13797 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13798 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13799 [(set_attr "type" "ssecmp")
13800 (set_attr "prefix_extra" "1")
13801 (set_attr "prefix" "vex")
13802 (set_attr "mode" "OI")])
13803
13804 (define_insn_and_split "*avx2_eq<mode>3"
13805 [(set (match_operand:VI_128_256 0 "register_operand")
13806 (vec_merge:VI_128_256
13807 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13808 (match_operand:VI_128_256 2 "const0_operand")
13809 (unspec:<avx512fmaskmode>
13810 [(match_operand:VI_128_256 3 "nonimmediate_operand")
13811 (match_operand:VI_128_256 4 "nonimmediate_operand")]
13812 UNSPEC_MASKED_EQ)))]
13813 "TARGET_AVX512VL && ix86_pre_reload_split ()
13814 && !(MEM_P (operands[3]) && MEM_P (operands[4]))"
13815 "#"
13816 "&& 1"
13817 [(set (match_dup 0)
13818 (eq:VI_128_256
13819 (match_dup 3)
13820 (match_dup 4)))])
13821
13822 (define_insn_and_split "*avx2_pcmp<mode>3_1"
13823 [(set (match_operand:VI_128_256 0 "register_operand")
13824 (vec_merge:VI_128_256
13825 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13826 (match_operand:VI_128_256 2 "const0_operand")
13827 (unspec:<avx512fmaskmode>
13828 [(match_operand:VI_128_256 3 "nonimmediate_operand")
13829 (match_operand:VI_128_256 4 "nonimmediate_operand")
13830 (match_operand:SI 5 "const_0_to_7_operand")]
13831 UNSPEC_PCMP)))]
13832 "TARGET_AVX512VL && ix86_pre_reload_split ()
13833 /* EQ is commutative. */
13834 && ((INTVAL (operands[5]) == 0
13835 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
13836 /* NLE aka GT, 3 must be register. */
13837 || (INTVAL (operands[5]) == 6
13838 && !MEM_P (operands[3]))
13839 /* LT, 4 must be register and we swap operands. */
13840 || (INTVAL (operands[5]) == 1
13841 && !MEM_P (operands[4])))"
13842 "#"
13843 "&& 1"
13844 [(const_int 0)]
13845 {
13846 if (INTVAL (operands[5]) == 1)
13847 std::swap (operands[3], operands[4]);
13848 enum rtx_code code = INTVAL (operands[5]) ? GT : EQ;
13849 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
13850 operands[3], operands[4]));
13851 DONE;
13852 })
13853
13854 (define_insn_and_split "*avx2_pcmp<mode>3_2"
13855 [(set (match_operand:VI_128_256 0 "register_operand")
13856 (vec_merge:VI_128_256
13857 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13858 (match_operand:VI_128_256 2 "const0_operand")
13859 (not:<avx512fmaskmode>
13860 (unspec:<avx512fmaskmode>
13861 [(match_operand:VI_128_256 3 "nonimmediate_operand")
13862 (match_operand:VI_128_256 4 "nonimmediate_operand")
13863 (match_operand:SI 5 "const_0_to_7_operand")]
13864 UNSPEC_PCMP))))]
13865 "TARGET_AVX512VL && ix86_pre_reload_split ()
13866 /* NE is commutative. */
13867 && ((INTVAL (operands[5]) == 4
13868 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
13869 /* LE, 3 must be register. */
13870 || (INTVAL (operands[5]) == 2
13871 && !MEM_P (operands[3]))
13872 /* NLT aka GE, 4 must be register and we swap operands. */
13873 || (INTVAL (operands[5]) == 5
13874 && !MEM_P (operands[4])))"
13875 "#"
13876 "&& 1"
13877 [(const_int 0)]
13878 {
13879 if (INTVAL (operands[5]) == 5)
13880 std::swap (operands[3], operands[4]);
13881 enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
13882 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
13883 operands[3], operands[4]));
13884 DONE;
13885 })
13886
13887 (define_insn_and_split "*avx2_pcmp<mode>3_3"
13888 [(set (match_operand:VI1_AVX2 0 "register_operand")
13889 (vec_merge:VI1_AVX2
13890 (match_operand:VI1_AVX2 1 "vector_operand")
13891 (match_operand:VI1_AVX2 2 "vector_operand")
13892 (unspec:<avx512fmaskmode>
13893 [(match_operand:VI1_AVX2 3 "register_operand")
13894 (match_operand:VI1_AVX2 4 "const0_operand")
13895 (match_operand:SI 5 "const_0_to_7_operand")]
13896 UNSPEC_PCMP)))]
13897 "TARGET_AVX512VL && ix86_pre_reload_split ()
13898 /* LT or GE 0 */
13899 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
13900 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
13901 "#"
13902 "&& 1"
13903 [(set (match_dup 0)
13904 (unspec:VI1_AVX2
13905 [(match_dup 2)
13906 (match_dup 1)
13907 (lt:VI1_AVX2
13908 (match_dup 3)
13909 (match_dup 4))]
13910 UNSPEC_BLENDV))]
13911 {
13912 if (INTVAL (operands[5]) == 5)
13913 std::swap (operands[1], operands[2]);
13914 })
13915
13916 (define_insn_and_split "*avx2_pcmp<mode>3_4"
13917 [(set (match_operand:VI1_AVX2 0 "register_operand")
13918 (vec_merge:VI1_AVX2
13919 (match_operand:VI1_AVX2 1 "vector_operand")
13920 (match_operand:VI1_AVX2 2 "vector_operand")
13921 (unspec:<avx512fmaskmode>
13922 [(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)
13923 (match_operand:VI1_AVX2 4 "const0_operand")
13924 (match_operand:SI 5 "const_0_to_7_operand")]
13925 UNSPEC_PCMP)))]
13926 "TARGET_AVX512VL && ix86_pre_reload_split ()
13927 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
13928 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
13929 /* LT or GE 0 */
13930 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
13931 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
13932 "#"
13933 "&& 1"
13934 [(set (match_dup 0)
13935 (unspec:VI1_AVX2
13936 [(match_dup 1)
13937 (match_dup 2)
13938 (lt:VI1_AVX2
13939 (match_dup 3)
13940 (match_dup 4))]
13941 UNSPEC_BLENDV))]
13942 {
13943 if (INTVAL (operands[5]) == 1)
13944 std::swap (operands[1], operands[2]);
13945 operands[3] = gen_lowpart (<MODE>mode, operands[3]);
13946 })
13947
13948 (define_insn_and_split "*avx2_pcmp<mode>3_5"
13949 [(set (match_operand:VI1_AVX2 0 "register_operand")
13950 (vec_merge:VI1_AVX2
13951 (match_operand:VI1_AVX2 1 "vector_operand")
13952 (match_operand:VI1_AVX2 2 "vector_operand")
13953 (unspec:<avx512fmaskmode>
13954 [(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))
13955 (match_operand:VI1_AVX2 4 "const0_operand")
13956 (match_operand:SI 5 "const_0_to_7_operand")]
13957 UNSPEC_PCMP)))]
13958 "TARGET_AVX512VL && ix86_pre_reload_split ()
13959 /* LT or GE 0 */
13960 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
13961 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
13962 "#"
13963 "&& 1"
13964 [(set (match_dup 0)
13965 (unspec:VI1_AVX2
13966 [(match_dup 1)
13967 (match_dup 2)
13968 (lt:VI1_AVX2
13969 (match_dup 3)
13970 (match_dup 4))]
13971 UNSPEC_BLENDV))]
13972 {
13973 if (INTVAL (operands[5]) == 1)
13974 std::swap (operands[1], operands[2]);
13975 })
13976
13977 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
13978 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
13979 (unspec:<avx512fmaskmode>
13980 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
13981 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
13982 UNSPEC_MASKED_EQ))]
13983 "TARGET_AVX512BW"
13984 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
13985
13986 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
13987 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
13988 (unspec:<avx512fmaskmode>
13989 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
13990 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
13991 UNSPEC_MASKED_EQ))]
13992 "TARGET_AVX512F"
13993 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
13994
13995 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
13996 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
13997 (unspec:<avx512fmaskmode>
13998 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
13999 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
14000 UNSPEC_MASKED_EQ))]
14001 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14002 "@
14003 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
14004 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
14005 [(set_attr "type" "ssecmp")
14006 (set_attr "prefix_extra" "1")
14007 (set_attr "prefix" "evex")
14008 (set_attr "mode" "<sseinsnmode>")])
14009
14010 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
14011 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
14012 (unspec:<avx512fmaskmode>
14013 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
14014 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
14015 UNSPEC_MASKED_EQ))]
14016 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14017 "@
14018 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
14019 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
14020 [(set_attr "type" "ssecmp")
14021 (set_attr "prefix_extra" "1")
14022 (set_attr "prefix" "evex")
14023 (set_attr "mode" "<sseinsnmode>")])
14024
14025 (define_insn "*sse4_1_eqv2di3"
14026 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
14027 (eq:V2DI
14028 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
14029 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
14030 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14031 "@
14032 pcmpeqq\t{%2, %0|%0, %2}
14033 pcmpeqq\t{%2, %0|%0, %2}
14034 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
14035 [(set_attr "isa" "noavx,noavx,avx")
14036 (set_attr "type" "ssecmp")
14037 (set_attr "prefix_extra" "1")
14038 (set_attr "prefix" "orig,orig,vex")
14039 (set_attr "mode" "TI")])
14040
14041 (define_insn "*sse2_eq<mode>3"
14042 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
14043 (eq:VI124_128
14044 (match_operand:VI124_128 1 "vector_operand" "%0,x")
14045 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
14046 "TARGET_SSE2
14047 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14048 "@
14049 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
14050 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14051 [(set_attr "isa" "noavx,avx")
14052 (set_attr "type" "ssecmp")
14053 (set_attr "prefix_data16" "1,*")
14054 (set_attr "prefix" "orig,vex")
14055 (set_attr "mode" "TI")])
14056
14057 (define_insn "sse4_2_gtv2di3"
14058 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
14059 (gt:V2DI
14060 (match_operand:V2DI 1 "register_operand" "0,0,x")
14061 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
14062 "TARGET_SSE4_2"
14063 "@
14064 pcmpgtq\t{%2, %0|%0, %2}
14065 pcmpgtq\t{%2, %0|%0, %2}
14066 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
14067 [(set_attr "isa" "noavx,noavx,avx")
14068 (set_attr "type" "ssecmp")
14069 (set_attr "prefix_extra" "1")
14070 (set_attr "prefix" "orig,orig,vex")
14071 (set_attr "mode" "TI")])
14072
14073 (define_insn "avx2_gt<mode>3"
14074 [(set (match_operand:VI_256 0 "register_operand" "=x")
14075 (gt:VI_256
14076 (match_operand:VI_256 1 "register_operand" "x")
14077 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
14078 "TARGET_AVX2"
14079 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14080 [(set_attr "type" "ssecmp")
14081 (set_attr "prefix_extra" "1")
14082 (set_attr "prefix" "vex")
14083 (set_attr "mode" "OI")])
14084
14085 (define_insn_and_split "*avx2_gt<mode>3"
14086 [(set (match_operand:VI_128_256 0 "register_operand")
14087 (vec_merge:VI_128_256
14088 (match_operand:VI_128_256 1 "vector_all_ones_operand")
14089 (match_operand:VI_128_256 2 "const0_operand")
14090 (unspec:<avx512fmaskmode>
14091 [(match_operand:VI_128_256 3 "register_operand")
14092 (match_operand:VI_128_256 4 "nonimmediate_operand")]
14093 UNSPEC_MASKED_GT)))]
14094 "TARGET_AVX512VL
14095 && ix86_pre_reload_split ()"
14096 "#"
14097 "&& 1"
14098 [(set (match_dup 0)
14099 (gt:VI_128_256
14100 (match_dup 3)
14101 (match_dup 4)))])
14102
14103 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
14104 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
14105 (unspec:<avx512fmaskmode>
14106 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
14107 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
14108 "TARGET_AVX512F"
14109 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
14110 [(set_attr "type" "ssecmp")
14111 (set_attr "prefix_extra" "1")
14112 (set_attr "prefix" "evex")
14113 (set_attr "mode" "<sseinsnmode>")])
14114
14115 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
14116 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
14117 (unspec:<avx512fmaskmode>
14118 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
14119 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
14120 "TARGET_AVX512BW"
14121 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
14122 [(set_attr "type" "ssecmp")
14123 (set_attr "prefix_extra" "1")
14124 (set_attr "prefix" "evex")
14125 (set_attr "mode" "<sseinsnmode>")])
14126
14127 (define_insn "*sse2_gt<mode>3"
14128 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
14129 (gt:VI124_128
14130 (match_operand:VI124_128 1 "register_operand" "0,x")
14131 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
14132 "TARGET_SSE2"
14133 "@
14134 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
14135 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14136 [(set_attr "isa" "noavx,avx")
14137 (set_attr "type" "ssecmp")
14138 (set_attr "prefix_data16" "1,*")
14139 (set_attr "prefix" "orig,vex")
14140 (set_attr "mode" "TI")])
14141
14142 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
14143 [(set (match_operand:V_512 0 "register_operand")
14144 (if_then_else:V_512
14145 (match_operator 3 ""
14146 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
14147 (match_operand:VI_AVX512BW 5 "general_operand")])
14148 (match_operand:V_512 1)
14149 (match_operand:V_512 2)))]
14150 "TARGET_AVX512F
14151 && (GET_MODE_NUNITS (<V_512:MODE>mode)
14152 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
14153 {
14154 bool ok = ix86_expand_int_vcond (operands);
14155 gcc_assert (ok);
14156 DONE;
14157 })
14158
14159 (define_expand "vcond<V_256:mode><VI_256:mode>"
14160 [(set (match_operand:V_256 0 "register_operand")
14161 (if_then_else:V_256
14162 (match_operator 3 ""
14163 [(match_operand:VI_256 4 "nonimmediate_operand")
14164 (match_operand:VI_256 5 "general_operand")])
14165 (match_operand:V_256 1)
14166 (match_operand:V_256 2)))]
14167 "TARGET_AVX2
14168 && (GET_MODE_NUNITS (<V_256:MODE>mode)
14169 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
14170 {
14171 bool ok = ix86_expand_int_vcond (operands);
14172 gcc_assert (ok);
14173 DONE;
14174 })
14175
14176 (define_expand "vcond<V_128:mode><VI124_128:mode>"
14177 [(set (match_operand:V_128 0 "register_operand")
14178 (if_then_else:V_128
14179 (match_operator 3 ""
14180 [(match_operand:VI124_128 4 "vector_operand")
14181 (match_operand:VI124_128 5 "general_operand")])
14182 (match_operand:V_128 1)
14183 (match_operand:V_128 2)))]
14184 "TARGET_SSE2
14185 && (GET_MODE_NUNITS (<V_128:MODE>mode)
14186 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
14187 {
14188 bool ok = ix86_expand_int_vcond (operands);
14189 gcc_assert (ok);
14190 DONE;
14191 })
14192
14193 (define_expand "vcond<VI8F_128:mode>v2di"
14194 [(set (match_operand:VI8F_128 0 "register_operand")
14195 (if_then_else:VI8F_128
14196 (match_operator 3 ""
14197 [(match_operand:V2DI 4 "vector_operand")
14198 (match_operand:V2DI 5 "general_operand")])
14199 (match_operand:VI8F_128 1)
14200 (match_operand:VI8F_128 2)))]
14201 "TARGET_SSE4_2"
14202 {
14203 bool ok = ix86_expand_int_vcond (operands);
14204 gcc_assert (ok);
14205 DONE;
14206 })
14207
14208 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
14209 [(set (match_operand:V_512 0 "register_operand")
14210 (if_then_else:V_512
14211 (match_operator 3 ""
14212 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
14213 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
14214 (match_operand:V_512 1 "general_operand")
14215 (match_operand:V_512 2 "general_operand")))]
14216 "TARGET_AVX512F
14217 && (GET_MODE_NUNITS (<V_512:MODE>mode)
14218 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
14219 {
14220 bool ok = ix86_expand_int_vcond (operands);
14221 gcc_assert (ok);
14222 DONE;
14223 })
14224
14225 (define_expand "vcondu<V_256:mode><VI_256:mode>"
14226 [(set (match_operand:V_256 0 "register_operand")
14227 (if_then_else:V_256
14228 (match_operator 3 ""
14229 [(match_operand:VI_256 4 "nonimmediate_operand")
14230 (match_operand:VI_256 5 "nonimmediate_operand")])
14231 (match_operand:V_256 1 "general_operand")
14232 (match_operand:V_256 2 "general_operand")))]
14233 "TARGET_AVX2
14234 && (GET_MODE_NUNITS (<V_256:MODE>mode)
14235 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
14236 {
14237 bool ok = ix86_expand_int_vcond (operands);
14238 gcc_assert (ok);
14239 DONE;
14240 })
14241
14242 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
14243 [(set (match_operand:V_128 0 "register_operand")
14244 (if_then_else:V_128
14245 (match_operator 3 ""
14246 [(match_operand:VI124_128 4 "vector_operand")
14247 (match_operand:VI124_128 5 "vector_operand")])
14248 (match_operand:V_128 1 "general_operand")
14249 (match_operand:V_128 2 "general_operand")))]
14250 "TARGET_SSE2
14251 && (GET_MODE_NUNITS (<V_128:MODE>mode)
14252 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
14253 {
14254 bool ok = ix86_expand_int_vcond (operands);
14255 gcc_assert (ok);
14256 DONE;
14257 })
14258
14259 (define_expand "vcondu<VI8F_128:mode>v2di"
14260 [(set (match_operand:VI8F_128 0 "register_operand")
14261 (if_then_else:VI8F_128
14262 (match_operator 3 ""
14263 [(match_operand:V2DI 4 "vector_operand")
14264 (match_operand:V2DI 5 "vector_operand")])
14265 (match_operand:VI8F_128 1 "general_operand")
14266 (match_operand:VI8F_128 2 "general_operand")))]
14267 "TARGET_SSE4_2"
14268 {
14269 bool ok = ix86_expand_int_vcond (operands);
14270 gcc_assert (ok);
14271 DONE;
14272 })
14273
14274 (define_expand "vcondeq<VI8F_128:mode>v2di"
14275 [(set (match_operand:VI8F_128 0 "register_operand")
14276 (if_then_else:VI8F_128
14277 (match_operator 3 ""
14278 [(match_operand:V2DI 4 "vector_operand")
14279 (match_operand:V2DI 5 "general_operand")])
14280 (match_operand:VI8F_128 1)
14281 (match_operand:VI8F_128 2)))]
14282 "TARGET_SSE4_1"
14283 {
14284 bool ok = ix86_expand_int_vcond (operands);
14285 gcc_assert (ok);
14286 DONE;
14287 })
14288
14289 (define_mode_iterator VEC_PERM_AVX2
14290 [V16QI V8HI V4SI V2DI V4SF V2DF
14291 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
14292 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
14293 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
14294 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
14295 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
14296 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
14297
14298 (define_expand "vec_perm<mode>"
14299 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
14300 (match_operand:VEC_PERM_AVX2 1 "register_operand")
14301 (match_operand:VEC_PERM_AVX2 2 "register_operand")
14302 (match_operand:<sseintvecmode> 3 "register_operand")]
14303 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
14304 {
14305 ix86_expand_vec_perm (operands);
14306 DONE;
14307 })
14308
14309 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14310 ;;
14311 ;; Parallel bitwise logical operations
14312 ;;
14313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14314
14315 (define_expand "one_cmpl<mode>2"
14316 [(set (match_operand:VI 0 "register_operand")
14317 (xor:VI (match_operand:VI 1 "vector_operand")
14318 (match_dup 2)))]
14319 "TARGET_SSE"
14320 {
14321 if (!TARGET_AVX512F)
14322 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
14323 else
14324 operands[2] = CONSTM1_RTX (<MODE>mode);
14325 })
14326
14327 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
14328 [(set (match_operand:VI 0 "register_operand" "=v,v")
14329 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
14330 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
14331 "TARGET_AVX512F
14332 && (!<mask_applied>
14333 || <ssescalarmode>mode == SImode
14334 || <ssescalarmode>mode == DImode)"
14335 {
14336 if (TARGET_AVX512VL)
14337 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
14338 else
14339 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
14340 }
14341 [(set_attr "type" "sselog")
14342 (set_attr "prefix" "evex")
14343 (set (attr "mode")
14344 (if_then_else (match_test "TARGET_AVX512VL")
14345 (const_string "<sseinsnmode>")
14346 (const_string "XI")))
14347 (set (attr "enabled")
14348 (if_then_else (eq_attr "alternative" "1")
14349 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
14350 (const_int 1)))])
14351
14352 (define_expand "<sse2_avx2>_andnot<mode>3"
14353 [(set (match_operand:VI_AVX2 0 "register_operand")
14354 (and:VI_AVX2
14355 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
14356 (match_operand:VI_AVX2 2 "vector_operand")))]
14357 "TARGET_SSE2")
14358
14359 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
14360 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
14361 (vec_merge:VI48_AVX512VL
14362 (and:VI48_AVX512VL
14363 (not:VI48_AVX512VL
14364 (match_operand:VI48_AVX512VL 1 "register_operand"))
14365 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
14366 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
14367 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14368 "TARGET_AVX512F")
14369
14370 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
14371 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
14372 (vec_merge:VI12_AVX512VL
14373 (and:VI12_AVX512VL
14374 (not:VI12_AVX512VL
14375 (match_operand:VI12_AVX512VL 1 "register_operand"))
14376 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
14377 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
14378 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14379 "TARGET_AVX512BW")
14380
14381 (define_insn "*andnot<mode>3"
14382 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
14383 (and:VI
14384 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
14385 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
14386 "TARGET_SSE"
14387 {
14388 char buf[64];
14389 const char *ops;
14390 const char *tmp;
14391 const char *ssesuffix;
14392
14393 switch (get_attr_mode (insn))
14394 {
14395 case MODE_XI:
14396 gcc_assert (TARGET_AVX512F);
14397 /* FALLTHRU */
14398 case MODE_OI:
14399 gcc_assert (TARGET_AVX2);
14400 /* FALLTHRU */
14401 case MODE_TI:
14402 gcc_assert (TARGET_SSE2);
14403 tmp = "pandn";
14404 switch (<MODE>mode)
14405 {
14406 case E_V64QImode:
14407 case E_V32HImode:
14408 /* There is no vpandnb or vpandnw instruction, nor vpandn for
14409 512-bit vectors. Use vpandnq instead. */
14410 ssesuffix = "q";
14411 break;
14412 case E_V16SImode:
14413 case E_V8DImode:
14414 ssesuffix = "<ssemodesuffix>";
14415 break;
14416 case E_V8SImode:
14417 case E_V4DImode:
14418 case E_V4SImode:
14419 case E_V2DImode:
14420 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
14421 ? "<ssemodesuffix>" : "");
14422 break;
14423 default:
14424 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
14425 }
14426 break;
14427
14428 case MODE_V16SF:
14429 gcc_assert (TARGET_AVX512F);
14430 /* FALLTHRU */
14431 case MODE_V8SF:
14432 gcc_assert (TARGET_AVX);
14433 /* FALLTHRU */
14434 case MODE_V4SF:
14435 gcc_assert (TARGET_SSE);
14436 tmp = "andn";
14437 ssesuffix = "ps";
14438 break;
14439
14440 default:
14441 gcc_unreachable ();
14442 }
14443
14444 switch (which_alternative)
14445 {
14446 case 0:
14447 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
14448 break;
14449 case 1:
14450 case 2:
14451 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
14452 break;
14453 default:
14454 gcc_unreachable ();
14455 }
14456
14457 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
14458 output_asm_insn (buf, operands);
14459 return "";
14460 }
14461 [(set_attr "isa" "noavx,avx,avx")
14462 (set_attr "type" "sselog")
14463 (set (attr "prefix_data16")
14464 (if_then_else
14465 (and (eq_attr "alternative" "0")
14466 (eq_attr "mode" "TI"))
14467 (const_string "1")
14468 (const_string "*")))
14469 (set_attr "prefix" "orig,vex,evex")
14470 (set (attr "mode")
14471 (cond [(match_test "TARGET_AVX2")
14472 (const_string "<sseinsnmode>")
14473 (match_test "TARGET_AVX")
14474 (if_then_else
14475 (match_test "<MODE_SIZE> > 16")
14476 (const_string "V8SF")
14477 (const_string "<sseinsnmode>"))
14478 (ior (not (match_test "TARGET_SSE2"))
14479 (match_test "optimize_function_for_size_p (cfun)"))
14480 (const_string "V4SF")
14481 ]
14482 (const_string "<sseinsnmode>")))])
14483
14484 (define_insn "*andnot<mode>3_mask"
14485 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14486 (vec_merge:VI48_AVX512VL
14487 (and:VI48_AVX512VL
14488 (not:VI48_AVX512VL
14489 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
14490 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
14491 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
14492 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14493 "TARGET_AVX512F"
14494 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
14495 [(set_attr "type" "sselog")
14496 (set_attr "prefix" "evex")
14497 (set_attr "mode" "<sseinsnmode>")])
14498
14499 (define_expand "<code><mode>3"
14500 [(set (match_operand:VI 0 "register_operand")
14501 (any_logic:VI
14502 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
14503 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
14504 "TARGET_SSE"
14505 {
14506 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
14507 DONE;
14508 })
14509
14510 (define_expand "cond_<code><mode>"
14511 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
14512 (vec_merge:VI48_AVX512VL
14513 (any_logic:VI48_AVX512VL
14514 (match_operand:VI48_AVX512VL 2 "vector_operand")
14515 (match_operand:VI48_AVX512VL 3 "vector_operand"))
14516 (match_operand:VI48_AVX512VL 4 "nonimm_or_0_operand")
14517 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
14518 "TARGET_AVX512F"
14519 {
14520 emit_insn (gen_<code><mode>3_mask (operands[0],
14521 operands[2],
14522 operands[3],
14523 operands[4],
14524 operands[1]));
14525 DONE;
14526 })
14527
14528 (define_insn "<mask_codefor><code><mode>3<mask_name>"
14529 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
14530 (any_logic:VI48_AVX_AVX512F
14531 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
14532 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
14533 "TARGET_SSE && <mask_mode512bit_condition>
14534 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14535 {
14536 char buf[64];
14537 const char *ops;
14538 const char *tmp;
14539 const char *ssesuffix;
14540
14541 switch (get_attr_mode (insn))
14542 {
14543 case MODE_XI:
14544 gcc_assert (TARGET_AVX512F);
14545 /* FALLTHRU */
14546 case MODE_OI:
14547 gcc_assert (TARGET_AVX2);
14548 /* FALLTHRU */
14549 case MODE_TI:
14550 gcc_assert (TARGET_SSE2);
14551 tmp = "p<logic>";
14552 switch (<MODE>mode)
14553 {
14554 case E_V16SImode:
14555 case E_V8DImode:
14556 ssesuffix = "<ssemodesuffix>";
14557 break;
14558 case E_V8SImode:
14559 case E_V4DImode:
14560 case E_V4SImode:
14561 case E_V2DImode:
14562 ssesuffix = (TARGET_AVX512VL
14563 && (<mask_applied> || which_alternative == 2)
14564 ? "<ssemodesuffix>" : "");
14565 break;
14566 default:
14567 gcc_unreachable ();
14568 }
14569 break;
14570
14571 case MODE_V8SF:
14572 gcc_assert (TARGET_AVX);
14573 /* FALLTHRU */
14574 case MODE_V4SF:
14575 gcc_assert (TARGET_SSE);
14576 tmp = "<logic>";
14577 ssesuffix = "ps";
14578 break;
14579
14580 default:
14581 gcc_unreachable ();
14582 }
14583
14584 switch (which_alternative)
14585 {
14586 case 0:
14587 if (<mask_applied>)
14588 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
14589 else
14590 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
14591 break;
14592 case 1:
14593 case 2:
14594 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
14595 break;
14596 default:
14597 gcc_unreachable ();
14598 }
14599
14600 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
14601 output_asm_insn (buf, operands);
14602 return "";
14603 }
14604 [(set_attr "isa" "noavx,avx,avx")
14605 (set_attr "type" "sselog")
14606 (set (attr "prefix_data16")
14607 (if_then_else
14608 (and (eq_attr "alternative" "0")
14609 (eq_attr "mode" "TI"))
14610 (const_string "1")
14611 (const_string "*")))
14612 (set_attr "prefix" "<mask_prefix3>,evex")
14613 (set (attr "mode")
14614 (cond [(match_test "TARGET_AVX2")
14615 (const_string "<sseinsnmode>")
14616 (match_test "TARGET_AVX")
14617 (if_then_else
14618 (match_test "<MODE_SIZE> > 16")
14619 (const_string "V8SF")
14620 (const_string "<sseinsnmode>"))
14621 (ior (not (match_test "TARGET_SSE2"))
14622 (match_test "optimize_function_for_size_p (cfun)"))
14623 (const_string "V4SF")
14624 ]
14625 (const_string "<sseinsnmode>")))])
14626
14627 (define_insn "*<code><mode>3"
14628 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
14629 (any_logic:VI12_AVX_AVX512F
14630 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
14631 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
14632 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14633 {
14634 char buf[64];
14635 const char *ops;
14636 const char *tmp;
14637 const char *ssesuffix;
14638
14639 switch (get_attr_mode (insn))
14640 {
14641 case MODE_XI:
14642 gcc_assert (TARGET_AVX512F);
14643 /* FALLTHRU */
14644 case MODE_OI:
14645 gcc_assert (TARGET_AVX2);
14646 /* FALLTHRU */
14647 case MODE_TI:
14648 gcc_assert (TARGET_SSE2);
14649 tmp = "p<logic>";
14650 switch (<MODE>mode)
14651 {
14652 case E_V64QImode:
14653 case E_V32HImode:
14654 ssesuffix = "q";
14655 break;
14656 case E_V32QImode:
14657 case E_V16HImode:
14658 case E_V16QImode:
14659 case E_V8HImode:
14660 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
14661 break;
14662 default:
14663 gcc_unreachable ();
14664 }
14665 break;
14666
14667 case MODE_V8SF:
14668 gcc_assert (TARGET_AVX);
14669 /* FALLTHRU */
14670 case MODE_V4SF:
14671 gcc_assert (TARGET_SSE);
14672 tmp = "<logic>";
14673 ssesuffix = "ps";
14674 break;
14675
14676 default:
14677 gcc_unreachable ();
14678 }
14679
14680 switch (which_alternative)
14681 {
14682 case 0:
14683 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
14684 break;
14685 case 1:
14686 case 2:
14687 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
14688 break;
14689 default:
14690 gcc_unreachable ();
14691 }
14692
14693 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
14694 output_asm_insn (buf, operands);
14695 return "";
14696 }
14697 [(set_attr "isa" "noavx,avx,avx")
14698 (set_attr "type" "sselog")
14699 (set (attr "prefix_data16")
14700 (if_then_else
14701 (and (eq_attr "alternative" "0")
14702 (eq_attr "mode" "TI"))
14703 (const_string "1")
14704 (const_string "*")))
14705 (set_attr "prefix" "orig,vex,evex")
14706 (set (attr "mode")
14707 (cond [(match_test "TARGET_AVX2")
14708 (const_string "<sseinsnmode>")
14709 (match_test "TARGET_AVX")
14710 (if_then_else
14711 (match_test "<MODE_SIZE> > 16")
14712 (const_string "V8SF")
14713 (const_string "<sseinsnmode>"))
14714 (ior (not (match_test "TARGET_SSE2"))
14715 (match_test "optimize_function_for_size_p (cfun)"))
14716 (const_string "V4SF")
14717 ]
14718 (const_string "<sseinsnmode>")))])
14719
14720 (define_mode_iterator AVX512ZEXTMASK
14721 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
14722
14723 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
14724 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
14725 (unspec:<avx512fmaskmode>
14726 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14727 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14728 UNSPEC_TESTM))]
14729 "TARGET_AVX512F"
14730 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
14731 [(set_attr "prefix" "evex")
14732 (set_attr "mode" "<sseinsnmode>")])
14733
14734 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
14735 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
14736 (unspec:<avx512fmaskmode>
14737 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14738 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14739 UNSPEC_TESTNM))]
14740 "TARGET_AVX512F"
14741 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
14742 [(set_attr "prefix" "evex")
14743 (set_attr "mode" "<sseinsnmode>")])
14744
14745 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
14746 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14747 (zero_extend:AVX512ZEXTMASK
14748 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14749 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14750 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14751 UNSPEC_TESTM)))]
14752 "TARGET_AVX512BW
14753 && (<AVX512ZEXTMASK:MODE_SIZE>
14754 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14755 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14756 [(set_attr "prefix" "evex")
14757 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14758
14759 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
14760 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14761 (zero_extend:AVX512ZEXTMASK
14762 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
14763 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14764 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14765 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14766 UNSPEC_TESTM)
14767 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
14768 "TARGET_AVX512BW
14769 && (<AVX512ZEXTMASK:MODE_SIZE>
14770 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14771 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
14772 [(set_attr "prefix" "evex")
14773 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14774
14775 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
14776 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14777 (zero_extend:AVX512ZEXTMASK
14778 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14779 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14780 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14781 UNSPEC_TESTNM)))]
14782 "TARGET_AVX512BW
14783 && (<AVX512ZEXTMASK:MODE_SIZE>
14784 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14785 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14786 [(set_attr "prefix" "evex")
14787 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14788
14789 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
14790 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14791 (zero_extend:AVX512ZEXTMASK
14792 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
14793 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14794 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14795 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14796 UNSPEC_TESTNM)
14797 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
14798 "TARGET_AVX512BW
14799 && (<AVX512ZEXTMASK:MODE_SIZE>
14800 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14801 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
14802 [(set_attr "prefix" "evex")
14803 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14804
14805 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14806 ;;
14807 ;; Parallel integral element swizzling
14808 ;;
14809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14810
14811 (define_expand "vec_pack_trunc_<mode>"
14812 [(match_operand:<ssepackmode> 0 "register_operand")
14813 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
14814 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
14815 "TARGET_SSE2"
14816 {
14817 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
14818 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
14819 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
14820 DONE;
14821 })
14822
14823 (define_expand "vec_pack_trunc_qi"
14824 [(set (match_operand:HI 0 "register_operand")
14825 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
14826 (const_int 8))
14827 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
14828 "TARGET_AVX512F")
14829
14830 (define_expand "vec_pack_trunc_<mode>"
14831 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
14832 (ior:<DOUBLEMASKMODE>
14833 (ashift:<DOUBLEMASKMODE>
14834 (zero_extend:<DOUBLEMASKMODE>
14835 (match_operand:SWI24 2 "register_operand"))
14836 (match_dup 3))
14837 (zero_extend:<DOUBLEMASKMODE>
14838 (match_operand:SWI24 1 "register_operand"))))]
14839 "TARGET_AVX512BW"
14840 {
14841 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
14842 })
14843
14844 (define_expand "vec_pack_sbool_trunc_qi"
14845 [(match_operand:QI 0 "register_operand")
14846 (match_operand:QI 1 "register_operand")
14847 (match_operand:QI 2 "register_operand")
14848 (match_operand:QI 3 "const_int_operand")]
14849 "TARGET_AVX512F"
14850 {
14851 HOST_WIDE_INT nunits = INTVAL (operands[3]);
14852 rtx mask, tem1, tem2;
14853 if (nunits != 8 && nunits != 4)
14854 FAIL;
14855 mask = gen_reg_rtx (QImode);
14856 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
14857 tem1 = gen_reg_rtx (QImode);
14858 emit_insn (gen_kandqi (tem1, operands[1], mask));
14859 if (TARGET_AVX512DQ)
14860 {
14861 tem2 = gen_reg_rtx (QImode);
14862 emit_insn (gen_kashiftqi (tem2, operands[2],
14863 GEN_INT (nunits / 2)));
14864 }
14865 else
14866 {
14867 tem2 = gen_reg_rtx (HImode);
14868 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
14869 QImode),
14870 GEN_INT (nunits / 2)));
14871 tem2 = lowpart_subreg (QImode, tem2, HImode);
14872 }
14873 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
14874 DONE;
14875 })
14876
14877 (define_insn "<sse2_avx2>_packsswb<mask_name>"
14878 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
14879 (vec_concat:VI1_AVX512
14880 (ss_truncate:<ssehalfvecmode>
14881 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
14882 (ss_truncate:<ssehalfvecmode>
14883 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
14884 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14885 "@
14886 packsswb\t{%2, %0|%0, %2}
14887 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14888 [(set_attr "isa" "noavx,avx")
14889 (set_attr "type" "sselog")
14890 (set_attr "prefix_data16" "1,*")
14891 (set_attr "prefix" "orig,<mask_prefix>")
14892 (set_attr "mode" "<sseinsnmode>")])
14893
14894 (define_insn "<sse2_avx2>_packssdw<mask_name>"
14895 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
14896 (vec_concat:VI2_AVX2
14897 (ss_truncate:<ssehalfvecmode>
14898 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
14899 (ss_truncate:<ssehalfvecmode>
14900 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
14901 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14902 "@
14903 packssdw\t{%2, %0|%0, %2}
14904 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14905 [(set_attr "isa" "noavx,avx")
14906 (set_attr "type" "sselog")
14907 (set_attr "prefix_data16" "1,*")
14908 (set_attr "prefix" "orig,<mask_prefix>")
14909 (set_attr "mode" "<sseinsnmode>")])
14910
14911 (define_insn "<sse2_avx2>_packuswb<mask_name>"
14912 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
14913 (vec_concat:VI1_AVX512
14914 (us_truncate:<ssehalfvecmode>
14915 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
14916 (us_truncate:<ssehalfvecmode>
14917 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
14918 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14919 "@
14920 packuswb\t{%2, %0|%0, %2}
14921 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14922 [(set_attr "isa" "noavx,avx")
14923 (set_attr "type" "sselog")
14924 (set_attr "prefix_data16" "1,*")
14925 (set_attr "prefix" "orig,<mask_prefix>")
14926 (set_attr "mode" "<sseinsnmode>")])
14927
14928 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
14929 [(set (match_operand:V64QI 0 "register_operand" "=v")
14930 (vec_select:V64QI
14931 (vec_concat:V128QI
14932 (match_operand:V64QI 1 "register_operand" "v")
14933 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
14934 (parallel [(const_int 8) (const_int 72)
14935 (const_int 9) (const_int 73)
14936 (const_int 10) (const_int 74)
14937 (const_int 11) (const_int 75)
14938 (const_int 12) (const_int 76)
14939 (const_int 13) (const_int 77)
14940 (const_int 14) (const_int 78)
14941 (const_int 15) (const_int 79)
14942 (const_int 24) (const_int 88)
14943 (const_int 25) (const_int 89)
14944 (const_int 26) (const_int 90)
14945 (const_int 27) (const_int 91)
14946 (const_int 28) (const_int 92)
14947 (const_int 29) (const_int 93)
14948 (const_int 30) (const_int 94)
14949 (const_int 31) (const_int 95)
14950 (const_int 40) (const_int 104)
14951 (const_int 41) (const_int 105)
14952 (const_int 42) (const_int 106)
14953 (const_int 43) (const_int 107)
14954 (const_int 44) (const_int 108)
14955 (const_int 45) (const_int 109)
14956 (const_int 46) (const_int 110)
14957 (const_int 47) (const_int 111)
14958 (const_int 56) (const_int 120)
14959 (const_int 57) (const_int 121)
14960 (const_int 58) (const_int 122)
14961 (const_int 59) (const_int 123)
14962 (const_int 60) (const_int 124)
14963 (const_int 61) (const_int 125)
14964 (const_int 62) (const_int 126)
14965 (const_int 63) (const_int 127)])))]
14966 "TARGET_AVX512BW"
14967 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14968 [(set_attr "type" "sselog")
14969 (set_attr "prefix" "evex")
14970 (set_attr "mode" "XI")])
14971
14972 (define_insn "avx2_interleave_highv32qi<mask_name>"
14973 [(set (match_operand:V32QI 0 "register_operand" "=Yw")
14974 (vec_select:V32QI
14975 (vec_concat:V64QI
14976 (match_operand:V32QI 1 "register_operand" "Yw")
14977 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
14978 (parallel [(const_int 8) (const_int 40)
14979 (const_int 9) (const_int 41)
14980 (const_int 10) (const_int 42)
14981 (const_int 11) (const_int 43)
14982 (const_int 12) (const_int 44)
14983 (const_int 13) (const_int 45)
14984 (const_int 14) (const_int 46)
14985 (const_int 15) (const_int 47)
14986 (const_int 24) (const_int 56)
14987 (const_int 25) (const_int 57)
14988 (const_int 26) (const_int 58)
14989 (const_int 27) (const_int 59)
14990 (const_int 28) (const_int 60)
14991 (const_int 29) (const_int 61)
14992 (const_int 30) (const_int 62)
14993 (const_int 31) (const_int 63)])))]
14994 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14995 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14996 [(set_attr "type" "sselog")
14997 (set_attr "prefix" "<mask_prefix>")
14998 (set_attr "mode" "OI")])
14999
15000 (define_insn "vec_interleave_highv16qi<mask_name>"
15001 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
15002 (vec_select:V16QI
15003 (vec_concat:V32QI
15004 (match_operand:V16QI 1 "register_operand" "0,Yw")
15005 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
15006 (parallel [(const_int 8) (const_int 24)
15007 (const_int 9) (const_int 25)
15008 (const_int 10) (const_int 26)
15009 (const_int 11) (const_int 27)
15010 (const_int 12) (const_int 28)
15011 (const_int 13) (const_int 29)
15012 (const_int 14) (const_int 30)
15013 (const_int 15) (const_int 31)])))]
15014 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15015 "@
15016 punpckhbw\t{%2, %0|%0, %2}
15017 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15018 [(set_attr "isa" "noavx,avx")
15019 (set_attr "type" "sselog")
15020 (set_attr "prefix_data16" "1,*")
15021 (set_attr "prefix" "orig,<mask_prefix>")
15022 (set_attr "mode" "TI")])
15023
15024 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
15025 [(set (match_operand:V64QI 0 "register_operand" "=v")
15026 (vec_select:V64QI
15027 (vec_concat:V128QI
15028 (match_operand:V64QI 1 "register_operand" "v")
15029 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
15030 (parallel [(const_int 0) (const_int 64)
15031 (const_int 1) (const_int 65)
15032 (const_int 2) (const_int 66)
15033 (const_int 3) (const_int 67)
15034 (const_int 4) (const_int 68)
15035 (const_int 5) (const_int 69)
15036 (const_int 6) (const_int 70)
15037 (const_int 7) (const_int 71)
15038 (const_int 16) (const_int 80)
15039 (const_int 17) (const_int 81)
15040 (const_int 18) (const_int 82)
15041 (const_int 19) (const_int 83)
15042 (const_int 20) (const_int 84)
15043 (const_int 21) (const_int 85)
15044 (const_int 22) (const_int 86)
15045 (const_int 23) (const_int 87)
15046 (const_int 32) (const_int 96)
15047 (const_int 33) (const_int 97)
15048 (const_int 34) (const_int 98)
15049 (const_int 35) (const_int 99)
15050 (const_int 36) (const_int 100)
15051 (const_int 37) (const_int 101)
15052 (const_int 38) (const_int 102)
15053 (const_int 39) (const_int 103)
15054 (const_int 48) (const_int 112)
15055 (const_int 49) (const_int 113)
15056 (const_int 50) (const_int 114)
15057 (const_int 51) (const_int 115)
15058 (const_int 52) (const_int 116)
15059 (const_int 53) (const_int 117)
15060 (const_int 54) (const_int 118)
15061 (const_int 55) (const_int 119)])))]
15062 "TARGET_AVX512BW"
15063 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15064 [(set_attr "type" "sselog")
15065 (set_attr "prefix" "evex")
15066 (set_attr "mode" "XI")])
15067
15068 (define_insn "avx2_interleave_lowv32qi<mask_name>"
15069 [(set (match_operand:V32QI 0 "register_operand" "=Yw")
15070 (vec_select:V32QI
15071 (vec_concat:V64QI
15072 (match_operand:V32QI 1 "register_operand" "Yw")
15073 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
15074 (parallel [(const_int 0) (const_int 32)
15075 (const_int 1) (const_int 33)
15076 (const_int 2) (const_int 34)
15077 (const_int 3) (const_int 35)
15078 (const_int 4) (const_int 36)
15079 (const_int 5) (const_int 37)
15080 (const_int 6) (const_int 38)
15081 (const_int 7) (const_int 39)
15082 (const_int 16) (const_int 48)
15083 (const_int 17) (const_int 49)
15084 (const_int 18) (const_int 50)
15085 (const_int 19) (const_int 51)
15086 (const_int 20) (const_int 52)
15087 (const_int 21) (const_int 53)
15088 (const_int 22) (const_int 54)
15089 (const_int 23) (const_int 55)])))]
15090 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15091 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15092 [(set_attr "type" "sselog")
15093 (set_attr "prefix" "maybe_vex")
15094 (set_attr "mode" "OI")])
15095
15096 (define_insn "vec_interleave_lowv16qi<mask_name>"
15097 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
15098 (vec_select:V16QI
15099 (vec_concat:V32QI
15100 (match_operand:V16QI 1 "register_operand" "0,Yw")
15101 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
15102 (parallel [(const_int 0) (const_int 16)
15103 (const_int 1) (const_int 17)
15104 (const_int 2) (const_int 18)
15105 (const_int 3) (const_int 19)
15106 (const_int 4) (const_int 20)
15107 (const_int 5) (const_int 21)
15108 (const_int 6) (const_int 22)
15109 (const_int 7) (const_int 23)])))]
15110 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15111 "@
15112 punpcklbw\t{%2, %0|%0, %2}
15113 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15114 [(set_attr "isa" "noavx,avx")
15115 (set_attr "type" "sselog")
15116 (set_attr "prefix_data16" "1,*")
15117 (set_attr "prefix" "orig,vex")
15118 (set_attr "mode" "TI")])
15119
15120 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
15121 [(set (match_operand:V32HI 0 "register_operand" "=v")
15122 (vec_select:V32HI
15123 (vec_concat:V64HI
15124 (match_operand:V32HI 1 "register_operand" "v")
15125 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
15126 (parallel [(const_int 4) (const_int 36)
15127 (const_int 5) (const_int 37)
15128 (const_int 6) (const_int 38)
15129 (const_int 7) (const_int 39)
15130 (const_int 12) (const_int 44)
15131 (const_int 13) (const_int 45)
15132 (const_int 14) (const_int 46)
15133 (const_int 15) (const_int 47)
15134 (const_int 20) (const_int 52)
15135 (const_int 21) (const_int 53)
15136 (const_int 22) (const_int 54)
15137 (const_int 23) (const_int 55)
15138 (const_int 28) (const_int 60)
15139 (const_int 29) (const_int 61)
15140 (const_int 30) (const_int 62)
15141 (const_int 31) (const_int 63)])))]
15142 "TARGET_AVX512BW"
15143 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15144 [(set_attr "type" "sselog")
15145 (set_attr "prefix" "evex")
15146 (set_attr "mode" "XI")])
15147
15148 (define_insn "avx2_interleave_highv16hi<mask_name>"
15149 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
15150 (vec_select:V16HI
15151 (vec_concat:V32HI
15152 (match_operand:V16HI 1 "register_operand" "Yw")
15153 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
15154 (parallel [(const_int 4) (const_int 20)
15155 (const_int 5) (const_int 21)
15156 (const_int 6) (const_int 22)
15157 (const_int 7) (const_int 23)
15158 (const_int 12) (const_int 28)
15159 (const_int 13) (const_int 29)
15160 (const_int 14) (const_int 30)
15161 (const_int 15) (const_int 31)])))]
15162 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15163 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15164 [(set_attr "type" "sselog")
15165 (set_attr "prefix" "maybe_evex")
15166 (set_attr "mode" "OI")])
15167
15168 (define_insn "vec_interleave_highv8hi<mask_name>"
15169 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
15170 (vec_select:V8HI
15171 (vec_concat:V16HI
15172 (match_operand:V8HI 1 "register_operand" "0,Yw")
15173 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
15174 (parallel [(const_int 4) (const_int 12)
15175 (const_int 5) (const_int 13)
15176 (const_int 6) (const_int 14)
15177 (const_int 7) (const_int 15)])))]
15178 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15179 "@
15180 punpckhwd\t{%2, %0|%0, %2}
15181 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15182 [(set_attr "isa" "noavx,avx")
15183 (set_attr "type" "sselog")
15184 (set_attr "prefix_data16" "1,*")
15185 (set_attr "prefix" "orig,maybe_vex")
15186 (set_attr "mode" "TI")])
15187
15188 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
15189 [(set (match_operand:V32HI 0 "register_operand" "=v")
15190 (vec_select:V32HI
15191 (vec_concat:V64HI
15192 (match_operand:V32HI 1 "register_operand" "v")
15193 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
15194 (parallel [(const_int 0) (const_int 32)
15195 (const_int 1) (const_int 33)
15196 (const_int 2) (const_int 34)
15197 (const_int 3) (const_int 35)
15198 (const_int 8) (const_int 40)
15199 (const_int 9) (const_int 41)
15200 (const_int 10) (const_int 42)
15201 (const_int 11) (const_int 43)
15202 (const_int 16) (const_int 48)
15203 (const_int 17) (const_int 49)
15204 (const_int 18) (const_int 50)
15205 (const_int 19) (const_int 51)
15206 (const_int 24) (const_int 56)
15207 (const_int 25) (const_int 57)
15208 (const_int 26) (const_int 58)
15209 (const_int 27) (const_int 59)])))]
15210 "TARGET_AVX512BW"
15211 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15212 [(set_attr "type" "sselog")
15213 (set_attr "prefix" "evex")
15214 (set_attr "mode" "XI")])
15215
15216 (define_insn "avx2_interleave_lowv16hi<mask_name>"
15217 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
15218 (vec_select:V16HI
15219 (vec_concat:V32HI
15220 (match_operand:V16HI 1 "register_operand" "Yw")
15221 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
15222 (parallel [(const_int 0) (const_int 16)
15223 (const_int 1) (const_int 17)
15224 (const_int 2) (const_int 18)
15225 (const_int 3) (const_int 19)
15226 (const_int 8) (const_int 24)
15227 (const_int 9) (const_int 25)
15228 (const_int 10) (const_int 26)
15229 (const_int 11) (const_int 27)])))]
15230 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15231 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15232 [(set_attr "type" "sselog")
15233 (set_attr "prefix" "maybe_evex")
15234 (set_attr "mode" "OI")])
15235
15236 (define_insn "vec_interleave_lowv8hi<mask_name>"
15237 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
15238 (vec_select:V8HI
15239 (vec_concat:V16HI
15240 (match_operand:V8HI 1 "register_operand" "0,Yw")
15241 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
15242 (parallel [(const_int 0) (const_int 8)
15243 (const_int 1) (const_int 9)
15244 (const_int 2) (const_int 10)
15245 (const_int 3) (const_int 11)])))]
15246 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
15247 "@
15248 punpcklwd\t{%2, %0|%0, %2}
15249 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15250 [(set_attr "isa" "noavx,avx")
15251 (set_attr "type" "sselog")
15252 (set_attr "prefix_data16" "1,*")
15253 (set_attr "prefix" "orig,maybe_evex")
15254 (set_attr "mode" "TI")])
15255
15256 (define_insn "avx2_interleave_highv8si<mask_name>"
15257 [(set (match_operand:V8SI 0 "register_operand" "=v")
15258 (vec_select:V8SI
15259 (vec_concat:V16SI
15260 (match_operand:V8SI 1 "register_operand" "v")
15261 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
15262 (parallel [(const_int 2) (const_int 10)
15263 (const_int 3) (const_int 11)
15264 (const_int 6) (const_int 14)
15265 (const_int 7) (const_int 15)])))]
15266 "TARGET_AVX2 && <mask_avx512vl_condition>"
15267 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15268 [(set_attr "type" "sselog")
15269 (set_attr "prefix" "maybe_evex")
15270 (set_attr "mode" "OI")])
15271
15272 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
15273 [(set (match_operand:V16SI 0 "register_operand" "=v")
15274 (vec_select:V16SI
15275 (vec_concat:V32SI
15276 (match_operand:V16SI 1 "register_operand" "v")
15277 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
15278 (parallel [(const_int 2) (const_int 18)
15279 (const_int 3) (const_int 19)
15280 (const_int 6) (const_int 22)
15281 (const_int 7) (const_int 23)
15282 (const_int 10) (const_int 26)
15283 (const_int 11) (const_int 27)
15284 (const_int 14) (const_int 30)
15285 (const_int 15) (const_int 31)])))]
15286 "TARGET_AVX512F"
15287 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15288 [(set_attr "type" "sselog")
15289 (set_attr "prefix" "evex")
15290 (set_attr "mode" "XI")])
15291
15292
15293 (define_insn "vec_interleave_highv4si<mask_name>"
15294 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
15295 (vec_select:V4SI
15296 (vec_concat:V8SI
15297 (match_operand:V4SI 1 "register_operand" "0,v")
15298 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
15299 (parallel [(const_int 2) (const_int 6)
15300 (const_int 3) (const_int 7)])))]
15301 "TARGET_SSE2 && <mask_avx512vl_condition>"
15302 "@
15303 punpckhdq\t{%2, %0|%0, %2}
15304 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15305 [(set_attr "isa" "noavx,avx")
15306 (set_attr "type" "sselog")
15307 (set_attr "prefix_data16" "1,*")
15308 (set_attr "prefix" "orig,maybe_vex")
15309 (set_attr "mode" "TI")])
15310
15311 (define_insn "avx2_interleave_lowv8si<mask_name>"
15312 [(set (match_operand:V8SI 0 "register_operand" "=v")
15313 (vec_select:V8SI
15314 (vec_concat:V16SI
15315 (match_operand:V8SI 1 "register_operand" "v")
15316 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
15317 (parallel [(const_int 0) (const_int 8)
15318 (const_int 1) (const_int 9)
15319 (const_int 4) (const_int 12)
15320 (const_int 5) (const_int 13)])))]
15321 "TARGET_AVX2 && <mask_avx512vl_condition>"
15322 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15323 [(set_attr "type" "sselog")
15324 (set_attr "prefix" "maybe_evex")
15325 (set_attr "mode" "OI")])
15326
15327 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
15328 [(set (match_operand:V16SI 0 "register_operand" "=v")
15329 (vec_select:V16SI
15330 (vec_concat:V32SI
15331 (match_operand:V16SI 1 "register_operand" "v")
15332 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
15333 (parallel [(const_int 0) (const_int 16)
15334 (const_int 1) (const_int 17)
15335 (const_int 4) (const_int 20)
15336 (const_int 5) (const_int 21)
15337 (const_int 8) (const_int 24)
15338 (const_int 9) (const_int 25)
15339 (const_int 12) (const_int 28)
15340 (const_int 13) (const_int 29)])))]
15341 "TARGET_AVX512F"
15342 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15343 [(set_attr "type" "sselog")
15344 (set_attr "prefix" "evex")
15345 (set_attr "mode" "XI")])
15346
15347 (define_insn "vec_interleave_lowv4si<mask_name>"
15348 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
15349 (vec_select:V4SI
15350 (vec_concat:V8SI
15351 (match_operand:V4SI 1 "register_operand" "0,v")
15352 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
15353 (parallel [(const_int 0) (const_int 4)
15354 (const_int 1) (const_int 5)])))]
15355 "TARGET_SSE2 && <mask_avx512vl_condition>"
15356 "@
15357 punpckldq\t{%2, %0|%0, %2}
15358 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15359 [(set_attr "isa" "noavx,avx")
15360 (set_attr "type" "sselog")
15361 (set_attr "prefix_data16" "1,*")
15362 (set_attr "prefix" "orig,vex")
15363 (set_attr "mode" "TI")])
15364
15365 (define_expand "vec_interleave_high<mode>"
15366 [(match_operand:VI_256 0 "register_operand")
15367 (match_operand:VI_256 1 "register_operand")
15368 (match_operand:VI_256 2 "nonimmediate_operand")]
15369 "TARGET_AVX2"
15370 {
15371 rtx t1 = gen_reg_rtx (<MODE>mode);
15372 rtx t2 = gen_reg_rtx (<MODE>mode);
15373 rtx t3 = gen_reg_rtx (V4DImode);
15374 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
15375 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
15376 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
15377 gen_lowpart (V4DImode, t2),
15378 GEN_INT (1 + (3 << 4))));
15379 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
15380 DONE;
15381 })
15382
15383 (define_expand "vec_interleave_low<mode>"
15384 [(match_operand:VI_256 0 "register_operand")
15385 (match_operand:VI_256 1 "register_operand")
15386 (match_operand:VI_256 2 "nonimmediate_operand")]
15387 "TARGET_AVX2"
15388 {
15389 rtx t1 = gen_reg_rtx (<MODE>mode);
15390 rtx t2 = gen_reg_rtx (<MODE>mode);
15391 rtx t3 = gen_reg_rtx (V4DImode);
15392 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
15393 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
15394 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
15395 gen_lowpart (V4DImode, t2),
15396 GEN_INT (0 + (2 << 4))));
15397 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
15398 DONE;
15399 })
15400
15401 ;; Modes handled by pinsr patterns.
15402 (define_mode_iterator PINSR_MODE
15403 [(V16QI "TARGET_SSE4_1") V8HI
15404 (V4SI "TARGET_SSE4_1")
15405 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
15406
15407 (define_mode_attr sse2p4_1
15408 [(V16QI "sse4_1") (V8HI "sse2")
15409 (V4SI "sse4_1") (V2DI "sse4_1")])
15410
15411 (define_mode_attr pinsr_evex_isa
15412 [(V16QI "avx512bw") (V8HI "avx512bw")
15413 (V4SI "avx512dq") (V2DI "avx512dq")])
15414
15415 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
15416 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
15417 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
15418 (vec_merge:PINSR_MODE
15419 (vec_duplicate:PINSR_MODE
15420 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
15421 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
15422 (match_operand:SI 3 "const_int_operand")))]
15423 "TARGET_SSE2
15424 && ((unsigned) exact_log2 (INTVAL (operands[3]))
15425 < GET_MODE_NUNITS (<MODE>mode))"
15426 {
15427 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
15428
15429 switch (which_alternative)
15430 {
15431 case 0:
15432 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
15433 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
15434 /* FALLTHRU */
15435 case 1:
15436 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
15437 case 2:
15438 case 4:
15439 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
15440 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
15441 /* FALLTHRU */
15442 case 3:
15443 case 5:
15444 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15445 default:
15446 gcc_unreachable ();
15447 }
15448 }
15449 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
15450 (set_attr "type" "sselog")
15451 (set (attr "prefix_rex")
15452 (if_then_else
15453 (and (not (match_test "TARGET_AVX"))
15454 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
15455 (const_string "1")
15456 (const_string "*")))
15457 (set (attr "prefix_data16")
15458 (if_then_else
15459 (and (not (match_test "TARGET_AVX"))
15460 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15461 (const_string "1")
15462 (const_string "*")))
15463 (set (attr "prefix_extra")
15464 (if_then_else
15465 (and (not (match_test "TARGET_AVX"))
15466 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15467 (const_string "*")
15468 (const_string "1")))
15469 (set_attr "length_immediate" "1")
15470 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
15471 (set_attr "mode" "TI")])
15472
15473 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
15474 [(match_operand:AVX512_VEC 0 "register_operand")
15475 (match_operand:AVX512_VEC 1 "register_operand")
15476 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
15477 (match_operand:SI 3 "const_0_to_3_operand")
15478 (match_operand:AVX512_VEC 4 "register_operand")
15479 (match_operand:<avx512fmaskmode> 5 "register_operand")]
15480 "TARGET_AVX512F"
15481 {
15482 int mask, selector;
15483 mask = INTVAL (operands[3]);
15484 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
15485 ? 0xFFFF ^ (0x000F << mask * 4)
15486 : 0xFF ^ (0x03 << mask * 2));
15487 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
15488 (operands[0], operands[1], operands[2], GEN_INT (selector),
15489 operands[4], operands[5]));
15490 DONE;
15491 })
15492
15493 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
15494 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
15495 (vec_merge:AVX512_VEC
15496 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
15497 (vec_duplicate:AVX512_VEC
15498 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
15499 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
15500 "TARGET_AVX512F
15501 && (INTVAL (operands[3])
15502 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
15503 {
15504 if (which_alternative == 0)
15505 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
15506 switch (<MODE>mode)
15507 {
15508 case E_V8DFmode:
15509 if (misaligned_operand (operands[2], <ssequartermode>mode))
15510 return "vmovupd\t{%2, %x0|%x0, %2}";
15511 else
15512 return "vmovapd\t{%2, %x0|%x0, %2}";
15513 case E_V16SFmode:
15514 if (misaligned_operand (operands[2], <ssequartermode>mode))
15515 return "vmovups\t{%2, %x0|%x0, %2}";
15516 else
15517 return "vmovaps\t{%2, %x0|%x0, %2}";
15518 case E_V8DImode:
15519 if (misaligned_operand (operands[2], <ssequartermode>mode))
15520 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
15521 : "vmovdqu\t{%2, %x0|%x0, %2}";
15522 else
15523 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
15524 : "vmovdqa\t{%2, %x0|%x0, %2}";
15525 case E_V16SImode:
15526 if (misaligned_operand (operands[2], <ssequartermode>mode))
15527 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
15528 : "vmovdqu\t{%2, %x0|%x0, %2}";
15529 else
15530 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
15531 : "vmovdqa\t{%2, %x0|%x0, %2}";
15532 default:
15533 gcc_unreachable ();
15534 }
15535 }
15536 [(set_attr "type" "sselog,ssemov,ssemov")
15537 (set_attr "length_immediate" "1,0,0")
15538 (set_attr "prefix" "evex,vex,evex")
15539 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
15540
15541 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
15542 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
15543 (vec_merge:AVX512_VEC
15544 (match_operand:AVX512_VEC 1 "register_operand" "v")
15545 (vec_duplicate:AVX512_VEC
15546 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
15547 (match_operand:SI 3 "const_int_operand" "n")))]
15548 "TARGET_AVX512F"
15549 {
15550 int mask;
15551 int selector = INTVAL (operands[3]);
15552
15553 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
15554 mask = 0;
15555 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
15556 mask = 1;
15557 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
15558 mask = 2;
15559 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
15560 mask = 3;
15561 else
15562 gcc_unreachable ();
15563
15564 operands[3] = GEN_INT (mask);
15565
15566 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
15567 }
15568 [(set_attr "type" "sselog")
15569 (set_attr "length_immediate" "1")
15570 (set_attr "prefix" "evex")
15571 (set_attr "mode" "<sseinsnmode>")])
15572
15573 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
15574 [(match_operand:AVX512_VEC_2 0 "register_operand")
15575 (match_operand:AVX512_VEC_2 1 "register_operand")
15576 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
15577 (match_operand:SI 3 "const_0_to_1_operand")
15578 (match_operand:AVX512_VEC_2 4 "register_operand")
15579 (match_operand:<avx512fmaskmode> 5 "register_operand")]
15580 "TARGET_AVX512F"
15581 {
15582 int mask = INTVAL (operands[3]);
15583 if (mask == 0)
15584 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
15585 operands[2], operands[4],
15586 operands[5]));
15587 else
15588 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
15589 operands[2], operands[4],
15590 operands[5]));
15591 DONE;
15592 })
15593
15594 (define_insn "vec_set_lo_<mode><mask_name>"
15595 [(set (match_operand:V16FI 0 "register_operand" "=v")
15596 (vec_concat:V16FI
15597 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
15598 (vec_select:<ssehalfvecmode>
15599 (match_operand:V16FI 1 "register_operand" "v")
15600 (parallel [(const_int 8) (const_int 9)
15601 (const_int 10) (const_int 11)
15602 (const_int 12) (const_int 13)
15603 (const_int 14) (const_int 15)]))))]
15604 "TARGET_AVX512DQ"
15605 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
15606 [(set_attr "type" "sselog")
15607 (set_attr "length_immediate" "1")
15608 (set_attr "prefix" "evex")
15609 (set_attr "mode" "<sseinsnmode>")])
15610
15611 (define_insn "vec_set_hi_<mode><mask_name>"
15612 [(set (match_operand:V16FI 0 "register_operand" "=v")
15613 (vec_concat:V16FI
15614 (vec_select:<ssehalfvecmode>
15615 (match_operand:V16FI 1 "register_operand" "v")
15616 (parallel [(const_int 0) (const_int 1)
15617 (const_int 2) (const_int 3)
15618 (const_int 4) (const_int 5)
15619 (const_int 6) (const_int 7)]))
15620 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
15621 "TARGET_AVX512DQ"
15622 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
15623 [(set_attr "type" "sselog")
15624 (set_attr "length_immediate" "1")
15625 (set_attr "prefix" "evex")
15626 (set_attr "mode" "<sseinsnmode>")])
15627
15628 (define_insn "vec_set_lo_<mode><mask_name>"
15629 [(set (match_operand:V8FI 0 "register_operand" "=v")
15630 (vec_concat:V8FI
15631 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
15632 (vec_select:<ssehalfvecmode>
15633 (match_operand:V8FI 1 "register_operand" "v")
15634 (parallel [(const_int 4) (const_int 5)
15635 (const_int 6) (const_int 7)]))))]
15636 "TARGET_AVX512F"
15637 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
15638 [(set_attr "type" "sselog")
15639 (set_attr "length_immediate" "1")
15640 (set_attr "prefix" "evex")
15641 (set_attr "mode" "XI")])
15642
15643 (define_insn "vec_set_hi_<mode><mask_name>"
15644 [(set (match_operand:V8FI 0 "register_operand" "=v")
15645 (vec_concat:V8FI
15646 (vec_select:<ssehalfvecmode>
15647 (match_operand:V8FI 1 "register_operand" "v")
15648 (parallel [(const_int 0) (const_int 1)
15649 (const_int 2) (const_int 3)]))
15650 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
15651 "TARGET_AVX512F"
15652 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
15653 [(set_attr "type" "sselog")
15654 (set_attr "length_immediate" "1")
15655 (set_attr "prefix" "evex")
15656 (set_attr "mode" "XI")])
15657
15658 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
15659 [(match_operand:VI8F_256 0 "register_operand")
15660 (match_operand:VI8F_256 1 "register_operand")
15661 (match_operand:VI8F_256 2 "nonimmediate_operand")
15662 (match_operand:SI 3 "const_0_to_3_operand")
15663 (match_operand:VI8F_256 4 "register_operand")
15664 (match_operand:QI 5 "register_operand")]
15665 "TARGET_AVX512DQ"
15666 {
15667 int mask = INTVAL (operands[3]);
15668 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
15669 (operands[0], operands[1], operands[2],
15670 GEN_INT (((mask >> 0) & 1) * 2 + 0),
15671 GEN_INT (((mask >> 0) & 1) * 2 + 1),
15672 GEN_INT (((mask >> 1) & 1) * 2 + 4),
15673 GEN_INT (((mask >> 1) & 1) * 2 + 5),
15674 operands[4], operands[5]));
15675 DONE;
15676 })
15677
15678 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
15679 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
15680 (vec_select:VI8F_256
15681 (vec_concat:<ssedoublemode>
15682 (match_operand:VI8F_256 1 "register_operand" "v")
15683 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
15684 (parallel [(match_operand 3 "const_0_to_3_operand")
15685 (match_operand 4 "const_0_to_3_operand")
15686 (match_operand 5 "const_4_to_7_operand")
15687 (match_operand 6 "const_4_to_7_operand")])))]
15688 "TARGET_AVX512VL
15689 && (INTVAL (operands[3]) & 1) == 0
15690 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15691 && (INTVAL (operands[5]) & 1) == 0
15692 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
15693 {
15694 int mask;
15695 mask = INTVAL (operands[3]) / 2;
15696 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
15697 operands[3] = GEN_INT (mask);
15698 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
15699 }
15700 [(set_attr "type" "sselog")
15701 (set_attr "length_immediate" "1")
15702 (set_attr "prefix" "evex")
15703 (set_attr "mode" "XI")])
15704
15705 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
15706 [(match_operand:V8FI 0 "register_operand")
15707 (match_operand:V8FI 1 "register_operand")
15708 (match_operand:V8FI 2 "nonimmediate_operand")
15709 (match_operand:SI 3 "const_0_to_255_operand")
15710 (match_operand:V8FI 4 "register_operand")
15711 (match_operand:QI 5 "register_operand")]
15712 "TARGET_AVX512F"
15713 {
15714 int mask = INTVAL (operands[3]);
15715 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
15716 (operands[0], operands[1], operands[2],
15717 GEN_INT (((mask >> 0) & 3) * 2),
15718 GEN_INT (((mask >> 0) & 3) * 2 + 1),
15719 GEN_INT (((mask >> 2) & 3) * 2),
15720 GEN_INT (((mask >> 2) & 3) * 2 + 1),
15721 GEN_INT (((mask >> 4) & 3) * 2 + 8),
15722 GEN_INT (((mask >> 4) & 3) * 2 + 9),
15723 GEN_INT (((mask >> 6) & 3) * 2 + 8),
15724 GEN_INT (((mask >> 6) & 3) * 2 + 9),
15725 operands[4], operands[5]));
15726 DONE;
15727 })
15728
15729 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
15730 [(set (match_operand:V8FI 0 "register_operand" "=v")
15731 (vec_select:V8FI
15732 (vec_concat:<ssedoublemode>
15733 (match_operand:V8FI 1 "register_operand" "v")
15734 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
15735 (parallel [(match_operand 3 "const_0_to_7_operand")
15736 (match_operand 4 "const_0_to_7_operand")
15737 (match_operand 5 "const_0_to_7_operand")
15738 (match_operand 6 "const_0_to_7_operand")
15739 (match_operand 7 "const_8_to_15_operand")
15740 (match_operand 8 "const_8_to_15_operand")
15741 (match_operand 9 "const_8_to_15_operand")
15742 (match_operand 10 "const_8_to_15_operand")])))]
15743 "TARGET_AVX512F
15744 && (INTVAL (operands[3]) & 1) == 0
15745 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15746 && (INTVAL (operands[5]) & 1) == 0
15747 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
15748 && (INTVAL (operands[7]) & 1) == 0
15749 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
15750 && (INTVAL (operands[9]) & 1) == 0
15751 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
15752 {
15753 int mask;
15754 mask = INTVAL (operands[3]) / 2;
15755 mask |= INTVAL (operands[5]) / 2 << 2;
15756 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
15757 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
15758 operands[3] = GEN_INT (mask);
15759
15760 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
15761 }
15762 [(set_attr "type" "sselog")
15763 (set_attr "length_immediate" "1")
15764 (set_attr "prefix" "evex")
15765 (set_attr "mode" "<sseinsnmode>")])
15766
15767 (define_insn "*avx512f_shuf_<shuffletype>64x2_1<mask_name>_1"
15768 [(set (match_operand:V8FI 0 "register_operand" "=v")
15769 (vec_select:V8FI
15770 (match_operand:V8FI 1 "register_operand" "v")
15771 (parallel [(match_operand 2 "const_0_to_7_operand")
15772 (match_operand 3 "const_0_to_7_operand")
15773 (match_operand 4 "const_0_to_7_operand")
15774 (match_operand 5 "const_0_to_7_operand")
15775 (match_operand 6 "const_0_to_7_operand")
15776 (match_operand 7 "const_0_to_7_operand")
15777 (match_operand 8 "const_0_to_7_operand")
15778 (match_operand 9 "const_0_to_7_operand")])))]
15779 "TARGET_AVX512F
15780 && (INTVAL (operands[2]) & 1) == 0
15781 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
15782 && (INTVAL (operands[4]) & 1) == 0
15783 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
15784 && (INTVAL (operands[6]) & 1) == 0
15785 && INTVAL (operands[6]) == INTVAL (operands[7]) - 1
15786 && (INTVAL (operands[8]) & 1) == 0
15787 && INTVAL (operands[8]) == INTVAL (operands[9]) - 1"
15788 {
15789 int mask;
15790 mask = INTVAL (operands[2]) / 2;
15791 mask |= INTVAL (operands[4]) / 2 << 2;
15792 mask |= INTVAL (operands[6]) / 2 << 4;
15793 mask |= INTVAL (operands[8]) / 2 << 6;
15794 operands[2] = GEN_INT (mask);
15795
15796 return "vshuf<shuffletype>64x2\t{%2, %1, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %1, %2}";
15797 }
15798 [(set_attr "type" "sselog")
15799 (set_attr "length_immediate" "1")
15800 (set_attr "prefix" "evex")
15801 (set_attr "mode" "<sseinsnmode>")])
15802
15803 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
15804 [(match_operand:VI4F_256 0 "register_operand")
15805 (match_operand:VI4F_256 1 "register_operand")
15806 (match_operand:VI4F_256 2 "nonimmediate_operand")
15807 (match_operand:SI 3 "const_0_to_3_operand")
15808 (match_operand:VI4F_256 4 "register_operand")
15809 (match_operand:QI 5 "register_operand")]
15810 "TARGET_AVX512VL"
15811 {
15812 int mask = INTVAL (operands[3]);
15813 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
15814 (operands[0], operands[1], operands[2],
15815 GEN_INT (((mask >> 0) & 1) * 4 + 0),
15816 GEN_INT (((mask >> 0) & 1) * 4 + 1),
15817 GEN_INT (((mask >> 0) & 1) * 4 + 2),
15818 GEN_INT (((mask >> 0) & 1) * 4 + 3),
15819 GEN_INT (((mask >> 1) & 1) * 4 + 8),
15820 GEN_INT (((mask >> 1) & 1) * 4 + 9),
15821 GEN_INT (((mask >> 1) & 1) * 4 + 10),
15822 GEN_INT (((mask >> 1) & 1) * 4 + 11),
15823 operands[4], operands[5]));
15824 DONE;
15825 })
15826
15827 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
15828 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
15829 (vec_select:VI4F_256
15830 (vec_concat:<ssedoublemode>
15831 (match_operand:VI4F_256 1 "register_operand" "v")
15832 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
15833 (parallel [(match_operand 3 "const_0_to_7_operand")
15834 (match_operand 4 "const_0_to_7_operand")
15835 (match_operand 5 "const_0_to_7_operand")
15836 (match_operand 6 "const_0_to_7_operand")
15837 (match_operand 7 "const_8_to_15_operand")
15838 (match_operand 8 "const_8_to_15_operand")
15839 (match_operand 9 "const_8_to_15_operand")
15840 (match_operand 10 "const_8_to_15_operand")])))]
15841 "TARGET_AVX512VL
15842 && (INTVAL (operands[3]) & 3) == 0
15843 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15844 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
15845 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
15846 && (INTVAL (operands[7]) & 3) == 0
15847 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
15848 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
15849 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
15850 {
15851 int mask;
15852 mask = INTVAL (operands[3]) / 4;
15853 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
15854 operands[3] = GEN_INT (mask);
15855
15856 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
15857 }
15858 [(set_attr "type" "sselog")
15859 (set_attr "length_immediate" "1")
15860 (set_attr "prefix" "evex")
15861 (set_attr "mode" "<sseinsnmode>")])
15862
15863 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
15864 [(match_operand:V16FI 0 "register_operand")
15865 (match_operand:V16FI 1 "register_operand")
15866 (match_operand:V16FI 2 "nonimmediate_operand")
15867 (match_operand:SI 3 "const_0_to_255_operand")
15868 (match_operand:V16FI 4 "register_operand")
15869 (match_operand:HI 5 "register_operand")]
15870 "TARGET_AVX512F"
15871 {
15872 int mask = INTVAL (operands[3]);
15873 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
15874 (operands[0], operands[1], operands[2],
15875 GEN_INT (((mask >> 0) & 3) * 4),
15876 GEN_INT (((mask >> 0) & 3) * 4 + 1),
15877 GEN_INT (((mask >> 0) & 3) * 4 + 2),
15878 GEN_INT (((mask >> 0) & 3) * 4 + 3),
15879 GEN_INT (((mask >> 2) & 3) * 4),
15880 GEN_INT (((mask >> 2) & 3) * 4 + 1),
15881 GEN_INT (((mask >> 2) & 3) * 4 + 2),
15882 GEN_INT (((mask >> 2) & 3) * 4 + 3),
15883 GEN_INT (((mask >> 4) & 3) * 4 + 16),
15884 GEN_INT (((mask >> 4) & 3) * 4 + 17),
15885 GEN_INT (((mask >> 4) & 3) * 4 + 18),
15886 GEN_INT (((mask >> 4) & 3) * 4 + 19),
15887 GEN_INT (((mask >> 6) & 3) * 4 + 16),
15888 GEN_INT (((mask >> 6) & 3) * 4 + 17),
15889 GEN_INT (((mask >> 6) & 3) * 4 + 18),
15890 GEN_INT (((mask >> 6) & 3) * 4 + 19),
15891 operands[4], operands[5]));
15892 DONE;
15893 })
15894
15895 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
15896 [(set (match_operand:V16FI 0 "register_operand" "=v")
15897 (vec_select:V16FI
15898 (vec_concat:<ssedoublemode>
15899 (match_operand:V16FI 1 "register_operand" "v")
15900 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
15901 (parallel [(match_operand 3 "const_0_to_15_operand")
15902 (match_operand 4 "const_0_to_15_operand")
15903 (match_operand 5 "const_0_to_15_operand")
15904 (match_operand 6 "const_0_to_15_operand")
15905 (match_operand 7 "const_0_to_15_operand")
15906 (match_operand 8 "const_0_to_15_operand")
15907 (match_operand 9 "const_0_to_15_operand")
15908 (match_operand 10 "const_0_to_15_operand")
15909 (match_operand 11 "const_16_to_31_operand")
15910 (match_operand 12 "const_16_to_31_operand")
15911 (match_operand 13 "const_16_to_31_operand")
15912 (match_operand 14 "const_16_to_31_operand")
15913 (match_operand 15 "const_16_to_31_operand")
15914 (match_operand 16 "const_16_to_31_operand")
15915 (match_operand 17 "const_16_to_31_operand")
15916 (match_operand 18 "const_16_to_31_operand")])))]
15917 "TARGET_AVX512F
15918 && (INTVAL (operands[3]) & 3) == 0
15919 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15920 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
15921 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
15922 && (INTVAL (operands[7]) & 3) == 0
15923 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
15924 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
15925 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
15926 && (INTVAL (operands[11]) & 3) == 0
15927 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
15928 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
15929 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
15930 && (INTVAL (operands[15]) & 3) == 0
15931 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
15932 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
15933 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
15934 {
15935 int mask;
15936 mask = INTVAL (operands[3]) / 4;
15937 mask |= INTVAL (operands[7]) / 4 << 2;
15938 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
15939 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
15940 operands[3] = GEN_INT (mask);
15941
15942 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
15943 }
15944 [(set_attr "type" "sselog")
15945 (set_attr "length_immediate" "1")
15946 (set_attr "prefix" "evex")
15947 (set_attr "mode" "<sseinsnmode>")])
15948
15949 (define_insn "*avx512f_shuf_<shuffletype>32x4_1<mask_name>_1"
15950 [(set (match_operand:V16FI 0 "register_operand" "=v")
15951 (vec_select:V16FI
15952 (match_operand:V16FI 1 "register_operand" "v")
15953 (parallel [(match_operand 2 "const_0_to_15_operand")
15954 (match_operand 3 "const_0_to_15_operand")
15955 (match_operand 4 "const_0_to_15_operand")
15956 (match_operand 5 "const_0_to_15_operand")
15957 (match_operand 6 "const_0_to_15_operand")
15958 (match_operand 7 "const_0_to_15_operand")
15959 (match_operand 8 "const_0_to_15_operand")
15960 (match_operand 9 "const_0_to_15_operand")
15961 (match_operand 10 "const_0_to_15_operand")
15962 (match_operand 11 "const_0_to_15_operand")
15963 (match_operand 12 "const_0_to_15_operand")
15964 (match_operand 13 "const_0_to_15_operand")
15965 (match_operand 14 "const_0_to_15_operand")
15966 (match_operand 15 "const_0_to_15_operand")
15967 (match_operand 16 "const_0_to_15_operand")
15968 (match_operand 17 "const_0_to_15_operand")])))]
15969 "TARGET_AVX512F
15970 && (INTVAL (operands[2]) & 3) == 0
15971 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
15972 && INTVAL (operands[2]) == INTVAL (operands[4]) - 2
15973 && INTVAL (operands[2]) == INTVAL (operands[5]) - 3
15974 && (INTVAL (operands[6]) & 3) == 0
15975 && INTVAL (operands[6]) == INTVAL (operands[7]) - 1
15976 && INTVAL (operands[6]) == INTVAL (operands[8]) - 2
15977 && INTVAL (operands[6]) == INTVAL (operands[9]) - 3
15978 && (INTVAL (operands[10]) & 3) == 0
15979 && INTVAL (operands[10]) == INTVAL (operands[11]) - 1
15980 && INTVAL (operands[10]) == INTVAL (operands[12]) - 2
15981 && INTVAL (operands[10]) == INTVAL (operands[13]) - 3
15982 && (INTVAL (operands[14]) & 3) == 0
15983 && INTVAL (operands[14]) == INTVAL (operands[15]) - 1
15984 && INTVAL (operands[14]) == INTVAL (operands[16]) - 2
15985 && INTVAL (operands[14]) == INTVAL (operands[17]) - 3"
15986 {
15987 int mask;
15988 mask = INTVAL (operands[2]) / 4;
15989 mask |= INTVAL (operands[6]) / 4 << 2;
15990 mask |= INTVAL (operands[10]) / 4 << 4;
15991 mask |= INTVAL (operands[14]) / 4 << 6;
15992 operands[2] = GEN_INT (mask);
15993
15994 return "vshuf<shuffletype>32x4\t{%2, %1, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %1, %2}";
15995 }
15996 [(set_attr "type" "sselog")
15997 (set_attr "length_immediate" "1")
15998 (set_attr "prefix" "evex")
15999 (set_attr "mode" "<sseinsnmode>")])
16000
16001 (define_expand "avx512f_pshufdv3_mask"
16002 [(match_operand:V16SI 0 "register_operand")
16003 (match_operand:V16SI 1 "nonimmediate_operand")
16004 (match_operand:SI 2 "const_0_to_255_operand")
16005 (match_operand:V16SI 3 "register_operand")
16006 (match_operand:HI 4 "register_operand")]
16007 "TARGET_AVX512F"
16008 {
16009 int mask = INTVAL (operands[2]);
16010 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
16011 GEN_INT ((mask >> 0) & 3),
16012 GEN_INT ((mask >> 2) & 3),
16013 GEN_INT ((mask >> 4) & 3),
16014 GEN_INT ((mask >> 6) & 3),
16015 GEN_INT (((mask >> 0) & 3) + 4),
16016 GEN_INT (((mask >> 2) & 3) + 4),
16017 GEN_INT (((mask >> 4) & 3) + 4),
16018 GEN_INT (((mask >> 6) & 3) + 4),
16019 GEN_INT (((mask >> 0) & 3) + 8),
16020 GEN_INT (((mask >> 2) & 3) + 8),
16021 GEN_INT (((mask >> 4) & 3) + 8),
16022 GEN_INT (((mask >> 6) & 3) + 8),
16023 GEN_INT (((mask >> 0) & 3) + 12),
16024 GEN_INT (((mask >> 2) & 3) + 12),
16025 GEN_INT (((mask >> 4) & 3) + 12),
16026 GEN_INT (((mask >> 6) & 3) + 12),
16027 operands[3], operands[4]));
16028 DONE;
16029 })
16030
16031 (define_insn "avx512f_pshufd_1<mask_name>"
16032 [(set (match_operand:V16SI 0 "register_operand" "=v")
16033 (vec_select:V16SI
16034 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
16035 (parallel [(match_operand 2 "const_0_to_3_operand")
16036 (match_operand 3 "const_0_to_3_operand")
16037 (match_operand 4 "const_0_to_3_operand")
16038 (match_operand 5 "const_0_to_3_operand")
16039 (match_operand 6 "const_4_to_7_operand")
16040 (match_operand 7 "const_4_to_7_operand")
16041 (match_operand 8 "const_4_to_7_operand")
16042 (match_operand 9 "const_4_to_7_operand")
16043 (match_operand 10 "const_8_to_11_operand")
16044 (match_operand 11 "const_8_to_11_operand")
16045 (match_operand 12 "const_8_to_11_operand")
16046 (match_operand 13 "const_8_to_11_operand")
16047 (match_operand 14 "const_12_to_15_operand")
16048 (match_operand 15 "const_12_to_15_operand")
16049 (match_operand 16 "const_12_to_15_operand")
16050 (match_operand 17 "const_12_to_15_operand")])))]
16051 "TARGET_AVX512F
16052 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
16053 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
16054 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
16055 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
16056 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
16057 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
16058 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
16059 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
16060 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
16061 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
16062 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
16063 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
16064 {
16065 int mask = 0;
16066 mask |= INTVAL (operands[2]) << 0;
16067 mask |= INTVAL (operands[3]) << 2;
16068 mask |= INTVAL (operands[4]) << 4;
16069 mask |= INTVAL (operands[5]) << 6;
16070 operands[2] = GEN_INT (mask);
16071
16072 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
16073 }
16074 [(set_attr "type" "sselog1")
16075 (set_attr "prefix" "evex")
16076 (set_attr "length_immediate" "1")
16077 (set_attr "mode" "XI")])
16078
16079 (define_expand "avx512vl_pshufdv3_mask"
16080 [(match_operand:V8SI 0 "register_operand")
16081 (match_operand:V8SI 1 "nonimmediate_operand")
16082 (match_operand:SI 2 "const_0_to_255_operand")
16083 (match_operand:V8SI 3 "register_operand")
16084 (match_operand:QI 4 "register_operand")]
16085 "TARGET_AVX512VL"
16086 {
16087 int mask = INTVAL (operands[2]);
16088 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
16089 GEN_INT ((mask >> 0) & 3),
16090 GEN_INT ((mask >> 2) & 3),
16091 GEN_INT ((mask >> 4) & 3),
16092 GEN_INT ((mask >> 6) & 3),
16093 GEN_INT (((mask >> 0) & 3) + 4),
16094 GEN_INT (((mask >> 2) & 3) + 4),
16095 GEN_INT (((mask >> 4) & 3) + 4),
16096 GEN_INT (((mask >> 6) & 3) + 4),
16097 operands[3], operands[4]));
16098 DONE;
16099 })
16100
16101 (define_expand "avx2_pshufdv3"
16102 [(match_operand:V8SI 0 "register_operand")
16103 (match_operand:V8SI 1 "nonimmediate_operand")
16104 (match_operand:SI 2 "const_0_to_255_operand")]
16105 "TARGET_AVX2"
16106 {
16107 int mask = INTVAL (operands[2]);
16108 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
16109 GEN_INT ((mask >> 0) & 3),
16110 GEN_INT ((mask >> 2) & 3),
16111 GEN_INT ((mask >> 4) & 3),
16112 GEN_INT ((mask >> 6) & 3),
16113 GEN_INT (((mask >> 0) & 3) + 4),
16114 GEN_INT (((mask >> 2) & 3) + 4),
16115 GEN_INT (((mask >> 4) & 3) + 4),
16116 GEN_INT (((mask >> 6) & 3) + 4)));
16117 DONE;
16118 })
16119
16120 (define_insn "avx2_pshufd_1<mask_name>"
16121 [(set (match_operand:V8SI 0 "register_operand" "=v")
16122 (vec_select:V8SI
16123 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
16124 (parallel [(match_operand 2 "const_0_to_3_operand")
16125 (match_operand 3 "const_0_to_3_operand")
16126 (match_operand 4 "const_0_to_3_operand")
16127 (match_operand 5 "const_0_to_3_operand")
16128 (match_operand 6 "const_4_to_7_operand")
16129 (match_operand 7 "const_4_to_7_operand")
16130 (match_operand 8 "const_4_to_7_operand")
16131 (match_operand 9 "const_4_to_7_operand")])))]
16132 "TARGET_AVX2
16133 && <mask_avx512vl_condition>
16134 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
16135 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
16136 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
16137 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
16138 {
16139 int mask = 0;
16140 mask |= INTVAL (operands[2]) << 0;
16141 mask |= INTVAL (operands[3]) << 2;
16142 mask |= INTVAL (operands[4]) << 4;
16143 mask |= INTVAL (operands[5]) << 6;
16144 operands[2] = GEN_INT (mask);
16145
16146 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
16147 }
16148 [(set_attr "type" "sselog1")
16149 (set_attr "prefix" "maybe_evex")
16150 (set_attr "length_immediate" "1")
16151 (set_attr "mode" "OI")])
16152
16153 (define_expand "avx512vl_pshufd_mask"
16154 [(match_operand:V4SI 0 "register_operand")
16155 (match_operand:V4SI 1 "nonimmediate_operand")
16156 (match_operand:SI 2 "const_0_to_255_operand")
16157 (match_operand:V4SI 3 "register_operand")
16158 (match_operand:QI 4 "register_operand")]
16159 "TARGET_AVX512VL"
16160 {
16161 int mask = INTVAL (operands[2]);
16162 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
16163 GEN_INT ((mask >> 0) & 3),
16164 GEN_INT ((mask >> 2) & 3),
16165 GEN_INT ((mask >> 4) & 3),
16166 GEN_INT ((mask >> 6) & 3),
16167 operands[3], operands[4]));
16168 DONE;
16169 })
16170
16171 (define_expand "sse2_pshufd"
16172 [(match_operand:V4SI 0 "register_operand")
16173 (match_operand:V4SI 1 "vector_operand")
16174 (match_operand:SI 2 "const_int_operand")]
16175 "TARGET_SSE2"
16176 {
16177 int mask = INTVAL (operands[2]);
16178 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
16179 GEN_INT ((mask >> 0) & 3),
16180 GEN_INT ((mask >> 2) & 3),
16181 GEN_INT ((mask >> 4) & 3),
16182 GEN_INT ((mask >> 6) & 3)));
16183 DONE;
16184 })
16185
16186 (define_insn "sse2_pshufd_1<mask_name>"
16187 [(set (match_operand:V4SI 0 "register_operand" "=v")
16188 (vec_select:V4SI
16189 (match_operand:V4SI 1 "vector_operand" "vBm")
16190 (parallel [(match_operand 2 "const_0_to_3_operand")
16191 (match_operand 3 "const_0_to_3_operand")
16192 (match_operand 4 "const_0_to_3_operand")
16193 (match_operand 5 "const_0_to_3_operand")])))]
16194 "TARGET_SSE2 && <mask_avx512vl_condition>"
16195 {
16196 int mask = 0;
16197 mask |= INTVAL (operands[2]) << 0;
16198 mask |= INTVAL (operands[3]) << 2;
16199 mask |= INTVAL (operands[4]) << 4;
16200 mask |= INTVAL (operands[5]) << 6;
16201 operands[2] = GEN_INT (mask);
16202
16203 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16204 }
16205 [(set_attr "type" "sselog1")
16206 (set_attr "prefix_data16" "1")
16207 (set_attr "prefix" "<mask_prefix2>")
16208 (set_attr "length_immediate" "1")
16209 (set_attr "mode" "TI")])
16210
16211 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
16212 [(set (match_operand:V32HI 0 "register_operand" "=v")
16213 (unspec:V32HI
16214 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
16215 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16216 UNSPEC_PSHUFLW))]
16217 "TARGET_AVX512BW"
16218 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16219 [(set_attr "type" "sselog")
16220 (set_attr "prefix" "evex")
16221 (set_attr "mode" "XI")])
16222
16223 (define_expand "avx512vl_pshuflwv3_mask"
16224 [(match_operand:V16HI 0 "register_operand")
16225 (match_operand:V16HI 1 "nonimmediate_operand")
16226 (match_operand:SI 2 "const_0_to_255_operand")
16227 (match_operand:V16HI 3 "register_operand")
16228 (match_operand:HI 4 "register_operand")]
16229 "TARGET_AVX512VL && TARGET_AVX512BW"
16230 {
16231 int mask = INTVAL (operands[2]);
16232 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
16233 GEN_INT ((mask >> 0) & 3),
16234 GEN_INT ((mask >> 2) & 3),
16235 GEN_INT ((mask >> 4) & 3),
16236 GEN_INT ((mask >> 6) & 3),
16237 GEN_INT (((mask >> 0) & 3) + 8),
16238 GEN_INT (((mask >> 2) & 3) + 8),
16239 GEN_INT (((mask >> 4) & 3) + 8),
16240 GEN_INT (((mask >> 6) & 3) + 8),
16241 operands[3], operands[4]));
16242 DONE;
16243 })
16244
16245 (define_expand "avx2_pshuflwv3"
16246 [(match_operand:V16HI 0 "register_operand")
16247 (match_operand:V16HI 1 "nonimmediate_operand")
16248 (match_operand:SI 2 "const_0_to_255_operand")]
16249 "TARGET_AVX2"
16250 {
16251 int mask = INTVAL (operands[2]);
16252 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
16253 GEN_INT ((mask >> 0) & 3),
16254 GEN_INT ((mask >> 2) & 3),
16255 GEN_INT ((mask >> 4) & 3),
16256 GEN_INT ((mask >> 6) & 3),
16257 GEN_INT (((mask >> 0) & 3) + 8),
16258 GEN_INT (((mask >> 2) & 3) + 8),
16259 GEN_INT (((mask >> 4) & 3) + 8),
16260 GEN_INT (((mask >> 6) & 3) + 8)));
16261 DONE;
16262 })
16263
16264 (define_insn "avx2_pshuflw_1<mask_name>"
16265 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
16266 (vec_select:V16HI
16267 (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
16268 (parallel [(match_operand 2 "const_0_to_3_operand")
16269 (match_operand 3 "const_0_to_3_operand")
16270 (match_operand 4 "const_0_to_3_operand")
16271 (match_operand 5 "const_0_to_3_operand")
16272 (const_int 4)
16273 (const_int 5)
16274 (const_int 6)
16275 (const_int 7)
16276 (match_operand 6 "const_8_to_11_operand")
16277 (match_operand 7 "const_8_to_11_operand")
16278 (match_operand 8 "const_8_to_11_operand")
16279 (match_operand 9 "const_8_to_11_operand")
16280 (const_int 12)
16281 (const_int 13)
16282 (const_int 14)
16283 (const_int 15)])))]
16284 "TARGET_AVX2
16285 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16286 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
16287 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
16288 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
16289 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
16290 {
16291 int mask = 0;
16292 mask |= INTVAL (operands[2]) << 0;
16293 mask |= INTVAL (operands[3]) << 2;
16294 mask |= INTVAL (operands[4]) << 4;
16295 mask |= INTVAL (operands[5]) << 6;
16296 operands[2] = GEN_INT (mask);
16297
16298 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
16299 }
16300 [(set_attr "type" "sselog")
16301 (set_attr "prefix" "maybe_evex")
16302 (set_attr "length_immediate" "1")
16303 (set_attr "mode" "OI")])
16304
16305 (define_expand "avx512vl_pshuflw_mask"
16306 [(match_operand:V8HI 0 "register_operand")
16307 (match_operand:V8HI 1 "nonimmediate_operand")
16308 (match_operand:SI 2 "const_0_to_255_operand")
16309 (match_operand:V8HI 3 "register_operand")
16310 (match_operand:QI 4 "register_operand")]
16311 "TARGET_AVX512VL && TARGET_AVX512BW"
16312 {
16313 int mask = INTVAL (operands[2]);
16314 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
16315 GEN_INT ((mask >> 0) & 3),
16316 GEN_INT ((mask >> 2) & 3),
16317 GEN_INT ((mask >> 4) & 3),
16318 GEN_INT ((mask >> 6) & 3),
16319 operands[3], operands[4]));
16320 DONE;
16321 })
16322
16323 (define_expand "sse2_pshuflw"
16324 [(match_operand:V8HI 0 "register_operand")
16325 (match_operand:V8HI 1 "vector_operand")
16326 (match_operand:SI 2 "const_int_operand")]
16327 "TARGET_SSE2"
16328 {
16329 int mask = INTVAL (operands[2]);
16330 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
16331 GEN_INT ((mask >> 0) & 3),
16332 GEN_INT ((mask >> 2) & 3),
16333 GEN_INT ((mask >> 4) & 3),
16334 GEN_INT ((mask >> 6) & 3)));
16335 DONE;
16336 })
16337
16338 (define_insn "sse2_pshuflw_1<mask_name>"
16339 [(set (match_operand:V8HI 0 "register_operand" "=Yw")
16340 (vec_select:V8HI
16341 (match_operand:V8HI 1 "vector_operand" "YwBm")
16342 (parallel [(match_operand 2 "const_0_to_3_operand")
16343 (match_operand 3 "const_0_to_3_operand")
16344 (match_operand 4 "const_0_to_3_operand")
16345 (match_operand 5 "const_0_to_3_operand")
16346 (const_int 4)
16347 (const_int 5)
16348 (const_int 6)
16349 (const_int 7)])))]
16350 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16351 {
16352 int mask = 0;
16353 mask |= INTVAL (operands[2]) << 0;
16354 mask |= INTVAL (operands[3]) << 2;
16355 mask |= INTVAL (operands[4]) << 4;
16356 mask |= INTVAL (operands[5]) << 6;
16357 operands[2] = GEN_INT (mask);
16358
16359 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16360 }
16361 [(set_attr "type" "sselog")
16362 (set_attr "prefix_data16" "0")
16363 (set_attr "prefix_rep" "1")
16364 (set_attr "prefix" "maybe_vex")
16365 (set_attr "length_immediate" "1")
16366 (set_attr "mode" "TI")])
16367
16368 (define_expand "avx2_pshufhwv3"
16369 [(match_operand:V16HI 0 "register_operand")
16370 (match_operand:V16HI 1 "nonimmediate_operand")
16371 (match_operand:SI 2 "const_0_to_255_operand")]
16372 "TARGET_AVX2"
16373 {
16374 int mask = INTVAL (operands[2]);
16375 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
16376 GEN_INT (((mask >> 0) & 3) + 4),
16377 GEN_INT (((mask >> 2) & 3) + 4),
16378 GEN_INT (((mask >> 4) & 3) + 4),
16379 GEN_INT (((mask >> 6) & 3) + 4),
16380 GEN_INT (((mask >> 0) & 3) + 12),
16381 GEN_INT (((mask >> 2) & 3) + 12),
16382 GEN_INT (((mask >> 4) & 3) + 12),
16383 GEN_INT (((mask >> 6) & 3) + 12)));
16384 DONE;
16385 })
16386
16387 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
16388 [(set (match_operand:V32HI 0 "register_operand" "=v")
16389 (unspec:V32HI
16390 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
16391 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16392 UNSPEC_PSHUFHW))]
16393 "TARGET_AVX512BW"
16394 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16395 [(set_attr "type" "sselog")
16396 (set_attr "prefix" "evex")
16397 (set_attr "mode" "XI")])
16398
16399 (define_expand "avx512vl_pshufhwv3_mask"
16400 [(match_operand:V16HI 0 "register_operand")
16401 (match_operand:V16HI 1 "nonimmediate_operand")
16402 (match_operand:SI 2 "const_0_to_255_operand")
16403 (match_operand:V16HI 3 "register_operand")
16404 (match_operand:HI 4 "register_operand")]
16405 "TARGET_AVX512VL && TARGET_AVX512BW"
16406 {
16407 int mask = INTVAL (operands[2]);
16408 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
16409 GEN_INT (((mask >> 0) & 3) + 4),
16410 GEN_INT (((mask >> 2) & 3) + 4),
16411 GEN_INT (((mask >> 4) & 3) + 4),
16412 GEN_INT (((mask >> 6) & 3) + 4),
16413 GEN_INT (((mask >> 0) & 3) + 12),
16414 GEN_INT (((mask >> 2) & 3) + 12),
16415 GEN_INT (((mask >> 4) & 3) + 12),
16416 GEN_INT (((mask >> 6) & 3) + 12),
16417 operands[3], operands[4]));
16418 DONE;
16419 })
16420
16421 (define_insn "avx2_pshufhw_1<mask_name>"
16422 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
16423 (vec_select:V16HI
16424 (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
16425 (parallel [(const_int 0)
16426 (const_int 1)
16427 (const_int 2)
16428 (const_int 3)
16429 (match_operand 2 "const_4_to_7_operand")
16430 (match_operand 3 "const_4_to_7_operand")
16431 (match_operand 4 "const_4_to_7_operand")
16432 (match_operand 5 "const_4_to_7_operand")
16433 (const_int 8)
16434 (const_int 9)
16435 (const_int 10)
16436 (const_int 11)
16437 (match_operand 6 "const_12_to_15_operand")
16438 (match_operand 7 "const_12_to_15_operand")
16439 (match_operand 8 "const_12_to_15_operand")
16440 (match_operand 9 "const_12_to_15_operand")])))]
16441 "TARGET_AVX2
16442 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16443 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
16444 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
16445 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
16446 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
16447 {
16448 int mask = 0;
16449 mask |= (INTVAL (operands[2]) - 4) << 0;
16450 mask |= (INTVAL (operands[3]) - 4) << 2;
16451 mask |= (INTVAL (operands[4]) - 4) << 4;
16452 mask |= (INTVAL (operands[5]) - 4) << 6;
16453 operands[2] = GEN_INT (mask);
16454
16455 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
16456 }
16457 [(set_attr "type" "sselog")
16458 (set_attr "prefix" "maybe_evex")
16459 (set_attr "length_immediate" "1")
16460 (set_attr "mode" "OI")])
16461
16462 (define_expand "avx512vl_pshufhw_mask"
16463 [(match_operand:V8HI 0 "register_operand")
16464 (match_operand:V8HI 1 "nonimmediate_operand")
16465 (match_operand:SI 2 "const_0_to_255_operand")
16466 (match_operand:V8HI 3 "register_operand")
16467 (match_operand:QI 4 "register_operand")]
16468 "TARGET_AVX512VL && TARGET_AVX512BW"
16469 {
16470 int mask = INTVAL (operands[2]);
16471 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
16472 GEN_INT (((mask >> 0) & 3) + 4),
16473 GEN_INT (((mask >> 2) & 3) + 4),
16474 GEN_INT (((mask >> 4) & 3) + 4),
16475 GEN_INT (((mask >> 6) & 3) + 4),
16476 operands[3], operands[4]));
16477 DONE;
16478 })
16479
16480 (define_expand "sse2_pshufhw"
16481 [(match_operand:V8HI 0 "register_operand")
16482 (match_operand:V8HI 1 "vector_operand")
16483 (match_operand:SI 2 "const_int_operand")]
16484 "TARGET_SSE2"
16485 {
16486 int mask = INTVAL (operands[2]);
16487 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
16488 GEN_INT (((mask >> 0) & 3) + 4),
16489 GEN_INT (((mask >> 2) & 3) + 4),
16490 GEN_INT (((mask >> 4) & 3) + 4),
16491 GEN_INT (((mask >> 6) & 3) + 4)));
16492 DONE;
16493 })
16494
16495 (define_insn "sse2_pshufhw_1<mask_name>"
16496 [(set (match_operand:V8HI 0 "register_operand" "=Yw")
16497 (vec_select:V8HI
16498 (match_operand:V8HI 1 "vector_operand" "YwBm")
16499 (parallel [(const_int 0)
16500 (const_int 1)
16501 (const_int 2)
16502 (const_int 3)
16503 (match_operand 2 "const_4_to_7_operand")
16504 (match_operand 3 "const_4_to_7_operand")
16505 (match_operand 4 "const_4_to_7_operand")
16506 (match_operand 5 "const_4_to_7_operand")])))]
16507 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16508 {
16509 int mask = 0;
16510 mask |= (INTVAL (operands[2]) - 4) << 0;
16511 mask |= (INTVAL (operands[3]) - 4) << 2;
16512 mask |= (INTVAL (operands[4]) - 4) << 4;
16513 mask |= (INTVAL (operands[5]) - 4) << 6;
16514 operands[2] = GEN_INT (mask);
16515
16516 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16517 }
16518 [(set_attr "type" "sselog")
16519 (set_attr "prefix_rep" "1")
16520 (set_attr "prefix_data16" "0")
16521 (set_attr "prefix" "maybe_vex")
16522 (set_attr "length_immediate" "1")
16523 (set_attr "mode" "TI")])
16524
16525 (define_expand "sse2_loadd"
16526 [(set (match_operand:V4SI 0 "register_operand")
16527 (vec_merge:V4SI
16528 (vec_duplicate:V4SI
16529 (match_operand:SI 1 "nonimmediate_operand"))
16530 (match_dup 2)
16531 (const_int 1)))]
16532 "TARGET_SSE"
16533 "operands[2] = CONST0_RTX (V4SImode);")
16534
16535 (define_insn "sse2_loadld"
16536 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
16537 (vec_merge:V4SI
16538 (vec_duplicate:V4SI
16539 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
16540 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
16541 (const_int 1)))]
16542 "TARGET_SSE"
16543 "@
16544 %vmovd\t{%2, %0|%0, %2}
16545 %vmovd\t{%2, %0|%0, %2}
16546 movss\t{%2, %0|%0, %2}
16547 movss\t{%2, %0|%0, %2}
16548 vmovss\t{%2, %1, %0|%0, %1, %2}"
16549 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
16550 (set_attr "type" "ssemov")
16551 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
16552 (set_attr "mode" "TI,TI,V4SF,SF,SF")
16553 (set (attr "preferred_for_speed")
16554 (cond [(eq_attr "alternative" "1")
16555 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16556 ]
16557 (symbol_ref "true")))])
16558
16559 ;; QI and HI modes handled by pextr patterns.
16560 (define_mode_iterator PEXTR_MODE12
16561 [(V16QI "TARGET_SSE4_1") V8HI])
16562
16563 (define_insn "*vec_extract<mode>"
16564 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
16565 (vec_select:<ssescalarmode>
16566 (match_operand:PEXTR_MODE12 1 "register_operand" "YW,YW")
16567 (parallel
16568 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
16569 "TARGET_SSE2"
16570 "@
16571 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
16572 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16573 [(set_attr "isa" "*,sse4")
16574 (set_attr "type" "sselog1")
16575 (set_attr "prefix_data16" "1")
16576 (set (attr "prefix_extra")
16577 (if_then_else
16578 (and (eq_attr "alternative" "0,2")
16579 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
16580 (const_string "*")
16581 (const_string "1")))
16582 (set_attr "length_immediate" "1")
16583 (set_attr "prefix" "maybe_vex,maybe_vex")
16584 (set_attr "mode" "TI")])
16585
16586 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
16587 [(set (match_operand:SWI48 0 "register_operand" "=r")
16588 (zero_extend:SWI48
16589 (vec_select:<PEXTR_MODE12:ssescalarmode>
16590 (match_operand:PEXTR_MODE12 1 "register_operand" "YW")
16591 (parallel
16592 [(match_operand:SI 2
16593 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
16594 "TARGET_SSE2"
16595 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
16596 [(set_attr "type" "sselog1")
16597 (set_attr "prefix_data16" "1")
16598 (set (attr "prefix_extra")
16599 (if_then_else
16600 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
16601 (const_string "*")
16602 (const_string "1")))
16603 (set_attr "length_immediate" "1")
16604 (set_attr "prefix" "maybe_vex")
16605 (set_attr "mode" "TI")])
16606
16607 (define_insn "*vec_extractv16qi_zext"
16608 [(set (match_operand:HI 0 "register_operand" "=r")
16609 (zero_extend:HI
16610 (vec_select:QI
16611 (match_operand:V16QI 1 "register_operand" "YW")
16612 (parallel
16613 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
16614 "TARGET_SSE4_1"
16615 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
16616 [(set_attr "type" "sselog1")
16617 (set_attr "prefix_data16" "1")
16618 (set_attr "prefix_extra" "1")
16619 (set_attr "length_immediate" "1")
16620 (set_attr "prefix" "maybe_vex")
16621 (set_attr "mode" "TI")])
16622
16623 (define_insn "*vec_extract<mode>_mem"
16624 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
16625 (vec_select:<ssescalarmode>
16626 (match_operand:VI12_128 1 "memory_operand" "o")
16627 (parallel
16628 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
16629 "TARGET_SSE"
16630 "#")
16631
16632 (define_insn "*vec_extract<ssevecmodelower>_0"
16633 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
16634 (vec_select:SWI48
16635 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
16636 (parallel [(const_int 0)])))]
16637 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
16638 "#"
16639 [(set_attr "isa" "*,sse2,*,*")
16640 (set (attr "preferred_for_speed")
16641 (cond [(eq_attr "alternative" "1")
16642 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
16643 ]
16644 (symbol_ref "true")))])
16645
16646 (define_insn "*vec_extractv2di_0_sse"
16647 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
16648 (vec_select:DI
16649 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
16650 (parallel [(const_int 0)])))]
16651 "TARGET_SSE && !TARGET_64BIT
16652 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
16653 "#"
16654 [(set_attr "isa" "sse4,*,*")
16655 (set (attr "preferred_for_speed")
16656 (cond [(eq_attr "alternative" "0")
16657 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
16658 ]
16659 (symbol_ref "true")))])
16660
16661 (define_split
16662 [(set (match_operand:DI 0 "general_reg_operand")
16663 (vec_select:DI
16664 (match_operand:V2DI 1 "register_operand")
16665 (parallel [(const_int 0)])))]
16666 "TARGET_SSE4_1 && !TARGET_64BIT
16667 && reload_completed"
16668 [(set (match_dup 2) (match_dup 4))
16669 (set (match_dup 3)
16670 (vec_select:SI
16671 (match_dup 5)
16672 (parallel [(const_int 1)])))]
16673 {
16674 operands[4] = gen_lowpart (SImode, operands[1]);
16675 operands[5] = gen_lowpart (V4SImode, operands[1]);
16676 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
16677 })
16678
16679 (define_split
16680 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
16681 (vec_select:SWI48x
16682 (match_operand:<ssevecmode> 1 "register_operand")
16683 (parallel [(const_int 0)])))]
16684 "TARGET_SSE && reload_completed"
16685 [(set (match_dup 0) (match_dup 1))]
16686 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
16687
16688 (define_insn "*vec_extractv4si_0_zext_sse4"
16689 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
16690 (zero_extend:DI
16691 (vec_select:SI
16692 (match_operand:V4SI 1 "register_operand" "v,x,v")
16693 (parallel [(const_int 0)]))))]
16694 "TARGET_SSE4_1"
16695 "#"
16696 [(set_attr "isa" "x64,*,avx512f")
16697 (set (attr "preferred_for_speed")
16698 (cond [(eq_attr "alternative" "0")
16699 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
16700 ]
16701 (symbol_ref "true")))])
16702
16703 (define_insn "*vec_extractv4si_0_zext"
16704 [(set (match_operand:DI 0 "register_operand" "=r")
16705 (zero_extend:DI
16706 (vec_select:SI
16707 (match_operand:V4SI 1 "register_operand" "x")
16708 (parallel [(const_int 0)]))))]
16709 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
16710 "#")
16711
16712 (define_split
16713 [(set (match_operand:DI 0 "register_operand")
16714 (zero_extend:DI
16715 (vec_select:SI
16716 (match_operand:V4SI 1 "register_operand")
16717 (parallel [(const_int 0)]))))]
16718 "TARGET_SSE2 && reload_completed"
16719 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
16720 "operands[1] = gen_lowpart (SImode, operands[1]);")
16721
16722 (define_insn "*vec_extractv4si"
16723 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,Yw")
16724 (vec_select:SI
16725 (match_operand:V4SI 1 "register_operand" " x, v, 0, 0,Yw")
16726 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
16727 "TARGET_SSE4_1"
16728 {
16729 switch (which_alternative)
16730 {
16731 case 0:
16732 case 1:
16733 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
16734
16735 case 2:
16736 case 3:
16737 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
16738 return "psrldq\t{%2, %0|%0, %2}";
16739
16740 case 4:
16741 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
16742 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
16743
16744 default:
16745 gcc_unreachable ();
16746 }
16747 }
16748 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx")
16749 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1")
16750 (set (attr "prefix_extra")
16751 (if_then_else (eq_attr "alternative" "0,1")
16752 (const_string "1")
16753 (const_string "*")))
16754 (set_attr "length_immediate" "1")
16755 (set_attr "prefix" "maybe_vex,evex,orig,orig,maybe_vex")
16756 (set_attr "mode" "TI")])
16757
16758 (define_insn "*vec_extractv4si_zext"
16759 [(set (match_operand:DI 0 "register_operand" "=r,r")
16760 (zero_extend:DI
16761 (vec_select:SI
16762 (match_operand:V4SI 1 "register_operand" "x,v")
16763 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
16764 "TARGET_64BIT && TARGET_SSE4_1"
16765 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
16766 [(set_attr "isa" "*,avx512dq")
16767 (set_attr "type" "sselog1")
16768 (set_attr "prefix_extra" "1")
16769 (set_attr "length_immediate" "1")
16770 (set_attr "prefix" "maybe_vex")
16771 (set_attr "mode" "TI")])
16772
16773 (define_insn "*vec_extractv4si_mem"
16774 [(set (match_operand:SI 0 "register_operand" "=x,r")
16775 (vec_select:SI
16776 (match_operand:V4SI 1 "memory_operand" "o,o")
16777 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
16778 "TARGET_SSE"
16779 "#")
16780
16781 (define_insn_and_split "*vec_extractv4si_zext_mem"
16782 [(set (match_operand:DI 0 "register_operand" "=x,r")
16783 (zero_extend:DI
16784 (vec_select:SI
16785 (match_operand:V4SI 1 "memory_operand" "o,o")
16786 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
16787 "TARGET_64BIT && TARGET_SSE"
16788 "#"
16789 "&& reload_completed"
16790 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
16791 {
16792 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
16793 })
16794
16795 (define_insn "*vec_extractv2di_1"
16796 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
16797 (vec_select:DI
16798 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
16799 (parallel [(const_int 1)])))]
16800 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
16801 "@
16802 %vpextrq\t{$1, %1, %0|%0, %1, 1}
16803 vpextrq\t{$1, %1, %0|%0, %1, 1}
16804 %vmovhps\t{%1, %0|%0, %1}
16805 psrldq\t{$8, %0|%0, 8}
16806 vpsrldq\t{$8, %1, %0|%0, %1, 8}
16807 vpsrldq\t{$8, %1, %0|%0, %1, 8}
16808 movhlps\t{%1, %0|%0, %1}
16809 #
16810 #"
16811 [(set (attr "isa")
16812 (cond [(eq_attr "alternative" "0")
16813 (const_string "x64_sse4")
16814 (eq_attr "alternative" "1")
16815 (const_string "x64_avx512dq")
16816 (eq_attr "alternative" "3")
16817 (const_string "sse2_noavx")
16818 (eq_attr "alternative" "4")
16819 (const_string "avx")
16820 (eq_attr "alternative" "5")
16821 (const_string "avx512bw")
16822 (eq_attr "alternative" "6")
16823 (const_string "noavx")
16824 (eq_attr "alternative" "8")
16825 (const_string "x64")
16826 ]
16827 (const_string "*")))
16828 (set (attr "type")
16829 (cond [(eq_attr "alternative" "2,6,7")
16830 (const_string "ssemov")
16831 (eq_attr "alternative" "3,4,5")
16832 (const_string "sseishft1")
16833 (eq_attr "alternative" "8")
16834 (const_string "imov")
16835 ]
16836 (const_string "sselog1")))
16837 (set (attr "length_immediate")
16838 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
16839 (const_string "1")
16840 (const_string "*")))
16841 (set (attr "prefix_rex")
16842 (if_then_else (eq_attr "alternative" "0,1")
16843 (const_string "1")
16844 (const_string "*")))
16845 (set (attr "prefix_extra")
16846 (if_then_else (eq_attr "alternative" "0,1")
16847 (const_string "1")
16848 (const_string "*")))
16849 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
16850 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
16851
16852 (define_split
16853 [(set (match_operand:<ssescalarmode> 0 "register_operand")
16854 (vec_select:<ssescalarmode>
16855 (match_operand:VI_128 1 "memory_operand")
16856 (parallel
16857 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
16858 "TARGET_SSE && reload_completed"
16859 [(set (match_dup 0) (match_dup 1))]
16860 {
16861 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
16862
16863 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
16864 })
16865
16866 (define_insn "*vec_extractv2ti"
16867 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
16868 (vec_select:TI
16869 (match_operand:V2TI 1 "register_operand" "x,v")
16870 (parallel
16871 [(match_operand:SI 2 "const_0_to_1_operand")])))]
16872 "TARGET_AVX"
16873 "@
16874 vextract%~128\t{%2, %1, %0|%0, %1, %2}
16875 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
16876 [(set_attr "type" "sselog")
16877 (set_attr "prefix_extra" "1")
16878 (set_attr "length_immediate" "1")
16879 (set_attr "prefix" "vex,evex")
16880 (set_attr "mode" "OI")])
16881
16882 (define_insn "*vec_extractv4ti"
16883 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
16884 (vec_select:TI
16885 (match_operand:V4TI 1 "register_operand" "v")
16886 (parallel
16887 [(match_operand:SI 2 "const_0_to_3_operand")])))]
16888 "TARGET_AVX512F"
16889 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
16890 [(set_attr "type" "sselog")
16891 (set_attr "prefix_extra" "1")
16892 (set_attr "length_immediate" "1")
16893 (set_attr "prefix" "evex")
16894 (set_attr "mode" "XI")])
16895
16896 (define_mode_iterator VEXTRACTI128_MODE
16897 [(V4TI "TARGET_AVX512F") V2TI])
16898
16899 (define_split
16900 [(set (match_operand:TI 0 "nonimmediate_operand")
16901 (vec_select:TI
16902 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
16903 (parallel [(const_int 0)])))]
16904 "TARGET_AVX
16905 && reload_completed
16906 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
16907 [(set (match_dup 0) (match_dup 1))]
16908 "operands[1] = gen_lowpart (TImode, operands[1]);")
16909
16910 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
16911 ;; vector modes into vec_extract*.
16912 (define_split
16913 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
16914 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
16915 "can_create_pseudo_p ()
16916 && REG_P (operands[1])
16917 && VECTOR_MODE_P (GET_MODE (operands[1]))
16918 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
16919 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
16920 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
16921 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
16922 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
16923 (parallel [(const_int 0)])))]
16924 {
16925 rtx tmp;
16926
16927 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
16928 {
16929 case 64:
16930 if (<MODE>mode == SImode)
16931 {
16932 tmp = gen_reg_rtx (V8SImode);
16933 emit_insn (gen_vec_extract_lo_v16si (tmp,
16934 gen_lowpart (V16SImode,
16935 operands[1])));
16936 }
16937 else
16938 {
16939 tmp = gen_reg_rtx (V4DImode);
16940 emit_insn (gen_vec_extract_lo_v8di (tmp,
16941 gen_lowpart (V8DImode,
16942 operands[1])));
16943 }
16944 operands[1] = tmp;
16945 /* FALLTHRU */
16946 case 32:
16947 tmp = gen_reg_rtx (<ssevecmode>mode);
16948 if (<MODE>mode == SImode)
16949 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
16950 operands[1])));
16951 else
16952 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
16953 operands[1])));
16954 operands[1] = tmp;
16955 break;
16956 case 16:
16957 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
16958 break;
16959 }
16960 })
16961
16962 (define_insn "*vec_concatv2si_sse4_1"
16963 [(set (match_operand:V2SI 0 "register_operand"
16964 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
16965 (vec_concat:V2SI
16966 (match_operand:SI 1 "nonimmediate_operand"
16967 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
16968 (match_operand:SI 2 "nonimm_or_0_operand"
16969 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
16970 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16971 "@
16972 pinsrd\t{$1, %2, %0|%0, %2, 1}
16973 pinsrd\t{$1, %2, %0|%0, %2, 1}
16974 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
16975 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
16976 punpckldq\t{%2, %0|%0, %2}
16977 punpckldq\t{%2, %0|%0, %2}
16978 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
16979 %vmovd\t{%1, %0|%0, %1}
16980 punpckldq\t{%2, %0|%0, %2}
16981 movd\t{%1, %0|%0, %1}"
16982 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
16983 (set (attr "mmx_isa")
16984 (if_then_else (eq_attr "alternative" "8,9")
16985 (const_string "native")
16986 (const_string "*")))
16987 (set (attr "type")
16988 (cond [(eq_attr "alternative" "7")
16989 (const_string "ssemov")
16990 (eq_attr "alternative" "8")
16991 (const_string "mmxcvt")
16992 (eq_attr "alternative" "9")
16993 (const_string "mmxmov")
16994 ]
16995 (const_string "sselog")))
16996 (set (attr "prefix_extra")
16997 (if_then_else (eq_attr "alternative" "0,1,2,3")
16998 (const_string "1")
16999 (const_string "*")))
17000 (set (attr "length_immediate")
17001 (if_then_else (eq_attr "alternative" "0,1,2,3")
17002 (const_string "1")
17003 (const_string "*")))
17004 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
17005 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
17006
17007 ;; ??? In theory we can match memory for the MMX alternative, but allowing
17008 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
17009 ;; alternatives pretty much forces the MMX alternative to be chosen.
17010 (define_insn "*vec_concatv2si"
17011 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
17012 (vec_concat:V2SI
17013 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
17014 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
17015 "TARGET_SSE && !TARGET_SSE4_1"
17016 "@
17017 punpckldq\t{%2, %0|%0, %2}
17018 movd\t{%1, %0|%0, %1}
17019 unpcklps\t{%2, %0|%0, %2}
17020 movss\t{%1, %0|%0, %1}
17021 punpckldq\t{%2, %0|%0, %2}
17022 movd\t{%1, %0|%0, %1}"
17023 [(set_attr "isa" "sse2,sse2,*,*,*,*")
17024 (set_attr "mmx_isa" "*,*,*,*,native,native")
17025 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
17026 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
17027
17028 (define_insn "*vec_concatv4si"
17029 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
17030 (vec_concat:V4SI
17031 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
17032 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
17033 "TARGET_SSE"
17034 "@
17035 punpcklqdq\t{%2, %0|%0, %2}
17036 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
17037 movlhps\t{%2, %0|%0, %2}
17038 movhps\t{%2, %0|%0, %q2}
17039 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
17040 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
17041 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
17042 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
17043 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
17044
17045 (define_insn "*vec_concat<mode>_0"
17046 [(set (match_operand:VI124_128 0 "register_operand" "=v,x")
17047 (vec_concat:VI124_128
17048 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm,?!*y")
17049 (match_operand:<ssehalfvecmode> 2 "const0_operand" " C,C")))]
17050 "TARGET_SSE2"
17051 "@
17052 %vmovq\t{%1, %0|%0, %1}
17053 movq2dq\t{%1, %0|%0, %1}"
17054 [(set_attr "mmx_isa" "*,native")
17055 (set_attr "type" "ssemov")
17056 (set_attr "prefix" "maybe_vex,orig")
17057 (set_attr "mode" "TI")])
17058
17059 (define_insn "vec_concatv2di"
17060 [(set (match_operand:V2DI 0 "register_operand"
17061 "=Yr,*x,x ,v ,x,v ,x,x,v")
17062 (vec_concat:V2DI
17063 (match_operand:DI 1 "register_operand"
17064 " 0, 0,x ,Yv,0,Yv,0,0,v")
17065 (match_operand:DI 2 "nonimmediate_operand"
17066 " rm,rm,rm,rm,x,Yv,x,m,m")))]
17067 "TARGET_SSE"
17068 "@
17069 pinsrq\t{$1, %2, %0|%0, %2, 1}
17070 pinsrq\t{$1, %2, %0|%0, %2, 1}
17071 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
17072 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
17073 punpcklqdq\t{%2, %0|%0, %2}
17074 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
17075 movlhps\t{%2, %0|%0, %2}
17076 movhps\t{%2, %0|%0, %2}
17077 vmovhps\t{%2, %1, %0|%0, %1, %2}"
17078 [(set (attr "isa")
17079 (cond [(eq_attr "alternative" "0,1")
17080 (const_string "x64_sse4_noavx")
17081 (eq_attr "alternative" "2")
17082 (const_string "x64_avx")
17083 (eq_attr "alternative" "3")
17084 (const_string "x64_avx512dq")
17085 (eq_attr "alternative" "4")
17086 (const_string "sse2_noavx")
17087 (eq_attr "alternative" "5,8")
17088 (const_string "avx")
17089 ]
17090 (const_string "noavx")))
17091 (set (attr "type")
17092 (if_then_else
17093 (eq_attr "alternative" "0,1,2,3,4,5")
17094 (const_string "sselog")
17095 (const_string "ssemov")))
17096 (set (attr "prefix_rex")
17097 (if_then_else (eq_attr "alternative" "0,1,2,3")
17098 (const_string "1")
17099 (const_string "*")))
17100 (set (attr "prefix_extra")
17101 (if_then_else (eq_attr "alternative" "0,1,2,3")
17102 (const_string "1")
17103 (const_string "*")))
17104 (set (attr "length_immediate")
17105 (if_then_else (eq_attr "alternative" "0,1,2,3")
17106 (const_string "1")
17107 (const_string "*")))
17108 (set (attr "prefix")
17109 (cond [(eq_attr "alternative" "2")
17110 (const_string "vex")
17111 (eq_attr "alternative" "3")
17112 (const_string "evex")
17113 (eq_attr "alternative" "5,8")
17114 (const_string "maybe_evex")
17115 ]
17116 (const_string "orig")))
17117 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
17118
17119 (define_insn "*vec_concatv2di_0"
17120 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
17121 (vec_concat:V2DI
17122 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
17123 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
17124 "TARGET_SSE2"
17125 "@
17126 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
17127 %vmovq\t{%1, %0|%0, %1}
17128 movq2dq\t{%1, %0|%0, %1}"
17129 [(set_attr "isa" "x64,*,*")
17130 (set_attr "mmx_isa" "*,*,native")
17131 (set_attr "type" "ssemov")
17132 (set_attr "prefix_rex" "1,*,*")
17133 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
17134 (set_attr "mode" "TI")
17135 (set (attr "preferred_for_speed")
17136 (cond [(eq_attr "alternative" "0")
17137 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
17138 ]
17139 (symbol_ref "true")))])
17140
17141 ;; vmovq clears also the higher bits.
17142 (define_insn "vec_set<mode>_0"
17143 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
17144 (vec_merge:VI8_AVX_AVX512F
17145 (vec_duplicate:VI8_AVX_AVX512F
17146 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
17147 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
17148 (const_int 1)))]
17149 "TARGET_AVX"
17150 "vmovq\t{%2, %x0|%x0, %2}"
17151 [(set_attr "isa" "x64,*")
17152 (set_attr "type" "ssemov")
17153 (set_attr "prefix_rex" "1,*")
17154 (set_attr "prefix" "maybe_evex")
17155 (set_attr "mode" "TI")
17156 (set (attr "preferred_for_speed")
17157 (cond [(eq_attr "alternative" "0")
17158 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
17159 ]
17160 (symbol_ref "true")))])
17161
17162 (define_expand "vec_unpacks_lo_<mode>"
17163 [(match_operand:<sseunpackmode> 0 "register_operand")
17164 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
17165 "TARGET_SSE2"
17166 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
17167
17168 (define_expand "vec_unpacks_hi_<mode>"
17169 [(match_operand:<sseunpackmode> 0 "register_operand")
17170 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
17171 "TARGET_SSE2"
17172 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
17173
17174 (define_expand "vec_unpacku_lo_<mode>"
17175 [(match_operand:<sseunpackmode> 0 "register_operand")
17176 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
17177 "TARGET_SSE2"
17178 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
17179
17180 (define_expand "vec_unpacks_sbool_lo_qi"
17181 [(match_operand:QI 0 "register_operand")
17182 (match_operand:QI 1 "register_operand")
17183 (match_operand:QI 2 "const_int_operand")]
17184 "TARGET_AVX512F"
17185 {
17186 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
17187 FAIL;
17188 emit_move_insn (operands[0], operands[1]);
17189 DONE;
17190 })
17191
17192 (define_expand "vec_unpacks_lo_hi"
17193 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
17194 (match_operand:HI 1 "register_operand"))]
17195 "TARGET_AVX512F")
17196
17197 (define_expand "vec_unpacks_lo_si"
17198 [(set (match_operand:HI 0 "register_operand")
17199 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
17200 "TARGET_AVX512F")
17201
17202 (define_expand "vec_unpacks_lo_di"
17203 [(set (match_operand:SI 0 "register_operand")
17204 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
17205 "TARGET_AVX512BW")
17206
17207 (define_expand "vec_unpacku_hi_<mode>"
17208 [(match_operand:<sseunpackmode> 0 "register_operand")
17209 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
17210 "TARGET_SSE2"
17211 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
17212
17213 (define_expand "vec_unpacks_sbool_hi_qi"
17214 [(match_operand:QI 0 "register_operand")
17215 (match_operand:QI 1 "register_operand")
17216 (match_operand:QI 2 "const_int_operand")]
17217 "TARGET_AVX512F"
17218 {
17219 HOST_WIDE_INT nunits = INTVAL (operands[2]);
17220 if (nunits != 8 && nunits != 4)
17221 FAIL;
17222 if (TARGET_AVX512DQ)
17223 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
17224 GEN_INT (nunits / 2)));
17225 else
17226 {
17227 rtx tem = gen_reg_rtx (HImode);
17228 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
17229 QImode),
17230 GEN_INT (nunits / 2)));
17231 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
17232 }
17233 DONE;
17234 })
17235
17236 (define_expand "vec_unpacks_hi_hi"
17237 [(parallel
17238 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
17239 (lshiftrt:HI (match_operand:HI 1 "register_operand")
17240 (const_int 8)))
17241 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
17242 "TARGET_AVX512F")
17243
17244 (define_expand "vec_unpacks_hi_<mode>"
17245 [(parallel
17246 [(set (subreg:SWI48x
17247 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
17248 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
17249 (match_dup 2)))
17250 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
17251 "TARGET_AVX512BW"
17252 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
17253
17254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17255 ;;
17256 ;; Miscellaneous
17257 ;;
17258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17259
17260 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
17261 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
17262 (truncate:VI12_AVX2_AVX512BW
17263 (lshiftrt:<ssedoublemode>
17264 (plus:<ssedoublemode>
17265 (plus:<ssedoublemode>
17266 (zero_extend:<ssedoublemode>
17267 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
17268 (zero_extend:<ssedoublemode>
17269 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
17270 (match_dup <mask_expand_op3>))
17271 (const_int 1))))]
17272 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17273 {
17274 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
17275 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
17276 })
17277
17278 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
17279 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
17280 (truncate:VI12_AVX2_AVX512BW
17281 (lshiftrt:<ssedoublemode>
17282 (plus:<ssedoublemode>
17283 (plus:<ssedoublemode>
17284 (zero_extend:<ssedoublemode>
17285 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>"))
17286 (zero_extend:<ssedoublemode>
17287 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))
17288 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
17289 (const_int 1))))]
17290 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
17291 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17292 "@
17293 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
17294 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17295 [(set_attr "isa" "noavx,avx")
17296 (set_attr "type" "sseiadd")
17297 (set_attr "prefix_data16" "1,*")
17298 (set_attr "prefix" "orig,<mask_prefix>")
17299 (set_attr "mode" "<sseinsnmode>")])
17300
17301 ;; The correct representation for this is absolutely enormous, and
17302 ;; surely not generally useful.
17303 (define_insn "<sse2_avx2>_psadbw"
17304 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW")
17305 (unspec:VI8_AVX2_AVX512BW
17306 [(match_operand:<ssebytemode> 1 "register_operand" "0,YW")
17307 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")]
17308 UNSPEC_PSADBW))]
17309 "TARGET_SSE2"
17310 "@
17311 psadbw\t{%2, %0|%0, %2}
17312 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
17313 [(set_attr "isa" "noavx,avx")
17314 (set_attr "type" "sseiadd")
17315 (set_attr "atom_unit" "simul")
17316 (set_attr "prefix_data16" "1,*")
17317 (set_attr "prefix" "orig,maybe_evex")
17318 (set_attr "mode" "<sseinsnmode>")])
17319
17320 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
17321 [(set (match_operand:SI 0 "register_operand" "=r")
17322 (unspec:SI
17323 [(match_operand:VF_128_256 1 "register_operand" "x")]
17324 UNSPEC_MOVMSK))]
17325 "TARGET_SSE"
17326 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
17327 [(set_attr "type" "ssemov")
17328 (set_attr "prefix" "maybe_vex")
17329 (set_attr "mode" "<MODE>")])
17330
17331 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
17332 [(set (match_operand:DI 0 "register_operand" "=r")
17333 (any_extend:DI
17334 (unspec:SI
17335 [(match_operand:VF_128_256 1 "register_operand" "x")]
17336 UNSPEC_MOVMSK)))]
17337 "TARGET_64BIT && TARGET_SSE"
17338 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
17339 [(set_attr "type" "ssemov")
17340 (set_attr "prefix" "maybe_vex")
17341 (set_attr "mode" "<MODE>")])
17342
17343 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
17344 [(set (match_operand:SI 0 "register_operand" "=r")
17345 (unspec:SI
17346 [(lt:VF_128_256
17347 (match_operand:<sseintvecmode> 1 "register_operand" "x")
17348 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
17349 UNSPEC_MOVMSK))]
17350 "TARGET_SSE"
17351 "#"
17352 "&& reload_completed"
17353 [(set (match_dup 0)
17354 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
17355 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
17356 [(set_attr "type" "ssemov")
17357 (set_attr "prefix" "maybe_vex")
17358 (set_attr "mode" "<MODE>")])
17359
17360 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
17361 [(set (match_operand:DI 0 "register_operand" "=r")
17362 (any_extend:DI
17363 (unspec:SI
17364 [(lt:VF_128_256
17365 (match_operand:<sseintvecmode> 1 "register_operand" "x")
17366 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
17367 UNSPEC_MOVMSK)))]
17368 "TARGET_64BIT && TARGET_SSE"
17369 "#"
17370 "&& reload_completed"
17371 [(set (match_dup 0)
17372 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
17373 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
17374 [(set_attr "type" "ssemov")
17375 (set_attr "prefix" "maybe_vex")
17376 (set_attr "mode" "<MODE>")])
17377
17378 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
17379 [(set (match_operand:SI 0 "register_operand" "=r")
17380 (unspec:SI
17381 [(subreg:VF_128_256
17382 (ashiftrt:<sseintvecmode>
17383 (match_operand:<sseintvecmode> 1 "register_operand" "x")
17384 (match_operand:QI 2 "const_int_operand" "n")) 0)]
17385 UNSPEC_MOVMSK))]
17386 "TARGET_SSE"
17387 "#"
17388 "&& reload_completed"
17389 [(set (match_dup 0)
17390 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
17391 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
17392 [(set_attr "type" "ssemov")
17393 (set_attr "prefix" "maybe_vex")
17394 (set_attr "mode" "<MODE>")])
17395
17396 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
17397 [(set (match_operand:DI 0 "register_operand" "=r")
17398 (any_extend:DI
17399 (unspec:SI
17400 [(subreg:VF_128_256
17401 (ashiftrt:<sseintvecmode>
17402 (match_operand:<sseintvecmode> 1 "register_operand" "x")
17403 (match_operand:QI 2 "const_int_operand" "n")) 0)]
17404 UNSPEC_MOVMSK)))]
17405 "TARGET_64BIT && TARGET_SSE"
17406 "#"
17407 "&& reload_completed"
17408 [(set (match_dup 0)
17409 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
17410 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
17411 [(set_attr "type" "ssemov")
17412 (set_attr "prefix" "maybe_vex")
17413 (set_attr "mode" "<MODE>")])
17414
17415 (define_insn "<sse2_avx2>_pmovmskb"
17416 [(set (match_operand:SI 0 "register_operand" "=r")
17417 (unspec:SI
17418 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
17419 UNSPEC_MOVMSK))]
17420 "TARGET_SSE2"
17421 "%vpmovmskb\t{%1, %0|%0, %1}"
17422 [(set_attr "type" "ssemov")
17423 (set (attr "prefix_data16")
17424 (if_then_else
17425 (match_test "TARGET_AVX")
17426 (const_string "*")
17427 (const_string "1")))
17428 (set_attr "prefix" "maybe_vex")
17429 (set_attr "mode" "SI")])
17430
17431 (define_insn "*<sse2_avx2>_pmovmskb_zext"
17432 [(set (match_operand:DI 0 "register_operand" "=r")
17433 (zero_extend:DI
17434 (unspec:SI
17435 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
17436 UNSPEC_MOVMSK)))]
17437 "TARGET_64BIT && TARGET_SSE2"
17438 "%vpmovmskb\t{%1, %k0|%k0, %1}"
17439 [(set_attr "type" "ssemov")
17440 (set (attr "prefix_data16")
17441 (if_then_else
17442 (match_test "TARGET_AVX")
17443 (const_string "*")
17444 (const_string "1")))
17445 (set_attr "prefix" "maybe_vex")
17446 (set_attr "mode" "SI")])
17447
17448 (define_insn "*sse2_pmovmskb_ext"
17449 [(set (match_operand:DI 0 "register_operand" "=r")
17450 (sign_extend:DI
17451 (unspec:SI
17452 [(match_operand:V16QI 1 "register_operand" "x")]
17453 UNSPEC_MOVMSK)))]
17454 "TARGET_64BIT && TARGET_SSE2"
17455 "%vpmovmskb\t{%1, %k0|%k0, %1}"
17456 [(set_attr "type" "ssemov")
17457 (set (attr "prefix_data16")
17458 (if_then_else
17459 (match_test "TARGET_AVX")
17460 (const_string "*")
17461 (const_string "1")))
17462 (set_attr "prefix" "maybe_vex")
17463 (set_attr "mode" "SI")])
17464
17465 (define_insn_and_split "*sse2_pmovskb_zexthisi"
17466 [(set (match_operand:SI 0 "register_operand")
17467 (zero_extend:SI
17468 (subreg:HI
17469 (unspec:SI
17470 [(match_operand:V16QI 1 "register_operand")]
17471 UNSPEC_MOVMSK) 0)))]
17472 "TARGET_SSE2 && ix86_pre_reload_split ()"
17473 "#"
17474 "&& 1"
17475 [(set (match_dup 0)
17476 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
17477
17478 (define_split
17479 [(set (match_operand:SI 0 "register_operand")
17480 (zero_extend:SI
17481 (not:HI
17482 (subreg:HI
17483 (unspec:SI
17484 [(match_operand:V16QI 1 "register_operand")]
17485 UNSPEC_MOVMSK) 0))))]
17486 "TARGET_SSE2"
17487 [(set (match_dup 2)
17488 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
17489 (set (match_dup 0)
17490 (xor:SI (match_dup 2) (const_int 65535)))]
17491 "operands[2] = gen_reg_rtx (SImode);")
17492
17493 (define_split
17494 [(set (match_operand:SI 0 "register_operand")
17495 (unspec:SI
17496 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
17497 UNSPEC_MOVMSK))]
17498 "TARGET_SSE2"
17499 [(set (match_dup 2)
17500 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
17501 (set (match_dup 0) (match_dup 3))]
17502 {
17503 operands[2] = gen_reg_rtx (SImode);
17504 if (GET_MODE_NUNITS (<MODE>mode) == 32)
17505 operands[3] = gen_rtx_NOT (SImode, operands[2]);
17506 else
17507 {
17508 operands[3]
17509 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
17510 SImode);
17511 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
17512 }
17513 })
17514
17515 (define_split
17516 [(set (match_operand:SI 0 "register_operand")
17517 (unspec:SI
17518 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
17519 UNSPEC_MOVMSK))]
17520 "TARGET_SSE2
17521 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
17522 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
17523 [(set (match_dup 2)
17524 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
17525 (set (match_dup 0) (match_dup 3))]
17526 {
17527 operands[2] = gen_reg_rtx (SImode);
17528 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
17529 if (GET_MODE_NUNITS (<MODE>mode) == 32)
17530 operands[3] = gen_rtx_NOT (SImode, operands[2]);
17531 else
17532 {
17533 operands[3]
17534 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
17535 SImode);
17536 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
17537 }
17538 })
17539
17540 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
17541 [(set (match_operand:SI 0 "register_operand" "=r")
17542 (unspec:SI
17543 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
17544 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
17545 UNSPEC_MOVMSK))]
17546 "TARGET_SSE2"
17547 "#"
17548 "&& 1"
17549 [(set (match_dup 0)
17550 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
17551 ""
17552 [(set_attr "type" "ssemov")
17553 (set (attr "prefix_data16")
17554 (if_then_else
17555 (match_test "TARGET_AVX")
17556 (const_string "*")
17557 (const_string "1")))
17558 (set_attr "prefix" "maybe_vex")
17559 (set_attr "mode" "SI")])
17560
17561 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
17562 [(set (match_operand:DI 0 "register_operand" "=r")
17563 (zero_extend:DI
17564 (unspec:SI
17565 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
17566 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
17567 UNSPEC_MOVMSK)))]
17568 "TARGET_64BIT && TARGET_SSE2"
17569 "#"
17570 "&& 1"
17571 [(set (match_dup 0)
17572 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
17573 ""
17574 [(set_attr "type" "ssemov")
17575 (set (attr "prefix_data16")
17576 (if_then_else
17577 (match_test "TARGET_AVX")
17578 (const_string "*")
17579 (const_string "1")))
17580 (set_attr "prefix" "maybe_vex")
17581 (set_attr "mode" "SI")])
17582
17583 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
17584 [(set (match_operand:DI 0 "register_operand" "=r")
17585 (sign_extend:DI
17586 (unspec:SI
17587 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
17588 (match_operand:V16QI 2 "const0_operand" "C"))]
17589 UNSPEC_MOVMSK)))]
17590 "TARGET_64BIT && TARGET_SSE2"
17591 "#"
17592 "&& 1"
17593 [(set (match_dup 0)
17594 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
17595 ""
17596 [(set_attr "type" "ssemov")
17597 (set (attr "prefix_data16")
17598 (if_then_else
17599 (match_test "TARGET_AVX")
17600 (const_string "*")
17601 (const_string "1")))
17602 (set_attr "prefix" "maybe_vex")
17603 (set_attr "mode" "SI")])
17604
17605 (define_expand "sse2_maskmovdqu"
17606 [(set (match_operand:V16QI 0 "memory_operand")
17607 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
17608 (match_operand:V16QI 2 "register_operand")
17609 (match_dup 0)]
17610 UNSPEC_MASKMOV))]
17611 "TARGET_SSE2")
17612
17613 (define_insn "*sse2_maskmovdqu"
17614 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
17615 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
17616 (match_operand:V16QI 2 "register_operand" "x")
17617 (mem:V16QI (match_dup 0))]
17618 UNSPEC_MASKMOV))]
17619 "TARGET_SSE2"
17620 {
17621 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
17622 that requires %v to be at the beginning of the opcode name. */
17623 if (Pmode != word_mode)
17624 fputs ("\taddr32", asm_out_file);
17625 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
17626 }
17627 [(set_attr "type" "ssemov")
17628 (set_attr "prefix_data16" "1")
17629 (set (attr "length_address")
17630 (symbol_ref ("Pmode != word_mode")))
17631 ;; The implicit %rdi operand confuses default length_vex computation.
17632 (set (attr "length_vex")
17633 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
17634 (set_attr "prefix" "maybe_vex")
17635 (set_attr "znver1_decode" "vector")
17636 (set_attr "mode" "TI")])
17637
17638 (define_insn "sse_ldmxcsr"
17639 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
17640 UNSPECV_LDMXCSR)]
17641 "TARGET_SSE"
17642 "%vldmxcsr\t%0"
17643 [(set_attr "type" "sse")
17644 (set_attr "atom_sse_attr" "mxcsr")
17645 (set_attr "prefix" "maybe_vex")
17646 (set_attr "memory" "load")])
17647
17648 (define_insn "sse_stmxcsr"
17649 [(set (match_operand:SI 0 "memory_operand" "=m")
17650 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
17651 "TARGET_SSE"
17652 "%vstmxcsr\t%0"
17653 [(set_attr "type" "sse")
17654 (set_attr "atom_sse_attr" "mxcsr")
17655 (set_attr "prefix" "maybe_vex")
17656 (set_attr "memory" "store")])
17657
17658 (define_insn "sse2_clflush"
17659 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
17660 UNSPECV_CLFLUSH)]
17661 "TARGET_SSE2"
17662 "clflush\t%a0"
17663 [(set_attr "type" "sse")
17664 (set_attr "atom_sse_attr" "fence")
17665 (set_attr "memory" "unknown")])
17666
17667 ;; As per AMD and Intel ISA manuals, the first operand is extensions
17668 ;; and it goes to %ecx. The second operand received is hints and it goes
17669 ;; to %eax.
17670 (define_insn "sse3_mwait"
17671 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
17672 (match_operand:SI 1 "register_operand" "a")]
17673 UNSPECV_MWAIT)]
17674 "TARGET_MWAIT"
17675 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
17676 ;; Since 32bit register operands are implicitly zero extended to 64bit,
17677 ;; we only need to set up 32bit registers.
17678 "mwait"
17679 [(set_attr "length" "3")])
17680
17681 (define_insn "@sse3_monitor_<mode>"
17682 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
17683 (match_operand:SI 1 "register_operand" "c")
17684 (match_operand:SI 2 "register_operand" "d")]
17685 UNSPECV_MONITOR)]
17686 "TARGET_MWAIT"
17687 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
17688 ;; RCX and RDX are used. Since 32bit register operands are implicitly
17689 ;; zero extended to 64bit, we only need to set up 32bit registers.
17690 "%^monitor"
17691 [(set (attr "length")
17692 (symbol_ref ("(Pmode != word_mode) + 3")))])
17693
17694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17695 ;;
17696 ;; SSSE3 instructions
17697 ;;
17698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17699
17700 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
17701
17702 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
17703 [(set (match_operand:V16HI 0 "register_operand" "=x")
17704 (ssse3_plusminus:V16HI
17705 (vec_select:V16HI
17706 (vec_concat:V32HI
17707 (match_operand:V16HI 1 "register_operand" "x")
17708 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
17709 (parallel
17710 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
17711 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
17712 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
17713 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
17714 (vec_select:V16HI
17715 (vec_concat:V32HI (match_dup 1) (match_dup 2))
17716 (parallel
17717 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
17718 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
17719 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
17720 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
17721 "TARGET_AVX2"
17722 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
17723 [(set_attr "type" "sseiadd")
17724 (set_attr "prefix_extra" "1")
17725 (set_attr "prefix" "vex")
17726 (set_attr "mode" "OI")])
17727
17728 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
17729 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
17730 (ssse3_plusminus:V8HI
17731 (vec_select:V8HI
17732 (vec_concat:V16HI
17733 (match_operand:V8HI 1 "register_operand" "0,x")
17734 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
17735 (parallel
17736 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
17737 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
17738 (vec_select:V8HI
17739 (vec_concat:V16HI (match_dup 1) (match_dup 2))
17740 (parallel
17741 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
17742 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
17743 "TARGET_SSSE3"
17744 "@
17745 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
17746 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
17747 [(set_attr "isa" "noavx,avx")
17748 (set_attr "type" "sseiadd")
17749 (set_attr "atom_unit" "complex")
17750 (set_attr "prefix_data16" "1,*")
17751 (set_attr "prefix_extra" "1")
17752 (set_attr "prefix" "orig,vex")
17753 (set_attr "mode" "TI")])
17754
17755 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
17756 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
17757 (ssse3_plusminus:V4HI
17758 (vec_select:V4HI
17759 (vec_concat:V8HI
17760 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
17761 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
17762 (parallel
17763 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
17764 (vec_select:V4HI
17765 (vec_concat:V8HI (match_dup 1) (match_dup 2))
17766 (parallel
17767 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
17768 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17769 "@
17770 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
17771 #
17772 #"
17773 "TARGET_SSSE3 && reload_completed
17774 && SSE_REGNO_P (REGNO (operands[0]))"
17775 [(const_int 0)]
17776 {
17777 /* Generate SSE version of the operation. */
17778 rtx op0 = lowpart_subreg (V8HImode, operands[0],
17779 GET_MODE (operands[0]));
17780 rtx op1 = lowpart_subreg (V8HImode, operands[1],
17781 GET_MODE (operands[1]));
17782 rtx op2 = lowpart_subreg (V8HImode, operands[2],
17783 GET_MODE (operands[2]));
17784 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
17785 ix86_move_vector_high_sse_to_mmx (op0);
17786 DONE;
17787 }
17788 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17789 (set_attr "type" "sseiadd")
17790 (set_attr "atom_unit" "complex")
17791 (set_attr "prefix_extra" "1")
17792 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17793 (set_attr "mode" "DI,TI,TI")])
17794
17795 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
17796 [(set (match_operand:V8SI 0 "register_operand" "=x")
17797 (plusminus:V8SI
17798 (vec_select:V8SI
17799 (vec_concat:V16SI
17800 (match_operand:V8SI 1 "register_operand" "x")
17801 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
17802 (parallel
17803 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
17804 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
17805 (vec_select:V8SI
17806 (vec_concat:V16SI (match_dup 1) (match_dup 2))
17807 (parallel
17808 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
17809 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
17810 "TARGET_AVX2"
17811 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
17812 [(set_attr "type" "sseiadd")
17813 (set_attr "prefix_extra" "1")
17814 (set_attr "prefix" "vex")
17815 (set_attr "mode" "OI")])
17816
17817 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
17818 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
17819 (plusminus:V4SI
17820 (vec_select:V4SI
17821 (vec_concat:V8SI
17822 (match_operand:V4SI 1 "register_operand" "0,x")
17823 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
17824 (parallel
17825 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
17826 (vec_select:V4SI
17827 (vec_concat:V8SI (match_dup 1) (match_dup 2))
17828 (parallel
17829 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
17830 "TARGET_SSSE3"
17831 "@
17832 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
17833 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
17834 [(set_attr "isa" "noavx,avx")
17835 (set_attr "type" "sseiadd")
17836 (set_attr "atom_unit" "complex")
17837 (set_attr "prefix_data16" "1,*")
17838 (set_attr "prefix_extra" "1")
17839 (set_attr "prefix" "orig,vex")
17840 (set_attr "mode" "TI")])
17841
17842 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
17843 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
17844 (plusminus:V2SI
17845 (vec_select:V2SI
17846 (vec_concat:V4SI
17847 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
17848 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
17849 (parallel [(const_int 0) (const_int 2)]))
17850 (vec_select:V2SI
17851 (vec_concat:V4SI (match_dup 1) (match_dup 2))
17852 (parallel [(const_int 1) (const_int 3)]))))]
17853 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17854 "@
17855 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
17856 #
17857 #"
17858 "TARGET_SSSE3 && reload_completed
17859 && SSE_REGNO_P (REGNO (operands[0]))"
17860 [(const_int 0)]
17861 {
17862 /* Generate SSE version of the operation. */
17863 rtx op0 = lowpart_subreg (V4SImode, operands[0],
17864 GET_MODE (operands[0]));
17865 rtx op1 = lowpart_subreg (V4SImode, operands[1],
17866 GET_MODE (operands[1]));
17867 rtx op2 = lowpart_subreg (V4SImode, operands[2],
17868 GET_MODE (operands[2]));
17869 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
17870 ix86_move_vector_high_sse_to_mmx (op0);
17871 DONE;
17872 }
17873 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17874 (set_attr "type" "sseiadd")
17875 (set_attr "atom_unit" "complex")
17876 (set_attr "prefix_extra" "1")
17877 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17878 (set_attr "mode" "DI,TI,TI")])
17879
17880 (define_insn "avx2_pmaddubsw256"
17881 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
17882 (ss_plus:V16HI
17883 (mult:V16HI
17884 (zero_extend:V16HI
17885 (vec_select:V16QI
17886 (match_operand:V32QI 1 "register_operand" "Yw")
17887 (parallel [(const_int 0) (const_int 2)
17888 (const_int 4) (const_int 6)
17889 (const_int 8) (const_int 10)
17890 (const_int 12) (const_int 14)
17891 (const_int 16) (const_int 18)
17892 (const_int 20) (const_int 22)
17893 (const_int 24) (const_int 26)
17894 (const_int 28) (const_int 30)])))
17895 (sign_extend:V16HI
17896 (vec_select:V16QI
17897 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")
17898 (parallel [(const_int 0) (const_int 2)
17899 (const_int 4) (const_int 6)
17900 (const_int 8) (const_int 10)
17901 (const_int 12) (const_int 14)
17902 (const_int 16) (const_int 18)
17903 (const_int 20) (const_int 22)
17904 (const_int 24) (const_int 26)
17905 (const_int 28) (const_int 30)]))))
17906 (mult:V16HI
17907 (zero_extend:V16HI
17908 (vec_select:V16QI (match_dup 1)
17909 (parallel [(const_int 1) (const_int 3)
17910 (const_int 5) (const_int 7)
17911 (const_int 9) (const_int 11)
17912 (const_int 13) (const_int 15)
17913 (const_int 17) (const_int 19)
17914 (const_int 21) (const_int 23)
17915 (const_int 25) (const_int 27)
17916 (const_int 29) (const_int 31)])))
17917 (sign_extend:V16HI
17918 (vec_select:V16QI (match_dup 2)
17919 (parallel [(const_int 1) (const_int 3)
17920 (const_int 5) (const_int 7)
17921 (const_int 9) (const_int 11)
17922 (const_int 13) (const_int 15)
17923 (const_int 17) (const_int 19)
17924 (const_int 21) (const_int 23)
17925 (const_int 25) (const_int 27)
17926 (const_int 29) (const_int 31)]))))))]
17927 "TARGET_AVX2"
17928 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
17929 [(set_attr "type" "sseiadd")
17930 (set_attr "prefix_extra" "1")
17931 (set_attr "prefix" "vex")
17932 (set_attr "mode" "OI")])
17933
17934 ;; The correct representation for this is absolutely enormous, and
17935 ;; surely not generally useful.
17936 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
17937 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17938 (unspec:VI2_AVX512VL
17939 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
17940 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
17941 UNSPEC_PMADDUBSW512))]
17942 "TARGET_AVX512BW"
17943 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
17944 [(set_attr "type" "sseiadd")
17945 (set_attr "prefix" "evex")
17946 (set_attr "mode" "XI")])
17947
17948 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
17949 [(set (match_operand:V32HI 0 "register_operand" "=v")
17950 (truncate:V32HI
17951 (lshiftrt:V32SI
17952 (plus:V32SI
17953 (lshiftrt:V32SI
17954 (mult:V32SI
17955 (sign_extend:V32SI
17956 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
17957 (sign_extend:V32SI
17958 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
17959 (const_int 14))
17960 (const_vector:V32HI [(const_int 1) (const_int 1)
17961 (const_int 1) (const_int 1)
17962 (const_int 1) (const_int 1)
17963 (const_int 1) (const_int 1)
17964 (const_int 1) (const_int 1)
17965 (const_int 1) (const_int 1)
17966 (const_int 1) (const_int 1)
17967 (const_int 1) (const_int 1)
17968 (const_int 1) (const_int 1)
17969 (const_int 1) (const_int 1)
17970 (const_int 1) (const_int 1)
17971 (const_int 1) (const_int 1)
17972 (const_int 1) (const_int 1)
17973 (const_int 1) (const_int 1)
17974 (const_int 1) (const_int 1)
17975 (const_int 1) (const_int 1)]))
17976 (const_int 1))))]
17977 "TARGET_AVX512BW"
17978 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17979 [(set_attr "type" "sseimul")
17980 (set_attr "prefix" "evex")
17981 (set_attr "mode" "XI")])
17982
17983 (define_insn "ssse3_pmaddubsw128"
17984 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
17985 (ss_plus:V8HI
17986 (mult:V8HI
17987 (zero_extend:V8HI
17988 (vec_select:V8QI
17989 (match_operand:V16QI 1 "register_operand" "0,Yw")
17990 (parallel [(const_int 0) (const_int 2)
17991 (const_int 4) (const_int 6)
17992 (const_int 8) (const_int 10)
17993 (const_int 12) (const_int 14)])))
17994 (sign_extend:V8HI
17995 (vec_select:V8QI
17996 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")
17997 (parallel [(const_int 0) (const_int 2)
17998 (const_int 4) (const_int 6)
17999 (const_int 8) (const_int 10)
18000 (const_int 12) (const_int 14)]))))
18001 (mult:V8HI
18002 (zero_extend:V8HI
18003 (vec_select:V8QI (match_dup 1)
18004 (parallel [(const_int 1) (const_int 3)
18005 (const_int 5) (const_int 7)
18006 (const_int 9) (const_int 11)
18007 (const_int 13) (const_int 15)])))
18008 (sign_extend:V8HI
18009 (vec_select:V8QI (match_dup 2)
18010 (parallel [(const_int 1) (const_int 3)
18011 (const_int 5) (const_int 7)
18012 (const_int 9) (const_int 11)
18013 (const_int 13) (const_int 15)]))))))]
18014 "TARGET_SSSE3"
18015 "@
18016 pmaddubsw\t{%2, %0|%0, %2}
18017 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
18018 [(set_attr "isa" "noavx,avx")
18019 (set_attr "type" "sseiadd")
18020 (set_attr "atom_unit" "simul")
18021 (set_attr "prefix_data16" "1,*")
18022 (set_attr "prefix_extra" "1")
18023 (set_attr "prefix" "orig,vex")
18024 (set_attr "mode" "TI")])
18025
18026 (define_insn "ssse3_pmaddubsw"
18027 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
18028 (ss_plus:V4HI
18029 (mult:V4HI
18030 (zero_extend:V4HI
18031 (vec_select:V4QI
18032 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
18033 (parallel [(const_int 0) (const_int 2)
18034 (const_int 4) (const_int 6)])))
18035 (sign_extend:V4HI
18036 (vec_select:V4QI
18037 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
18038 (parallel [(const_int 0) (const_int 2)
18039 (const_int 4) (const_int 6)]))))
18040 (mult:V4HI
18041 (zero_extend:V4HI
18042 (vec_select:V4QI (match_dup 1)
18043 (parallel [(const_int 1) (const_int 3)
18044 (const_int 5) (const_int 7)])))
18045 (sign_extend:V4HI
18046 (vec_select:V4QI (match_dup 2)
18047 (parallel [(const_int 1) (const_int 3)
18048 (const_int 5) (const_int 7)]))))))]
18049 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18050 "@
18051 pmaddubsw\t{%2, %0|%0, %2}
18052 pmaddubsw\t{%2, %0|%0, %2}
18053 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
18054 [(set_attr "isa" "*,noavx,avx")
18055 (set_attr "mmx_isa" "native,*,*")
18056 (set_attr "type" "sseiadd")
18057 (set_attr "atom_unit" "simul")
18058 (set_attr "prefix_extra" "1")
18059 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18060 (set_attr "mode" "DI,TI,TI")])
18061
18062 (define_mode_iterator PMULHRSW
18063 [V8HI (V16HI "TARGET_AVX2")])
18064
18065 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
18066 [(set (match_operand:PMULHRSW 0 "register_operand")
18067 (vec_merge:PMULHRSW
18068 (truncate:PMULHRSW
18069 (lshiftrt:<ssedoublemode>
18070 (plus:<ssedoublemode>
18071 (lshiftrt:<ssedoublemode>
18072 (mult:<ssedoublemode>
18073 (sign_extend:<ssedoublemode>
18074 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
18075 (sign_extend:<ssedoublemode>
18076 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
18077 (const_int 14))
18078 (match_dup 5))
18079 (const_int 1)))
18080 (match_operand:PMULHRSW 3 "register_operand")
18081 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
18082 "TARGET_AVX512BW && TARGET_AVX512VL"
18083 {
18084 operands[5] = CONST1_RTX(<MODE>mode);
18085 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
18086 })
18087
18088 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
18089 [(set (match_operand:PMULHRSW 0 "register_operand")
18090 (truncate:PMULHRSW
18091 (lshiftrt:<ssedoublemode>
18092 (plus:<ssedoublemode>
18093 (lshiftrt:<ssedoublemode>
18094 (mult:<ssedoublemode>
18095 (sign_extend:<ssedoublemode>
18096 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
18097 (sign_extend:<ssedoublemode>
18098 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
18099 (const_int 14))
18100 (match_dup 3))
18101 (const_int 1))))]
18102 "TARGET_SSSE3"
18103 {
18104 operands[3] = CONST1_RTX(<MODE>mode);
18105 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
18106 })
18107
18108 (define_expand "smulhrs<mode>3"
18109 [(set (match_operand:VI2_AVX2 0 "register_operand")
18110 (truncate:VI2_AVX2
18111 (lshiftrt:<ssedoublemode>
18112 (plus:<ssedoublemode>
18113 (lshiftrt:<ssedoublemode>
18114 (mult:<ssedoublemode>
18115 (sign_extend:<ssedoublemode>
18116 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
18117 (sign_extend:<ssedoublemode>
18118 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
18119 (const_int 14))
18120 (match_dup 3))
18121 (const_int 1))))]
18122 "TARGET_SSSE3"
18123 {
18124 operands[3] = CONST1_RTX(<MODE>mode);
18125 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
18126 })
18127
18128 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
18129 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
18130 (truncate:VI2_AVX2
18131 (lshiftrt:<ssedoublemode>
18132 (plus:<ssedoublemode>
18133 (lshiftrt:<ssedoublemode>
18134 (mult:<ssedoublemode>
18135 (sign_extend:<ssedoublemode>
18136 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
18137 (sign_extend:<ssedoublemode>
18138 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
18139 (const_int 14))
18140 (match_operand:VI2_AVX2 3 "const1_operand"))
18141 (const_int 1))))]
18142 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
18143 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18144 "@
18145 pmulhrsw\t{%2, %0|%0, %2}
18146 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
18147 [(set_attr "isa" "noavx,avx")
18148 (set_attr "type" "sseimul")
18149 (set_attr "prefix_data16" "1,*")
18150 (set_attr "prefix_extra" "1")
18151 (set_attr "prefix" "orig,maybe_evex")
18152 (set_attr "mode" "<sseinsnmode>")])
18153
18154 (define_expand "smulhrsv4hi3"
18155 [(set (match_operand:V4HI 0 "register_operand")
18156 (truncate:V4HI
18157 (lshiftrt:V4SI
18158 (plus:V4SI
18159 (lshiftrt:V4SI
18160 (mult:V4SI
18161 (sign_extend:V4SI
18162 (match_operand:V4HI 1 "register_operand"))
18163 (sign_extend:V4SI
18164 (match_operand:V4HI 2 "register_operand")))
18165 (const_int 14))
18166 (match_dup 3))
18167 (const_int 1))))]
18168 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
18169 "operands[3] = CONST1_RTX(V4HImode);")
18170
18171 (define_expand "ssse3_pmulhrswv4hi3"
18172 [(set (match_operand:V4HI 0 "register_operand")
18173 (truncate:V4HI
18174 (lshiftrt:V4SI
18175 (plus:V4SI
18176 (lshiftrt:V4SI
18177 (mult:V4SI
18178 (sign_extend:V4SI
18179 (match_operand:V4HI 1 "register_mmxmem_operand"))
18180 (sign_extend:V4SI
18181 (match_operand:V4HI 2 "register_mmxmem_operand")))
18182 (const_int 14))
18183 (match_dup 3))
18184 (const_int 1))))]
18185 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18186 {
18187 operands[3] = CONST1_RTX(V4HImode);
18188 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
18189 })
18190
18191 (define_insn "*ssse3_pmulhrswv4hi3"
18192 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
18193 (truncate:V4HI
18194 (lshiftrt:V4SI
18195 (plus:V4SI
18196 (lshiftrt:V4SI
18197 (mult:V4SI
18198 (sign_extend:V4SI
18199 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
18200 (sign_extend:V4SI
18201 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
18202 (const_int 14))
18203 (match_operand:V4HI 3 "const1_operand"))
18204 (const_int 1))))]
18205 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
18206 && TARGET_SSSE3
18207 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18208 "@
18209 pmulhrsw\t{%2, %0|%0, %2}
18210 pmulhrsw\t{%2, %0|%0, %2}
18211 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
18212 [(set_attr "isa" "*,noavx,avx")
18213 (set_attr "mmx_isa" "native,*,*")
18214 (set_attr "type" "sseimul")
18215 (set_attr "prefix_extra" "1")
18216 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18217 (set_attr "mode" "DI,TI,TI")])
18218
18219 (define_expand "smulhrsv2hi3"
18220 [(set (match_operand:V2HI 0 "register_operand")
18221 (truncate:V2HI
18222 (lshiftrt:V2SI
18223 (plus:V2SI
18224 (lshiftrt:V2SI
18225 (mult:V2SI
18226 (sign_extend:V2SI
18227 (match_operand:V2HI 1 "register_operand"))
18228 (sign_extend:V2SI
18229 (match_operand:V2HI 2 "register_operand")))
18230 (const_int 14))
18231 (match_dup 3))
18232 (const_int 1))))]
18233 "TARGET_SSSE3"
18234 "operands[3] = CONST1_RTX(V2HImode);")
18235
18236 (define_insn "*smulhrsv2hi3"
18237 [(set (match_operand:V2HI 0 "register_operand" "=x,Yv")
18238 (truncate:V2HI
18239 (lshiftrt:V2SI
18240 (plus:V2SI
18241 (lshiftrt:V2SI
18242 (mult:V2SI
18243 (sign_extend:V2SI
18244 (match_operand:V2HI 1 "register_operand" "%0,Yv"))
18245 (sign_extend:V2SI
18246 (match_operand:V2HI 2 "register_operand" "x,Yv")))
18247 (const_int 14))
18248 (match_operand:V2HI 3 "const1_operand"))
18249 (const_int 1))))]
18250 "TARGET_SSSE3
18251 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18252 "@
18253 pmulhrsw\t{%2, %0|%0, %2}
18254 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
18255 [(set_attr "isa" "noavx,avx")
18256 (set_attr "type" "sseimul")
18257 (set_attr "prefix_extra" "1")
18258 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18259 (set_attr "mode" "TI")])
18260
18261 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
18262 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
18263 (unspec:VI1_AVX512
18264 [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>")
18265 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")]
18266 UNSPEC_PSHUFB))]
18267 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
18268 "@
18269 pshufb\t{%2, %0|%0, %2}
18270 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18271 [(set_attr "isa" "noavx,avx")
18272 (set_attr "type" "sselog1")
18273 (set_attr "prefix_data16" "1,*")
18274 (set_attr "prefix_extra" "1")
18275 (set_attr "prefix" "orig,maybe_evex")
18276 (set_attr "btver2_decode" "vector")
18277 (set_attr "mode" "<sseinsnmode>")])
18278
18279 (define_expand "ssse3_pshufbv8qi3"
18280 [(parallel
18281 [(set (match_operand:V8QI 0 "register_operand")
18282 (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
18283 (match_operand:V8QI 2 "register_mmxmem_operand")
18284 (match_dup 3)] UNSPEC_PSHUFB))
18285 (clobber (match_scratch:V4SI 4))])]
18286 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18287 {
18288 operands[3] = ix86_build_const_vector (V4SImode, true,
18289 gen_int_mode (0xf7f7f7f7, SImode));
18290 })
18291
18292 (define_insn_and_split "*ssse3_pshufbv8qi3"
18293 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
18294 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
18295 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
18296 (match_operand:V4SI 4 "reg_or_const_vector_operand"
18297 "i,3,3")]
18298 UNSPEC_PSHUFB))
18299 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
18300 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18301 "@
18302 pshufb\t{%2, %0|%0, %2}
18303 #
18304 #"
18305 "TARGET_SSSE3 && reload_completed
18306 && SSE_REGNO_P (REGNO (operands[0]))"
18307 [(set (match_dup 3)
18308 (and:V4SI (match_dup 3) (match_dup 2)))
18309 (set (match_dup 0)
18310 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
18311 {
18312 /* Emulate MMX version of pshufb with SSE version by masking out the
18313 bit 3 of the shuffle control byte. */
18314 operands[0] = lowpart_subreg (V16QImode, operands[0],
18315 GET_MODE (operands[0]));
18316 operands[1] = lowpart_subreg (V16QImode, operands[1],
18317 GET_MODE (operands[1]));
18318 operands[2] = lowpart_subreg (V4SImode, operands[2],
18319 GET_MODE (operands[2]));
18320 operands[4] = lowpart_subreg (V16QImode, operands[3],
18321 GET_MODE (operands[3]));
18322 }
18323 [(set_attr "mmx_isa" "native,sse_noavx,avx")
18324 (set_attr "prefix_extra" "1")
18325 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18326 (set_attr "mode" "DI,TI,TI")])
18327
18328 (define_insn "<ssse3_avx2>_psign<mode>3"
18329 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
18330 (unspec:VI124_AVX2
18331 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
18332 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
18333 UNSPEC_PSIGN))]
18334 "TARGET_SSSE3"
18335 "@
18336 psign<ssemodesuffix>\t{%2, %0|%0, %2}
18337 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18338 [(set_attr "isa" "noavx,avx")
18339 (set_attr "type" "sselog1")
18340 (set_attr "prefix_data16" "1,*")
18341 (set_attr "prefix_extra" "1")
18342 (set_attr "prefix" "orig,vex")
18343 (set_attr "mode" "<sseinsnmode>")])
18344
18345 (define_insn "ssse3_psign<mode>3"
18346 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
18347 (unspec:MMXMODEI
18348 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
18349 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
18350 UNSPEC_PSIGN))]
18351 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18352 "@
18353 psign<mmxvecsize>\t{%2, %0|%0, %2}
18354 psign<mmxvecsize>\t{%2, %0|%0, %2}
18355 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
18356 [(set_attr "isa" "*,noavx,avx")
18357 (set_attr "mmx_isa" "native,*,*")
18358 (set_attr "type" "sselog1")
18359 (set_attr "prefix_extra" "1")
18360 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18361 (set_attr "mode" "DI,TI,TI")])
18362
18363 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
18364 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
18365 (vec_merge:VI1_AVX512
18366 (unspec:VI1_AVX512
18367 [(match_operand:VI1_AVX512 1 "register_operand" "v")
18368 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
18369 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
18370 UNSPEC_PALIGNR)
18371 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
18372 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
18373 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
18374 {
18375 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
18376 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
18377 }
18378 [(set_attr "type" "sseishft")
18379 (set_attr "atom_unit" "sishuf")
18380 (set_attr "prefix_extra" "1")
18381 (set_attr "length_immediate" "1")
18382 (set_attr "prefix" "evex")
18383 (set_attr "mode" "<sseinsnmode>")])
18384
18385 (define_insn "<ssse3_avx2>_palignr<mode>"
18386 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,<v_Yw>")
18387 (unspec:SSESCALARMODE
18388 [(match_operand:SSESCALARMODE 1 "register_operand" "0,<v_Yw>")
18389 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,<v_Yw>m")
18390 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
18391 UNSPEC_PALIGNR))]
18392 "TARGET_SSSE3"
18393 {
18394 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
18395
18396 switch (which_alternative)
18397 {
18398 case 0:
18399 return "palignr\t{%3, %2, %0|%0, %2, %3}";
18400 case 1:
18401 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18402 default:
18403 gcc_unreachable ();
18404 }
18405 }
18406 [(set_attr "isa" "noavx,avx")
18407 (set_attr "type" "sseishft")
18408 (set_attr "atom_unit" "sishuf")
18409 (set_attr "prefix_data16" "1,*")
18410 (set_attr "prefix_extra" "1")
18411 (set_attr "length_immediate" "1")
18412 (set_attr "prefix" "orig,vex")
18413 (set_attr "mode" "<sseinsnmode>")])
18414
18415 (define_insn_and_split "ssse3_palignrdi"
18416 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
18417 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
18418 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
18419 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
18420 UNSPEC_PALIGNR))]
18421 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18422 {
18423 switch (which_alternative)
18424 {
18425 case 0:
18426 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
18427 return "palignr\t{%3, %2, %0|%0, %2, %3}";
18428 case 1:
18429 case 2:
18430 return "#";
18431 default:
18432 gcc_unreachable ();
18433 }
18434 }
18435 "TARGET_SSSE3 && reload_completed
18436 && SSE_REGNO_P (REGNO (operands[0]))"
18437 [(set (match_dup 0)
18438 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
18439 {
18440 /* Emulate MMX palignrdi with SSE psrldq. */
18441 rtx op0 = lowpart_subreg (V2DImode, operands[0],
18442 GET_MODE (operands[0]));
18443 if (TARGET_AVX)
18444 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
18445 else
18446 {
18447 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
18448 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
18449 /* Swap bits 0:63 with bits 64:127. */
18450 rtx mask = gen_rtx_PARALLEL (VOIDmode,
18451 gen_rtvec (4, GEN_INT (2),
18452 GEN_INT (3),
18453 GEN_INT (0),
18454 GEN_INT (1)));
18455 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
18456 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
18457 emit_insn (gen_rtx_SET (op1, op2));
18458 }
18459 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
18460 }
18461 [(set_attr "mmx_isa" "native,sse_noavx,avx")
18462 (set_attr "type" "sseishft")
18463 (set_attr "atom_unit" "sishuf")
18464 (set_attr "prefix_extra" "1")
18465 (set_attr "length_immediate" "1")
18466 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18467 (set_attr "mode" "DI,TI,TI")])
18468
18469 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
18470 ;; modes for abs instruction on pre AVX-512 targets.
18471 (define_mode_iterator VI1248_AVX512VL_AVX512BW
18472 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
18473 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
18474 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
18475 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
18476
18477 (define_insn "*abs<mode>2"
18478 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>")
18479 (abs:VI1248_AVX512VL_AVX512BW
18480 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))]
18481 "TARGET_SSSE3"
18482 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
18483 [(set_attr "type" "sselog1")
18484 (set_attr "prefix_data16" "1")
18485 (set_attr "prefix_extra" "1")
18486 (set_attr "prefix" "maybe_vex")
18487 (set_attr "mode" "<sseinsnmode>")])
18488
18489 (define_insn "abs<mode>2_mask"
18490 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18491 (vec_merge:VI48_AVX512VL
18492 (abs:VI48_AVX512VL
18493 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
18494 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
18495 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
18496 "TARGET_AVX512F"
18497 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18498 [(set_attr "type" "sselog1")
18499 (set_attr "prefix" "evex")
18500 (set_attr "mode" "<sseinsnmode>")])
18501
18502 (define_insn "abs<mode>2_mask"
18503 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
18504 (vec_merge:VI12_AVX512VL
18505 (abs:VI12_AVX512VL
18506 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
18507 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
18508 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
18509 "TARGET_AVX512BW"
18510 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18511 [(set_attr "type" "sselog1")
18512 (set_attr "prefix" "evex")
18513 (set_attr "mode" "<sseinsnmode>")])
18514
18515 (define_expand "abs<mode>2"
18516 [(set (match_operand:VI_AVX2 0 "register_operand")
18517 (abs:VI_AVX2
18518 (match_operand:VI_AVX2 1 "vector_operand")))]
18519 "TARGET_SSE2"
18520 {
18521 if (!TARGET_SSSE3
18522 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
18523 && !TARGET_AVX512VL))
18524 {
18525 ix86_expand_sse2_abs (operands[0], operands[1]);
18526 DONE;
18527 }
18528 })
18529
18530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18531 ;;
18532 ;; AMD SSE4A instructions
18533 ;;
18534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18535
18536 (define_insn "sse4a_movnt<mode>"
18537 [(set (match_operand:MODEF 0 "memory_operand" "=m")
18538 (unspec:MODEF
18539 [(match_operand:MODEF 1 "register_operand" "x")]
18540 UNSPEC_MOVNT))]
18541 "TARGET_SSE4A"
18542 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
18543 [(set_attr "type" "ssemov")
18544 (set_attr "mode" "<MODE>")])
18545
18546 (define_insn "sse4a_vmmovnt<mode>"
18547 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
18548 (unspec:<ssescalarmode>
18549 [(vec_select:<ssescalarmode>
18550 (match_operand:VF_128 1 "register_operand" "x")
18551 (parallel [(const_int 0)]))]
18552 UNSPEC_MOVNT))]
18553 "TARGET_SSE4A"
18554 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
18555 [(set_attr "type" "ssemov")
18556 (set_attr "mode" "<ssescalarmode>")])
18557
18558 (define_insn "sse4a_extrqi"
18559 [(set (match_operand:V2DI 0 "register_operand" "=x")
18560 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18561 (match_operand 2 "const_0_to_255_operand")
18562 (match_operand 3 "const_0_to_255_operand")]
18563 UNSPEC_EXTRQI))]
18564 "TARGET_SSE4A"
18565 "extrq\t{%3, %2, %0|%0, %2, %3}"
18566 [(set_attr "type" "sse")
18567 (set_attr "prefix_data16" "1")
18568 (set_attr "length_immediate" "2")
18569 (set_attr "mode" "TI")])
18570
18571 (define_insn "sse4a_extrq"
18572 [(set (match_operand:V2DI 0 "register_operand" "=x")
18573 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18574 (match_operand:V16QI 2 "register_operand" "x")]
18575 UNSPEC_EXTRQ))]
18576 "TARGET_SSE4A"
18577 "extrq\t{%2, %0|%0, %2}"
18578 [(set_attr "type" "sse")
18579 (set_attr "prefix_data16" "1")
18580 (set_attr "mode" "TI")])
18581
18582 (define_insn "sse4a_insertqi"
18583 [(set (match_operand:V2DI 0 "register_operand" "=x")
18584 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18585 (match_operand:V2DI 2 "register_operand" "x")
18586 (match_operand 3 "const_0_to_255_operand")
18587 (match_operand 4 "const_0_to_255_operand")]
18588 UNSPEC_INSERTQI))]
18589 "TARGET_SSE4A"
18590 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
18591 [(set_attr "type" "sseins")
18592 (set_attr "prefix_data16" "0")
18593 (set_attr "prefix_rep" "1")
18594 (set_attr "length_immediate" "2")
18595 (set_attr "mode" "TI")])
18596
18597 (define_insn "sse4a_insertq"
18598 [(set (match_operand:V2DI 0 "register_operand" "=x")
18599 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18600 (match_operand:V2DI 2 "register_operand" "x")]
18601 UNSPEC_INSERTQ))]
18602 "TARGET_SSE4A"
18603 "insertq\t{%2, %0|%0, %2}"
18604 [(set_attr "type" "sseins")
18605 (set_attr "prefix_data16" "0")
18606 (set_attr "prefix_rep" "1")
18607 (set_attr "mode" "TI")])
18608
18609 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18610 ;;
18611 ;; Intel SSE4.1 instructions
18612 ;;
18613 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18614
18615 ;; Mapping of immediate bits for blend instructions
18616 (define_mode_attr blendbits
18617 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
18618
18619 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
18620 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18621 (vec_merge:VF_128_256
18622 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18623 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
18624 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
18625 "TARGET_SSE4_1"
18626 "@
18627 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18628 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18629 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18630 [(set_attr "isa" "noavx,noavx,avx")
18631 (set_attr "type" "ssemov")
18632 (set_attr "length_immediate" "1")
18633 (set_attr "prefix_data16" "1,1,*")
18634 (set_attr "prefix_extra" "1")
18635 (set_attr "prefix" "orig,orig,vex")
18636 (set_attr "mode" "<MODE>")])
18637
18638 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
18639 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18640 (unspec:VF_128_256
18641 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
18642 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18643 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
18644 UNSPEC_BLENDV))]
18645 "TARGET_SSE4_1"
18646 "@
18647 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18648 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18649 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18650 [(set_attr "isa" "noavx,noavx,avx")
18651 (set_attr "type" "ssemov")
18652 (set_attr "length_immediate" "1")
18653 (set_attr "prefix_data16" "1,1,*")
18654 (set_attr "prefix_extra" "1")
18655 (set_attr "prefix" "orig,orig,vex")
18656 (set_attr "btver2_decode" "vector,vector,vector")
18657 (set_attr "mode" "<MODE>")])
18658
18659 ;; Also define scalar versions. These are used for conditional move.
18660 ;; Using subregs into vector modes causes register allocation lossage.
18661 ;; These patterns do not allow memory operands because the native
18662 ;; instructions read the full 128-bits.
18663
18664 (define_insn "sse4_1_blendv<ssemodesuffix>"
18665 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
18666 (unspec:MODEF
18667 [(match_operand:MODEF 1 "register_operand" "0,0,x")
18668 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
18669 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
18670 UNSPEC_BLENDV))]
18671 "TARGET_SSE4_1"
18672 {
18673 if (get_attr_mode (insn) == MODE_V4SF)
18674 return (which_alternative == 2
18675 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18676 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
18677 else
18678 return (which_alternative == 2
18679 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18680 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
18681 }
18682 [(set_attr "isa" "noavx,noavx,avx")
18683 (set_attr "type" "ssemov")
18684 (set_attr "length_immediate" "1")
18685 (set_attr "prefix_data16" "1,1,*")
18686 (set_attr "prefix_extra" "1")
18687 (set_attr "prefix" "orig,orig,vex")
18688 (set_attr "btver2_decode" "vector,vector,vector")
18689 (set (attr "mode")
18690 (cond [(match_test "TARGET_AVX")
18691 (const_string "<ssevecmode>")
18692 (match_test "optimize_function_for_size_p (cfun)")
18693 (const_string "V4SF")
18694 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
18695 (const_string "V4SF")
18696 ]
18697 (const_string "<ssevecmode>")))])
18698
18699 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
18700 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18701 (unspec:VF_128_256
18702 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
18703 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18704 (lt:VF_128_256
18705 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
18706 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
18707 UNSPEC_BLENDV))]
18708 "TARGET_SSE4_1"
18709 "#"
18710 "&& reload_completed"
18711 [(set (match_dup 0)
18712 (unspec:VF_128_256
18713 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
18714 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
18715 [(set_attr "isa" "noavx,noavx,avx")
18716 (set_attr "type" "ssemov")
18717 (set_attr "length_immediate" "1")
18718 (set_attr "prefix_data16" "1,1,*")
18719 (set_attr "prefix_extra" "1")
18720 (set_attr "prefix" "orig,orig,vex")
18721 (set_attr "btver2_decode" "vector,vector,vector")
18722 (set_attr "mode" "<MODE>")])
18723
18724 (define_mode_attr ssefltmodesuffix
18725 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
18726
18727 (define_mode_attr ssefltvecmode
18728 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
18729
18730 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
18731 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
18732 (unspec:<ssebytemode>
18733 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
18734 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
18735 (subreg:<ssebytemode>
18736 (lt:VI48_AVX
18737 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
18738 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
18739 UNSPEC_BLENDV))]
18740 "TARGET_SSE4_1"
18741 "#"
18742 "&& reload_completed"
18743 [(set (match_dup 0)
18744 (unspec:<ssefltvecmode>
18745 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
18746 {
18747 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
18748 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
18749 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
18750 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
18751 }
18752 [(set_attr "isa" "noavx,noavx,avx")
18753 (set_attr "type" "ssemov")
18754 (set_attr "length_immediate" "1")
18755 (set_attr "prefix_data16" "1,1,*")
18756 (set_attr "prefix_extra" "1")
18757 (set_attr "prefix" "orig,orig,vex")
18758 (set_attr "btver2_decode" "vector,vector,vector")
18759 (set_attr "mode" "<ssefltvecmode>")])
18760
18761 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
18762 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18763 (unspec:VF_128_256
18764 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
18765 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18766 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
18767 UNSPEC_DP))]
18768 "TARGET_SSE4_1"
18769 "@
18770 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18771 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18772 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18773 [(set_attr "isa" "noavx,noavx,avx")
18774 (set_attr "type" "ssemul")
18775 (set_attr "length_immediate" "1")
18776 (set_attr "prefix_data16" "1,1,*")
18777 (set_attr "prefix_extra" "1")
18778 (set_attr "prefix" "orig,orig,vex")
18779 (set_attr "btver2_decode" "vector,vector,vector")
18780 (set_attr "znver1_decode" "vector,vector,vector")
18781 (set_attr "mode" "<MODE>")])
18782
18783 ;; Mode attribute used by `vmovntdqa' pattern
18784 (define_mode_attr vi8_sse4_1_avx2_avx512
18785 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
18786
18787 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
18788 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
18789 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
18790 UNSPEC_MOVNTDQA))]
18791 "TARGET_SSE4_1"
18792 "%vmovntdqa\t{%1, %0|%0, %1}"
18793 [(set_attr "isa" "noavx,noavx,avx")
18794 (set_attr "type" "ssemov")
18795 (set_attr "prefix_extra" "1,1,*")
18796 (set_attr "prefix" "orig,orig,maybe_evex")
18797 (set_attr "mode" "<sseinsnmode>")])
18798
18799 (define_insn "<sse4_1_avx2>_mpsadbw"
18800 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
18801 (unspec:VI1_AVX2
18802 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
18803 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
18804 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
18805 UNSPEC_MPSADBW))]
18806 "TARGET_SSE4_1"
18807 "@
18808 mpsadbw\t{%3, %2, %0|%0, %2, %3}
18809 mpsadbw\t{%3, %2, %0|%0, %2, %3}
18810 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18811 [(set_attr "isa" "noavx,noavx,avx")
18812 (set_attr "type" "sselog1")
18813 (set_attr "length_immediate" "1")
18814 (set_attr "prefix_extra" "1")
18815 (set_attr "prefix" "orig,orig,vex")
18816 (set_attr "btver2_decode" "vector,vector,vector")
18817 (set_attr "znver1_decode" "vector,vector,vector")
18818 (set_attr "mode" "<sseinsnmode>")])
18819
18820 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
18821 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
18822 (vec_concat:VI2_AVX2
18823 (us_truncate:<ssehalfvecmode>
18824 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
18825 (us_truncate:<ssehalfvecmode>
18826 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
18827 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
18828 "@
18829 packusdw\t{%2, %0|%0, %2}
18830 packusdw\t{%2, %0|%0, %2}
18831 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18832 [(set_attr "isa" "noavx,noavx,avx")
18833 (set_attr "type" "sselog")
18834 (set_attr "prefix_extra" "1")
18835 (set_attr "prefix" "orig,orig,<mask_prefix>")
18836 (set_attr "mode" "<sseinsnmode>")])
18837
18838 (define_insn "<sse4_1_avx2>_pblendvb"
18839 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
18840 (unspec:VI1_AVX2
18841 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
18842 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
18843 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
18844 UNSPEC_BLENDV))]
18845 "TARGET_SSE4_1"
18846 "@
18847 pblendvb\t{%3, %2, %0|%0, %2, %3}
18848 pblendvb\t{%3, %2, %0|%0, %2, %3}
18849 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18850 [(set_attr "isa" "noavx,noavx,avx")
18851 (set_attr "type" "ssemov")
18852 (set_attr "prefix_extra" "1")
18853 (set_attr "length_immediate" "*,*,1")
18854 (set_attr "prefix" "orig,orig,vex")
18855 (set_attr "btver2_decode" "vector,vector,vector")
18856 (set_attr "mode" "<sseinsnmode>")])
18857
18858 (define_split
18859 [(set (match_operand:VI1_AVX2 0 "register_operand")
18860 (unspec:VI1_AVX2
18861 [(match_operand:VI1_AVX2 1 "vector_operand")
18862 (match_operand:VI1_AVX2 2 "register_operand")
18863 (not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))]
18864 UNSPEC_BLENDV))]
18865 "TARGET_SSE4_1"
18866 [(set (match_dup 0)
18867 (unspec:VI1_AVX2
18868 [(match_dup 2) (match_dup 1) (match_dup 3)]
18869 UNSPEC_BLENDV))])
18870
18871 (define_split
18872 [(set (match_operand:VI1_AVX2 0 "register_operand")
18873 (unspec:VI1_AVX2
18874 [(match_operand:VI1_AVX2 1 "vector_operand")
18875 (match_operand:VI1_AVX2 2 "register_operand")
18876 (subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)]
18877 UNSPEC_BLENDV))]
18878 "TARGET_SSE4_1
18879 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
18880 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>"
18881 [(set (match_dup 0)
18882 (unspec:VI1_AVX2
18883 [(match_dup 2) (match_dup 1) (match_dup 4)]
18884 UNSPEC_BLENDV))]
18885 "operands[4] = gen_lowpart (<MODE>mode, operands[3]);")
18886
18887 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
18888 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
18889 (unspec:VI1_AVX2
18890 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
18891 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
18892 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
18893 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
18894 UNSPEC_BLENDV))]
18895 "TARGET_SSE4_1"
18896 "#"
18897 "&& 1"
18898 [(set (match_dup 0)
18899 (unspec:VI1_AVX2
18900 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
18901 ""
18902 [(set_attr "isa" "noavx,noavx,avx")
18903 (set_attr "type" "ssemov")
18904 (set_attr "prefix_extra" "1")
18905 (set_attr "length_immediate" "*,*,1")
18906 (set_attr "prefix" "orig,orig,vex")
18907 (set_attr "btver2_decode" "vector,vector,vector")
18908 (set_attr "mode" "<sseinsnmode>")])
18909
18910 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
18911 [(set (match_operand:VI1_AVX2 0 "register_operand")
18912 (unspec:VI1_AVX2
18913 [(match_operand:VI1_AVX2 2 "vector_operand")
18914 (match_operand:VI1_AVX2 1 "register_operand")
18915 (lt:VI1_AVX2
18916 (subreg:VI1_AVX2
18917 (not (match_operand 3 "register_operand")) 0)
18918 (match_operand:VI1_AVX2 4 "const0_operand"))]
18919 UNSPEC_BLENDV))]
18920 "TARGET_SSE4_1
18921 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
18922 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
18923 && ix86_pre_reload_split ()"
18924 "#"
18925 "&& 1"
18926 [(set (match_dup 0)
18927 (unspec:VI1_AVX2
18928 [(match_dup 1) (match_dup 2)
18929 (lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
18930 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
18931
18932 (define_insn "sse4_1_pblendw"
18933 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
18934 (vec_merge:V8HI
18935 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
18936 (match_operand:V8HI 1 "register_operand" "0,0,x")
18937 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
18938 "TARGET_SSE4_1"
18939 "@
18940 pblendw\t{%3, %2, %0|%0, %2, %3}
18941 pblendw\t{%3, %2, %0|%0, %2, %3}
18942 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18943 [(set_attr "isa" "noavx,noavx,avx")
18944 (set_attr "type" "ssemov")
18945 (set_attr "prefix_extra" "1")
18946 (set_attr "length_immediate" "1")
18947 (set_attr "prefix" "orig,orig,vex")
18948 (set_attr "mode" "TI")])
18949
18950 ;; The builtin uses an 8-bit immediate. Expand that.
18951 (define_expand "avx2_pblendw"
18952 [(set (match_operand:V16HI 0 "register_operand")
18953 (vec_merge:V16HI
18954 (match_operand:V16HI 2 "nonimmediate_operand")
18955 (match_operand:V16HI 1 "register_operand")
18956 (match_operand:SI 3 "const_0_to_255_operand")))]
18957 "TARGET_AVX2"
18958 {
18959 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
18960 operands[3] = GEN_INT (val << 8 | val);
18961 })
18962
18963 (define_insn "*avx2_pblendw"
18964 [(set (match_operand:V16HI 0 "register_operand" "=x")
18965 (vec_merge:V16HI
18966 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
18967 (match_operand:V16HI 1 "register_operand" "x")
18968 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
18969 "TARGET_AVX2"
18970 {
18971 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
18972 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18973 }
18974 [(set_attr "type" "ssemov")
18975 (set_attr "prefix_extra" "1")
18976 (set_attr "length_immediate" "1")
18977 (set_attr "prefix" "vex")
18978 (set_attr "mode" "OI")])
18979
18980 (define_insn "avx2_pblendd<mode>"
18981 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
18982 (vec_merge:VI4_AVX2
18983 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
18984 (match_operand:VI4_AVX2 1 "register_operand" "x")
18985 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
18986 "TARGET_AVX2"
18987 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18988 [(set_attr "type" "ssemov")
18989 (set_attr "prefix_extra" "1")
18990 (set_attr "length_immediate" "1")
18991 (set_attr "prefix" "vex")
18992 (set_attr "mode" "<sseinsnmode>")])
18993
18994 (define_insn "sse4_1_phminposuw"
18995 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
18996 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
18997 UNSPEC_PHMINPOSUW))]
18998 "TARGET_SSE4_1"
18999 "%vphminposuw\t{%1, %0|%0, %1}"
19000 [(set_attr "isa" "noavx,noavx,avx")
19001 (set_attr "type" "sselog1")
19002 (set_attr "prefix_extra" "1")
19003 (set_attr "prefix" "orig,orig,vex")
19004 (set_attr "mode" "TI")])
19005
19006 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
19007 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
19008 (any_extend:V16HI
19009 (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))]
19010 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
19011 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19012 [(set_attr "type" "ssemov")
19013 (set_attr "prefix_extra" "1")
19014 (set_attr "prefix" "maybe_evex")
19015 (set_attr "mode" "OI")])
19016
19017 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
19018 [(set (match_operand:V32QI 0 "register_operand" "=v")
19019 (vec_select:V32QI
19020 (vec_concat:V64QI
19021 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
19022 (match_operand:V32QI 2 "const0_operand" "C"))
19023 (match_parallel 3 "pmovzx_parallel"
19024 [(match_operand 4 "const_int_operand" "n")])))]
19025 "TARGET_AVX2"
19026 "#"
19027 "&& reload_completed"
19028 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
19029 {
19030 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
19031 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
19032 })
19033
19034 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_2"
19035 [(set (match_operand:V32QI 0 "register_operand" "=v")
19036 (vec_select:V32QI
19037 (vec_concat:V64QI
19038 (subreg:V32QI
19039 (vec_concat:VI248_256
19040 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
19041 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
19042 (match_operand:V32QI 3 "const0_operand" "C"))
19043 (match_parallel 4 "pmovzx_parallel"
19044 [(match_operand 5 "const_int_operand" "n")])))]
19045 "TARGET_AVX2"
19046 "#"
19047 "&& reload_completed"
19048 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
19049 {
19050 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
19051 operands[1] = lowpart_subreg (V16QImode, operands[1], <ssehalfvecmode>mode);
19052 })
19053
19054 (define_expand "<insn>v16qiv16hi2"
19055 [(set (match_operand:V16HI 0 "register_operand")
19056 (any_extend:V16HI
19057 (match_operand:V16QI 1 "nonimmediate_operand")))]
19058 "TARGET_AVX2")
19059
19060 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
19061 [(set (match_operand:V32HI 0 "register_operand" "=v")
19062 (any_extend:V32HI
19063 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
19064 "TARGET_AVX512BW"
19065 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19066 [(set_attr "type" "ssemov")
19067 (set_attr "prefix_extra" "1")
19068 (set_attr "prefix" "evex")
19069 (set_attr "mode" "XI")])
19070
19071 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
19072 [(set (match_operand:V64QI 0 "register_operand" "=v")
19073 (vec_select:V64QI
19074 (vec_concat:V128QI
19075 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
19076 (match_operand:V64QI 2 "const0_operand" "C"))
19077 (match_parallel 3 "pmovzx_parallel"
19078 [(match_operand 4 "const_int_operand" "n")])))]
19079 "TARGET_AVX512BW"
19080 "#"
19081 "&& reload_completed"
19082 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
19083 {
19084 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
19085 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
19086 })
19087
19088 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_2"
19089 [(set (match_operand:V64QI 0 "register_operand" "=v")
19090 (vec_select:V64QI
19091 (vec_concat:V128QI
19092 (subreg:V64QI
19093 (vec_concat:VI248_512
19094 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
19095 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
19096 (match_operand:V64QI 3 "const0_operand" "C"))
19097 (match_parallel 4 "pmovzx_parallel"
19098 [(match_operand 5 "const_int_operand" "n")])))]
19099 "TARGET_AVX512BW"
19100 "#"
19101 "&& reload_completed"
19102 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
19103 {
19104 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
19105 operands[1] = lowpart_subreg (V32QImode, operands[1], <ssehalfvecmode>mode);
19106 })
19107
19108 (define_expand "<insn>v32qiv32hi2"
19109 [(set (match_operand:V32HI 0 "register_operand")
19110 (any_extend:V32HI
19111 (match_operand:V32QI 1 "nonimmediate_operand")))]
19112 "TARGET_AVX512BW")
19113
19114 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
19115 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
19116 (any_extend:V8HI
19117 (vec_select:V8QI
19118 (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw")
19119 (parallel [(const_int 0) (const_int 1)
19120 (const_int 2) (const_int 3)
19121 (const_int 4) (const_int 5)
19122 (const_int 6) (const_int 7)]))))]
19123 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
19124 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19125 [(set_attr "isa" "noavx,noavx,avx")
19126 (set_attr "type" "ssemov")
19127 (set_attr "prefix_extra" "1")
19128 (set_attr "prefix" "orig,orig,maybe_evex")
19129 (set_attr "mode" "TI")])
19130
19131 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
19132 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
19133 (any_extend:V8HI
19134 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
19135 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
19136 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19137 [(set_attr "isa" "noavx,noavx,avx")
19138 (set_attr "type" "ssemov")
19139 (set_attr "prefix_extra" "1")
19140 (set_attr "prefix" "orig,orig,maybe_evex")
19141 (set_attr "mode" "TI")])
19142
19143 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
19144 [(set (match_operand:V8HI 0 "register_operand")
19145 (any_extend:V8HI
19146 (vec_select:V8QI
19147 (subreg:V16QI
19148 (vec_concat:V2DI
19149 (match_operand:DI 1 "memory_operand")
19150 (const_int 0)) 0)
19151 (parallel [(const_int 0) (const_int 1)
19152 (const_int 2) (const_int 3)
19153 (const_int 4) (const_int 5)
19154 (const_int 6) (const_int 7)]))))]
19155 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
19156 && ix86_pre_reload_split ()"
19157 "#"
19158 "&& 1"
19159 [(set (match_dup 0)
19160 (any_extend:V8HI (match_dup 1)))]
19161 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
19162
19163 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
19164 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
19165 (vec_select:V16QI
19166 (vec_concat:V32QI
19167 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,Ywm")
19168 (match_operand:V16QI 2 "const0_operand" "C,C,C"))
19169 (match_parallel 3 "pmovzx_parallel"
19170 [(match_operand 4 "const_int_operand" "n,n,n")])))]
19171 "TARGET_SSE4_1"
19172 "#"
19173 "&& reload_completed"
19174 [(set (match_dup 0)
19175 (zero_extend:V8HI
19176 (vec_select:V8QI
19177 (match_dup 1)
19178 (parallel [(const_int 0) (const_int 1)
19179 (const_int 2) (const_int 3)
19180 (const_int 4) (const_int 5)
19181 (const_int 6) (const_int 7)]))))]
19182 {
19183 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
19184 if (MEM_P (operands[1]))
19185 {
19186 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
19187 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
19188 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19189 DONE;
19190 }
19191 }
19192 [(set_attr "isa" "noavx,noavx,avx")])
19193
19194 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_4"
19195 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
19196 (vec_select:V16QI
19197 (vec_concat:V32QI
19198 (subreg:V16QI
19199 (vec_concat:VI248_128
19200 (match_operand:<ssehalfvecmode> 1 "vector_operand" "YrBm,*xBm,Ywm")
19201 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C,C,C")) 0)
19202 (match_operand:V16QI 3 "const0_operand" "C,C,C"))
19203 (match_parallel 4 "pmovzx_parallel"
19204 [(match_operand 5 "const_int_operand" "n,n,n")])))]
19205 "TARGET_SSE4_1"
19206 "#"
19207 "&& reload_completed"
19208 [(set (match_dup 0)
19209 (zero_extend:V8HI
19210 (vec_select:V8QI
19211 (match_dup 1)
19212 (parallel [(const_int 0) (const_int 1)
19213 (const_int 2) (const_int 3)
19214 (const_int 4) (const_int 5)
19215 (const_int 6) (const_int 7)]))))]
19216 {
19217 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
19218 if (MEM_P (operands[1]))
19219 {
19220 operands[1] = lowpart_subreg (V8QImode, operands[1], <ssehalfvecmode>mode);
19221 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
19222 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19223 DONE;
19224 }
19225 operands[1] = lowpart_subreg (V16QImode, operands[1], <ssehalfvecmode>mode);
19226 }
19227 [(set_attr "isa" "noavx,noavx,avx")])
19228
19229 (define_expand "<insn>v8qiv8hi2"
19230 [(set (match_operand:V8HI 0 "register_operand")
19231 (any_extend:V8HI
19232 (match_operand:V8QI 1 "nonimmediate_operand")))]
19233 "TARGET_SSE4_1"
19234 {
19235 if (!MEM_P (operands[1]))
19236 {
19237 operands[1] = force_reg (V8QImode, operands[1]);
19238 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
19239 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
19240 DONE;
19241 }
19242 })
19243
19244 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
19245 [(set (match_operand:V16SI 0 "register_operand" "=v")
19246 (any_extend:V16SI
19247 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
19248 "TARGET_AVX512F"
19249 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19250 [(set_attr "type" "ssemov")
19251 (set_attr "prefix" "evex")
19252 (set_attr "mode" "XI")])
19253
19254 (define_expand "<insn>v16qiv16si2"
19255 [(set (match_operand:V16SI 0 "register_operand")
19256 (any_extend:V16SI
19257 (match_operand:V16QI 1 "nonimmediate_operand")))]
19258 "TARGET_AVX512F")
19259
19260 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
19261 [(set (match_operand:V8SI 0 "register_operand" "=v")
19262 (any_extend:V8SI
19263 (vec_select:V8QI
19264 (match_operand:V16QI 1 "register_operand" "v")
19265 (parallel [(const_int 0) (const_int 1)
19266 (const_int 2) (const_int 3)
19267 (const_int 4) (const_int 5)
19268 (const_int 6) (const_int 7)]))))]
19269 "TARGET_AVX2 && <mask_avx512vl_condition>"
19270 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19271 [(set_attr "type" "ssemov")
19272 (set_attr "prefix_extra" "1")
19273 (set_attr "prefix" "maybe_evex")
19274 (set_attr "mode" "OI")])
19275
19276 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
19277 [(set (match_operand:V8SI 0 "register_operand" "=v")
19278 (any_extend:V8SI
19279 (match_operand:V8QI 1 "memory_operand" "m")))]
19280 "TARGET_AVX2 && <mask_avx512vl_condition>"
19281 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19282 [(set_attr "type" "ssemov")
19283 (set_attr "prefix_extra" "1")
19284 (set_attr "prefix" "maybe_evex")
19285 (set_attr "mode" "OI")])
19286
19287 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
19288 [(set (match_operand:V8SI 0 "register_operand")
19289 (any_extend:V8SI
19290 (vec_select:V8QI
19291 (subreg:V16QI
19292 (vec_concat:V2DI
19293 (match_operand:DI 1 "memory_operand")
19294 (const_int 0)) 0)
19295 (parallel [(const_int 0) (const_int 1)
19296 (const_int 2) (const_int 3)
19297 (const_int 4) (const_int 5)
19298 (const_int 6) (const_int 7)]))))]
19299 "TARGET_AVX2 && <mask_avx512vl_condition>
19300 && ix86_pre_reload_split ()"
19301 "#"
19302 "&& 1"
19303 [(set (match_dup 0)
19304 (any_extend:V8SI (match_dup 1)))]
19305 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
19306
19307 (define_expand "<insn>v8qiv8si2"
19308 [(set (match_operand:V8SI 0 "register_operand")
19309 (any_extend:V8SI
19310 (match_operand:V8QI 1 "nonimmediate_operand")))]
19311 "TARGET_AVX2"
19312 {
19313 if (!MEM_P (operands[1]))
19314 {
19315 operands[1] = force_reg (V8QImode, operands[1]);
19316 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
19317 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
19318 DONE;
19319 }
19320 })
19321
19322 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
19323 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19324 (any_extend:V4SI
19325 (vec_select:V4QI
19326 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
19327 (parallel [(const_int 0) (const_int 1)
19328 (const_int 2) (const_int 3)]))))]
19329 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19330 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19331 [(set_attr "isa" "noavx,noavx,avx")
19332 (set_attr "type" "ssemov")
19333 (set_attr "prefix_extra" "1")
19334 (set_attr "prefix" "orig,orig,maybe_evex")
19335 (set_attr "mode" "TI")])
19336
19337 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
19338 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19339 (any_extend:V4SI
19340 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
19341 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19342 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19343 [(set_attr "isa" "noavx,noavx,avx")
19344 (set_attr "type" "ssemov")
19345 (set_attr "prefix_extra" "1")
19346 (set_attr "prefix" "orig,orig,maybe_evex")
19347 (set_attr "mode" "TI")])
19348
19349 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
19350 [(set (match_operand:V4SI 0 "register_operand")
19351 (any_extend:V4SI
19352 (vec_select:V4QI
19353 (subreg:V16QI
19354 (vec_merge:V4SI
19355 (vec_duplicate:V4SI
19356 (match_operand:SI 1 "memory_operand"))
19357 (const_vector:V4SI
19358 [(const_int 0) (const_int 0)
19359 (const_int 0) (const_int 0)])
19360 (const_int 1)) 0)
19361 (parallel [(const_int 0) (const_int 1)
19362 (const_int 2) (const_int 3)]))))]
19363 "TARGET_SSE4_1 && <mask_avx512vl_condition>
19364 && ix86_pre_reload_split ()"
19365 "#"
19366 "&& 1"
19367 [(set (match_dup 0)
19368 (any_extend:V4SI (match_dup 1)))]
19369 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
19370
19371 (define_expand "<insn>v4qiv4si2"
19372 [(set (match_operand:V4SI 0 "register_operand")
19373 (any_extend:V4SI
19374 (match_operand:V4QI 1 "nonimmediate_operand")))]
19375 "TARGET_SSE4_1"
19376 {
19377 if (!MEM_P (operands[1]))
19378 {
19379 operands[1] = force_reg (V4QImode, operands[1]);
19380 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
19381 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
19382 DONE;
19383 }
19384 })
19385
19386 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
19387 [(set (match_operand:V16SI 0 "register_operand" "=v")
19388 (any_extend:V16SI
19389 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
19390 "TARGET_AVX512F"
19391 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19392 [(set_attr "type" "ssemov")
19393 (set_attr "prefix" "evex")
19394 (set_attr "mode" "XI")])
19395
19396 (define_expand "<insn>v16hiv16si2"
19397 [(set (match_operand:V16SI 0 "register_operand")
19398 (any_extend:V16SI
19399 (match_operand:V16HI 1 "nonimmediate_operand")))]
19400 "TARGET_AVX512F")
19401
19402 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
19403 [(set (match_operand:V32HI 0 "register_operand" "=v")
19404 (vec_select:V32HI
19405 (vec_concat:V64HI
19406 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
19407 (match_operand:V32HI 2 "const0_operand" "C"))
19408 (match_parallel 3 "pmovzx_parallel"
19409 [(match_operand 4 "const_int_operand" "n")])))]
19410 "TARGET_AVX512F"
19411 "#"
19412 "&& reload_completed"
19413 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
19414 {
19415 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
19416 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
19417 })
19418
19419 (define_insn_and_split "*avx512f_zero_extendv16hiv16si2_2"
19420 [(set (match_operand:V32HI 0 "register_operand" "=v")
19421 (vec_select:V32HI
19422 (vec_concat:V64HI
19423 (subreg:V32HI
19424 (vec_concat:VI148_512
19425 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
19426 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
19427 (match_operand:V32HI 3 "const0_operand" "C"))
19428 (match_parallel 4 "pmovzx_parallel"
19429 [(match_operand 5 "const_int_operand" "n")])))]
19430 "TARGET_AVX512F"
19431 "#"
19432 "&& reload_completed"
19433 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
19434 {
19435 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
19436 operands[1] = lowpart_subreg (V16HImode, operands[1], <ssehalfvecmode>mode);
19437 })
19438
19439 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
19440 [(set (match_operand:V8SI 0 "register_operand" "=v")
19441 (any_extend:V8SI
19442 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
19443 "TARGET_AVX2 && <mask_avx512vl_condition>"
19444 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19445 [(set_attr "type" "ssemov")
19446 (set_attr "prefix_extra" "1")
19447 (set_attr "prefix" "maybe_evex")
19448 (set_attr "mode" "OI")])
19449
19450 (define_expand "<insn>v8hiv8si2"
19451 [(set (match_operand:V8SI 0 "register_operand")
19452 (any_extend:V8SI
19453 (match_operand:V8HI 1 "nonimmediate_operand")))]
19454 "TARGET_AVX2")
19455
19456 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
19457 [(set (match_operand:V16HI 0 "register_operand" "=v")
19458 (vec_select:V16HI
19459 (vec_concat:V32HI
19460 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
19461 (match_operand:V16HI 2 "const0_operand" "C"))
19462 (match_parallel 3 "pmovzx_parallel"
19463 [(match_operand 4 "const_int_operand" "n")])))]
19464 "TARGET_AVX2"
19465 "#"
19466 "&& reload_completed"
19467 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
19468 {
19469 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
19470 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
19471 })
19472
19473 (define_insn_and_split "*avx2_zero_extendv8hiv8si2_2"
19474 [(set (match_operand:V16HI 0 "register_operand" "=v")
19475 (vec_select:V16HI
19476 (vec_concat:V32HI
19477 (subreg:V16HI
19478 (vec_concat:VI148_256
19479 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
19480 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
19481 (match_operand:V16HI 3 "const0_operand" "C"))
19482 (match_parallel 4 "pmovzx_parallel"
19483 [(match_operand 5 "const_int_operand" "n")])))]
19484 "TARGET_AVX2"
19485 "#"
19486 "&& reload_completed"
19487 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
19488 {
19489 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
19490 operands[1] = lowpart_subreg (V8HImode, operands[1], <ssehalfvecmode>mode);
19491 })
19492
19493
19494 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
19495 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19496 (any_extend:V4SI
19497 (vec_select:V4HI
19498 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
19499 (parallel [(const_int 0) (const_int 1)
19500 (const_int 2) (const_int 3)]))))]
19501 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19502 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19503 [(set_attr "isa" "noavx,noavx,avx")
19504 (set_attr "type" "ssemov")
19505 (set_attr "prefix_extra" "1")
19506 (set_attr "prefix" "orig,orig,maybe_evex")
19507 (set_attr "mode" "TI")])
19508
19509 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
19510 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19511 (any_extend:V4SI
19512 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
19513 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19514 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19515 [(set_attr "isa" "noavx,noavx,avx")
19516 (set_attr "type" "ssemov")
19517 (set_attr "prefix_extra" "1")
19518 (set_attr "prefix" "orig,orig,maybe_evex")
19519 (set_attr "mode" "TI")])
19520
19521 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
19522 [(set (match_operand:V4SI 0 "register_operand")
19523 (any_extend:V4SI
19524 (vec_select:V4HI
19525 (subreg:V8HI
19526 (vec_concat:V2DI
19527 (match_operand:DI 1 "memory_operand")
19528 (const_int 0)) 0)
19529 (parallel [(const_int 0) (const_int 1)
19530 (const_int 2) (const_int 3)]))))]
19531 "TARGET_SSE4_1 && <mask_avx512vl_condition>
19532 && ix86_pre_reload_split ()"
19533 "#"
19534 "&& 1"
19535 [(set (match_dup 0)
19536 (any_extend:V4SI (match_dup 1)))]
19537 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
19538
19539 (define_expand "<insn>v4hiv4si2"
19540 [(set (match_operand:V4SI 0 "register_operand")
19541 (any_extend:V4SI
19542 (match_operand:V4HI 1 "nonimmediate_operand")))]
19543 "TARGET_SSE4_1"
19544 {
19545 if (!MEM_P (operands[1]))
19546 {
19547 operands[1] = force_reg (V4HImode, operands[1]);
19548 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
19549 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
19550 DONE;
19551 }
19552 })
19553
19554 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
19555 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
19556 (vec_select:V8HI
19557 (vec_concat:V16HI
19558 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
19559 (match_operand:V8HI 2 "const0_operand" "C,C,C"))
19560 (match_parallel 3 "pmovzx_parallel"
19561 [(match_operand 4 "const_int_operand" "n,n,n")])))]
19562 "TARGET_SSE4_1"
19563 "#"
19564 "&& reload_completed"
19565 [(set (match_dup 0)
19566 (zero_extend:V4SI
19567 (vec_select:V4HI
19568 (match_dup 1)
19569 (parallel [(const_int 0) (const_int 1)
19570 (const_int 2) (const_int 3)]))))]
19571 {
19572 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
19573 if (MEM_P (operands[1]))
19574 {
19575 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
19576 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
19577 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19578 DONE;
19579 }
19580 }
19581 [(set_attr "isa" "noavx,noavx,avx")])
19582
19583 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_4"
19584 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
19585 (vec_select:V8HI
19586 (vec_concat:V16HI
19587 (subreg:V8HI
19588 (vec_concat:VI148_128
19589 (match_operand:<ssehalfvecmode> 1 "vector_operand" "YrBm,*xBm,vm")
19590 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C,C,C")) 0)
19591 (match_operand:V8HI 3 "const0_operand" "C,C,C"))
19592 (match_parallel 4 "pmovzx_parallel"
19593 [(match_operand 5 "const_int_operand" "n,n,n")])))]
19594 "TARGET_SSE4_1"
19595 "#"
19596 "&& reload_completed"
19597 [(set (match_dup 0)
19598 (zero_extend:V4SI
19599 (vec_select:V4HI
19600 (match_dup 1)
19601 (parallel [(const_int 0) (const_int 1)
19602 (const_int 2) (const_int 3)]))))]
19603 {
19604 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
19605 if (MEM_P (operands[1]))
19606 {
19607 operands[1] = lowpart_subreg (V4HImode, operands[1], <ssehalfvecmode>mode);
19608 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
19609 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19610 DONE;
19611 }
19612 operands[1] = lowpart_subreg (V8HImode, operands[1], <ssehalfvecmode>mode);
19613 }
19614 [(set_attr "isa" "noavx,noavx,avx")])
19615
19616 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
19617 [(set (match_operand:V8DI 0 "register_operand" "=v")
19618 (any_extend:V8DI
19619 (vec_select:V8QI
19620 (match_operand:V16QI 1 "register_operand" "v")
19621 (parallel [(const_int 0) (const_int 1)
19622 (const_int 2) (const_int 3)
19623 (const_int 4) (const_int 5)
19624 (const_int 6) (const_int 7)]))))]
19625 "TARGET_AVX512F"
19626 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19627 [(set_attr "type" "ssemov")
19628 (set_attr "prefix" "evex")
19629 (set_attr "mode" "XI")])
19630
19631 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
19632 [(set (match_operand:V8DI 0 "register_operand" "=v")
19633 (any_extend:V8DI
19634 (match_operand:V8QI 1 "memory_operand" "m")))]
19635 "TARGET_AVX512F"
19636 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19637 [(set_attr "type" "ssemov")
19638 (set_attr "prefix" "evex")
19639 (set_attr "mode" "XI")])
19640
19641 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
19642 [(set (match_operand:V8DI 0 "register_operand")
19643 (any_extend:V8DI
19644 (vec_select:V8QI
19645 (subreg:V16QI
19646 (vec_concat:V2DI
19647 (match_operand:DI 1 "memory_operand")
19648 (const_int 0)) 0)
19649 (parallel [(const_int 0) (const_int 1)
19650 (const_int 2) (const_int 3)
19651 (const_int 4) (const_int 5)
19652 (const_int 6) (const_int 7)]))))]
19653 "TARGET_AVX512F && ix86_pre_reload_split ()"
19654 "#"
19655 "&& 1"
19656 [(set (match_dup 0)
19657 (any_extend:V8DI (match_dup 1)))]
19658 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
19659
19660 (define_expand "<insn>v8qiv8di2"
19661 [(set (match_operand:V8DI 0 "register_operand")
19662 (any_extend:V8DI
19663 (match_operand:V8QI 1 "nonimmediate_operand")))]
19664 "TARGET_AVX512F"
19665 {
19666 if (!MEM_P (operands[1]))
19667 {
19668 operands[1] = force_reg (V8QImode, operands[1]);
19669 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
19670 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
19671 DONE;
19672 }
19673 })
19674
19675 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
19676 [(set (match_operand:V4DI 0 "register_operand" "=v")
19677 (any_extend:V4DI
19678 (vec_select:V4QI
19679 (match_operand:V16QI 1 "register_operand" "v")
19680 (parallel [(const_int 0) (const_int 1)
19681 (const_int 2) (const_int 3)]))))]
19682 "TARGET_AVX2 && <mask_avx512vl_condition>"
19683 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19684 [(set_attr "type" "ssemov")
19685 (set_attr "prefix_extra" "1")
19686 (set_attr "prefix" "maybe_evex")
19687 (set_attr "mode" "OI")])
19688
19689 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
19690 [(set (match_operand:V4DI 0 "register_operand" "=v")
19691 (any_extend:V4DI
19692 (match_operand:V4QI 1 "memory_operand" "m")))]
19693 "TARGET_AVX2 && <mask_avx512vl_condition>"
19694 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19695 [(set_attr "type" "ssemov")
19696 (set_attr "prefix_extra" "1")
19697 (set_attr "prefix" "maybe_evex")
19698 (set_attr "mode" "OI")])
19699
19700 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
19701 [(set (match_operand:V4DI 0 "register_operand")
19702 (any_extend:V4DI
19703 (vec_select:V4QI
19704 (subreg:V16QI
19705 (vec_merge:V4SI
19706 (vec_duplicate:V4SI
19707 (match_operand:SI 1 "memory_operand"))
19708 (const_vector:V4SI
19709 [(const_int 0) (const_int 0)
19710 (const_int 0) (const_int 0)])
19711 (const_int 1)) 0)
19712 (parallel [(const_int 0) (const_int 1)
19713 (const_int 2) (const_int 3)]))))]
19714 "TARGET_AVX2 && <mask_avx512vl_condition>
19715 && ix86_pre_reload_split ()"
19716 "#"
19717 "&& 1"
19718 [(set (match_dup 0)
19719 (any_extend:V4DI (match_dup 1)))]
19720 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
19721
19722 (define_expand "<insn>v4qiv4di2"
19723 [(set (match_operand:V4DI 0 "register_operand")
19724 (any_extend:V4DI
19725 (match_operand:V4QI 1 "nonimmediate_operand")))]
19726 "TARGET_AVX2"
19727 {
19728 if (!MEM_P (operands[1]))
19729 {
19730 operands[1] = force_reg (V4QImode, operands[1]);
19731 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
19732 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
19733 DONE;
19734 }
19735 })
19736
19737 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
19738 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19739 (any_extend:V2DI
19740 (vec_select:V2QI
19741 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
19742 (parallel [(const_int 0) (const_int 1)]))))]
19743 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19744 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19745 [(set_attr "isa" "noavx,noavx,avx")
19746 (set_attr "type" "ssemov")
19747 (set_attr "prefix_extra" "1")
19748 (set_attr "prefix" "orig,orig,maybe_evex")
19749 (set_attr "mode" "TI")])
19750
19751 (define_expand "<insn>v2qiv2di2"
19752 [(set (match_operand:V2DI 0 "register_operand")
19753 (any_extend:V2DI
19754 (match_operand:V2QI 1 "register_operand")))]
19755 "TARGET_SSE4_1"
19756 {
19757 operands[1] = force_reg (V2QImode, operands[1]);
19758 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
19759 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
19760 DONE;
19761 })
19762
19763 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
19764 [(set (match_operand:V8DI 0 "register_operand" "=v")
19765 (any_extend:V8DI
19766 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
19767 "TARGET_AVX512F"
19768 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19769 [(set_attr "type" "ssemov")
19770 (set_attr "prefix" "evex")
19771 (set_attr "mode" "XI")])
19772
19773 (define_expand "<insn>v8hiv8di2"
19774 [(set (match_operand:V8DI 0 "register_operand")
19775 (any_extend:V8DI
19776 (match_operand:V8HI 1 "nonimmediate_operand")))]
19777 "TARGET_AVX512F")
19778
19779 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
19780 [(set (match_operand:V4DI 0 "register_operand" "=v")
19781 (any_extend:V4DI
19782 (vec_select:V4HI
19783 (match_operand:V8HI 1 "register_operand" "v")
19784 (parallel [(const_int 0) (const_int 1)
19785 (const_int 2) (const_int 3)]))))]
19786 "TARGET_AVX2 && <mask_avx512vl_condition>"
19787 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19788 [(set_attr "type" "ssemov")
19789 (set_attr "prefix_extra" "1")
19790 (set_attr "prefix" "maybe_evex")
19791 (set_attr "mode" "OI")])
19792
19793 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
19794 [(set (match_operand:V4DI 0 "register_operand" "=v")
19795 (any_extend:V4DI
19796 (match_operand:V4HI 1 "memory_operand" "m")))]
19797 "TARGET_AVX2 && <mask_avx512vl_condition>"
19798 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19799 [(set_attr "type" "ssemov")
19800 (set_attr "prefix_extra" "1")
19801 (set_attr "prefix" "maybe_evex")
19802 (set_attr "mode" "OI")])
19803
19804 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
19805 [(set (match_operand:V4DI 0 "register_operand")
19806 (any_extend:V4DI
19807 (vec_select:V4HI
19808 (subreg:V8HI
19809 (vec_concat:V2DI
19810 (match_operand:DI 1 "memory_operand")
19811 (const_int 0)) 0)
19812 (parallel [(const_int 0) (const_int 1)
19813 (const_int 2) (const_int 3)]))))]
19814 "TARGET_AVX2 && <mask_avx512vl_condition>
19815 && ix86_pre_reload_split ()"
19816 "#"
19817 "&& 1"
19818 [(set (match_dup 0)
19819 (any_extend:V4DI (match_dup 1)))]
19820 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
19821
19822 (define_expand "<insn>v4hiv4di2"
19823 [(set (match_operand:V4DI 0 "register_operand")
19824 (any_extend:V4DI
19825 (match_operand:V4HI 1 "nonimmediate_operand")))]
19826 "TARGET_AVX2"
19827 {
19828 if (!MEM_P (operands[1]))
19829 {
19830 operands[1] = force_reg (V4HImode, operands[1]);
19831 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
19832 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
19833 DONE;
19834 }
19835 })
19836
19837 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
19838 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19839 (any_extend:V2DI
19840 (vec_select:V2HI
19841 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
19842 (parallel [(const_int 0) (const_int 1)]))))]
19843 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19844 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19845 [(set_attr "isa" "noavx,noavx,avx")
19846 (set_attr "type" "ssemov")
19847 (set_attr "prefix_extra" "1")
19848 (set_attr "prefix" "orig,orig,maybe_evex")
19849 (set_attr "mode" "TI")])
19850
19851 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
19852 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19853 (any_extend:V2DI
19854 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
19855 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19856 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19857 [(set_attr "isa" "noavx,noavx,avx")
19858 (set_attr "type" "ssemov")
19859 (set_attr "prefix_extra" "1")
19860 (set_attr "prefix" "orig,orig,maybe_evex")
19861 (set_attr "mode" "TI")])
19862
19863 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
19864 [(set (match_operand:V2DI 0 "register_operand")
19865 (any_extend:V2DI
19866 (vec_select:V2HI
19867 (subreg:V8HI
19868 (vec_merge:V4SI
19869 (vec_duplicate:V4SI
19870 (match_operand:SI 1 "memory_operand"))
19871 (const_vector:V4SI
19872 [(const_int 0) (const_int 0)
19873 (const_int 0) (const_int 0)])
19874 (const_int 1)) 0)
19875 (parallel [(const_int 0) (const_int 1)]))))]
19876 "TARGET_SSE4_1 && <mask_avx512vl_condition>
19877 && ix86_pre_reload_split ()"
19878 "#"
19879 "&& 1"
19880 [(set (match_dup 0)
19881 (any_extend:V2DI (match_dup 1)))]
19882 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
19883
19884 (define_expand "<insn>v2hiv2di2"
19885 [(set (match_operand:V2DI 0 "register_operand")
19886 (any_extend:V2DI
19887 (match_operand:V2HI 1 "nonimmediate_operand")))]
19888 "TARGET_SSE4_1"
19889 {
19890 if (!MEM_P (operands[1]))
19891 {
19892 operands[1] = force_reg (V2HImode, operands[1]);
19893 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
19894 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
19895 DONE;
19896 }
19897 })
19898
19899 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
19900 [(set (match_operand:V8DI 0 "register_operand" "=v")
19901 (any_extend:V8DI
19902 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
19903 "TARGET_AVX512F"
19904 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19905 [(set_attr "type" "ssemov")
19906 (set_attr "prefix" "evex")
19907 (set_attr "mode" "XI")])
19908
19909 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
19910 [(set (match_operand:V16SI 0 "register_operand" "=v")
19911 (vec_select:V16SI
19912 (vec_concat:V32SI
19913 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
19914 (match_operand:V16SI 2 "const0_operand" "C"))
19915 (match_parallel 3 "pmovzx_parallel"
19916 [(match_operand 4 "const_int_operand" "n")])))]
19917 "TARGET_AVX512F"
19918 "#"
19919 "&& reload_completed"
19920 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
19921 {
19922 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
19923 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
19924 })
19925
19926 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_2"
19927 [(set (match_operand:V16SI 0 "register_operand" "=v")
19928 (vec_select:V16SI
19929 (vec_concat:V32SI
19930 (vec_concat:V16SI
19931 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
19932 (match_operand:V8SI 2 "const0_operand" "C"))
19933 (match_operand:V16SI 3 "const0_operand" "C"))
19934 (match_parallel 4 "pmovzx_parallel"
19935 [(match_operand 5 "const_int_operand" "n")])))]
19936 "TARGET_AVX512F"
19937 "#"
19938 "&& reload_completed"
19939 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
19940 {
19941 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
19942 })
19943
19944 (define_expand "<insn>v8siv8di2"
19945 [(set (match_operand:V8DI 0 "register_operand" "=v")
19946 (any_extend:V8DI
19947 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
19948 "TARGET_AVX512F")
19949
19950 (define_insn "avx2_<code>v4siv4di2<mask_name>"
19951 [(set (match_operand:V4DI 0 "register_operand" "=v")
19952 (any_extend:V4DI
19953 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
19954 "TARGET_AVX2 && <mask_avx512vl_condition>"
19955 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19956 [(set_attr "type" "ssemov")
19957 (set_attr "prefix" "maybe_evex")
19958 (set_attr "prefix_extra" "1")
19959 (set_attr "mode" "OI")])
19960
19961 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
19962 [(set (match_operand:V8SI 0 "register_operand" "=v")
19963 (vec_select:V8SI
19964 (vec_concat:V16SI
19965 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
19966 (match_operand:V8SI 2 "const0_operand" "C"))
19967 (match_parallel 3 "pmovzx_parallel"
19968 [(match_operand 4 "const_int_operand" "n")])))]
19969 "TARGET_AVX2"
19970 "#"
19971 "&& reload_completed"
19972 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
19973 {
19974 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
19975 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
19976 })
19977
19978 (define_insn_and_split "*avx2_zero_extendv4siv4di2_2"
19979 [(set (match_operand:V8SI 0 "register_operand" "=v")
19980 (vec_select:V8SI
19981 (vec_concat:V16SI
19982 (vec_concat:V8SI
19983 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
19984 (match_operand:V4SI 2 "const0_operand" "C"))
19985 (match_operand:V8SI 3 "const0_operand" "C"))
19986 (match_parallel 4 "pmovzx_parallel"
19987 [(match_operand 5 "const_int_operand" "n")])))]
19988 "TARGET_AVX2"
19989 "#"
19990 "&& reload_completed"
19991 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
19992 {
19993 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
19994 })
19995
19996 (define_expand "<insn>v4siv4di2"
19997 [(set (match_operand:V4DI 0 "register_operand")
19998 (any_extend:V4DI
19999 (match_operand:V4SI 1 "nonimmediate_operand")))]
20000 "TARGET_AVX2")
20001
20002 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
20003 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
20004 (any_extend:V2DI
20005 (vec_select:V2SI
20006 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
20007 (parallel [(const_int 0) (const_int 1)]))))]
20008 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
20009 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20010 [(set_attr "isa" "noavx,noavx,avx")
20011 (set_attr "type" "ssemov")
20012 (set_attr "prefix_extra" "1")
20013 (set_attr "prefix" "orig,orig,maybe_evex")
20014 (set_attr "mode" "TI")])
20015
20016 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
20017 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
20018 (any_extend:V2DI
20019 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
20020 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
20021 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20022 [(set_attr "isa" "noavx,noavx,avx")
20023 (set_attr "type" "ssemov")
20024 (set_attr "prefix_extra" "1")
20025 (set_attr "prefix" "orig,orig,maybe_evex")
20026 (set_attr "mode" "TI")])
20027
20028 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
20029 [(set (match_operand:V2DI 0 "register_operand")
20030 (any_extend:V2DI
20031 (vec_select:V2SI
20032 (subreg:V4SI
20033 (vec_concat:V2DI
20034 (match_operand:DI 1 "memory_operand")
20035 (const_int 0)) 0)
20036 (parallel [(const_int 0) (const_int 1)]))))]
20037 "TARGET_SSE4_1 && <mask_avx512vl_condition>
20038 && ix86_pre_reload_split ()"
20039 "#"
20040 "&& 1"
20041 [(set (match_dup 0)
20042 (any_extend:V2DI (match_dup 1)))]
20043 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
20044
20045 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
20046 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
20047 (vec_select:V4SI
20048 (vec_concat:V8SI
20049 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
20050 (match_operand:V4SI 2 "const0_operand" "C,C,C"))
20051 (match_parallel 3 "pmovzx_parallel"
20052 [(match_operand 4 "const_int_operand" "n,n,n")])))]
20053 "TARGET_SSE4_1"
20054 "#"
20055 "&& reload_completed"
20056 [(set (match_dup 0)
20057 (zero_extend:V2DI
20058 (vec_select:V2SI (match_dup 1)
20059 (parallel [(const_int 0) (const_int 1)]))))]
20060 {
20061 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
20062 if (MEM_P (operands[1]))
20063 {
20064 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
20065 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
20066 emit_insn (gen_rtx_SET (operands[0], operands[1]));
20067 DONE;
20068 }
20069 }
20070 [(set_attr "isa" "noavx,noavx,avx")])
20071
20072 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_4"
20073 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
20074 (vec_select:V4SI
20075 (vec_concat:V8SI
20076 (vec_concat:V4SI
20077 (match_operand:V2SI 1 "vector_operand" "YrBm, *xBm, vm")
20078 (match_operand:V2SI 2 "const0_operand" "C,C,C"))
20079 (match_operand:V4SI 3 "const0_operand" "C,C,C"))
20080 (match_parallel 4 "pmovzx_parallel"
20081 [(match_operand 5 "const_int_operand" "n,n,n")])))]
20082 "TARGET_SSE4_1"
20083 "#"
20084 "&& reload_completed"
20085 [(set (match_dup 0)
20086 (zero_extend:V2DI
20087 (vec_select:V2SI (match_dup 1)
20088 (parallel [(const_int 0) (const_int 1)]))))]
20089 {
20090 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
20091 if (MEM_P (operands[1]))
20092 {
20093 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
20094 emit_insn (gen_rtx_SET (operands[0], operands[1]));
20095 DONE;
20096 }
20097 operands[1] = lowpart_subreg (V4SImode, operands[1], V2SImode);
20098 }
20099 [(set_attr "isa" "noavx,noavx,avx")])
20100
20101 (define_expand "<insn>v2siv2di2"
20102 [(set (match_operand:V2DI 0 "register_operand")
20103 (any_extend:V2DI
20104 (match_operand:V2SI 1 "nonimmediate_operand")))]
20105 "TARGET_SSE4_1"
20106 {
20107 if (!MEM_P (operands[1]))
20108 {
20109 operands[1] = force_reg (V2SImode, operands[1]);
20110 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
20111 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
20112 DONE;
20113 }
20114 })
20115
20116 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
20117 ;; setting FLAGS_REG. But it is not a really compare instruction.
20118 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
20119 [(set (reg:CC FLAGS_REG)
20120 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
20121 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
20122 UNSPEC_VTESTP))]
20123 "TARGET_AVX"
20124 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
20125 [(set_attr "type" "ssecomi")
20126 (set_attr "prefix_extra" "1")
20127 (set_attr "prefix" "vex")
20128 (set_attr "mode" "<MODE>")])
20129
20130 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
20131 ;; But it is not a really compare instruction.
20132 (define_insn "<sse4_1>_ptest<mode>"
20133 [(set (reg:CC FLAGS_REG)
20134 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
20135 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
20136 UNSPEC_PTEST))]
20137 "TARGET_SSE4_1"
20138 "%vptest\t{%1, %0|%0, %1}"
20139 [(set_attr "isa" "noavx,noavx,avx")
20140 (set_attr "type" "ssecomi")
20141 (set_attr "prefix_extra" "1")
20142 (set_attr "prefix" "orig,orig,vex")
20143 (set (attr "btver2_decode")
20144 (if_then_else
20145 (match_test "<sseinsnmode>mode==OImode")
20146 (const_string "vector")
20147 (const_string "*")))
20148 (set_attr "mode" "<sseinsnmode>")])
20149
20150 (define_insn "ptesttf2"
20151 [(set (reg:CC FLAGS_REG)
20152 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
20153 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
20154 UNSPEC_PTEST))]
20155 "TARGET_SSE4_1"
20156 "%vptest\t{%1, %0|%0, %1}"
20157 [(set_attr "isa" "noavx,noavx,avx")
20158 (set_attr "type" "ssecomi")
20159 (set_attr "prefix_extra" "1")
20160 (set_attr "prefix" "orig,orig,vex")
20161 (set_attr "mode" "TI")])
20162
20163 (define_expand "nearbyint<mode>2"
20164 [(set (match_operand:VF 0 "register_operand")
20165 (unspec:VF
20166 [(match_operand:VF 1 "vector_operand")
20167 (match_dup 2)]
20168 UNSPEC_ROUND))]
20169 "TARGET_SSE4_1"
20170 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
20171
20172 (define_expand "rint<mode>2"
20173 [(set (match_operand:VF 0 "register_operand")
20174 (unspec:VF
20175 [(match_operand:VF 1 "vector_operand")
20176 (match_dup 2)]
20177 UNSPEC_ROUND))]
20178 "TARGET_SSE4_1"
20179 "operands[2] = GEN_INT (ROUND_MXCSR);")
20180
20181 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
20182 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
20183 (unspec:VF_128_256
20184 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
20185 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
20186 UNSPEC_ROUND))]
20187 "TARGET_SSE4_1"
20188 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20189 [(set_attr "isa" "noavx,noavx,avx")
20190 (set_attr "type" "ssecvt")
20191 (set_attr "prefix_data16" "1,1,*")
20192 (set_attr "prefix_extra" "1")
20193 (set_attr "length_immediate" "1")
20194 (set_attr "prefix" "orig,orig,vex")
20195 (set_attr "mode" "<MODE>")])
20196
20197 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
20198 [(match_operand:<sseintvecmode> 0 "register_operand")
20199 (match_operand:VF1_128_256 1 "vector_operand")
20200 (match_operand:SI 2 "const_0_to_15_operand")]
20201 "TARGET_SSE4_1"
20202 {
20203 rtx tmp = gen_reg_rtx (<MODE>mode);
20204
20205 emit_insn
20206 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
20207 operands[2]));
20208 emit_insn
20209 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
20210 DONE;
20211 })
20212
20213 (define_expand "avx512f_round<castmode>512"
20214 [(match_operand:VF_512 0 "register_operand")
20215 (match_operand:VF_512 1 "nonimmediate_operand")
20216 (match_operand:SI 2 "const_0_to_15_operand")]
20217 "TARGET_AVX512F"
20218 {
20219 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
20220 DONE;
20221 })
20222
20223 (define_expand "avx512f_roundps512_sfix"
20224 [(match_operand:V16SI 0 "register_operand")
20225 (match_operand:V16SF 1 "nonimmediate_operand")
20226 (match_operand:SI 2 "const_0_to_15_operand")]
20227 "TARGET_AVX512F"
20228 {
20229 rtx tmp = gen_reg_rtx (V16SFmode);
20230 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
20231 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
20232 DONE;
20233 })
20234
20235 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
20236 [(match_operand:<ssepackfltmode> 0 "register_operand")
20237 (match_operand:VF2 1 "vector_operand")
20238 (match_operand:VF2 2 "vector_operand")
20239 (match_operand:SI 3 "const_0_to_15_operand")]
20240 "TARGET_SSE4_1"
20241 {
20242 rtx tmp0, tmp1;
20243
20244 if (<MODE>mode == V2DFmode
20245 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
20246 {
20247 rtx tmp2 = gen_reg_rtx (V4DFmode);
20248
20249 tmp0 = gen_reg_rtx (V4DFmode);
20250 tmp1 = force_reg (V2DFmode, operands[1]);
20251
20252 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
20253 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
20254 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
20255 }
20256 else
20257 {
20258 tmp0 = gen_reg_rtx (<MODE>mode);
20259 tmp1 = gen_reg_rtx (<MODE>mode);
20260
20261 emit_insn
20262 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
20263 operands[3]));
20264 emit_insn
20265 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
20266 operands[3]));
20267 emit_insn
20268 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
20269 }
20270 DONE;
20271 })
20272
20273 (define_insn "sse4_1_round<ssescalarmodesuffix>"
20274 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
20275 (vec_merge:VF_128
20276 (unspec:VF_128
20277 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
20278 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
20279 UNSPEC_ROUND)
20280 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
20281 (const_int 1)))]
20282 "TARGET_SSE4_1"
20283 "@
20284 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
20285 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
20286 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
20287 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
20288 [(set_attr "isa" "noavx,noavx,avx,avx512f")
20289 (set_attr "type" "ssecvt")
20290 (set_attr "length_immediate" "1")
20291 (set_attr "prefix_data16" "1,1,*,*")
20292 (set_attr "prefix_extra" "1")
20293 (set_attr "prefix" "orig,orig,vex,evex")
20294 (set_attr "mode" "<MODE>")])
20295
20296 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
20297 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
20298 (vec_merge:VF_128
20299 (vec_duplicate:VF_128
20300 (unspec:<ssescalarmode>
20301 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
20302 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
20303 UNSPEC_ROUND))
20304 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
20305 (const_int 1)))]
20306 "TARGET_SSE4_1"
20307 "@
20308 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
20309 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
20310 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
20311 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20312 [(set_attr "isa" "noavx,noavx,avx,avx512f")
20313 (set_attr "type" "ssecvt")
20314 (set_attr "length_immediate" "1")
20315 (set_attr "prefix_data16" "1,1,*,*")
20316 (set_attr "prefix_extra" "1")
20317 (set_attr "prefix" "orig,orig,vex,evex")
20318 (set_attr "mode" "<MODE>")])
20319
20320 (define_expand "round<mode>2"
20321 [(set (match_dup 3)
20322 (plus:VF
20323 (match_operand:VF 1 "register_operand")
20324 (match_dup 2)))
20325 (set (match_operand:VF 0 "register_operand")
20326 (unspec:VF
20327 [(match_dup 3) (match_dup 4)]
20328 UNSPEC_ROUND))]
20329 "TARGET_SSE4_1 && !flag_trapping_math"
20330 {
20331 machine_mode scalar_mode;
20332 const struct real_format *fmt;
20333 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20334 rtx half, vec_half;
20335
20336 scalar_mode = GET_MODE_INNER (<MODE>mode);
20337
20338 /* load nextafter (0.5, 0.0) */
20339 fmt = REAL_MODE_FORMAT (scalar_mode);
20340 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
20341 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
20342 half = const_double_from_real_value (pred_half, scalar_mode);
20343
20344 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
20345 vec_half = force_reg (<MODE>mode, vec_half);
20346
20347 operands[2] = gen_reg_rtx (<MODE>mode);
20348 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
20349
20350 operands[3] = gen_reg_rtx (<MODE>mode);
20351 operands[4] = GEN_INT (ROUND_TRUNC);
20352 })
20353
20354 (define_expand "round<mode>2_sfix"
20355 [(match_operand:<sseintvecmode> 0 "register_operand")
20356 (match_operand:VF1 1 "register_operand")]
20357 "TARGET_SSE4_1 && !flag_trapping_math"
20358 {
20359 rtx tmp = gen_reg_rtx (<MODE>mode);
20360
20361 emit_insn (gen_round<mode>2 (tmp, operands[1]));
20362
20363 emit_insn
20364 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
20365 DONE;
20366 })
20367
20368 (define_expand "round<mode>2_vec_pack_sfix"
20369 [(match_operand:<ssepackfltmode> 0 "register_operand")
20370 (match_operand:VF2 1 "register_operand")
20371 (match_operand:VF2 2 "register_operand")]
20372 "TARGET_SSE4_1 && !flag_trapping_math"
20373 {
20374 rtx tmp0, tmp1;
20375
20376 if (<MODE>mode == V2DFmode
20377 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
20378 {
20379 rtx tmp2 = gen_reg_rtx (V4DFmode);
20380
20381 tmp0 = gen_reg_rtx (V4DFmode);
20382 tmp1 = force_reg (V2DFmode, operands[1]);
20383
20384 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
20385 emit_insn (gen_roundv4df2 (tmp2, tmp0));
20386 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
20387 }
20388 else
20389 {
20390 tmp0 = gen_reg_rtx (<MODE>mode);
20391 tmp1 = gen_reg_rtx (<MODE>mode);
20392
20393 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
20394 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
20395
20396 emit_insn
20397 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
20398 }
20399 DONE;
20400 })
20401
20402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20403 ;;
20404 ;; Intel SSE4.2 string/text processing instructions
20405 ;;
20406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20407
20408 (define_insn_and_split "sse4_2_pcmpestr"
20409 [(set (match_operand:SI 0 "register_operand" "=c,c")
20410 (unspec:SI
20411 [(match_operand:V16QI 2 "register_operand" "x,x")
20412 (match_operand:SI 3 "register_operand" "a,a")
20413 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
20414 (match_operand:SI 5 "register_operand" "d,d")
20415 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
20416 UNSPEC_PCMPESTR))
20417 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
20418 (unspec:V16QI
20419 [(match_dup 2)
20420 (match_dup 3)
20421 (match_dup 4)
20422 (match_dup 5)
20423 (match_dup 6)]
20424 UNSPEC_PCMPESTR))
20425 (set (reg:CC FLAGS_REG)
20426 (unspec:CC
20427 [(match_dup 2)
20428 (match_dup 3)
20429 (match_dup 4)
20430 (match_dup 5)
20431 (match_dup 6)]
20432 UNSPEC_PCMPESTR))]
20433 "TARGET_SSE4_2
20434 && ix86_pre_reload_split ()"
20435 "#"
20436 "&& 1"
20437 [(const_int 0)]
20438 {
20439 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
20440 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
20441 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
20442
20443 if (ecx)
20444 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
20445 operands[3], operands[4],
20446 operands[5], operands[6]));
20447 if (xmm0)
20448 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
20449 operands[3], operands[4],
20450 operands[5], operands[6]));
20451 if (flags && !(ecx || xmm0))
20452 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
20453 operands[2], operands[3],
20454 operands[4], operands[5],
20455 operands[6]));
20456 if (!(flags || ecx || xmm0))
20457 emit_note (NOTE_INSN_DELETED);
20458
20459 DONE;
20460 }
20461 [(set_attr "type" "sselog")
20462 (set_attr "prefix_data16" "1")
20463 (set_attr "prefix_extra" "1")
20464 (set_attr "length_immediate" "1")
20465 (set_attr "memory" "none,load")
20466 (set_attr "mode" "TI")])
20467
20468 (define_insn "sse4_2_pcmpestri"
20469 [(set (match_operand:SI 0 "register_operand" "=c,c")
20470 (unspec:SI
20471 [(match_operand:V16QI 1 "register_operand" "x,x")
20472 (match_operand:SI 2 "register_operand" "a,a")
20473 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
20474 (match_operand:SI 4 "register_operand" "d,d")
20475 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
20476 UNSPEC_PCMPESTR))
20477 (set (reg:CC FLAGS_REG)
20478 (unspec:CC
20479 [(match_dup 1)
20480 (match_dup 2)
20481 (match_dup 3)
20482 (match_dup 4)
20483 (match_dup 5)]
20484 UNSPEC_PCMPESTR))]
20485 "TARGET_SSE4_2"
20486 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
20487 [(set_attr "type" "sselog")
20488 (set_attr "prefix_data16" "1")
20489 (set_attr "prefix_extra" "1")
20490 (set_attr "prefix" "maybe_vex")
20491 (set_attr "length_immediate" "1")
20492 (set_attr "btver2_decode" "vector")
20493 (set_attr "memory" "none,load")
20494 (set_attr "mode" "TI")])
20495
20496 (define_insn "sse4_2_pcmpestrm"
20497 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
20498 (unspec:V16QI
20499 [(match_operand:V16QI 1 "register_operand" "x,x")
20500 (match_operand:SI 2 "register_operand" "a,a")
20501 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
20502 (match_operand:SI 4 "register_operand" "d,d")
20503 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
20504 UNSPEC_PCMPESTR))
20505 (set (reg:CC FLAGS_REG)
20506 (unspec:CC
20507 [(match_dup 1)
20508 (match_dup 2)
20509 (match_dup 3)
20510 (match_dup 4)
20511 (match_dup 5)]
20512 UNSPEC_PCMPESTR))]
20513 "TARGET_SSE4_2"
20514 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
20515 [(set_attr "type" "sselog")
20516 (set_attr "prefix_data16" "1")
20517 (set_attr "prefix_extra" "1")
20518 (set_attr "length_immediate" "1")
20519 (set_attr "prefix" "maybe_vex")
20520 (set_attr "btver2_decode" "vector")
20521 (set_attr "memory" "none,load")
20522 (set_attr "mode" "TI")])
20523
20524 (define_insn "sse4_2_pcmpestr_cconly"
20525 [(set (reg:CC FLAGS_REG)
20526 (unspec:CC
20527 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
20528 (match_operand:SI 3 "register_operand" "a,a,a,a")
20529 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
20530 (match_operand:SI 5 "register_operand" "d,d,d,d")
20531 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
20532 UNSPEC_PCMPESTR))
20533 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
20534 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
20535 "TARGET_SSE4_2"
20536 "@
20537 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
20538 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
20539 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
20540 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
20541 [(set_attr "type" "sselog")
20542 (set_attr "prefix_data16" "1")
20543 (set_attr "prefix_extra" "1")
20544 (set_attr "length_immediate" "1")
20545 (set_attr "memory" "none,load,none,load")
20546 (set_attr "btver2_decode" "vector,vector,vector,vector")
20547 (set_attr "prefix" "maybe_vex")
20548 (set_attr "mode" "TI")])
20549
20550 (define_insn_and_split "sse4_2_pcmpistr"
20551 [(set (match_operand:SI 0 "register_operand" "=c,c")
20552 (unspec:SI
20553 [(match_operand:V16QI 2 "register_operand" "x,x")
20554 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
20555 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
20556 UNSPEC_PCMPISTR))
20557 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
20558 (unspec:V16QI
20559 [(match_dup 2)
20560 (match_dup 3)
20561 (match_dup 4)]
20562 UNSPEC_PCMPISTR))
20563 (set (reg:CC FLAGS_REG)
20564 (unspec:CC
20565 [(match_dup 2)
20566 (match_dup 3)
20567 (match_dup 4)]
20568 UNSPEC_PCMPISTR))]
20569 "TARGET_SSE4_2
20570 && ix86_pre_reload_split ()"
20571 "#"
20572 "&& 1"
20573 [(const_int 0)]
20574 {
20575 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
20576 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
20577 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
20578
20579 if (ecx)
20580 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
20581 operands[3], operands[4]));
20582 if (xmm0)
20583 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
20584 operands[3], operands[4]));
20585 if (flags && !(ecx || xmm0))
20586 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
20587 operands[2], operands[3],
20588 operands[4]));
20589 if (!(flags || ecx || xmm0))
20590 emit_note (NOTE_INSN_DELETED);
20591
20592 DONE;
20593 }
20594 [(set_attr "type" "sselog")
20595 (set_attr "prefix_data16" "1")
20596 (set_attr "prefix_extra" "1")
20597 (set_attr "length_immediate" "1")
20598 (set_attr "memory" "none,load")
20599 (set_attr "mode" "TI")])
20600
20601 (define_insn "sse4_2_pcmpistri"
20602 [(set (match_operand:SI 0 "register_operand" "=c,c")
20603 (unspec:SI
20604 [(match_operand:V16QI 1 "register_operand" "x,x")
20605 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
20606 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20607 UNSPEC_PCMPISTR))
20608 (set (reg:CC FLAGS_REG)
20609 (unspec:CC
20610 [(match_dup 1)
20611 (match_dup 2)
20612 (match_dup 3)]
20613 UNSPEC_PCMPISTR))]
20614 "TARGET_SSE4_2"
20615 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
20616 [(set_attr "type" "sselog")
20617 (set_attr "prefix_data16" "1")
20618 (set_attr "prefix_extra" "1")
20619 (set_attr "length_immediate" "1")
20620 (set_attr "prefix" "maybe_vex")
20621 (set_attr "memory" "none,load")
20622 (set_attr "btver2_decode" "vector")
20623 (set_attr "mode" "TI")])
20624
20625 (define_insn "sse4_2_pcmpistrm"
20626 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
20627 (unspec:V16QI
20628 [(match_operand:V16QI 1 "register_operand" "x,x")
20629 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
20630 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20631 UNSPEC_PCMPISTR))
20632 (set (reg:CC FLAGS_REG)
20633 (unspec:CC
20634 [(match_dup 1)
20635 (match_dup 2)
20636 (match_dup 3)]
20637 UNSPEC_PCMPISTR))]
20638 "TARGET_SSE4_2"
20639 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
20640 [(set_attr "type" "sselog")
20641 (set_attr "prefix_data16" "1")
20642 (set_attr "prefix_extra" "1")
20643 (set_attr "length_immediate" "1")
20644 (set_attr "prefix" "maybe_vex")
20645 (set_attr "memory" "none,load")
20646 (set_attr "btver2_decode" "vector")
20647 (set_attr "mode" "TI")])
20648
20649 (define_insn "sse4_2_pcmpistr_cconly"
20650 [(set (reg:CC FLAGS_REG)
20651 (unspec:CC
20652 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
20653 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
20654 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
20655 UNSPEC_PCMPISTR))
20656 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
20657 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
20658 "TARGET_SSE4_2"
20659 "@
20660 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
20661 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
20662 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
20663 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
20664 [(set_attr "type" "sselog")
20665 (set_attr "prefix_data16" "1")
20666 (set_attr "prefix_extra" "1")
20667 (set_attr "length_immediate" "1")
20668 (set_attr "memory" "none,load,none,load")
20669 (set_attr "prefix" "maybe_vex")
20670 (set_attr "btver2_decode" "vector,vector,vector,vector")
20671 (set_attr "mode" "TI")])
20672
20673 ;; Packed float variants
20674 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
20675 [(V8DI "V8SF") (V16SI "V16SF")])
20676
20677 (define_expand "avx512pf_gatherpf<mode>sf"
20678 [(unspec
20679 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20680 (mem:<GATHER_SCATTER_SF_MEM_MODE>
20681 (match_par_dup 5
20682 [(match_operand 2 "vsib_address_operand")
20683 (match_operand:VI48_512 1 "register_operand")
20684 (match_operand:SI 3 "const1248_operand")]))
20685 (match_operand:SI 4 "const_2_to_3_operand")]
20686 UNSPEC_GATHER_PREFETCH)]
20687 "TARGET_AVX512PF"
20688 {
20689 operands[5]
20690 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20691 operands[3]), UNSPEC_VSIBADDR);
20692 })
20693
20694 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
20695 [(unspec
20696 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20697 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
20698 [(unspec:P
20699 [(match_operand:P 2 "vsib_address_operand" "Tv")
20700 (match_operand:VI48_512 1 "register_operand" "v")
20701 (match_operand:SI 3 "const1248_operand" "n")]
20702 UNSPEC_VSIBADDR)])
20703 (match_operand:SI 4 "const_2_to_3_operand" "n")]
20704 UNSPEC_GATHER_PREFETCH)]
20705 "TARGET_AVX512PF"
20706 {
20707 switch (INTVAL (operands[4]))
20708 {
20709 case 3:
20710 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20711 gas changed what it requires incompatibly. */
20712 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20713 case 2:
20714 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20715 default:
20716 gcc_unreachable ();
20717 }
20718 }
20719 [(set_attr "type" "sse")
20720 (set_attr "prefix" "evex")
20721 (set_attr "mode" "XI")])
20722
20723 ;; Packed double variants
20724 (define_expand "avx512pf_gatherpf<mode>df"
20725 [(unspec
20726 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20727 (mem:V8DF
20728 (match_par_dup 5
20729 [(match_operand 2 "vsib_address_operand")
20730 (match_operand:VI4_256_8_512 1 "register_operand")
20731 (match_operand:SI 3 "const1248_operand")]))
20732 (match_operand:SI 4 "const_2_to_3_operand")]
20733 UNSPEC_GATHER_PREFETCH)]
20734 "TARGET_AVX512PF"
20735 {
20736 operands[5]
20737 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20738 operands[3]), UNSPEC_VSIBADDR);
20739 })
20740
20741 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
20742 [(unspec
20743 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20744 (match_operator:V8DF 5 "vsib_mem_operator"
20745 [(unspec:P
20746 [(match_operand:P 2 "vsib_address_operand" "Tv")
20747 (match_operand:VI4_256_8_512 1 "register_operand" "v")
20748 (match_operand:SI 3 "const1248_operand" "n")]
20749 UNSPEC_VSIBADDR)])
20750 (match_operand:SI 4 "const_2_to_3_operand" "n")]
20751 UNSPEC_GATHER_PREFETCH)]
20752 "TARGET_AVX512PF"
20753 {
20754 switch (INTVAL (operands[4]))
20755 {
20756 case 3:
20757 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20758 gas changed what it requires incompatibly. */
20759 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20760 case 2:
20761 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20762 default:
20763 gcc_unreachable ();
20764 }
20765 }
20766 [(set_attr "type" "sse")
20767 (set_attr "prefix" "evex")
20768 (set_attr "mode" "XI")])
20769
20770 ;; Packed float variants
20771 (define_expand "avx512pf_scatterpf<mode>sf"
20772 [(unspec
20773 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20774 (mem:<GATHER_SCATTER_SF_MEM_MODE>
20775 (match_par_dup 5
20776 [(match_operand 2 "vsib_address_operand")
20777 (match_operand:VI48_512 1 "register_operand")
20778 (match_operand:SI 3 "const1248_operand")]))
20779 (match_operand:SI 4 "const2367_operand")]
20780 UNSPEC_SCATTER_PREFETCH)]
20781 "TARGET_AVX512PF"
20782 {
20783 operands[5]
20784 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20785 operands[3]), UNSPEC_VSIBADDR);
20786 })
20787
20788 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
20789 [(unspec
20790 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20791 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
20792 [(unspec:P
20793 [(match_operand:P 2 "vsib_address_operand" "Tv")
20794 (match_operand:VI48_512 1 "register_operand" "v")
20795 (match_operand:SI 3 "const1248_operand" "n")]
20796 UNSPEC_VSIBADDR)])
20797 (match_operand:SI 4 "const2367_operand" "n")]
20798 UNSPEC_SCATTER_PREFETCH)]
20799 "TARGET_AVX512PF"
20800 {
20801 switch (INTVAL (operands[4]))
20802 {
20803 case 3:
20804 case 7:
20805 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20806 gas changed what it requires incompatibly. */
20807 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20808 case 2:
20809 case 6:
20810 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20811 default:
20812 gcc_unreachable ();
20813 }
20814 }
20815 [(set_attr "type" "sse")
20816 (set_attr "prefix" "evex")
20817 (set_attr "mode" "XI")])
20818
20819 ;; Packed double variants
20820 (define_expand "avx512pf_scatterpf<mode>df"
20821 [(unspec
20822 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20823 (mem:V8DF
20824 (match_par_dup 5
20825 [(match_operand 2 "vsib_address_operand")
20826 (match_operand:VI4_256_8_512 1 "register_operand")
20827 (match_operand:SI 3 "const1248_operand")]))
20828 (match_operand:SI 4 "const2367_operand")]
20829 UNSPEC_SCATTER_PREFETCH)]
20830 "TARGET_AVX512PF"
20831 {
20832 operands[5]
20833 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20834 operands[3]), UNSPEC_VSIBADDR);
20835 })
20836
20837 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
20838 [(unspec
20839 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20840 (match_operator:V8DF 5 "vsib_mem_operator"
20841 [(unspec:P
20842 [(match_operand:P 2 "vsib_address_operand" "Tv")
20843 (match_operand:VI4_256_8_512 1 "register_operand" "v")
20844 (match_operand:SI 3 "const1248_operand" "n")]
20845 UNSPEC_VSIBADDR)])
20846 (match_operand:SI 4 "const2367_operand" "n")]
20847 UNSPEC_SCATTER_PREFETCH)]
20848 "TARGET_AVX512PF"
20849 {
20850 switch (INTVAL (operands[4]))
20851 {
20852 case 3:
20853 case 7:
20854 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20855 gas changed what it requires incompatibly. */
20856 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20857 case 2:
20858 case 6:
20859 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20860 default:
20861 gcc_unreachable ();
20862 }
20863 }
20864 [(set_attr "type" "sse")
20865 (set_attr "prefix" "evex")
20866 (set_attr "mode" "XI")])
20867
20868 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
20869 [(set (match_operand:VF_512 0 "register_operand" "=v")
20870 (unspec:VF_512
20871 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20872 UNSPEC_EXP2))]
20873 "TARGET_AVX512ER"
20874 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20875 [(set_attr "prefix" "evex")
20876 (set_attr "type" "sse")
20877 (set_attr "mode" "<MODE>")])
20878
20879 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
20880 [(set (match_operand:VF_512 0 "register_operand" "=v")
20881 (unspec:VF_512
20882 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20883 UNSPEC_RCP28))]
20884 "TARGET_AVX512ER"
20885 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20886 [(set_attr "prefix" "evex")
20887 (set_attr "type" "sse")
20888 (set_attr "mode" "<MODE>")])
20889
20890 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
20891 [(set (match_operand:VF_128 0 "register_operand" "=v")
20892 (vec_merge:VF_128
20893 (unspec:VF_128
20894 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
20895 UNSPEC_RCP28)
20896 (match_operand:VF_128 2 "register_operand" "v")
20897 (const_int 1)))]
20898 "TARGET_AVX512ER"
20899 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
20900 [(set_attr "length_immediate" "1")
20901 (set_attr "prefix" "evex")
20902 (set_attr "type" "sse")
20903 (set_attr "mode" "<MODE>")])
20904
20905 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
20906 [(set (match_operand:VF_512 0 "register_operand" "=v")
20907 (unspec:VF_512
20908 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20909 UNSPEC_RSQRT28))]
20910 "TARGET_AVX512ER"
20911 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20912 [(set_attr "prefix" "evex")
20913 (set_attr "type" "sse")
20914 (set_attr "mode" "<MODE>")])
20915
20916 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
20917 [(set (match_operand:VF_128 0 "register_operand" "=v")
20918 (vec_merge:VF_128
20919 (unspec:VF_128
20920 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
20921 UNSPEC_RSQRT28)
20922 (match_operand:VF_128 2 "register_operand" "v")
20923 (const_int 1)))]
20924 "TARGET_AVX512ER"
20925 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
20926 [(set_attr "length_immediate" "1")
20927 (set_attr "type" "sse")
20928 (set_attr "prefix" "evex")
20929 (set_attr "mode" "<MODE>")])
20930
20931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20932 ;;
20933 ;; XOP instructions
20934 ;;
20935 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20936
20937 (define_code_iterator xop_plus [plus ss_plus])
20938
20939 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
20940 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
20941
20942 ;; XOP parallel integer multiply/add instructions.
20943
20944 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
20945 [(set (match_operand:VI24_128 0 "register_operand" "=x")
20946 (xop_plus:VI24_128
20947 (mult:VI24_128
20948 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
20949 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
20950 (match_operand:VI24_128 3 "register_operand" "x")))]
20951 "TARGET_XOP"
20952 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20953 [(set_attr "type" "ssemuladd")
20954 (set_attr "mode" "TI")])
20955
20956 (define_insn "xop_p<macs>dql"
20957 [(set (match_operand:V2DI 0 "register_operand" "=x")
20958 (xop_plus:V2DI
20959 (mult:V2DI
20960 (sign_extend:V2DI
20961 (vec_select:V2SI
20962 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
20963 (parallel [(const_int 0) (const_int 2)])))
20964 (sign_extend:V2DI
20965 (vec_select:V2SI
20966 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
20967 (parallel [(const_int 0) (const_int 2)]))))
20968 (match_operand:V2DI 3 "register_operand" "x")))]
20969 "TARGET_XOP"
20970 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20971 [(set_attr "type" "ssemuladd")
20972 (set_attr "mode" "TI")])
20973
20974 (define_insn "xop_p<macs>dqh"
20975 [(set (match_operand:V2DI 0 "register_operand" "=x")
20976 (xop_plus:V2DI
20977 (mult:V2DI
20978 (sign_extend:V2DI
20979 (vec_select:V2SI
20980 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
20981 (parallel [(const_int 1) (const_int 3)])))
20982 (sign_extend:V2DI
20983 (vec_select:V2SI
20984 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
20985 (parallel [(const_int 1) (const_int 3)]))))
20986 (match_operand:V2DI 3 "register_operand" "x")))]
20987 "TARGET_XOP"
20988 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20989 [(set_attr "type" "ssemuladd")
20990 (set_attr "mode" "TI")])
20991
20992 ;; XOP parallel integer multiply/add instructions for the intrinisics
20993 (define_insn "xop_p<macs>wd"
20994 [(set (match_operand:V4SI 0 "register_operand" "=x")
20995 (xop_plus:V4SI
20996 (mult:V4SI
20997 (sign_extend:V4SI
20998 (vec_select:V4HI
20999 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
21000 (parallel [(const_int 1) (const_int 3)
21001 (const_int 5) (const_int 7)])))
21002 (sign_extend:V4SI
21003 (vec_select:V4HI
21004 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
21005 (parallel [(const_int 1) (const_int 3)
21006 (const_int 5) (const_int 7)]))))
21007 (match_operand:V4SI 3 "register_operand" "x")))]
21008 "TARGET_XOP"
21009 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21010 [(set_attr "type" "ssemuladd")
21011 (set_attr "mode" "TI")])
21012
21013 (define_insn "xop_p<madcs>wd"
21014 [(set (match_operand:V4SI 0 "register_operand" "=x")
21015 (xop_plus:V4SI
21016 (plus:V4SI
21017 (mult:V4SI
21018 (sign_extend:V4SI
21019 (vec_select:V4HI
21020 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
21021 (parallel [(const_int 0) (const_int 2)
21022 (const_int 4) (const_int 6)])))
21023 (sign_extend:V4SI
21024 (vec_select:V4HI
21025 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
21026 (parallel [(const_int 0) (const_int 2)
21027 (const_int 4) (const_int 6)]))))
21028 (mult:V4SI
21029 (sign_extend:V4SI
21030 (vec_select:V4HI
21031 (match_dup 1)
21032 (parallel [(const_int 1) (const_int 3)
21033 (const_int 5) (const_int 7)])))
21034 (sign_extend:V4SI
21035 (vec_select:V4HI
21036 (match_dup 2)
21037 (parallel [(const_int 1) (const_int 3)
21038 (const_int 5) (const_int 7)])))))
21039 (match_operand:V4SI 3 "register_operand" "x")))]
21040 "TARGET_XOP"
21041 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21042 [(set_attr "type" "ssemuladd")
21043 (set_attr "mode" "TI")])
21044
21045 ;; XOP parallel XMM conditional moves
21046 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
21047 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
21048 (if_then_else:V_128_256
21049 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
21050 (match_operand:V_128_256 1 "register_operand" "x,x")
21051 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
21052 "TARGET_XOP"
21053 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21054 [(set_attr "type" "sse4arg")])
21055
21056 ;; XOP horizontal add/subtract instructions
21057 (define_insn "xop_phadd<u>bw"
21058 [(set (match_operand:V8HI 0 "register_operand" "=x")
21059 (plus:V8HI
21060 (any_extend:V8HI
21061 (vec_select:V8QI
21062 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
21063 (parallel [(const_int 0) (const_int 2)
21064 (const_int 4) (const_int 6)
21065 (const_int 8) (const_int 10)
21066 (const_int 12) (const_int 14)])))
21067 (any_extend:V8HI
21068 (vec_select:V8QI
21069 (match_dup 1)
21070 (parallel [(const_int 1) (const_int 3)
21071 (const_int 5) (const_int 7)
21072 (const_int 9) (const_int 11)
21073 (const_int 13) (const_int 15)])))))]
21074 "TARGET_XOP"
21075 "vphadd<u>bw\t{%1, %0|%0, %1}"
21076 [(set_attr "type" "sseiadd1")])
21077
21078 (define_insn "xop_phadd<u>bd"
21079 [(set (match_operand:V4SI 0 "register_operand" "=x")
21080 (plus:V4SI
21081 (plus:V4SI
21082 (any_extend:V4SI
21083 (vec_select:V4QI
21084 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
21085 (parallel [(const_int 0) (const_int 4)
21086 (const_int 8) (const_int 12)])))
21087 (any_extend:V4SI
21088 (vec_select:V4QI
21089 (match_dup 1)
21090 (parallel [(const_int 1) (const_int 5)
21091 (const_int 9) (const_int 13)]))))
21092 (plus:V4SI
21093 (any_extend:V4SI
21094 (vec_select:V4QI
21095 (match_dup 1)
21096 (parallel [(const_int 2) (const_int 6)
21097 (const_int 10) (const_int 14)])))
21098 (any_extend:V4SI
21099 (vec_select:V4QI
21100 (match_dup 1)
21101 (parallel [(const_int 3) (const_int 7)
21102 (const_int 11) (const_int 15)]))))))]
21103 "TARGET_XOP"
21104 "vphadd<u>bd\t{%1, %0|%0, %1}"
21105 [(set_attr "type" "sseiadd1")])
21106
21107 (define_insn "xop_phadd<u>bq"
21108 [(set (match_operand:V2DI 0 "register_operand" "=x")
21109 (plus:V2DI
21110 (plus:V2DI
21111 (plus:V2DI
21112 (any_extend:V2DI
21113 (vec_select:V2QI
21114 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
21115 (parallel [(const_int 0) (const_int 8)])))
21116 (any_extend:V2DI
21117 (vec_select:V2QI
21118 (match_dup 1)
21119 (parallel [(const_int 1) (const_int 9)]))))
21120 (plus:V2DI
21121 (any_extend:V2DI
21122 (vec_select:V2QI
21123 (match_dup 1)
21124 (parallel [(const_int 2) (const_int 10)])))
21125 (any_extend:V2DI
21126 (vec_select:V2QI
21127 (match_dup 1)
21128 (parallel [(const_int 3) (const_int 11)])))))
21129 (plus:V2DI
21130 (plus:V2DI
21131 (any_extend:V2DI
21132 (vec_select:V2QI
21133 (match_dup 1)
21134 (parallel [(const_int 4) (const_int 12)])))
21135 (any_extend:V2DI
21136 (vec_select:V2QI
21137 (match_dup 1)
21138 (parallel [(const_int 5) (const_int 13)]))))
21139 (plus:V2DI
21140 (any_extend:V2DI
21141 (vec_select:V2QI
21142 (match_dup 1)
21143 (parallel [(const_int 6) (const_int 14)])))
21144 (any_extend:V2DI
21145 (vec_select:V2QI
21146 (match_dup 1)
21147 (parallel [(const_int 7) (const_int 15)])))))))]
21148 "TARGET_XOP"
21149 "vphadd<u>bq\t{%1, %0|%0, %1}"
21150 [(set_attr "type" "sseiadd1")])
21151
21152 (define_insn "xop_phadd<u>wd"
21153 [(set (match_operand:V4SI 0 "register_operand" "=x")
21154 (plus:V4SI
21155 (any_extend:V4SI
21156 (vec_select:V4HI
21157 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
21158 (parallel [(const_int 0) (const_int 2)
21159 (const_int 4) (const_int 6)])))
21160 (any_extend:V4SI
21161 (vec_select:V4HI
21162 (match_dup 1)
21163 (parallel [(const_int 1) (const_int 3)
21164 (const_int 5) (const_int 7)])))))]
21165 "TARGET_XOP"
21166 "vphadd<u>wd\t{%1, %0|%0, %1}"
21167 [(set_attr "type" "sseiadd1")])
21168
21169 (define_insn "xop_phadd<u>wq"
21170 [(set (match_operand:V2DI 0 "register_operand" "=x")
21171 (plus:V2DI
21172 (plus:V2DI
21173 (any_extend:V2DI
21174 (vec_select:V2HI
21175 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
21176 (parallel [(const_int 0) (const_int 4)])))
21177 (any_extend:V2DI
21178 (vec_select:V2HI
21179 (match_dup 1)
21180 (parallel [(const_int 1) (const_int 5)]))))
21181 (plus:V2DI
21182 (any_extend:V2DI
21183 (vec_select:V2HI
21184 (match_dup 1)
21185 (parallel [(const_int 2) (const_int 6)])))
21186 (any_extend:V2DI
21187 (vec_select:V2HI
21188 (match_dup 1)
21189 (parallel [(const_int 3) (const_int 7)]))))))]
21190 "TARGET_XOP"
21191 "vphadd<u>wq\t{%1, %0|%0, %1}"
21192 [(set_attr "type" "sseiadd1")])
21193
21194 (define_insn "xop_phadd<u>dq"
21195 [(set (match_operand:V2DI 0 "register_operand" "=x")
21196 (plus:V2DI
21197 (any_extend:V2DI
21198 (vec_select:V2SI
21199 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
21200 (parallel [(const_int 0) (const_int 2)])))
21201 (any_extend:V2DI
21202 (vec_select:V2SI
21203 (match_dup 1)
21204 (parallel [(const_int 1) (const_int 3)])))))]
21205 "TARGET_XOP"
21206 "vphadd<u>dq\t{%1, %0|%0, %1}"
21207 [(set_attr "type" "sseiadd1")])
21208
21209 (define_insn "xop_phsubbw"
21210 [(set (match_operand:V8HI 0 "register_operand" "=x")
21211 (minus:V8HI
21212 (sign_extend:V8HI
21213 (vec_select:V8QI
21214 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
21215 (parallel [(const_int 0) (const_int 2)
21216 (const_int 4) (const_int 6)
21217 (const_int 8) (const_int 10)
21218 (const_int 12) (const_int 14)])))
21219 (sign_extend:V8HI
21220 (vec_select:V8QI
21221 (match_dup 1)
21222 (parallel [(const_int 1) (const_int 3)
21223 (const_int 5) (const_int 7)
21224 (const_int 9) (const_int 11)
21225 (const_int 13) (const_int 15)])))))]
21226 "TARGET_XOP"
21227 "vphsubbw\t{%1, %0|%0, %1}"
21228 [(set_attr "type" "sseiadd1")])
21229
21230 (define_insn "xop_phsubwd"
21231 [(set (match_operand:V4SI 0 "register_operand" "=x")
21232 (minus:V4SI
21233 (sign_extend:V4SI
21234 (vec_select:V4HI
21235 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
21236 (parallel [(const_int 0) (const_int 2)
21237 (const_int 4) (const_int 6)])))
21238 (sign_extend:V4SI
21239 (vec_select:V4HI
21240 (match_dup 1)
21241 (parallel [(const_int 1) (const_int 3)
21242 (const_int 5) (const_int 7)])))))]
21243 "TARGET_XOP"
21244 "vphsubwd\t{%1, %0|%0, %1}"
21245 [(set_attr "type" "sseiadd1")])
21246
21247 (define_insn "xop_phsubdq"
21248 [(set (match_operand:V2DI 0 "register_operand" "=x")
21249 (minus:V2DI
21250 (sign_extend:V2DI
21251 (vec_select:V2SI
21252 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
21253 (parallel [(const_int 0) (const_int 2)])))
21254 (sign_extend:V2DI
21255 (vec_select:V2SI
21256 (match_dup 1)
21257 (parallel [(const_int 1) (const_int 3)])))))]
21258 "TARGET_XOP"
21259 "vphsubdq\t{%1, %0|%0, %1}"
21260 [(set_attr "type" "sseiadd1")])
21261
21262 ;; XOP permute instructions
21263 (define_insn "xop_pperm"
21264 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
21265 (unspec:V16QI
21266 [(match_operand:V16QI 1 "register_operand" "x,x")
21267 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
21268 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
21269 UNSPEC_XOP_PERMUTE))]
21270 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21271 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21272 [(set_attr "type" "sse4arg")
21273 (set_attr "mode" "TI")])
21274
21275 ;; XOP pack instructions that combine two vectors into a smaller vector
21276 (define_insn "xop_pperm_pack_v2di_v4si"
21277 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
21278 (vec_concat:V4SI
21279 (truncate:V2SI
21280 (match_operand:V2DI 1 "register_operand" "x,x"))
21281 (truncate:V2SI
21282 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
21283 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
21284 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21285 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21286 [(set_attr "type" "sse4arg")
21287 (set_attr "mode" "TI")])
21288
21289 (define_insn "xop_pperm_pack_v4si_v8hi"
21290 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
21291 (vec_concat:V8HI
21292 (truncate:V4HI
21293 (match_operand:V4SI 1 "register_operand" "x,x"))
21294 (truncate:V4HI
21295 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
21296 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
21297 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21298 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21299 [(set_attr "type" "sse4arg")
21300 (set_attr "mode" "TI")])
21301
21302 (define_insn "xop_pperm_pack_v8hi_v16qi"
21303 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
21304 (vec_concat:V16QI
21305 (truncate:V8QI
21306 (match_operand:V8HI 1 "register_operand" "x,x"))
21307 (truncate:V8QI
21308 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
21309 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
21310 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21311 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21312 [(set_attr "type" "sse4arg")
21313 (set_attr "mode" "TI")])
21314
21315 ;; XOP packed rotate instructions
21316 (define_expand "rotl<mode>3"
21317 [(set (match_operand:VI_128 0 "register_operand")
21318 (rotate:VI_128
21319 (match_operand:VI_128 1 "nonimmediate_operand")
21320 (match_operand:SI 2 "general_operand")))]
21321 "TARGET_XOP"
21322 {
21323 /* If we were given a scalar, convert it to parallel */
21324 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
21325 {
21326 rtvec vs = rtvec_alloc (<ssescalarnum>);
21327 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
21328 rtx reg = gen_reg_rtx (<MODE>mode);
21329 rtx op2 = operands[2];
21330 int i;
21331
21332 if (GET_MODE (op2) != <ssescalarmode>mode)
21333 {
21334 op2 = gen_reg_rtx (<ssescalarmode>mode);
21335 convert_move (op2, operands[2], false);
21336 }
21337
21338 for (i = 0; i < <ssescalarnum>; i++)
21339 RTVEC_ELT (vs, i) = op2;
21340
21341 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
21342 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
21343 DONE;
21344 }
21345 })
21346
21347 (define_expand "rotr<mode>3"
21348 [(set (match_operand:VI_128 0 "register_operand")
21349 (rotatert:VI_128
21350 (match_operand:VI_128 1 "nonimmediate_operand")
21351 (match_operand:SI 2 "general_operand")))]
21352 "TARGET_XOP"
21353 {
21354 /* If we were given a scalar, convert it to parallel */
21355 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
21356 {
21357 rtvec vs = rtvec_alloc (<ssescalarnum>);
21358 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
21359 rtx neg = gen_reg_rtx (<MODE>mode);
21360 rtx reg = gen_reg_rtx (<MODE>mode);
21361 rtx op2 = operands[2];
21362 int i;
21363
21364 if (GET_MODE (op2) != <ssescalarmode>mode)
21365 {
21366 op2 = gen_reg_rtx (<ssescalarmode>mode);
21367 convert_move (op2, operands[2], false);
21368 }
21369
21370 for (i = 0; i < <ssescalarnum>; i++)
21371 RTVEC_ELT (vs, i) = op2;
21372
21373 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
21374 emit_insn (gen_neg<mode>2 (neg, reg));
21375 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
21376 DONE;
21377 }
21378 })
21379
21380 (define_insn "xop_rotl<mode>3"
21381 [(set (match_operand:VI_128 0 "register_operand" "=x")
21382 (rotate:VI_128
21383 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
21384 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
21385 "TARGET_XOP"
21386 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21387 [(set_attr "type" "sseishft")
21388 (set_attr "length_immediate" "1")
21389 (set_attr "mode" "TI")])
21390
21391 (define_insn "xop_rotr<mode>3"
21392 [(set (match_operand:VI_128 0 "register_operand" "=x")
21393 (rotatert:VI_128
21394 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
21395 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
21396 "TARGET_XOP"
21397 {
21398 operands[3]
21399 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
21400 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
21401 }
21402 [(set_attr "type" "sseishft")
21403 (set_attr "length_immediate" "1")
21404 (set_attr "mode" "TI")])
21405
21406 (define_expand "vrotr<mode>3"
21407 [(match_operand:VI_128 0 "register_operand")
21408 (match_operand:VI_128 1 "register_operand")
21409 (match_operand:VI_128 2 "register_operand")]
21410 "TARGET_XOP"
21411 {
21412 rtx reg = gen_reg_rtx (<MODE>mode);
21413 emit_insn (gen_neg<mode>2 (reg, operands[2]));
21414 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
21415 DONE;
21416 })
21417
21418 (define_expand "vrotl<mode>3"
21419 [(match_operand:VI_128 0 "register_operand")
21420 (match_operand:VI_128 1 "register_operand")
21421 (match_operand:VI_128 2 "register_operand")]
21422 "TARGET_XOP"
21423 {
21424 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
21425 DONE;
21426 })
21427
21428 (define_insn "xop_vrotl<mode>3"
21429 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
21430 (if_then_else:VI_128
21431 (ge:VI_128
21432 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
21433 (const_int 0))
21434 (rotate:VI_128
21435 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
21436 (match_dup 2))
21437 (rotatert:VI_128
21438 (match_dup 1)
21439 (neg:VI_128 (match_dup 2)))))]
21440 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21441 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21442 [(set_attr "type" "sseishft")
21443 (set_attr "prefix_data16" "0")
21444 (set_attr "prefix_extra" "2")
21445 (set_attr "mode" "TI")])
21446
21447 ;; XOP packed shift instructions.
21448 (define_expand "vlshr<mode>3"
21449 [(set (match_operand:VI12_128 0 "register_operand")
21450 (lshiftrt:VI12_128
21451 (match_operand:VI12_128 1 "register_operand")
21452 (match_operand:VI12_128 2 "nonimmediate_operand")))]
21453 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
21454 {
21455 if (TARGET_XOP)
21456 {
21457 rtx neg = gen_reg_rtx (<MODE>mode);
21458 emit_insn (gen_neg<mode>2 (neg, operands[2]));
21459 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
21460 DONE;
21461 }
21462 else if (<MODE>mode == V16QImode)
21463 {
21464 ix86_expand_vecop_qihi (LSHIFTRT, operands[0], operands[1], operands[2]);
21465 DONE;
21466 }
21467 })
21468
21469 (define_expand "vlshr<mode>3"
21470 [(set (match_operand:VI48_128 0 "register_operand")
21471 (lshiftrt:VI48_128
21472 (match_operand:VI48_128 1 "register_operand")
21473 (match_operand:VI48_128 2 "nonimmediate_operand")))]
21474 "TARGET_AVX2 || TARGET_XOP"
21475 {
21476 if (!TARGET_AVX2)
21477 {
21478 rtx neg = gen_reg_rtx (<MODE>mode);
21479 emit_insn (gen_neg<mode>2 (neg, operands[2]));
21480 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
21481 DONE;
21482 }
21483 })
21484
21485 (define_expand "v<insn><mode>3"
21486 [(set (match_operand:VI12_256_512_AVX512VL 0 "register_operand")
21487 (any_shift:VI12_256_512_AVX512VL
21488 (match_operand:VI12_256_512_AVX512VL 1 "register_operand")
21489 (match_operand:VI12_256_512_AVX512VL 2 "nonimmediate_operand")))]
21490 "TARGET_AVX512BW"
21491 {
21492 if (<MODE>mode == V32QImode || <MODE>mode == V64QImode)
21493 {
21494 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
21495 DONE;
21496 }
21497 })
21498
21499 (define_expand "v<insn>v8qi3"
21500 [(set (match_operand:V8QI 0 "register_operand")
21501 (any_shift:V8QI
21502 (match_operand:V8QI 1 "register_operand")
21503 (match_operand:V8QI 2 "nonimmediate_operand")))]
21504 "TARGET_AVX512BW && TARGET_AVX512VL && TARGET_64BIT"
21505 {
21506 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
21507 DONE;
21508 })
21509
21510 (define_expand "vlshr<mode>3"
21511 [(set (match_operand:VI48_512 0 "register_operand")
21512 (lshiftrt:VI48_512
21513 (match_operand:VI48_512 1 "register_operand")
21514 (match_operand:VI48_512 2 "nonimmediate_operand")))]
21515 "TARGET_AVX512F")
21516
21517 (define_expand "vlshr<mode>3"
21518 [(set (match_operand:VI48_256 0 "register_operand")
21519 (lshiftrt:VI48_256
21520 (match_operand:VI48_256 1 "register_operand")
21521 (match_operand:VI48_256 2 "nonimmediate_operand")))]
21522 "TARGET_AVX2")
21523
21524 (define_expand "vashrv8di3"
21525 [(set (match_operand:V8DI 0 "register_operand")
21526 (ashiftrt:V8DI
21527 (match_operand:V8DI 1 "register_operand")
21528 (match_operand:V8DI 2 "nonimmediate_operand")))]
21529 "TARGET_AVX512F")
21530
21531 (define_expand "vashrv4di3"
21532 [(set (match_operand:V4DI 0 "register_operand")
21533 (ashiftrt:V4DI
21534 (match_operand:V4DI 1 "register_operand")
21535 (match_operand:V4DI 2 "nonimmediate_operand")))]
21536 "TARGET_AVX2"
21537 {
21538 if (!TARGET_AVX512VL)
21539 {
21540 rtx mask = ix86_build_signbit_mask (V4DImode, 1, 0);
21541 rtx t1 = gen_reg_rtx (V4DImode);
21542 rtx t2 = gen_reg_rtx (V4DImode);
21543 rtx t3 = gen_reg_rtx (V4DImode);
21544 emit_insn (gen_vlshrv4di3 (t1, operands[1], operands[2]));
21545 emit_insn (gen_vlshrv4di3 (t2, mask, operands[2]));
21546 emit_insn (gen_xorv4di3 (t3, t1, t2));
21547 emit_insn (gen_subv4di3 (operands[0], t3, t2));
21548 DONE;
21549 }
21550 })
21551
21552 (define_expand "vashr<mode>3"
21553 [(set (match_operand:VI12_128 0 "register_operand")
21554 (ashiftrt:VI12_128
21555 (match_operand:VI12_128 1 "register_operand")
21556 (match_operand:VI12_128 2 "nonimmediate_operand")))]
21557 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
21558 {
21559 if (TARGET_XOP)
21560 {
21561 rtx neg = gen_reg_rtx (<MODE>mode);
21562 emit_insn (gen_neg<mode>2 (neg, operands[2]));
21563 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
21564 DONE;
21565 }
21566 else if(<MODE>mode == V16QImode)
21567 {
21568 ix86_expand_vecop_qihi (ASHIFTRT, operands[0],operands[1], operands[2]);
21569 DONE;
21570 }
21571 })
21572
21573 (define_expand "vashrv2di3"
21574 [(set (match_operand:V2DI 0 "register_operand")
21575 (ashiftrt:V2DI
21576 (match_operand:V2DI 1 "register_operand")
21577 (match_operand:V2DI 2 "nonimmediate_operand")))]
21578 "TARGET_XOP || TARGET_AVX2"
21579 {
21580 if (TARGET_XOP)
21581 {
21582 rtx neg = gen_reg_rtx (V2DImode);
21583 emit_insn (gen_negv2di2 (neg, operands[2]));
21584 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
21585 DONE;
21586 }
21587 if (!TARGET_AVX512VL)
21588 {
21589 rtx mask = ix86_build_signbit_mask (V2DImode, 1, 0);
21590 rtx t1 = gen_reg_rtx (V2DImode);
21591 rtx t2 = gen_reg_rtx (V2DImode);
21592 rtx t3 = gen_reg_rtx (V2DImode);
21593 emit_insn (gen_vlshrv2di3 (t1, operands[1], operands[2]));
21594 emit_insn (gen_vlshrv2di3 (t2, mask, operands[2]));
21595 emit_insn (gen_xorv2di3 (t3, t1, t2));
21596 emit_insn (gen_subv2di3 (operands[0], t3, t2));
21597 DONE;
21598 }
21599 })
21600
21601 (define_expand "vashrv4si3"
21602 [(set (match_operand:V4SI 0 "register_operand")
21603 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
21604 (match_operand:V4SI 2 "nonimmediate_operand")))]
21605 "TARGET_AVX2 || TARGET_XOP"
21606 {
21607 if (!TARGET_AVX2)
21608 {
21609 rtx neg = gen_reg_rtx (V4SImode);
21610 emit_insn (gen_negv4si2 (neg, operands[2]));
21611 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
21612 DONE;
21613 }
21614 })
21615
21616 (define_expand "vashrv16si3"
21617 [(set (match_operand:V16SI 0 "register_operand")
21618 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
21619 (match_operand:V16SI 2 "nonimmediate_operand")))]
21620 "TARGET_AVX512F")
21621
21622 (define_expand "vashrv8si3"
21623 [(set (match_operand:V8SI 0 "register_operand")
21624 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
21625 (match_operand:V8SI 2 "nonimmediate_operand")))]
21626 "TARGET_AVX2")
21627
21628 (define_expand "vashl<mode>3"
21629 [(set (match_operand:VI12_128 0 "register_operand")
21630 (ashift:VI12_128
21631 (match_operand:VI12_128 1 "register_operand")
21632 (match_operand:VI12_128 2 "nonimmediate_operand")))]
21633 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
21634 {
21635 if (TARGET_XOP)
21636 {
21637 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
21638 DONE;
21639 }
21640 else if (<MODE>mode == V16QImode)
21641 {
21642 ix86_expand_vecop_qihi (ASHIFT, operands[0], operands[1], operands[2]);
21643 DONE;
21644 }
21645 })
21646
21647 (define_expand "vashl<mode>3"
21648 [(set (match_operand:VI48_128 0 "register_operand")
21649 (ashift:VI48_128
21650 (match_operand:VI48_128 1 "register_operand")
21651 (match_operand:VI48_128 2 "nonimmediate_operand")))]
21652 "TARGET_AVX2 || TARGET_XOP"
21653 {
21654 if (!TARGET_AVX2)
21655 {
21656 operands[2] = force_reg (<MODE>mode, operands[2]);
21657 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
21658 DONE;
21659 }
21660 })
21661
21662 (define_expand "vashl<mode>3"
21663 [(set (match_operand:VI48_512 0 "register_operand")
21664 (ashift:VI48_512
21665 (match_operand:VI48_512 1 "register_operand")
21666 (match_operand:VI48_512 2 "nonimmediate_operand")))]
21667 "TARGET_AVX512F")
21668
21669 (define_expand "vashl<mode>3"
21670 [(set (match_operand:VI48_256 0 "register_operand")
21671 (ashift:VI48_256
21672 (match_operand:VI48_256 1 "register_operand")
21673 (match_operand:VI48_256 2 "nonimmediate_operand")))]
21674 "TARGET_AVX2")
21675
21676 (define_insn "xop_sha<mode>3"
21677 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
21678 (if_then_else:VI_128
21679 (ge:VI_128
21680 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
21681 (const_int 0))
21682 (ashift:VI_128
21683 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
21684 (match_dup 2))
21685 (ashiftrt:VI_128
21686 (match_dup 1)
21687 (neg:VI_128 (match_dup 2)))))]
21688 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21689 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21690 [(set_attr "type" "sseishft")
21691 (set_attr "prefix_data16" "0")
21692 (set_attr "prefix_extra" "2")
21693 (set_attr "mode" "TI")])
21694
21695 (define_insn "xop_shl<mode>3"
21696 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
21697 (if_then_else:VI_128
21698 (ge:VI_128
21699 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
21700 (const_int 0))
21701 (ashift:VI_128
21702 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
21703 (match_dup 2))
21704 (lshiftrt:VI_128
21705 (match_dup 1)
21706 (neg:VI_128 (match_dup 2)))))]
21707 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21708 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21709 [(set_attr "type" "sseishft")
21710 (set_attr "prefix_data16" "0")
21711 (set_attr "prefix_extra" "2")
21712 (set_attr "mode" "TI")])
21713
21714 (define_expand "<insn><mode>3"
21715 [(set (match_operand:VI1_AVX512 0 "register_operand")
21716 (any_shift:VI1_AVX512
21717 (match_operand:VI1_AVX512 1 "register_operand")
21718 (match_operand:SI 2 "nonmemory_operand")))]
21719 "TARGET_SSE2"
21720 {
21721 if (TARGET_XOP && <MODE>mode == V16QImode)
21722 {
21723 bool negate = false;
21724 rtx (*gen) (rtx, rtx, rtx);
21725 rtx tmp, par;
21726 int i;
21727
21728 if (<CODE> != ASHIFT)
21729 {
21730 if (CONST_INT_P (operands[2]))
21731 operands[2] = GEN_INT (-INTVAL (operands[2]));
21732 else
21733 negate = true;
21734 }
21735 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
21736 for (i = 0; i < 16; i++)
21737 XVECEXP (par, 0, i) = operands[2];
21738
21739 tmp = gen_reg_rtx (V16QImode);
21740 emit_insn (gen_vec_initv16qiqi (tmp, par));
21741
21742 if (negate)
21743 emit_insn (gen_negv16qi2 (tmp, tmp));
21744
21745 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
21746 emit_insn (gen (operands[0], operands[1], tmp));
21747 }
21748 else
21749 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
21750 DONE;
21751 })
21752
21753 (define_expand "ashrv2di3"
21754 [(set (match_operand:V2DI 0 "register_operand")
21755 (ashiftrt:V2DI
21756 (match_operand:V2DI 1 "register_operand")
21757 (match_operand:DI 2 "nonmemory_operand")))]
21758 "TARGET_SSE2"
21759 {
21760 if (!TARGET_AVX512VL)
21761 {
21762 if (TARGET_SSE4_2
21763 && CONST_INT_P (operands[2])
21764 && UINTVAL (operands[2]) >= 63)
21765 {
21766 rtx zero = force_reg (V2DImode, CONST0_RTX (V2DImode));
21767 emit_insn (gen_sse4_2_gtv2di3 (operands[0], zero, operands[1]));
21768 DONE;
21769 }
21770 if (operands[2] == const0_rtx)
21771 {
21772 emit_move_insn (operands[0], operands[1]);
21773 DONE;
21774 }
21775 if (CONST_INT_P (operands[2])
21776 && (!TARGET_XOP || UINTVAL (operands[2]) >= 63))
21777 {
21778 vec_perm_builder sel (4, 4, 1);
21779 sel.quick_grow (4);
21780 rtx arg0, arg1;
21781 rtx op1 = lowpart_subreg (V4SImode, operands[1], V2DImode);
21782 rtx target = gen_reg_rtx (V4SImode);
21783 if (UINTVAL (operands[2]) >= 63)
21784 {
21785 arg0 = arg1 = gen_reg_rtx (V4SImode);
21786 emit_insn (gen_ashrv4si3 (arg0, op1, GEN_INT (31)));
21787 sel[0] = 1;
21788 sel[1] = 1;
21789 sel[2] = 3;
21790 sel[3] = 3;
21791 }
21792 else if (INTVAL (operands[2]) > 32)
21793 {
21794 arg0 = gen_reg_rtx (V4SImode);
21795 arg1 = gen_reg_rtx (V4SImode);
21796 emit_insn (gen_ashrv4si3 (arg1, op1, GEN_INT (31)));
21797 emit_insn (gen_ashrv4si3 (arg0, op1,
21798 GEN_INT (INTVAL (operands[2]) - 32)));
21799 sel[0] = 1;
21800 sel[1] = 5;
21801 sel[2] = 3;
21802 sel[3] = 7;
21803 }
21804 else if (INTVAL (operands[2]) == 32)
21805 {
21806 arg0 = op1;
21807 arg1 = gen_reg_rtx (V4SImode);
21808 emit_insn (gen_ashrv4si3 (arg1, op1, GEN_INT (31)));
21809 sel[0] = 1;
21810 sel[1] = 5;
21811 sel[2] = 3;
21812 sel[3] = 7;
21813 }
21814 else
21815 {
21816 arg0 = gen_reg_rtx (V2DImode);
21817 arg1 = gen_reg_rtx (V4SImode);
21818 emit_insn (gen_lshrv2di3 (arg0, operands[1], operands[2]));
21819 emit_insn (gen_ashrv4si3 (arg1, op1, operands[2]));
21820 arg0 = lowpart_subreg (V4SImode, arg0, V2DImode);
21821 sel[0] = 0;
21822 sel[1] = 5;
21823 sel[2] = 2;
21824 sel[3] = 7;
21825 }
21826 vec_perm_indices indices (sel, arg0 != arg1 ? 2 : 1, 4);
21827 bool ok = targetm.vectorize.vec_perm_const (V4SImode, target,
21828 arg0, arg1, indices);
21829 gcc_assert (ok);
21830 emit_move_insn (operands[0],
21831 lowpart_subreg (V2DImode, target, V4SImode));
21832 DONE;
21833 }
21834 if (!TARGET_XOP)
21835 {
21836 rtx zero = force_reg (V2DImode, CONST0_RTX (V2DImode));
21837 rtx zero_or_all_ones;
21838 if (TARGET_SSE4_2)
21839 {
21840 zero_or_all_ones = gen_reg_rtx (V2DImode);
21841 emit_insn (gen_sse4_2_gtv2di3 (zero_or_all_ones, zero,
21842 operands[1]));
21843 }
21844 else
21845 {
21846 rtx temp = gen_reg_rtx (V4SImode);
21847 emit_insn (gen_ashrv4si3 (temp, lowpart_subreg (V4SImode,
21848 operands[1],
21849 V2DImode),
21850 GEN_INT (31)));
21851 zero_or_all_ones = gen_reg_rtx (V4SImode);
21852 emit_insn (gen_sse2_pshufd_1 (zero_or_all_ones, temp,
21853 const1_rtx, const1_rtx,
21854 GEN_INT (3), GEN_INT (3)));
21855 zero_or_all_ones = lowpart_subreg (V2DImode, zero_or_all_ones,
21856 V4SImode);
21857 }
21858 rtx lshr_res = gen_reg_rtx (V2DImode);
21859 emit_insn (gen_lshrv2di3 (lshr_res, operands[1], operands[2]));
21860 rtx ashl_res = gen_reg_rtx (V2DImode);
21861 rtx amount;
21862 if (TARGET_64BIT)
21863 {
21864 amount = gen_reg_rtx (DImode);
21865 emit_insn (gen_subdi3 (amount, force_reg (DImode, GEN_INT (64)),
21866 operands[2]));
21867 }
21868 else
21869 {
21870 rtx temp = gen_reg_rtx (SImode);
21871 emit_insn (gen_subsi3 (temp, force_reg (SImode, GEN_INT (64)),
21872 lowpart_subreg (SImode, operands[2],
21873 DImode)));
21874 amount = gen_reg_rtx (V4SImode);
21875 emit_insn (gen_vec_setv4si_0 (amount, CONST0_RTX (V4SImode),
21876 temp));
21877 }
21878 amount = lowpart_subreg (DImode, amount, GET_MODE (amount));
21879 emit_insn (gen_ashlv2di3 (ashl_res, zero_or_all_ones, amount));
21880 emit_insn (gen_iorv2di3 (operands[0], lshr_res, ashl_res));
21881 DONE;
21882 }
21883
21884 rtx reg = gen_reg_rtx (V2DImode);
21885 rtx par;
21886 bool negate = false;
21887 int i;
21888
21889 if (CONST_INT_P (operands[2]))
21890 operands[2] = GEN_INT (-INTVAL (operands[2]));
21891 else
21892 negate = true;
21893
21894 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
21895 for (i = 0; i < 2; i++)
21896 XVECEXP (par, 0, i) = operands[2];
21897
21898 emit_insn (gen_vec_initv2didi (reg, par));
21899
21900 if (negate)
21901 emit_insn (gen_negv2di2 (reg, reg));
21902
21903 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
21904 DONE;
21905 }
21906 })
21907
21908 ;; XOP FRCZ support
21909 (define_insn "xop_frcz<mode>2"
21910 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
21911 (unspec:FMAMODE
21912 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
21913 UNSPEC_FRCZ))]
21914 "TARGET_XOP"
21915 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
21916 [(set_attr "type" "ssecvt1")
21917 (set_attr "mode" "<MODE>")])
21918
21919 (define_expand "xop_vmfrcz<mode>2"
21920 [(set (match_operand:VF_128 0 "register_operand")
21921 (vec_merge:VF_128
21922 (unspec:VF_128
21923 [(match_operand:VF_128 1 "nonimmediate_operand")]
21924 UNSPEC_FRCZ)
21925 (match_dup 2)
21926 (const_int 1)))]
21927 "TARGET_XOP"
21928 "operands[2] = CONST0_RTX (<MODE>mode);")
21929
21930 (define_insn "*xop_vmfrcz<mode>2"
21931 [(set (match_operand:VF_128 0 "register_operand" "=x")
21932 (vec_merge:VF_128
21933 (unspec:VF_128
21934 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
21935 UNSPEC_FRCZ)
21936 (match_operand:VF_128 2 "const0_operand")
21937 (const_int 1)))]
21938 "TARGET_XOP"
21939 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
21940 [(set_attr "type" "ssecvt1")
21941 (set_attr "mode" "<MODE>")])
21942
21943 (define_insn "xop_maskcmp<mode>3"
21944 [(set (match_operand:VI_128 0 "register_operand" "=x")
21945 (match_operator:VI_128 1 "ix86_comparison_int_operator"
21946 [(match_operand:VI_128 2 "register_operand" "x")
21947 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
21948 "TARGET_XOP"
21949 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
21950 [(set_attr "type" "sse4arg")
21951 (set_attr "prefix_data16" "0")
21952 (set_attr "prefix_rep" "0")
21953 (set_attr "prefix_extra" "2")
21954 (set_attr "length_immediate" "1")
21955 (set_attr "mode" "TI")])
21956
21957 (define_insn "xop_maskcmp_uns<mode>3"
21958 [(set (match_operand:VI_128 0 "register_operand" "=x")
21959 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
21960 [(match_operand:VI_128 2 "register_operand" "x")
21961 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
21962 "TARGET_XOP"
21963 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
21964 [(set_attr "type" "ssecmp")
21965 (set_attr "prefix_data16" "0")
21966 (set_attr "prefix_rep" "0")
21967 (set_attr "prefix_extra" "2")
21968 (set_attr "length_immediate" "1")
21969 (set_attr "mode" "TI")])
21970
21971 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
21972 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
21973 ;; the exact instruction generated for the intrinsic.
21974 (define_insn "xop_maskcmp_uns2<mode>3"
21975 [(set (match_operand:VI_128 0 "register_operand" "=x")
21976 (unspec:VI_128
21977 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
21978 [(match_operand:VI_128 2 "register_operand" "x")
21979 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
21980 UNSPEC_XOP_UNSIGNED_CMP))]
21981 "TARGET_XOP"
21982 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
21983 [(set_attr "type" "ssecmp")
21984 (set_attr "prefix_data16" "0")
21985 (set_attr "prefix_extra" "2")
21986 (set_attr "length_immediate" "1")
21987 (set_attr "mode" "TI")])
21988
21989 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
21990 ;; being added here to be complete.
21991 (define_insn "xop_pcom_tf<mode>3"
21992 [(set (match_operand:VI_128 0 "register_operand" "=x")
21993 (unspec:VI_128
21994 [(match_operand:VI_128 1 "register_operand" "x")
21995 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
21996 (match_operand:SI 3 "const_int_operand" "n")]
21997 UNSPEC_XOP_TRUEFALSE))]
21998 "TARGET_XOP"
21999 {
22000 return ((INTVAL (operands[3]) != 0)
22001 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22002 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
22003 }
22004 [(set_attr "type" "ssecmp")
22005 (set_attr "prefix_data16" "0")
22006 (set_attr "prefix_extra" "2")
22007 (set_attr "length_immediate" "1")
22008 (set_attr "mode" "TI")])
22009
22010 (define_insn "xop_vpermil2<mode>3"
22011 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
22012 (unspec:VF_128_256
22013 [(match_operand:VF_128_256 1 "register_operand" "x,x")
22014 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
22015 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
22016 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
22017 UNSPEC_VPERMIL2))]
22018 "TARGET_XOP"
22019 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
22020 [(set_attr "type" "sse4arg")
22021 (set_attr "length_immediate" "1")
22022 (set_attr "mode" "<MODE>")])
22023
22024 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22025
22026 (define_insn "aesenc"
22027 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
22028 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
22029 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
22030 UNSPEC_AESENC))]
22031 "TARGET_AES"
22032 "@
22033 aesenc\t{%2, %0|%0, %2}
22034 vaesenc\t{%2, %1, %0|%0, %1, %2}"
22035 [(set_attr "isa" "noavx,avx")
22036 (set_attr "type" "sselog1")
22037 (set_attr "prefix_extra" "1")
22038 (set_attr "prefix" "orig,vex")
22039 (set_attr "btver2_decode" "double,double")
22040 (set_attr "mode" "TI")])
22041
22042 (define_insn "aesenclast"
22043 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
22044 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
22045 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
22046 UNSPEC_AESENCLAST))]
22047 "TARGET_AES"
22048 "@
22049 aesenclast\t{%2, %0|%0, %2}
22050 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
22051 [(set_attr "isa" "noavx,avx")
22052 (set_attr "type" "sselog1")
22053 (set_attr "prefix_extra" "1")
22054 (set_attr "prefix" "orig,vex")
22055 (set_attr "btver2_decode" "double,double")
22056 (set_attr "mode" "TI")])
22057
22058 (define_insn "aesdec"
22059 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
22060 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
22061 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
22062 UNSPEC_AESDEC))]
22063 "TARGET_AES"
22064 "@
22065 aesdec\t{%2, %0|%0, %2}
22066 vaesdec\t{%2, %1, %0|%0, %1, %2}"
22067 [(set_attr "isa" "noavx,avx")
22068 (set_attr "type" "sselog1")
22069 (set_attr "prefix_extra" "1")
22070 (set_attr "prefix" "orig,vex")
22071 (set_attr "btver2_decode" "double,double")
22072 (set_attr "mode" "TI")])
22073
22074 (define_insn "aesdeclast"
22075 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
22076 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
22077 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
22078 UNSPEC_AESDECLAST))]
22079 "TARGET_AES"
22080 "@
22081 aesdeclast\t{%2, %0|%0, %2}
22082 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
22083 [(set_attr "isa" "noavx,avx")
22084 (set_attr "type" "sselog1")
22085 (set_attr "prefix_extra" "1")
22086 (set_attr "prefix" "orig,vex")
22087 (set_attr "btver2_decode" "double,double")
22088 (set_attr "mode" "TI")])
22089
22090 (define_insn "aesimc"
22091 [(set (match_operand:V2DI 0 "register_operand" "=x")
22092 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
22093 UNSPEC_AESIMC))]
22094 "TARGET_AES"
22095 "%vaesimc\t{%1, %0|%0, %1}"
22096 [(set_attr "type" "sselog1")
22097 (set_attr "prefix_extra" "1")
22098 (set_attr "prefix" "maybe_vex")
22099 (set_attr "mode" "TI")])
22100
22101 (define_insn "aeskeygenassist"
22102 [(set (match_operand:V2DI 0 "register_operand" "=x")
22103 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
22104 (match_operand:SI 2 "const_0_to_255_operand" "n")]
22105 UNSPEC_AESKEYGENASSIST))]
22106 "TARGET_AES"
22107 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
22108 [(set_attr "type" "sselog1")
22109 (set_attr "prefix_extra" "1")
22110 (set_attr "length_immediate" "1")
22111 (set_attr "prefix" "maybe_vex")
22112 (set_attr "mode" "TI")])
22113
22114 (define_insn "pclmulqdq"
22115 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
22116 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
22117 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
22118 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
22119 UNSPEC_PCLMUL))]
22120 "TARGET_PCLMUL"
22121 "@
22122 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
22123 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22124 [(set_attr "isa" "noavx,avx")
22125 (set_attr "type" "sselog1")
22126 (set_attr "prefix_extra" "1")
22127 (set_attr "length_immediate" "1")
22128 (set_attr "prefix" "orig,vex")
22129 (set_attr "mode" "TI")])
22130
22131 (define_expand "avx_vzeroall"
22132 [(match_par_dup 0 [(const_int 0)])]
22133 "TARGET_AVX"
22134 {
22135 int nregs = TARGET_64BIT ? 16 : 8;
22136 int regno;
22137
22138 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
22139
22140 XVECEXP (operands[0], 0, 0)
22141 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
22142 UNSPECV_VZEROALL);
22143
22144 for (regno = 0; regno < nregs; regno++)
22145 XVECEXP (operands[0], 0, regno + 1)
22146 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
22147 CONST0_RTX (V8SImode));
22148 })
22149
22150 (define_insn "*avx_vzeroall"
22151 [(match_parallel 0 "vzeroall_operation"
22152 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
22153 "TARGET_AVX"
22154 "vzeroall"
22155 [(set_attr "type" "sse")
22156 (set_attr "modrm" "0")
22157 (set_attr "memory" "none")
22158 (set_attr "prefix" "vex")
22159 (set_attr "btver2_decode" "vector")
22160 (set_attr "mode" "OI")])
22161
22162 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
22163 ;; if the upper 128bits are unused. Initially we expand the instructions
22164 ;; as though they had no effect on the SSE registers, but later add SETs and
22165 ;; CLOBBERs to the PARALLEL to model the real effect.
22166
22167 (define_expand "avx_vzeroupper"
22168 [(parallel [(call (mem:QI (const_int 0))
22169 (const_int 0))
22170 (unspec [(const_int ABI_VZEROUPPER)] UNSPEC_CALLEE_ABI)])]
22171 "TARGET_AVX"
22172 {
22173 ix86_expand_avx_vzeroupper ();
22174 DONE;
22175 })
22176
22177 (define_insn "avx_vzeroupper_callee_abi"
22178 [(call (mem:QI (const_int 0))
22179 (const_int 0))
22180 (unspec [(const_int ABI_VZEROUPPER)] UNSPEC_CALLEE_ABI)]
22181 "TARGET_AVX"
22182 "vzeroupper"
22183 [(set_attr "type" "sse")
22184 (set_attr "modrm" "0")
22185 (set_attr "memory" "none")
22186 (set_attr "prefix" "vex")
22187 (set_attr "btver2_decode" "vector")
22188 (set_attr "mode" "OI")])
22189
22190 (define_mode_attr pbroadcast_evex_isa
22191 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
22192 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
22193 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
22194 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
22195
22196 (define_insn "avx2_pbroadcast<mode>"
22197 [(set (match_operand:VI 0 "register_operand" "=x,v")
22198 (vec_duplicate:VI
22199 (vec_select:<ssescalarmode>
22200 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
22201 (parallel [(const_int 0)]))))]
22202 "TARGET_AVX2"
22203 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
22204 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
22205 (set_attr "type" "ssemov")
22206 (set_attr "prefix_extra" "1")
22207 (set_attr "prefix" "vex,evex")
22208 (set_attr "mode" "<sseinsnmode>")])
22209
22210 (define_insn "avx2_pbroadcast<mode>_1"
22211 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
22212 (vec_duplicate:VI_256
22213 (vec_select:<ssescalarmode>
22214 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
22215 (parallel [(const_int 0)]))))]
22216 "TARGET_AVX2"
22217 "@
22218 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
22219 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
22220 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
22221 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
22222 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
22223 (set_attr "type" "ssemov")
22224 (set_attr "prefix_extra" "1")
22225 (set_attr "prefix" "vex")
22226 (set_attr "mode" "<sseinsnmode>")])
22227
22228 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
22229 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
22230 (unspec:VI48F_256_512
22231 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
22232 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
22233 UNSPEC_VPERMVAR))]
22234 "TARGET_AVX2 && <mask_mode512bit_condition>"
22235 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
22236 [(set_attr "type" "sselog")
22237 (set_attr "prefix" "<mask_prefix2>")
22238 (set_attr "mode" "<sseinsnmode>")])
22239
22240 (define_insn "<avx512>_permvar<mode><mask_name>"
22241 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
22242 (unspec:VI1_AVX512VL
22243 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
22244 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
22245 UNSPEC_VPERMVAR))]
22246 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
22247 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
22248 [(set_attr "type" "sselog")
22249 (set_attr "prefix" "<mask_prefix2>")
22250 (set_attr "mode" "<sseinsnmode>")])
22251
22252 (define_insn "<avx512>_permvar<mode><mask_name>"
22253 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22254 (unspec:VI2_AVX512VL
22255 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
22256 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
22257 UNSPEC_VPERMVAR))]
22258 "TARGET_AVX512BW && <mask_mode512bit_condition>"
22259 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
22260 [(set_attr "type" "sselog")
22261 (set_attr "prefix" "<mask_prefix2>")
22262 (set_attr "mode" "<sseinsnmode>")])
22263
22264 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
22265 ;; If it so happens that the input is in memory, use vbroadcast.
22266 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
22267 (define_insn "*avx_vperm_broadcast_v4sf"
22268 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
22269 (vec_select:V4SF
22270 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
22271 (match_parallel 2 "avx_vbroadcast_operand"
22272 [(match_operand 3 "const_int_operand" "C,n,n")])))]
22273 "TARGET_AVX"
22274 {
22275 int elt = INTVAL (operands[3]);
22276 switch (which_alternative)
22277 {
22278 case 0:
22279 case 1:
22280 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
22281 return "vbroadcastss\t{%1, %0|%0, %k1}";
22282 case 2:
22283 operands[2] = GEN_INT (elt * 0x55);
22284 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
22285 default:
22286 gcc_unreachable ();
22287 }
22288 }
22289 [(set_attr "type" "ssemov,ssemov,sselog1")
22290 (set_attr "prefix_extra" "1")
22291 (set_attr "length_immediate" "0,0,1")
22292 (set_attr "prefix" "maybe_evex")
22293 (set_attr "mode" "SF,SF,V4SF")])
22294
22295 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
22296 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
22297 (vec_select:VF_256
22298 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
22299 (match_parallel 2 "avx_vbroadcast_operand"
22300 [(match_operand 3 "const_int_operand" "C,n,n")])))]
22301 "TARGET_AVX
22302 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
22303 "#"
22304 "&& reload_completed"
22305 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
22306 {
22307 rtx op0 = operands[0], op1 = operands[1];
22308 int elt = INTVAL (operands[3]);
22309
22310 if (REG_P (op1))
22311 {
22312 int mask;
22313
22314 if (TARGET_AVX2 && elt == 0)
22315 {
22316 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
22317 op1)));
22318 DONE;
22319 }
22320
22321 /* Shuffle element we care about into all elements of the 128-bit lane.
22322 The other lane gets shuffled too, but we don't care. */
22323 if (<MODE>mode == V4DFmode)
22324 mask = (elt & 1 ? 15 : 0);
22325 else
22326 mask = (elt & 3) * 0x55;
22327 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
22328
22329 /* Shuffle the lane we care about into both lanes of the dest. */
22330 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
22331 if (EXT_REX_SSE_REG_P (op0))
22332 {
22333 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
22334 or VSHUFF128. */
22335 gcc_assert (<MODE>mode == V8SFmode);
22336 if ((mask & 1) == 0)
22337 emit_insn (gen_avx2_vec_dupv8sf (op0,
22338 gen_lowpart (V4SFmode, op0)));
22339 else
22340 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
22341 GEN_INT (4), GEN_INT (5),
22342 GEN_INT (6), GEN_INT (7),
22343 GEN_INT (12), GEN_INT (13),
22344 GEN_INT (14), GEN_INT (15)));
22345 DONE;
22346 }
22347
22348 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
22349 DONE;
22350 }
22351
22352 operands[1] = adjust_address (op1, <ssescalarmode>mode,
22353 elt * GET_MODE_SIZE (<ssescalarmode>mode));
22354 })
22355
22356 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
22357 [(set (match_operand:VF2 0 "register_operand")
22358 (vec_select:VF2
22359 (match_operand:VF2 1 "nonimmediate_operand")
22360 (match_operand:SI 2 "const_0_to_255_operand")))]
22361 "TARGET_AVX && <mask_mode512bit_condition>"
22362 {
22363 int mask = INTVAL (operands[2]);
22364 rtx perm[<ssescalarnum>];
22365
22366 int i;
22367 for (i = 0; i < <ssescalarnum>; i = i + 2)
22368 {
22369 perm[i] = GEN_INT (((mask >> i) & 1) + i);
22370 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
22371 }
22372
22373 operands[2]
22374 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
22375 })
22376
22377 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
22378 [(set (match_operand:VF1 0 "register_operand")
22379 (vec_select:VF1
22380 (match_operand:VF1 1 "nonimmediate_operand")
22381 (match_operand:SI 2 "const_0_to_255_operand")))]
22382 "TARGET_AVX && <mask_mode512bit_condition>"
22383 {
22384 int mask = INTVAL (operands[2]);
22385 rtx perm[<ssescalarnum>];
22386
22387 int i;
22388 for (i = 0; i < <ssescalarnum>; i = i + 4)
22389 {
22390 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
22391 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
22392 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
22393 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
22394 }
22395
22396 operands[2]
22397 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
22398 })
22399
22400 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
22401 ;; patterns, as they have the same RTL representation (vpermilp*
22402 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
22403 ;; latency as it never crosses lanes.
22404 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
22405 [(set (match_operand:VF 0 "register_operand" "=v")
22406 (vec_select:VF
22407 (match_operand:VF 1 "nonimmediate_operand" "vm")
22408 (match_parallel 2 ""
22409 [(match_operand 3 "const_int_operand")])))]
22410 "TARGET_AVX && <mask_mode512bit_condition>
22411 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
22412 {
22413 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
22414 operands[2] = GEN_INT (mask);
22415 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
22416 }
22417 [(set_attr "type" "sselog")
22418 (set_attr "prefix_extra" "1")
22419 (set_attr "length_immediate" "1")
22420 (set_attr "prefix" "<mask_prefix>")
22421 (set_attr "mode" "<sseinsnmode>")])
22422
22423 (define_expand "avx2_perm<mode>"
22424 [(match_operand:VI8F_256 0 "register_operand")
22425 (match_operand:VI8F_256 1 "nonimmediate_operand")
22426 (match_operand:SI 2 "const_0_to_255_operand")]
22427 "TARGET_AVX2"
22428 {
22429 int mask = INTVAL (operands[2]);
22430 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
22431 GEN_INT ((mask >> 0) & 3),
22432 GEN_INT ((mask >> 2) & 3),
22433 GEN_INT ((mask >> 4) & 3),
22434 GEN_INT ((mask >> 6) & 3)));
22435 DONE;
22436 })
22437
22438 (define_expand "avx512vl_perm<mode>_mask"
22439 [(match_operand:VI8F_256 0 "register_operand")
22440 (match_operand:VI8F_256 1 "nonimmediate_operand")
22441 (match_operand:SI 2 "const_0_to_255_operand")
22442 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
22443 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22444 "TARGET_AVX512VL"
22445 {
22446 int mask = INTVAL (operands[2]);
22447 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
22448 GEN_INT ((mask >> 0) & 3),
22449 GEN_INT ((mask >> 2) & 3),
22450 GEN_INT ((mask >> 4) & 3),
22451 GEN_INT ((mask >> 6) & 3),
22452 operands[3], operands[4]));
22453 DONE;
22454 })
22455
22456 (define_insn "avx2_perm<mode>_1<mask_name>"
22457 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
22458 (vec_select:VI8F_256
22459 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
22460 (parallel [(match_operand 2 "const_0_to_3_operand")
22461 (match_operand 3 "const_0_to_3_operand")
22462 (match_operand 4 "const_0_to_3_operand")
22463 (match_operand 5 "const_0_to_3_operand")])))]
22464 "TARGET_AVX2 && <mask_mode512bit_condition>"
22465 {
22466 int mask = 0;
22467 mask |= INTVAL (operands[2]) << 0;
22468 mask |= INTVAL (operands[3]) << 2;
22469 mask |= INTVAL (operands[4]) << 4;
22470 mask |= INTVAL (operands[5]) << 6;
22471 operands[2] = GEN_INT (mask);
22472 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
22473 }
22474 [(set_attr "type" "sselog")
22475 (set_attr "prefix" "<mask_prefix2>")
22476 (set_attr "mode" "<sseinsnmode>")])
22477
22478 (define_expand "avx512f_perm<mode>"
22479 [(match_operand:V8FI 0 "register_operand")
22480 (match_operand:V8FI 1 "nonimmediate_operand")
22481 (match_operand:SI 2 "const_0_to_255_operand")]
22482 "TARGET_AVX512F"
22483 {
22484 int mask = INTVAL (operands[2]);
22485 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
22486 GEN_INT ((mask >> 0) & 3),
22487 GEN_INT ((mask >> 2) & 3),
22488 GEN_INT ((mask >> 4) & 3),
22489 GEN_INT ((mask >> 6) & 3),
22490 GEN_INT (((mask >> 0) & 3) + 4),
22491 GEN_INT (((mask >> 2) & 3) + 4),
22492 GEN_INT (((mask >> 4) & 3) + 4),
22493 GEN_INT (((mask >> 6) & 3) + 4)));
22494 DONE;
22495 })
22496
22497 (define_expand "avx512f_perm<mode>_mask"
22498 [(match_operand:V8FI 0 "register_operand")
22499 (match_operand:V8FI 1 "nonimmediate_operand")
22500 (match_operand:SI 2 "const_0_to_255_operand")
22501 (match_operand:V8FI 3 "nonimm_or_0_operand")
22502 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22503 "TARGET_AVX512F"
22504 {
22505 int mask = INTVAL (operands[2]);
22506 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
22507 GEN_INT ((mask >> 0) & 3),
22508 GEN_INT ((mask >> 2) & 3),
22509 GEN_INT ((mask >> 4) & 3),
22510 GEN_INT ((mask >> 6) & 3),
22511 GEN_INT (((mask >> 0) & 3) + 4),
22512 GEN_INT (((mask >> 2) & 3) + 4),
22513 GEN_INT (((mask >> 4) & 3) + 4),
22514 GEN_INT (((mask >> 6) & 3) + 4),
22515 operands[3], operands[4]));
22516 DONE;
22517 })
22518
22519 (define_insn "avx512f_perm<mode>_1<mask_name>"
22520 [(set (match_operand:V8FI 0 "register_operand" "=v")
22521 (vec_select:V8FI
22522 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
22523 (parallel [(match_operand 2 "const_0_to_3_operand")
22524 (match_operand 3 "const_0_to_3_operand")
22525 (match_operand 4 "const_0_to_3_operand")
22526 (match_operand 5 "const_0_to_3_operand")
22527 (match_operand 6 "const_4_to_7_operand")
22528 (match_operand 7 "const_4_to_7_operand")
22529 (match_operand 8 "const_4_to_7_operand")
22530 (match_operand 9 "const_4_to_7_operand")])))]
22531 "TARGET_AVX512F && <mask_mode512bit_condition>
22532 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
22533 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
22534 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
22535 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
22536 {
22537 int mask = 0;
22538 mask |= INTVAL (operands[2]) << 0;
22539 mask |= INTVAL (operands[3]) << 2;
22540 mask |= INTVAL (operands[4]) << 4;
22541 mask |= INTVAL (operands[5]) << 6;
22542 operands[2] = GEN_INT (mask);
22543 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
22544 }
22545 [(set_attr "type" "sselog")
22546 (set_attr "prefix" "<mask_prefix2>")
22547 (set_attr "mode" "<sseinsnmode>")])
22548
22549 (define_insn "avx2_permv2ti"
22550 [(set (match_operand:V4DI 0 "register_operand" "=x")
22551 (unspec:V4DI
22552 [(match_operand:V4DI 1 "register_operand" "x")
22553 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
22554 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22555 UNSPEC_VPERMTI))]
22556 "TARGET_AVX2"
22557 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22558 [(set_attr "type" "sselog")
22559 (set_attr "prefix" "vex")
22560 (set_attr "mode" "OI")])
22561
22562 (define_insn "avx2_vec_dupv4df"
22563 [(set (match_operand:V4DF 0 "register_operand" "=v")
22564 (vec_duplicate:V4DF
22565 (vec_select:DF
22566 (match_operand:V2DF 1 "register_operand" "v")
22567 (parallel [(const_int 0)]))))]
22568 "TARGET_AVX2"
22569 "vbroadcastsd\t{%1, %0|%0, %1}"
22570 [(set_attr "type" "sselog1")
22571 (set_attr "prefix" "maybe_evex")
22572 (set_attr "mode" "V4DF")])
22573
22574 (define_insn "<avx512>_vec_dup<mode>_1"
22575 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
22576 (vec_duplicate:VI_AVX512BW
22577 (vec_select:<ssescalarmode>
22578 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
22579 (parallel [(const_int 0)]))))]
22580 "TARGET_AVX512F"
22581 "@
22582 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
22583 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
22584 [(set_attr "type" "ssemov")
22585 (set_attr "prefix" "evex")
22586 (set_attr "mode" "<sseinsnmode>")])
22587
22588 (define_insn "<avx512>_vec_dup<mode><mask_name>"
22589 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
22590 (vec_duplicate:V48_AVX512VL
22591 (vec_select:<ssescalarmode>
22592 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
22593 (parallel [(const_int 0)]))))]
22594 "TARGET_AVX512F"
22595 {
22596 /* There is no DF broadcast (in AVX-512*) to 128b register.
22597 Mimic it with integer variant. */
22598 if (<MODE>mode == V2DFmode)
22599 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
22600
22601 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
22602 }
22603 [(set_attr "type" "ssemov")
22604 (set_attr "prefix" "evex")
22605 (set_attr "mode" "<sseinsnmode>")])
22606
22607 (define_insn "<avx512>_vec_dup<mode><mask_name>"
22608 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
22609 (vec_duplicate:VI12_AVX512VL
22610 (vec_select:<ssescalarmode>
22611 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
22612 (parallel [(const_int 0)]))))]
22613 "TARGET_AVX512BW"
22614 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
22615 [(set_attr "type" "ssemov")
22616 (set_attr "prefix" "evex")
22617 (set_attr "mode" "<sseinsnmode>")])
22618
22619 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
22620 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
22621 (vec_duplicate:V16FI
22622 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
22623 "TARGET_AVX512F"
22624 "@
22625 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
22626 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22627 [(set_attr "type" "ssemov")
22628 (set_attr "prefix" "evex")
22629 (set_attr "mode" "<sseinsnmode>")])
22630
22631 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
22632 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
22633 (vec_duplicate:V8FI
22634 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
22635 "TARGET_AVX512F"
22636 "@
22637 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
22638 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22639 [(set_attr "type" "ssemov")
22640 (set_attr "prefix" "evex")
22641 (set_attr "mode" "<sseinsnmode>")])
22642
22643 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
22644 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
22645 (vec_duplicate:VI12_AVX512VL
22646 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
22647 "TARGET_AVX512BW"
22648 "@
22649 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
22650 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
22651 [(set_attr "type" "ssemov")
22652 (set_attr "prefix" "evex")
22653 (set_attr "mode" "<sseinsnmode>")])
22654
22655 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
22656 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
22657 (vec_duplicate:V48_AVX512VL
22658 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
22659 "TARGET_AVX512F"
22660 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22661 [(set_attr "type" "ssemov")
22662 (set_attr "prefix" "evex")
22663 (set_attr "mode" "<sseinsnmode>")
22664 (set (attr "enabled")
22665 (if_then_else (eq_attr "alternative" "1")
22666 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
22667 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
22668 (const_int 1)))])
22669
22670 (define_insn "vec_dupv4sf"
22671 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
22672 (vec_duplicate:V4SF
22673 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
22674 "TARGET_SSE"
22675 "@
22676 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
22677 vbroadcastss\t{%1, %0|%0, %1}
22678 shufps\t{$0, %0, %0|%0, %0, 0}"
22679 [(set_attr "isa" "avx,avx,noavx")
22680 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
22681 (set_attr "length_immediate" "1,0,1")
22682 (set_attr "prefix_extra" "0,1,*")
22683 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
22684 (set_attr "mode" "V4SF")])
22685
22686 (define_insn "*vec_dupv4si"
22687 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
22688 (vec_duplicate:V4SI
22689 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
22690 "TARGET_SSE"
22691 "@
22692 %vpshufd\t{$0, %1, %0|%0, %1, 0}
22693 vbroadcastss\t{%1, %0|%0, %1}
22694 shufps\t{$0, %0, %0|%0, %0, 0}"
22695 [(set_attr "isa" "sse2,avx,noavx")
22696 (set_attr "type" "sselog1,ssemov,sselog1")
22697 (set_attr "length_immediate" "1,0,1")
22698 (set_attr "prefix_extra" "0,1,*")
22699 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
22700 (set_attr "mode" "TI,V4SF,V4SF")])
22701
22702 (define_insn "*vec_dupv2di"
22703 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
22704 (vec_duplicate:V2DI
22705 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
22706 "TARGET_SSE"
22707 "@
22708 punpcklqdq\t%0, %0
22709 vpunpcklqdq\t{%d1, %0|%0, %d1}
22710 %vmovddup\t{%1, %0|%0, %1}
22711 movlhps\t%0, %0"
22712 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
22713 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
22714 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
22715 (set_attr "mode" "TI,TI,DF,V4SF")])
22716
22717 (define_insn "avx2_vbroadcasti128_<mode>"
22718 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
22719 (vec_concat:VI_256
22720 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
22721 (match_dup 1)))]
22722 "TARGET_AVX2"
22723 "@
22724 vbroadcasti128\t{%1, %0|%0, %1}
22725 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
22726 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
22727 [(set_attr "isa" "*,avx512dq,avx512vl")
22728 (set_attr "type" "ssemov")
22729 (set_attr "prefix_extra" "1")
22730 (set_attr "prefix" "vex,evex,evex")
22731 (set_attr "mode" "OI")])
22732
22733 ;; Modes handled by AVX vec_dup patterns.
22734 (define_mode_iterator AVX_VEC_DUP_MODE
22735 [V8SI V8SF V4DI V4DF])
22736 (define_mode_attr vecdupssescalarmodesuffix
22737 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
22738 ;; Modes handled by AVX2 vec_dup patterns.
22739 (define_mode_iterator AVX2_VEC_DUP_MODE
22740 [V32QI V16QI V16HI V8HI V8SI V4SI])
22741
22742 (define_insn "*vec_dup<mode>"
22743 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
22744 (vec_duplicate:AVX2_VEC_DUP_MODE
22745 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
22746 "TARGET_AVX2"
22747 "@
22748 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
22749 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
22750 #"
22751 [(set_attr "isa" "*,*,noavx512vl")
22752 (set_attr "type" "ssemov")
22753 (set_attr "prefix_extra" "1")
22754 (set_attr "prefix" "maybe_evex")
22755 (set_attr "mode" "<sseinsnmode>")
22756 (set (attr "preferred_for_speed")
22757 (cond [(eq_attr "alternative" "2")
22758 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
22759 ]
22760 (symbol_ref "true")))])
22761
22762 (define_insn "vec_dup<mode>"
22763 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
22764 (vec_duplicate:AVX_VEC_DUP_MODE
22765 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
22766 "TARGET_AVX"
22767 "@
22768 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
22769 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
22770 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
22771 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
22772 #"
22773 [(set_attr "type" "ssemov")
22774 (set_attr "prefix_extra" "1")
22775 (set_attr "prefix" "maybe_evex")
22776 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
22777 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
22778
22779 (define_split
22780 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
22781 (vec_duplicate:AVX2_VEC_DUP_MODE
22782 (match_operand:<ssescalarmode> 1 "register_operand")))]
22783 "TARGET_AVX2
22784 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
22785 available, because then we can broadcast from GPRs directly.
22786 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
22787 for V*SI mode it requires just -mavx512vl. */
22788 && !(TARGET_AVX512VL
22789 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
22790 && reload_completed && GENERAL_REG_P (operands[1])"
22791 [(const_int 0)]
22792 {
22793 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
22794 CONST0_RTX (V4SImode),
22795 gen_lowpart (SImode, operands[1])));
22796 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
22797 gen_lowpart (<ssexmmmode>mode,
22798 operands[0])));
22799 DONE;
22800 })
22801
22802 (define_split
22803 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
22804 (vec_duplicate:AVX_VEC_DUP_MODE
22805 (match_operand:<ssescalarmode> 1 "register_operand")))]
22806 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
22807 [(set (match_dup 2)
22808 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
22809 (set (match_dup 0)
22810 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
22811 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
22812
22813 (define_insn "avx_vbroadcastf128_<mode>"
22814 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
22815 (vec_concat:V_256
22816 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
22817 (match_dup 1)))]
22818 "TARGET_AVX"
22819 "@
22820 vbroadcast<i128>\t{%1, %0|%0, %1}
22821 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
22822 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
22823 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
22824 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
22825 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
22826 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
22827 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
22828 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
22829 (set_attr "prefix_extra" "1")
22830 (set_attr "length_immediate" "0,1,1,0,1,0,1")
22831 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
22832 (set_attr "mode" "<sseinsnmode>")])
22833
22834 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
22835 (define_mode_iterator VI4F_BRCST32x2
22836 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
22837 V16SF (V8SF "TARGET_AVX512VL")])
22838
22839 (define_mode_attr 64x2mode
22840 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
22841
22842 (define_mode_attr 32x2mode
22843 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
22844 (V8SF "V2SF") (V4SI "V2SI")])
22845
22846 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
22847 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
22848 (vec_duplicate:VI4F_BRCST32x2
22849 (vec_select:<32x2mode>
22850 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
22851 (parallel [(const_int 0) (const_int 1)]))))]
22852 "TARGET_AVX512DQ"
22853 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
22854 [(set_attr "type" "ssemov")
22855 (set_attr "prefix_extra" "1")
22856 (set_attr "prefix" "evex")
22857 (set_attr "mode" "<sseinsnmode>")])
22858
22859 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
22860 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
22861 (vec_duplicate:VI4F_256
22862 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
22863 "TARGET_AVX512VL"
22864 "@
22865 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
22866 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22867 [(set_attr "type" "ssemov")
22868 (set_attr "prefix_extra" "1")
22869 (set_attr "prefix" "evex")
22870 (set_attr "mode" "<sseinsnmode>")])
22871
22872 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
22873 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
22874 (vec_duplicate:V16FI
22875 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
22876 "TARGET_AVX512DQ"
22877 "@
22878 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
22879 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22880 [(set_attr "type" "ssemov")
22881 (set_attr "prefix_extra" "1")
22882 (set_attr "prefix" "evex")
22883 (set_attr "mode" "<sseinsnmode>")])
22884
22885 ;; For broadcast[i|f]64x2
22886 (define_mode_iterator VI8F_BRCST64x2
22887 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
22888
22889 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
22890 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
22891 (vec_duplicate:VI8F_BRCST64x2
22892 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
22893 "TARGET_AVX512DQ"
22894 "@
22895 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
22896 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22897 [(set_attr "type" "ssemov")
22898 (set_attr "prefix_extra" "1")
22899 (set_attr "prefix" "evex")
22900 (set_attr "mode" "<sseinsnmode>")])
22901
22902 (define_insn "avx512cd_maskb_vec_dup<mode>"
22903 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22904 (vec_duplicate:VI8_AVX512VL
22905 (zero_extend:DI
22906 (match_operand:QI 1 "register_operand" "k"))))]
22907 "TARGET_AVX512CD"
22908 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
22909 [(set_attr "type" "mskmov")
22910 (set_attr "prefix" "evex")
22911 (set_attr "mode" "XI")])
22912
22913 (define_insn "avx512cd_maskw_vec_dup<mode>"
22914 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22915 (vec_duplicate:VI4_AVX512VL
22916 (zero_extend:SI
22917 (match_operand:HI 1 "register_operand" "k"))))]
22918 "TARGET_AVX512CD"
22919 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
22920 [(set_attr "type" "mskmov")
22921 (set_attr "prefix" "evex")
22922 (set_attr "mode" "XI")])
22923
22924 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
22925 [(set (match_operand:VF 0 "register_operand" "=v")
22926 (unspec:VF
22927 [(match_operand:VF 1 "register_operand" "v")
22928 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
22929 UNSPEC_VPERMIL))]
22930 "TARGET_AVX && <mask_mode512bit_condition>"
22931 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22932 [(set_attr "type" "sselog")
22933 (set_attr "prefix_extra" "1")
22934 (set_attr "btver2_decode" "vector")
22935 (set_attr "prefix" "<mask_prefix>")
22936 (set_attr "mode" "<sseinsnmode>")])
22937
22938 (define_mode_iterator VPERMI2
22939 [V16SI V16SF V8DI V8DF
22940 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
22941 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
22942 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
22943 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
22944 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
22945 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
22946 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
22947 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
22948
22949 (define_mode_iterator VPERMI2I
22950 [V16SI V8DI
22951 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
22952 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
22953 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
22954 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
22955 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
22956 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
22957
22958 (define_expand "<avx512>_vpermi2var<mode>3_mask"
22959 [(set (match_operand:VPERMI2 0 "register_operand")
22960 (vec_merge:VPERMI2
22961 (unspec:VPERMI2
22962 [(match_operand:<sseintvecmode> 2 "register_operand")
22963 (match_operand:VPERMI2 1 "register_operand")
22964 (match_operand:VPERMI2 3 "nonimmediate_operand")]
22965 UNSPEC_VPERMT2)
22966 (match_dup 5)
22967 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
22968 "TARGET_AVX512F"
22969 {
22970 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
22971 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
22972 })
22973
22974 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
22975 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
22976 (vec_merge:VPERMI2I
22977 (unspec:VPERMI2I
22978 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
22979 (match_operand:VPERMI2I 1 "register_operand" "v")
22980 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
22981 UNSPEC_VPERMT2)
22982 (match_dup 2)
22983 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22984 "TARGET_AVX512F"
22985 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
22986 [(set_attr "type" "sselog")
22987 (set_attr "prefix" "evex")
22988 (set_attr "mode" "<sseinsnmode>")])
22989
22990 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
22991 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22992 (vec_merge:VF_AVX512VL
22993 (unspec:VF_AVX512VL
22994 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
22995 (match_operand:VF_AVX512VL 1 "register_operand" "v")
22996 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
22997 UNSPEC_VPERMT2)
22998 (subreg:VF_AVX512VL (match_dup 2) 0)
22999 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23000 "TARGET_AVX512F"
23001 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
23002 [(set_attr "type" "sselog")
23003 (set_attr "prefix" "evex")
23004 (set_attr "mode" "<sseinsnmode>")])
23005
23006 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
23007 [(match_operand:VPERMI2 0 "register_operand")
23008 (match_operand:<sseintvecmode> 1 "register_operand")
23009 (match_operand:VPERMI2 2 "register_operand")
23010 (match_operand:VPERMI2 3 "nonimmediate_operand")
23011 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23012 "TARGET_AVX512F"
23013 {
23014 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
23015 operands[0], operands[1], operands[2], operands[3],
23016 CONST0_RTX (<MODE>mode), operands[4]));
23017 DONE;
23018 })
23019
23020 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
23021 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
23022 (unspec:VPERMI2
23023 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
23024 (match_operand:VPERMI2 2 "register_operand" "0,v")
23025 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
23026 UNSPEC_VPERMT2))]
23027 "TARGET_AVX512F"
23028 "@
23029 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
23030 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
23031 [(set_attr "type" "sselog")
23032 (set_attr "prefix" "evex")
23033 (set_attr "mode" "<sseinsnmode>")])
23034
23035 (define_insn "<avx512>_vpermt2var<mode>3_mask"
23036 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
23037 (vec_merge:VPERMI2
23038 (unspec:VPERMI2
23039 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
23040 (match_operand:VPERMI2 2 "register_operand" "0")
23041 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
23042 UNSPEC_VPERMT2)
23043 (match_dup 2)
23044 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23045 "TARGET_AVX512F"
23046 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
23047 [(set_attr "type" "sselog")
23048 (set_attr "prefix" "evex")
23049 (set_attr "mode" "<sseinsnmode>")])
23050
23051 (define_expand "avx_vperm2f128<mode>3"
23052 [(set (match_operand:AVX256MODE2P 0 "register_operand")
23053 (unspec:AVX256MODE2P
23054 [(match_operand:AVX256MODE2P 1 "register_operand")
23055 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
23056 (match_operand:SI 3 "const_0_to_255_operand")]
23057 UNSPEC_VPERMIL2F128))]
23058 "TARGET_AVX"
23059 {
23060 int mask = INTVAL (operands[3]);
23061 if ((mask & 0x88) == 0)
23062 {
23063 rtx perm[<ssescalarnum>], t1, t2;
23064 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
23065
23066 base = (mask & 3) * nelt2;
23067 for (i = 0; i < nelt2; ++i)
23068 perm[i] = GEN_INT (base + i);
23069
23070 base = ((mask >> 4) & 3) * nelt2;
23071 for (i = 0; i < nelt2; ++i)
23072 perm[i + nelt2] = GEN_INT (base + i);
23073
23074 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
23075 operands[1], operands[2]);
23076 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
23077 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
23078 t2 = gen_rtx_SET (operands[0], t2);
23079 emit_insn (t2);
23080 DONE;
23081 }
23082 })
23083
23084 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
23085 ;; means that in order to represent this properly in rtl we'd have to
23086 ;; nest *another* vec_concat with a zero operand and do the select from
23087 ;; a 4x wide vector. That doesn't seem very nice.
23088 (define_insn "*avx_vperm2f128<mode>_full"
23089 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
23090 (unspec:AVX256MODE2P
23091 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
23092 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
23093 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23094 UNSPEC_VPERMIL2F128))]
23095 "TARGET_AVX"
23096 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23097 [(set_attr "type" "sselog")
23098 (set_attr "prefix_extra" "1")
23099 (set_attr "length_immediate" "1")
23100 (set_attr "prefix" "vex")
23101 (set_attr "mode" "<sseinsnmode>")])
23102
23103 (define_insn "*avx_vperm2f128<mode>_nozero"
23104 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
23105 (vec_select:AVX256MODE2P
23106 (vec_concat:<ssedoublevecmode>
23107 (match_operand:AVX256MODE2P 1 "register_operand" "x")
23108 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
23109 (match_parallel 3 ""
23110 [(match_operand 4 "const_int_operand")])))]
23111 "TARGET_AVX
23112 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
23113 {
23114 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
23115 if (mask == 0x12)
23116 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
23117 if (mask == 0x20)
23118 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
23119 operands[3] = GEN_INT (mask);
23120 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
23121 }
23122 [(set_attr "type" "sselog")
23123 (set_attr "prefix_extra" "1")
23124 (set_attr "length_immediate" "1")
23125 (set_attr "prefix" "vex")
23126 (set_attr "mode" "<sseinsnmode>")])
23127
23128 (define_insn "*ssse3_palignr<mode>_perm"
23129 [(set (match_operand:V_128 0 "register_operand" "=x,Yw")
23130 (vec_select:V_128
23131 (match_operand:V_128 1 "register_operand" "0,Yw")
23132 (match_parallel 2 "palignr_operand"
23133 [(match_operand 3 "const_int_operand" "n,n")])))]
23134 "TARGET_SSSE3"
23135 {
23136 operands[2] = (GEN_INT (INTVAL (operands[3])
23137 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
23138
23139 switch (which_alternative)
23140 {
23141 case 0:
23142 return "palignr\t{%2, %1, %0|%0, %1, %2}";
23143 case 1:
23144 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
23145 default:
23146 gcc_unreachable ();
23147 }
23148 }
23149 [(set_attr "isa" "noavx,avx")
23150 (set_attr "type" "sseishft")
23151 (set_attr "atom_unit" "sishuf")
23152 (set_attr "prefix_data16" "1,*")
23153 (set_attr "prefix_extra" "1")
23154 (set_attr "length_immediate" "1")
23155 (set_attr "prefix" "orig,maybe_evex")])
23156
23157 (define_expand "avx512vl_vinsert<mode>"
23158 [(match_operand:VI48F_256 0 "register_operand")
23159 (match_operand:VI48F_256 1 "register_operand")
23160 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
23161 (match_operand:SI 3 "const_0_to_1_operand")
23162 (match_operand:VI48F_256 4 "register_operand")
23163 (match_operand:<avx512fmaskmode> 5 "register_operand")]
23164 "TARGET_AVX512VL"
23165 {
23166 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
23167
23168 switch (INTVAL (operands[3]))
23169 {
23170 case 0:
23171 insn = gen_vec_set_lo_<mode>_mask;
23172 break;
23173 case 1:
23174 insn = gen_vec_set_hi_<mode>_mask;
23175 break;
23176 default:
23177 gcc_unreachable ();
23178 }
23179
23180 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
23181 operands[5]));
23182 DONE;
23183 })
23184
23185 (define_expand "avx_vinsertf128<mode>"
23186 [(match_operand:V_256 0 "register_operand")
23187 (match_operand:V_256 1 "register_operand")
23188 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
23189 (match_operand:SI 3 "const_0_to_1_operand")]
23190 "TARGET_AVX"
23191 {
23192 rtx (*insn)(rtx, rtx, rtx);
23193
23194 switch (INTVAL (operands[3]))
23195 {
23196 case 0:
23197 insn = gen_vec_set_lo_<mode>;
23198 break;
23199 case 1:
23200 insn = gen_vec_set_hi_<mode>;
23201 break;
23202 default:
23203 gcc_unreachable ();
23204 }
23205
23206 emit_insn (insn (operands[0], operands[1], operands[2]));
23207 DONE;
23208 })
23209
23210 (define_insn "vec_set_lo_<mode><mask_name>"
23211 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
23212 (vec_concat:VI8F_256
23213 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
23214 (vec_select:<ssehalfvecmode>
23215 (match_operand:VI8F_256 1 "register_operand" "v")
23216 (parallel [(const_int 2) (const_int 3)]))))]
23217 "TARGET_AVX && <mask_avx512dq_condition>"
23218 {
23219 if (TARGET_AVX512DQ)
23220 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
23221 else if (TARGET_AVX512VL)
23222 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
23223 else
23224 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
23225 }
23226 [(set_attr "type" "sselog")
23227 (set_attr "prefix_extra" "1")
23228 (set_attr "length_immediate" "1")
23229 (set_attr "prefix" "vex")
23230 (set_attr "mode" "<sseinsnmode>")])
23231
23232 (define_insn "vec_set_hi_<mode><mask_name>"
23233 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
23234 (vec_concat:VI8F_256
23235 (vec_select:<ssehalfvecmode>
23236 (match_operand:VI8F_256 1 "register_operand" "v")
23237 (parallel [(const_int 0) (const_int 1)]))
23238 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
23239 "TARGET_AVX && <mask_avx512dq_condition>"
23240 {
23241 if (TARGET_AVX512DQ)
23242 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
23243 else if (TARGET_AVX512VL)
23244 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
23245 else
23246 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
23247 }
23248 [(set_attr "type" "sselog")
23249 (set_attr "prefix_extra" "1")
23250 (set_attr "length_immediate" "1")
23251 (set_attr "prefix" "vex")
23252 (set_attr "mode" "<sseinsnmode>")])
23253
23254 (define_insn "vec_set_lo_<mode><mask_name>"
23255 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
23256 (vec_concat:VI4F_256
23257 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
23258 (vec_select:<ssehalfvecmode>
23259 (match_operand:VI4F_256 1 "register_operand" "v")
23260 (parallel [(const_int 4) (const_int 5)
23261 (const_int 6) (const_int 7)]))))]
23262 "TARGET_AVX"
23263 {
23264 if (TARGET_AVX512VL)
23265 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
23266 else
23267 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
23268 }
23269 [(set_attr "type" "sselog")
23270 (set_attr "prefix_extra" "1")
23271 (set_attr "length_immediate" "1")
23272 (set_attr "prefix" "vex")
23273 (set_attr "mode" "<sseinsnmode>")])
23274
23275 (define_insn "vec_set_hi_<mode><mask_name>"
23276 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
23277 (vec_concat:VI4F_256
23278 (vec_select:<ssehalfvecmode>
23279 (match_operand:VI4F_256 1 "register_operand" "v")
23280 (parallel [(const_int 0) (const_int 1)
23281 (const_int 2) (const_int 3)]))
23282 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
23283 "TARGET_AVX"
23284 {
23285 if (TARGET_AVX512VL)
23286 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
23287 else
23288 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
23289 }
23290 [(set_attr "type" "sselog")
23291 (set_attr "prefix_extra" "1")
23292 (set_attr "length_immediate" "1")
23293 (set_attr "prefix" "vex")
23294 (set_attr "mode" "<sseinsnmode>")])
23295
23296 (define_insn "vec_set_lo_v16hi"
23297 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
23298 (vec_concat:V16HI
23299 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
23300 (vec_select:V8HI
23301 (match_operand:V16HI 1 "register_operand" "x,v")
23302 (parallel [(const_int 8) (const_int 9)
23303 (const_int 10) (const_int 11)
23304 (const_int 12) (const_int 13)
23305 (const_int 14) (const_int 15)]))))]
23306 "TARGET_AVX"
23307 "@
23308 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
23309 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
23310 [(set_attr "type" "sselog")
23311 (set_attr "prefix_extra" "1")
23312 (set_attr "length_immediate" "1")
23313 (set_attr "prefix" "vex,evex")
23314 (set_attr "mode" "OI")])
23315
23316 (define_insn "vec_set_hi_v16hi"
23317 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
23318 (vec_concat:V16HI
23319 (vec_select:V8HI
23320 (match_operand:V16HI 1 "register_operand" "x,v")
23321 (parallel [(const_int 0) (const_int 1)
23322 (const_int 2) (const_int 3)
23323 (const_int 4) (const_int 5)
23324 (const_int 6) (const_int 7)]))
23325 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
23326 "TARGET_AVX"
23327 "@
23328 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
23329 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
23330 [(set_attr "type" "sselog")
23331 (set_attr "prefix_extra" "1")
23332 (set_attr "length_immediate" "1")
23333 (set_attr "prefix" "vex,evex")
23334 (set_attr "mode" "OI")])
23335
23336 (define_insn "vec_set_lo_v32qi"
23337 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
23338 (vec_concat:V32QI
23339 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
23340 (vec_select:V16QI
23341 (match_operand:V32QI 1 "register_operand" "x,v")
23342 (parallel [(const_int 16) (const_int 17)
23343 (const_int 18) (const_int 19)
23344 (const_int 20) (const_int 21)
23345 (const_int 22) (const_int 23)
23346 (const_int 24) (const_int 25)
23347 (const_int 26) (const_int 27)
23348 (const_int 28) (const_int 29)
23349 (const_int 30) (const_int 31)]))))]
23350 "TARGET_AVX"
23351 "@
23352 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
23353 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
23354 [(set_attr "type" "sselog")
23355 (set_attr "prefix_extra" "1")
23356 (set_attr "length_immediate" "1")
23357 (set_attr "prefix" "vex,evex")
23358 (set_attr "mode" "OI")])
23359
23360 (define_insn "vec_set_hi_v32qi"
23361 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
23362 (vec_concat:V32QI
23363 (vec_select:V16QI
23364 (match_operand:V32QI 1 "register_operand" "x,v")
23365 (parallel [(const_int 0) (const_int 1)
23366 (const_int 2) (const_int 3)
23367 (const_int 4) (const_int 5)
23368 (const_int 6) (const_int 7)
23369 (const_int 8) (const_int 9)
23370 (const_int 10) (const_int 11)
23371 (const_int 12) (const_int 13)
23372 (const_int 14) (const_int 15)]))
23373 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
23374 "TARGET_AVX"
23375 "@
23376 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
23377 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
23378 [(set_attr "type" "sselog")
23379 (set_attr "prefix_extra" "1")
23380 (set_attr "length_immediate" "1")
23381 (set_attr "prefix" "vex,evex")
23382 (set_attr "mode" "OI")])
23383
23384 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
23385 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
23386 (unspec:V48_AVX2
23387 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
23388 (match_operand:V48_AVX2 1 "memory_operand" "m")]
23389 UNSPEC_MASKMOV))]
23390 "TARGET_AVX"
23391 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
23392 [(set_attr "type" "sselog1")
23393 (set_attr "prefix_extra" "1")
23394 (set_attr "prefix" "vex")
23395 (set_attr "btver2_decode" "vector")
23396 (set_attr "mode" "<sseinsnmode>")])
23397
23398 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
23399 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
23400 (unspec:V48_AVX2
23401 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
23402 (match_operand:V48_AVX2 2 "register_operand" "x")
23403 (match_dup 0)]
23404 UNSPEC_MASKMOV))]
23405 "TARGET_AVX"
23406 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23407 [(set_attr "type" "sselog1")
23408 (set_attr "prefix_extra" "1")
23409 (set_attr "prefix" "vex")
23410 (set_attr "btver2_decode" "vector")
23411 (set_attr "mode" "<sseinsnmode>")])
23412
23413 (define_expand "maskload<mode><sseintvecmodelower>"
23414 [(set (match_operand:V48_AVX2 0 "register_operand")
23415 (unspec:V48_AVX2
23416 [(match_operand:<sseintvecmode> 2 "register_operand")
23417 (match_operand:V48_AVX2 1 "memory_operand")]
23418 UNSPEC_MASKMOV))]
23419 "TARGET_AVX")
23420
23421 (define_expand "maskload<mode><avx512fmaskmodelower>"
23422 [(set (match_operand:V48_AVX512VL 0 "register_operand")
23423 (vec_merge:V48_AVX512VL
23424 (match_operand:V48_AVX512VL 1 "memory_operand")
23425 (match_dup 0)
23426 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23427 "TARGET_AVX512F")
23428
23429 (define_expand "maskload<mode><avx512fmaskmodelower>"
23430 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
23431 (vec_merge:VI12_AVX512VL
23432 (match_operand:VI12_AVX512VL 1 "memory_operand")
23433 (match_dup 0)
23434 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23435 "TARGET_AVX512BW")
23436
23437 (define_expand "maskstore<mode><sseintvecmodelower>"
23438 [(set (match_operand:V48_AVX2 0 "memory_operand")
23439 (unspec:V48_AVX2
23440 [(match_operand:<sseintvecmode> 2 "register_operand")
23441 (match_operand:V48_AVX2 1 "register_operand")
23442 (match_dup 0)]
23443 UNSPEC_MASKMOV))]
23444 "TARGET_AVX")
23445
23446 (define_expand "maskstore<mode><avx512fmaskmodelower>"
23447 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
23448 (vec_merge:V48_AVX512VL
23449 (match_operand:V48_AVX512VL 1 "register_operand")
23450 (match_dup 0)
23451 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23452 "TARGET_AVX512F")
23453
23454 (define_expand "maskstore<mode><avx512fmaskmodelower>"
23455 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
23456 (vec_merge:VI12_AVX512VL
23457 (match_operand:VI12_AVX512VL 1 "register_operand")
23458 (match_dup 0)
23459 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23460 "TARGET_AVX512BW")
23461
23462 (define_expand "cbranch<mode>4"
23463 [(set (reg:CC FLAGS_REG)
23464 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
23465 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
23466 (set (pc) (if_then_else
23467 (match_operator 0 "bt_comparison_operator"
23468 [(reg:CC FLAGS_REG) (const_int 0)])
23469 (label_ref (match_operand 3))
23470 (pc)))]
23471 "TARGET_SSE4_1"
23472 {
23473 ix86_expand_branch (GET_CODE (operands[0]),
23474 operands[1], operands[2], operands[3]);
23475 DONE;
23476 })
23477
23478
23479 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
23480 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
23481 (vec_concat:AVX256MODE2P
23482 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
23483 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
23484 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23485 "#"
23486 "&& reload_completed"
23487 [(set (match_dup 0) (match_dup 1))]
23488 {
23489 if (REG_P (operands[0]))
23490 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
23491 else
23492 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
23493 <ssehalfvecmode>mode);
23494 })
23495
23496 ;; Modes handled by vec_init expanders.
23497 (define_mode_iterator VEC_INIT_MODE
23498 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
23499 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
23500 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
23501 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
23502 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
23503 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
23504 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
23505
23506 ;; Likewise, but for initialization from half sized vectors.
23507 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
23508 (define_mode_iterator VEC_INIT_HALF_MODE
23509 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
23510 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
23511 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
23512 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
23513 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
23514 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
23515 (V4TI "TARGET_AVX512F")])
23516
23517 (define_expand "vec_init<mode><ssescalarmodelower>"
23518 [(match_operand:VEC_INIT_MODE 0 "register_operand")
23519 (match_operand 1)]
23520 "TARGET_SSE"
23521 {
23522 ix86_expand_vector_init (false, operands[0], operands[1]);
23523 DONE;
23524 })
23525
23526 (define_expand "vec_init<mode><ssehalfvecmodelower>"
23527 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
23528 (match_operand 1)]
23529 "TARGET_SSE"
23530 {
23531 ix86_expand_vector_init (false, operands[0], operands[1]);
23532 DONE;
23533 })
23534
23535 (define_expand "cond_<insn><mode>"
23536 [(set (match_operand:VI248_AVX512VLBW 0 "register_operand")
23537 (vec_merge:VI248_AVX512VLBW
23538 (any_shift:VI248_AVX512VLBW
23539 (match_operand:VI248_AVX512VLBW 2 "register_operand")
23540 (match_operand:VI248_AVX512VLBW 3 "nonimmediate_or_const_vec_dup_operand"))
23541 (match_operand:VI248_AVX512VLBW 4 "nonimm_or_0_operand")
23542 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
23543 "TARGET_AVX512F"
23544 {
23545 if (const_vec_duplicate_p (operands[3]))
23546 {
23547 operands[3] = unwrap_const_vec_duplicate (operands[3]);
23548 operands[3] = lowpart_subreg (DImode, operands[3], <ssescalarmode>mode);
23549 emit_insn (gen_<insn><mode>3_mask (operands[0],
23550 operands[2],
23551 operands[3],
23552 operands[4],
23553 operands[1]));
23554 }
23555 else
23556 emit_insn (gen_<avx2_avx512>_<insn>v<mode>_mask (operands[0],
23557 operands[2],
23558 operands[3],
23559 operands[4],
23560 operands[1]));
23561 DONE;
23562 })
23563
23564 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
23565 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
23566 (ashiftrt:VI48_AVX512F_AVX512VL
23567 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
23568 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
23569 "TARGET_AVX2 && <mask_mode512bit_condition>"
23570 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23571 [(set_attr "type" "sseishft")
23572 (set_attr "prefix" "maybe_evex")
23573 (set_attr "mode" "<sseinsnmode>")])
23574
23575 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
23576 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
23577 (ashiftrt:VI2_AVX512VL
23578 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
23579 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
23580 "TARGET_AVX512BW"
23581 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23582 [(set_attr "type" "sseishft")
23583 (set_attr "prefix" "maybe_evex")
23584 (set_attr "mode" "<sseinsnmode>")])
23585
23586 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
23587 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
23588 (any_lshift:VI48_AVX512F
23589 (match_operand:VI48_AVX512F 1 "register_operand" "v")
23590 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
23591 "TARGET_AVX2 && <mask_mode512bit_condition>"
23592 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23593 [(set_attr "type" "sseishft")
23594 (set_attr "prefix" "maybe_evex")
23595 (set_attr "mode" "<sseinsnmode>")])
23596
23597 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
23598 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
23599 (any_lshift:VI2_AVX512VL
23600 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
23601 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
23602 "TARGET_AVX512BW"
23603 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23604 [(set_attr "type" "sseishft")
23605 (set_attr "prefix" "maybe_evex")
23606 (set_attr "mode" "<sseinsnmode>")])
23607
23608 (define_insn "avx_vec_concat<mode>"
23609 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
23610 (vec_concat:V_256_512
23611 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
23612 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
23613 "TARGET_AVX
23614 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
23615 || !MEM_P (operands[1]))"
23616 {
23617 switch (which_alternative)
23618 {
23619 case 0:
23620 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23621 case 1:
23622 if (<MODE_SIZE> == 64)
23623 {
23624 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
23625 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23626 else
23627 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23628 }
23629 else
23630 {
23631 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23632 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23633 else
23634 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23635 }
23636 case 2:
23637 case 3:
23638 switch (get_attr_mode (insn))
23639 {
23640 case MODE_V16SF:
23641 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23642 return "vmovups\t{%1, %t0|%t0, %1}";
23643 else
23644 return "vmovaps\t{%1, %t0|%t0, %1}";
23645 case MODE_V8DF:
23646 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23647 return "vmovupd\t{%1, %t0|%t0, %1}";
23648 else
23649 return "vmovapd\t{%1, %t0|%t0, %1}";
23650 case MODE_V8SF:
23651 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23652 return "vmovups\t{%1, %x0|%x0, %1}";
23653 else
23654 return "vmovaps\t{%1, %x0|%x0, %1}";
23655 case MODE_V4DF:
23656 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23657 return "vmovupd\t{%1, %x0|%x0, %1}";
23658 else
23659 return "vmovapd\t{%1, %x0|%x0, %1}";
23660 case MODE_XI:
23661 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23662 {
23663 if (which_alternative == 2)
23664 return "vmovdqu\t{%1, %t0|%t0, %1}";
23665 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23666 return "vmovdqu64\t{%1, %t0|%t0, %1}";
23667 else
23668 return "vmovdqu32\t{%1, %t0|%t0, %1}";
23669 }
23670 else
23671 {
23672 if (which_alternative == 2)
23673 return "vmovdqa\t{%1, %t0|%t0, %1}";
23674 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23675 return "vmovdqa64\t{%1, %t0|%t0, %1}";
23676 else
23677 return "vmovdqa32\t{%1, %t0|%t0, %1}";
23678 }
23679 case MODE_OI:
23680 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23681 {
23682 if (which_alternative == 2)
23683 return "vmovdqu\t{%1, %x0|%x0, %1}";
23684 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23685 return "vmovdqu64\t{%1, %x0|%x0, %1}";
23686 else
23687 return "vmovdqu32\t{%1, %x0|%x0, %1}";
23688 }
23689 else
23690 {
23691 if (which_alternative == 2)
23692 return "vmovdqa\t{%1, %x0|%x0, %1}";
23693 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23694 return "vmovdqa64\t{%1, %x0|%x0, %1}";
23695 else
23696 return "vmovdqa32\t{%1, %x0|%x0, %1}";
23697 }
23698 default:
23699 gcc_unreachable ();
23700 }
23701 default:
23702 gcc_unreachable ();
23703 }
23704 }
23705 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
23706 (set_attr "prefix_extra" "1,1,*,*")
23707 (set_attr "length_immediate" "1,1,*,*")
23708 (set_attr "prefix" "maybe_evex")
23709 (set_attr "mode" "<sseinsnmode>")])
23710
23711 (define_insn_and_split "*vec_concat<mode>_0_1"
23712 [(set (match_operand:V 0 "register_operand")
23713 (vec_select:V
23714 (vec_concat:<ssedoublevecmode>
23715 (match_operand:V 1 "nonimmediate_operand")
23716 (match_operand:V 2 "const0_operand"))
23717 (match_parallel 3 "movq_parallel"
23718 [(match_operand 4 "const_int_operand")])))]
23719 "TARGET_SSE2 && ix86_pre_reload_split ()"
23720 "#"
23721 "&& 1"
23722 [(set (match_dup 0)
23723 (vec_concat:V (match_dup 1) (match_dup 5)))]
23724 {
23725 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
23726 operands[5] = CONST0_RTX (<ssehalfvecmode>mode);
23727 })
23728
23729 (define_insn "vcvtph2ps<mask_name>"
23730 [(set (match_operand:V4SF 0 "register_operand" "=v")
23731 (vec_select:V4SF
23732 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
23733 UNSPEC_VCVTPH2PS)
23734 (parallel [(const_int 0) (const_int 1)
23735 (const_int 2) (const_int 3)])))]
23736 "TARGET_F16C || TARGET_AVX512VL"
23737 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
23738 [(set_attr "type" "ssecvt")
23739 (set_attr "prefix" "maybe_evex")
23740 (set_attr "mode" "V4SF")])
23741
23742 (define_insn "*vcvtph2ps_load<mask_name>"
23743 [(set (match_operand:V4SF 0 "register_operand" "=v")
23744 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
23745 UNSPEC_VCVTPH2PS))]
23746 "TARGET_F16C || TARGET_AVX512VL"
23747 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
23748 [(set_attr "type" "ssecvt")
23749 (set_attr "prefix" "vex")
23750 (set_attr "mode" "V8SF")])
23751
23752 (define_insn "vcvtph2ps256<mask_name>"
23753 [(set (match_operand:V8SF 0 "register_operand" "=v")
23754 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
23755 UNSPEC_VCVTPH2PS))]
23756 "TARGET_F16C || TARGET_AVX512VL"
23757 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
23758 [(set_attr "type" "ssecvt")
23759 (set_attr "prefix" "vex")
23760 (set_attr "btver2_decode" "double")
23761 (set_attr "mode" "V8SF")])
23762
23763 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
23764 [(set (match_operand:V16SF 0 "register_operand" "=v")
23765 (unspec:V16SF
23766 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
23767 UNSPEC_VCVTPH2PS))]
23768 "TARGET_AVX512F"
23769 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
23770 [(set_attr "type" "ssecvt")
23771 (set_attr "prefix" "evex")
23772 (set_attr "mode" "V16SF")])
23773
23774 (define_expand "vcvtps2ph_mask"
23775 [(set (match_operand:V8HI 0 "register_operand")
23776 (vec_merge:V8HI
23777 (vec_concat:V8HI
23778 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
23779 (match_operand:SI 2 "const_0_to_255_operand")]
23780 UNSPEC_VCVTPS2PH)
23781 (match_dup 5))
23782 (match_operand:V8HI 3 "nonimm_or_0_operand")
23783 (match_operand:QI 4 "register_operand")))]
23784 "TARGET_AVX512VL"
23785 "operands[5] = CONST0_RTX (V4HImode);")
23786
23787 (define_expand "vcvtps2ph"
23788 [(set (match_operand:V8HI 0 "register_operand")
23789 (vec_concat:V8HI
23790 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
23791 (match_operand:SI 2 "const_0_to_255_operand")]
23792 UNSPEC_VCVTPS2PH)
23793 (match_dup 3)))]
23794 "TARGET_F16C"
23795 "operands[3] = CONST0_RTX (V4HImode);")
23796
23797 (define_insn "*vcvtps2ph<mask_name>"
23798 [(set (match_operand:V8HI 0 "register_operand" "=v")
23799 (vec_concat:V8HI
23800 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
23801 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23802 UNSPEC_VCVTPS2PH)
23803 (match_operand:V4HI 3 "const0_operand")))]
23804 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
23805 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
23806 [(set_attr "type" "ssecvt")
23807 (set_attr "prefix" "maybe_evex")
23808 (set_attr "mode" "V4SF")])
23809
23810 (define_insn "*vcvtps2ph_store<merge_mask_name>"
23811 [(set (match_operand:V4HI 0 "memory_operand" "=m")
23812 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
23813 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23814 UNSPEC_VCVTPS2PH))]
23815 "TARGET_F16C || TARGET_AVX512VL"
23816 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
23817 [(set_attr "type" "ssecvt")
23818 (set_attr "prefix" "maybe_evex")
23819 (set_attr "mode" "V4SF")])
23820
23821 (define_insn "vcvtps2ph256<mask_name>"
23822 [(set (match_operand:V8HI 0 "register_operand" "=v")
23823 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
23824 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23825 UNSPEC_VCVTPS2PH))]
23826 "TARGET_F16C || TARGET_AVX512VL"
23827 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23828 [(set_attr "type" "ssecvt")
23829 (set_attr "prefix" "maybe_evex")
23830 (set_attr "btver2_decode" "vector")
23831 (set_attr "mode" "V8SF")])
23832
23833 (define_insn "*vcvtps2ph256<merge_mask_name>"
23834 [(set (match_operand:V8HI 0 "memory_operand" "=m")
23835 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
23836 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23837 UNSPEC_VCVTPS2PH))]
23838 "TARGET_F16C || TARGET_AVX512VL"
23839 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
23840 [(set_attr "type" "ssecvt")
23841 (set_attr "prefix" "maybe_evex")
23842 (set_attr "btver2_decode" "vector")
23843 (set_attr "mode" "V8SF")])
23844
23845 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
23846 [(set (match_operand:V16HI 0 "register_operand" "=v")
23847 (unspec:V16HI
23848 [(match_operand:V16SF 1 "register_operand" "v")
23849 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23850 UNSPEC_VCVTPS2PH))]
23851 "TARGET_AVX512F"
23852 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23853 [(set_attr "type" "ssecvt")
23854 (set_attr "prefix" "evex")
23855 (set_attr "mode" "V16SF")])
23856
23857 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
23858 [(set (match_operand:V16HI 0 "memory_operand" "=m")
23859 (unspec:V16HI
23860 [(match_operand:V16SF 1 "register_operand" "v")
23861 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23862 UNSPEC_VCVTPS2PH))]
23863 "TARGET_AVX512F"
23864 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
23865 [(set_attr "type" "ssecvt")
23866 (set_attr "prefix" "evex")
23867 (set_attr "mode" "V16SF")])
23868
23869 ;; For gather* insn patterns
23870 (define_mode_iterator VEC_GATHER_MODE
23871 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
23872 (define_mode_attr VEC_GATHER_IDXSI
23873 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
23874 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
23875 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
23876 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
23877
23878 (define_mode_attr VEC_GATHER_IDXDI
23879 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
23880 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
23881 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
23882 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
23883
23884 (define_mode_attr VEC_GATHER_SRCDI
23885 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
23886 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
23887 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
23888 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
23889
23890 (define_expand "avx2_gathersi<mode>"
23891 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
23892 (unspec:VEC_GATHER_MODE
23893 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
23894 (mem:<ssescalarmode>
23895 (match_par_dup 6
23896 [(match_operand 2 "vsib_address_operand")
23897 (match_operand:<VEC_GATHER_IDXSI>
23898 3 "register_operand")
23899 (match_operand:SI 5 "const1248_operand ")]))
23900 (mem:BLK (scratch))
23901 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
23902 UNSPEC_GATHER))
23903 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
23904 "TARGET_AVX2"
23905 {
23906 operands[6]
23907 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
23908 operands[5]), UNSPEC_VSIBADDR);
23909 })
23910
23911 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
23912 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23913 (unspec:VEC_GATHER_MODE
23914 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
23915 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
23916 [(unspec:P
23917 [(match_operand:P 3 "vsib_address_operand" "Tv")
23918 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
23919 (match_operand:SI 6 "const1248_operand" "n")]
23920 UNSPEC_VSIBADDR)])
23921 (mem:BLK (scratch))
23922 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
23923 UNSPEC_GATHER))
23924 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23925 "TARGET_AVX2"
23926 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
23927 [(set_attr "type" "ssemov")
23928 (set_attr "prefix" "vex")
23929 (set_attr "mode" "<sseinsnmode>")])
23930
23931 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
23932 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23933 (unspec:VEC_GATHER_MODE
23934 [(pc)
23935 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23936 [(unspec:P
23937 [(match_operand:P 2 "vsib_address_operand" "Tv")
23938 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
23939 (match_operand:SI 5 "const1248_operand" "n")]
23940 UNSPEC_VSIBADDR)])
23941 (mem:BLK (scratch))
23942 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
23943 UNSPEC_GATHER))
23944 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23945 "TARGET_AVX2"
23946 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
23947 [(set_attr "type" "ssemov")
23948 (set_attr "prefix" "vex")
23949 (set_attr "mode" "<sseinsnmode>")])
23950
23951 (define_expand "avx2_gatherdi<mode>"
23952 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
23953 (unspec:VEC_GATHER_MODE
23954 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
23955 (mem:<ssescalarmode>
23956 (match_par_dup 6
23957 [(match_operand 2 "vsib_address_operand")
23958 (match_operand:<VEC_GATHER_IDXDI>
23959 3 "register_operand")
23960 (match_operand:SI 5 "const1248_operand ")]))
23961 (mem:BLK (scratch))
23962 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
23963 UNSPEC_GATHER))
23964 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
23965 "TARGET_AVX2"
23966 {
23967 operands[6]
23968 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
23969 operands[5]), UNSPEC_VSIBADDR);
23970 })
23971
23972 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
23973 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23974 (unspec:VEC_GATHER_MODE
23975 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
23976 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
23977 [(unspec:P
23978 [(match_operand:P 3 "vsib_address_operand" "Tv")
23979 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
23980 (match_operand:SI 6 "const1248_operand" "n")]
23981 UNSPEC_VSIBADDR)])
23982 (mem:BLK (scratch))
23983 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
23984 UNSPEC_GATHER))
23985 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23986 "TARGET_AVX2"
23987 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
23988 [(set_attr "type" "ssemov")
23989 (set_attr "prefix" "vex")
23990 (set_attr "mode" "<sseinsnmode>")])
23991
23992 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
23993 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23994 (unspec:VEC_GATHER_MODE
23995 [(pc)
23996 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23997 [(unspec:P
23998 [(match_operand:P 2 "vsib_address_operand" "Tv")
23999 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
24000 (match_operand:SI 5 "const1248_operand" "n")]
24001 UNSPEC_VSIBADDR)])
24002 (mem:BLK (scratch))
24003 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
24004 UNSPEC_GATHER))
24005 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
24006 "TARGET_AVX2"
24007 {
24008 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
24009 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
24010 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
24011 }
24012 [(set_attr "type" "ssemov")
24013 (set_attr "prefix" "vex")
24014 (set_attr "mode" "<sseinsnmode>")])
24015
24016 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
24017 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
24018 (vec_select:<VEC_GATHER_SRCDI>
24019 (unspec:VI4F_256
24020 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
24021 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
24022 [(unspec:P
24023 [(match_operand:P 3 "vsib_address_operand" "Tv")
24024 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
24025 (match_operand:SI 6 "const1248_operand" "n")]
24026 UNSPEC_VSIBADDR)])
24027 (mem:BLK (scratch))
24028 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
24029 UNSPEC_GATHER)
24030 (parallel [(const_int 0) (const_int 1)
24031 (const_int 2) (const_int 3)])))
24032 (clobber (match_scratch:VI4F_256 1 "=&x"))]
24033 "TARGET_AVX2"
24034 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
24035 [(set_attr "type" "ssemov")
24036 (set_attr "prefix" "vex")
24037 (set_attr "mode" "<sseinsnmode>")])
24038
24039 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
24040 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
24041 (vec_select:<VEC_GATHER_SRCDI>
24042 (unspec:VI4F_256
24043 [(pc)
24044 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
24045 [(unspec:P
24046 [(match_operand:P 2 "vsib_address_operand" "Tv")
24047 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
24048 (match_operand:SI 5 "const1248_operand" "n")]
24049 UNSPEC_VSIBADDR)])
24050 (mem:BLK (scratch))
24051 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
24052 UNSPEC_GATHER)
24053 (parallel [(const_int 0) (const_int 1)
24054 (const_int 2) (const_int 3)])))
24055 (clobber (match_scratch:VI4F_256 1 "=&x"))]
24056 "TARGET_AVX2"
24057 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
24058 [(set_attr "type" "ssemov")
24059 (set_attr "prefix" "vex")
24060 (set_attr "mode" "<sseinsnmode>")])
24061
24062 (define_expand "<avx512>_gathersi<mode>"
24063 [(parallel [(set (match_operand:VI48F 0 "register_operand")
24064 (unspec:VI48F
24065 [(match_operand:VI48F 1 "register_operand")
24066 (match_operand:<avx512fmaskmode> 4 "register_operand")
24067 (mem:<ssescalarmode>
24068 (match_par_dup 6
24069 [(match_operand 2 "vsib_address_operand")
24070 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
24071 (match_operand:SI 5 "const1248_operand")]))]
24072 UNSPEC_GATHER))
24073 (clobber (match_scratch:<avx512fmaskmode> 7))])]
24074 "TARGET_AVX512F"
24075 {
24076 operands[6]
24077 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
24078 operands[5]), UNSPEC_VSIBADDR);
24079 })
24080
24081 (define_insn "*avx512f_gathersi<VI48F:mode>"
24082 [(set (match_operand:VI48F 0 "register_operand" "=&v")
24083 (unspec:VI48F
24084 [(match_operand:VI48F 1 "register_operand" "0")
24085 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
24086 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
24087 [(unspec:P
24088 [(match_operand:P 4 "vsib_address_operand" "Tv")
24089 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
24090 (match_operand:SI 5 "const1248_operand" "n")]
24091 UNSPEC_VSIBADDR)])]
24092 UNSPEC_GATHER))
24093 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
24094 "TARGET_AVX512F"
24095 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
24096 ;; gas changed what it requires incompatibly.
24097 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
24098 [(set_attr "type" "ssemov")
24099 (set_attr "prefix" "evex")
24100 (set_attr "mode" "<sseinsnmode>")])
24101
24102 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
24103 [(set (match_operand:VI48F 0 "register_operand" "=&v")
24104 (unspec:VI48F
24105 [(pc)
24106 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
24107 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
24108 [(unspec:P
24109 [(match_operand:P 3 "vsib_address_operand" "Tv")
24110 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
24111 (match_operand:SI 4 "const1248_operand" "n")]
24112 UNSPEC_VSIBADDR)])]
24113 UNSPEC_GATHER))
24114 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
24115 "TARGET_AVX512F"
24116 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
24117 ;; gas changed what it requires incompatibly.
24118 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
24119 [(set_attr "type" "ssemov")
24120 (set_attr "prefix" "evex")
24121 (set_attr "mode" "<sseinsnmode>")])
24122
24123
24124 (define_expand "<avx512>_gatherdi<mode>"
24125 [(parallel [(set (match_operand:VI48F 0 "register_operand")
24126 (unspec:VI48F
24127 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
24128 (match_operand:QI 4 "register_operand")
24129 (mem:<ssescalarmode>
24130 (match_par_dup 6
24131 [(match_operand 2 "vsib_address_operand")
24132 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
24133 (match_operand:SI 5 "const1248_operand")]))]
24134 UNSPEC_GATHER))
24135 (clobber (match_scratch:QI 7))])]
24136 "TARGET_AVX512F"
24137 {
24138 operands[6]
24139 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
24140 operands[5]), UNSPEC_VSIBADDR);
24141 })
24142
24143 (define_insn "*avx512f_gatherdi<VI48F:mode>"
24144 [(set (match_operand:VI48F 0 "register_operand" "=&v")
24145 (unspec:VI48F
24146 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
24147 (match_operand:QI 7 "register_operand" "2")
24148 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
24149 [(unspec:P
24150 [(match_operand:P 4 "vsib_address_operand" "Tv")
24151 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
24152 (match_operand:SI 5 "const1248_operand" "n")]
24153 UNSPEC_VSIBADDR)])]
24154 UNSPEC_GATHER))
24155 (clobber (match_scratch:QI 2 "=&Yk"))]
24156 "TARGET_AVX512F"
24157 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
24158 ;; gas changed what it requires incompatibly.
24159 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
24160 [(set_attr "type" "ssemov")
24161 (set_attr "prefix" "evex")
24162 (set_attr "mode" "<sseinsnmode>")])
24163
24164 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
24165 [(set (match_operand:VI48F 0 "register_operand" "=&v")
24166 (unspec:VI48F
24167 [(pc)
24168 (match_operand:QI 6 "register_operand" "1")
24169 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
24170 [(unspec:P
24171 [(match_operand:P 3 "vsib_address_operand" "Tv")
24172 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
24173 (match_operand:SI 4 "const1248_operand" "n")]
24174 UNSPEC_VSIBADDR)])]
24175 UNSPEC_GATHER))
24176 (clobber (match_scratch:QI 1 "=&Yk"))]
24177 "TARGET_AVX512F"
24178 {
24179 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
24180 gas changed what it requires incompatibly. */
24181 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
24182 {
24183 if (<VI48F:MODE_SIZE> != 64)
24184 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
24185 else
24186 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
24187 }
24188 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
24189 }
24190 [(set_attr "type" "ssemov")
24191 (set_attr "prefix" "evex")
24192 (set_attr "mode" "<sseinsnmode>")])
24193
24194 (define_expand "<avx512>_scattersi<mode>"
24195 [(parallel [(set (mem:VI48F
24196 (match_par_dup 5
24197 [(match_operand 0 "vsib_address_operand")
24198 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
24199 (match_operand:SI 4 "const1248_operand")]))
24200 (unspec:VI48F
24201 [(match_operand:<avx512fmaskmode> 1 "register_operand")
24202 (match_operand:VI48F 3 "register_operand")]
24203 UNSPEC_SCATTER))
24204 (clobber (match_scratch:<avx512fmaskmode> 6))])]
24205 "TARGET_AVX512F"
24206 {
24207 operands[5]
24208 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
24209 operands[4]), UNSPEC_VSIBADDR);
24210 })
24211
24212 (define_insn "*avx512f_scattersi<VI48F:mode>"
24213 [(set (match_operator:VI48F 5 "vsib_mem_operator"
24214 [(unspec:P
24215 [(match_operand:P 0 "vsib_address_operand" "Tv")
24216 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
24217 (match_operand:SI 4 "const1248_operand" "n")]
24218 UNSPEC_VSIBADDR)])
24219 (unspec:VI48F
24220 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
24221 (match_operand:VI48F 3 "register_operand" "v")]
24222 UNSPEC_SCATTER))
24223 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
24224 "TARGET_AVX512F"
24225 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
24226 ;; gas changed what it requires incompatibly.
24227 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
24228 [(set_attr "type" "ssemov")
24229 (set_attr "prefix" "evex")
24230 (set_attr "mode" "<sseinsnmode>")])
24231
24232 (define_expand "<avx512>_scatterdi<mode>"
24233 [(parallel [(set (mem:VI48F
24234 (match_par_dup 5
24235 [(match_operand 0 "vsib_address_operand")
24236 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
24237 (match_operand:SI 4 "const1248_operand")]))
24238 (unspec:VI48F
24239 [(match_operand:QI 1 "register_operand")
24240 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
24241 UNSPEC_SCATTER))
24242 (clobber (match_scratch:QI 6))])]
24243 "TARGET_AVX512F"
24244 {
24245 operands[5]
24246 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
24247 operands[4]), UNSPEC_VSIBADDR);
24248 })
24249
24250 (define_insn "*avx512f_scatterdi<VI48F:mode>"
24251 [(set (match_operator:VI48F 5 "vsib_mem_operator"
24252 [(unspec:P
24253 [(match_operand:P 0 "vsib_address_operand" "Tv")
24254 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
24255 (match_operand:SI 4 "const1248_operand" "n")]
24256 UNSPEC_VSIBADDR)])
24257 (unspec:VI48F
24258 [(match_operand:QI 6 "register_operand" "1")
24259 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
24260 UNSPEC_SCATTER))
24261 (clobber (match_scratch:QI 1 "=&Yk"))]
24262 "TARGET_AVX512F"
24263 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
24264 ;; gas changed what it requires incompatibly.
24265 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
24266 [(set_attr "type" "ssemov")
24267 (set_attr "prefix" "evex")
24268 (set_attr "mode" "<sseinsnmode>")])
24269
24270 (define_insn "<avx512>_compress<mode>_mask"
24271 [(set (match_operand:VI48F 0 "register_operand" "=v")
24272 (unspec:VI48F
24273 [(match_operand:VI48F 1 "register_operand" "v")
24274 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
24275 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
24276 UNSPEC_COMPRESS))]
24277 "TARGET_AVX512F"
24278 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
24279 [(set_attr "type" "ssemov")
24280 (set_attr "prefix" "evex")
24281 (set_attr "mode" "<sseinsnmode>")])
24282
24283 (define_insn "compress<mode>_mask"
24284 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
24285 (unspec:VI12_AVX512VLBW
24286 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
24287 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
24288 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
24289 UNSPEC_COMPRESS))]
24290 "TARGET_AVX512VBMI2"
24291 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
24292 [(set_attr "type" "ssemov")
24293 (set_attr "prefix" "evex")
24294 (set_attr "mode" "<sseinsnmode>")])
24295
24296 (define_insn "<avx512>_compressstore<mode>_mask"
24297 [(set (match_operand:VI48F 0 "memory_operand" "=m")
24298 (unspec:VI48F
24299 [(match_operand:VI48F 1 "register_operand" "x")
24300 (match_dup 0)
24301 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
24302 UNSPEC_COMPRESS_STORE))]
24303 "TARGET_AVX512F"
24304 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
24305 [(set_attr "type" "ssemov")
24306 (set_attr "prefix" "evex")
24307 (set_attr "memory" "store")
24308 (set_attr "mode" "<sseinsnmode>")])
24309
24310 (define_insn "compressstore<mode>_mask"
24311 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
24312 (unspec:VI12_AVX512VLBW
24313 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
24314 (match_dup 0)
24315 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
24316 UNSPEC_COMPRESS_STORE))]
24317 "TARGET_AVX512VBMI2"
24318 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
24319 [(set_attr "type" "ssemov")
24320 (set_attr "prefix" "evex")
24321 (set_attr "memory" "store")
24322 (set_attr "mode" "<sseinsnmode>")])
24323
24324 (define_expand "<avx512>_expand<mode>_maskz"
24325 [(set (match_operand:VI48F 0 "register_operand")
24326 (unspec:VI48F
24327 [(match_operand:VI48F 1 "nonimmediate_operand")
24328 (match_operand:VI48F 2 "nonimm_or_0_operand")
24329 (match_operand:<avx512fmaskmode> 3 "register_operand")]
24330 UNSPEC_EXPAND))]
24331 "TARGET_AVX512F"
24332 "operands[2] = CONST0_RTX (<MODE>mode);")
24333
24334 (define_insn "expand<mode>_mask"
24335 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
24336 (unspec:VI48F
24337 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
24338 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
24339 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
24340 UNSPEC_EXPAND))]
24341 "TARGET_AVX512F"
24342 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
24343 [(set_attr "type" "ssemov")
24344 (set_attr "prefix" "evex")
24345 (set_attr "memory" "none,load")
24346 (set_attr "mode" "<sseinsnmode>")])
24347
24348 (define_insn "expand<mode>_mask"
24349 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
24350 (unspec:VI12_AVX512VLBW
24351 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
24352 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
24353 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
24354 UNSPEC_EXPAND))]
24355 "TARGET_AVX512VBMI2"
24356 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
24357 [(set_attr "type" "ssemov")
24358 (set_attr "prefix" "evex")
24359 (set_attr "memory" "none,load")
24360 (set_attr "mode" "<sseinsnmode>")])
24361
24362 (define_insn_and_split "*expand<mode>_mask"
24363 [(set (match_operand:VI12_VI48F_AVX512VLBW 0 "register_operand")
24364 (unspec:VI12_VI48F_AVX512VLBW
24365 [(match_operand:VI12_VI48F_AVX512VLBW 1 "nonimmediate_operand")
24366 (match_operand:VI12_VI48F_AVX512VLBW 2 "nonimm_or_0_operand")
24367 (match_operand 3 "const_int_operand")]
24368 UNSPEC_EXPAND))]
24369 "ix86_pre_reload_split ()
24370 && (TARGET_AVX512VBMI2 || GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4)"
24371 "#"
24372 "&& 1"
24373 [(const_int 0)]
24374 {
24375 unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
24376 bool has_zero = false;
24377 unsigned n = GET_MODE_NUNITS (<MODE>mode), i;
24378 unsigned ones = 0;
24379
24380 /* If all ones bits is in mask's lower part,
24381 get number of ones and assign it to ONES. */
24382 for (i = 0; i != n; i++)
24383 {
24384 if ((mask & HOST_WIDE_INT_1U << i) && has_zero)
24385 break;
24386
24387 /* Record first zero bit. */
24388 if (!(mask & HOST_WIDE_INT_1U << i) && !has_zero)
24389 {
24390 has_zero = true;
24391 ones = i;
24392 }
24393 }
24394
24395 if (!has_zero)
24396 ones = n;
24397
24398 if (i != n || (ones != 0 && ones != n))
24399 {
24400 rtx reg = gen_reg_rtx (<avx512fmaskmode>mode);
24401 emit_move_insn (reg, operands[3]);
24402 enum insn_code icode;
24403 if (i == n)
24404 /* For masks with all one bits in it's lower part,
24405 we can transform v{,p}expand* to vmovdq* with
24406 mask operand. */
24407 icode = CODE_FOR_<avx512>_load<mode>_mask;
24408 else
24409 icode = CODE_FOR_expand<mode>_mask;
24410 emit_insn (GEN_FCN (icode) (operands[0], operands[1], operands[2], reg));
24411 }
24412 else
24413 /* For ALL_MASK_ONES or CONST0_RTX mask, transform it to simple mov. */
24414 emit_move_insn (operands[0], ones ? operands[1] : operands[2]);
24415 DONE;
24416 })
24417
24418 (define_expand "expand<mode>_maskz"
24419 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
24420 (unspec:VI12_AVX512VLBW
24421 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
24422 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
24423 (match_operand:<avx512fmaskmode> 3 "register_operand")]
24424 UNSPEC_EXPAND))]
24425 "TARGET_AVX512VBMI2"
24426 "operands[2] = CONST0_RTX (<MODE>mode);")
24427
24428 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
24429 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
24430 (unspec:VF_AVX512VL
24431 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
24432 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
24433 (match_operand:SI 3 "const_0_to_15_operand")]
24434 UNSPEC_RANGE))]
24435 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
24436 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
24437 [(set_attr "type" "sse")
24438 (set_attr "prefix" "evex")
24439 (set_attr "mode" "<MODE>")])
24440
24441 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
24442 [(set (match_operand:VF_128 0 "register_operand" "=v")
24443 (vec_merge:VF_128
24444 (unspec:VF_128
24445 [(match_operand:VF_128 1 "register_operand" "v")
24446 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
24447 (match_operand:SI 3 "const_0_to_15_operand")]
24448 UNSPEC_RANGE)
24449 (match_dup 1)
24450 (const_int 1)))]
24451 "TARGET_AVX512DQ"
24452 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
24453 [(set_attr "type" "sse")
24454 (set_attr "prefix" "evex")
24455 (set_attr "mode" "<MODE>")])
24456
24457 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
24458 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
24459 (unspec:<avx512fmaskmode>
24460 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
24461 (match_operand 2 "const_0_to_255_operand" "n")]
24462 UNSPEC_FPCLASS))]
24463 "TARGET_AVX512DQ"
24464 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
24465 [(set_attr "type" "sse")
24466 (set_attr "length_immediate" "1")
24467 (set_attr "prefix" "evex")
24468 (set_attr "mode" "<MODE>")])
24469
24470 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
24471 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
24472 (and:<avx512fmaskmode>
24473 (unspec:<avx512fmaskmode>
24474 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
24475 (match_operand 2 "const_0_to_255_operand" "n")]
24476 UNSPEC_FPCLASS)
24477 (const_int 1)))]
24478 "TARGET_AVX512DQ"
24479 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
24480 [(set_attr "type" "sse")
24481 (set_attr "length_immediate" "1")
24482 (set_attr "prefix" "evex")
24483 (set_attr "mode" "<MODE>")])
24484
24485 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
24486 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
24487 (unspec:VF_AVX512VL
24488 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
24489 (match_operand:SI 2 "const_0_to_15_operand")]
24490 UNSPEC_GETMANT))]
24491 "TARGET_AVX512F"
24492 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
24493 [(set_attr "prefix" "evex")
24494 (set_attr "mode" "<MODE>")])
24495
24496 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
24497 [(set (match_operand:VF_128 0 "register_operand" "=v")
24498 (vec_merge:VF_128
24499 (unspec:VF_128
24500 [(match_operand:VF_128 1 "register_operand" "v")
24501 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
24502 (match_operand:SI 3 "const_0_to_15_operand")]
24503 UNSPEC_GETMANT)
24504 (match_dup 1)
24505 (const_int 1)))]
24506 "TARGET_AVX512F"
24507 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
24508 [(set_attr "prefix" "evex")
24509 (set_attr "mode" "<ssescalarmode>")])
24510
24511 ;; The correct representation for this is absolutely enormous, and
24512 ;; surely not generally useful.
24513 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
24514 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
24515 (unspec:VI2_AVX512VL
24516 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
24517 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
24518 (match_operand:SI 3 "const_0_to_255_operand")]
24519 UNSPEC_DBPSADBW))]
24520 "TARGET_AVX512BW"
24521 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
24522 [(set_attr "type" "sselog1")
24523 (set_attr "length_immediate" "1")
24524 (set_attr "prefix" "evex")
24525 (set_attr "mode" "<sseinsnmode>")])
24526
24527 (define_insn "clz<mode>2<mask_name>"
24528 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
24529 (clz:VI48_AVX512VL
24530 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
24531 "TARGET_AVX512CD"
24532 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
24533 [(set_attr "type" "sse")
24534 (set_attr "prefix" "evex")
24535 (set_attr "mode" "<sseinsnmode>")])
24536
24537 (define_insn "<mask_codefor>conflict<mode><mask_name>"
24538 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
24539 (unspec:VI48_AVX512VL
24540 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
24541 UNSPEC_CONFLICT))]
24542 "TARGET_AVX512CD"
24543 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
24544 [(set_attr "type" "sse")
24545 (set_attr "prefix" "evex")
24546 (set_attr "mode" "<sseinsnmode>")])
24547
24548 (define_insn "sha1msg1"
24549 [(set (match_operand:V4SI 0 "register_operand" "=x")
24550 (unspec:V4SI
24551 [(match_operand:V4SI 1 "register_operand" "0")
24552 (match_operand:V4SI 2 "vector_operand" "xBm")]
24553 UNSPEC_SHA1MSG1))]
24554 "TARGET_SHA"
24555 "sha1msg1\t{%2, %0|%0, %2}"
24556 [(set_attr "type" "sselog1")
24557 (set_attr "mode" "TI")])
24558
24559 (define_insn "sha1msg2"
24560 [(set (match_operand:V4SI 0 "register_operand" "=x")
24561 (unspec:V4SI
24562 [(match_operand:V4SI 1 "register_operand" "0")
24563 (match_operand:V4SI 2 "vector_operand" "xBm")]
24564 UNSPEC_SHA1MSG2))]
24565 "TARGET_SHA"
24566 "sha1msg2\t{%2, %0|%0, %2}"
24567 [(set_attr "type" "sselog1")
24568 (set_attr "mode" "TI")])
24569
24570 (define_insn "sha1nexte"
24571 [(set (match_operand:V4SI 0 "register_operand" "=x")
24572 (unspec:V4SI
24573 [(match_operand:V4SI 1 "register_operand" "0")
24574 (match_operand:V4SI 2 "vector_operand" "xBm")]
24575 UNSPEC_SHA1NEXTE))]
24576 "TARGET_SHA"
24577 "sha1nexte\t{%2, %0|%0, %2}"
24578 [(set_attr "type" "sselog1")
24579 (set_attr "mode" "TI")])
24580
24581 (define_insn "sha1rnds4"
24582 [(set (match_operand:V4SI 0 "register_operand" "=x")
24583 (unspec:V4SI
24584 [(match_operand:V4SI 1 "register_operand" "0")
24585 (match_operand:V4SI 2 "vector_operand" "xBm")
24586 (match_operand:SI 3 "const_0_to_3_operand" "n")]
24587 UNSPEC_SHA1RNDS4))]
24588 "TARGET_SHA"
24589 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
24590 [(set_attr "type" "sselog1")
24591 (set_attr "length_immediate" "1")
24592 (set_attr "mode" "TI")])
24593
24594 (define_insn "sha256msg1"
24595 [(set (match_operand:V4SI 0 "register_operand" "=x")
24596 (unspec:V4SI
24597 [(match_operand:V4SI 1 "register_operand" "0")
24598 (match_operand:V4SI 2 "vector_operand" "xBm")]
24599 UNSPEC_SHA256MSG1))]
24600 "TARGET_SHA"
24601 "sha256msg1\t{%2, %0|%0, %2}"
24602 [(set_attr "type" "sselog1")
24603 (set_attr "mode" "TI")])
24604
24605 (define_insn "sha256msg2"
24606 [(set (match_operand:V4SI 0 "register_operand" "=x")
24607 (unspec:V4SI
24608 [(match_operand:V4SI 1 "register_operand" "0")
24609 (match_operand:V4SI 2 "vector_operand" "xBm")]
24610 UNSPEC_SHA256MSG2))]
24611 "TARGET_SHA"
24612 "sha256msg2\t{%2, %0|%0, %2}"
24613 [(set_attr "type" "sselog1")
24614 (set_attr "mode" "TI")])
24615
24616 (define_insn "sha256rnds2"
24617 [(set (match_operand:V4SI 0 "register_operand" "=x")
24618 (unspec:V4SI
24619 [(match_operand:V4SI 1 "register_operand" "0")
24620 (match_operand:V4SI 2 "vector_operand" "xBm")
24621 (match_operand:V4SI 3 "register_operand" "Yz")]
24622 UNSPEC_SHA256RNDS2))]
24623 "TARGET_SHA"
24624 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
24625 [(set_attr "type" "sselog1")
24626 (set_attr "length_immediate" "1")
24627 (set_attr "mode" "TI")])
24628
24629 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
24630 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
24631 (vec_concat:AVX512MODE2P
24632 (vec_concat:<ssehalfvecmode>
24633 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
24634 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
24635 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
24636 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
24637 "#"
24638 "&& reload_completed"
24639 [(set (match_dup 0) (match_dup 1))]
24640 {
24641 if (REG_P (operands[0]))
24642 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
24643 else
24644 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
24645 <ssequartermode>mode);
24646 })
24647
24648 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
24649 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
24650 (vec_concat:AVX512MODE2P
24651 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
24652 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
24653 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
24654 "#"
24655 "&& reload_completed"
24656 [(set (match_dup 0) (match_dup 1))]
24657 {
24658 if (REG_P (operands[0]))
24659 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
24660 else
24661 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
24662 <ssehalfvecmode>mode);
24663 })
24664
24665 (define_int_iterator VPMADD52
24666 [UNSPEC_VPMADD52LUQ
24667 UNSPEC_VPMADD52HUQ])
24668
24669 (define_int_attr vpmadd52type
24670 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
24671
24672 (define_expand "vpamdd52huq<mode>_maskz"
24673 [(match_operand:VI8_AVX512VL 0 "register_operand")
24674 (match_operand:VI8_AVX512VL 1 "register_operand")
24675 (match_operand:VI8_AVX512VL 2 "register_operand")
24676 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
24677 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24678 "TARGET_AVX512IFMA"
24679 {
24680 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
24681 operands[0], operands[1], operands[2], operands[3],
24682 CONST0_RTX (<MODE>mode), operands[4]));
24683 DONE;
24684 })
24685
24686 (define_expand "vpamdd52luq<mode>_maskz"
24687 [(match_operand:VI8_AVX512VL 0 "register_operand")
24688 (match_operand:VI8_AVX512VL 1 "register_operand")
24689 (match_operand:VI8_AVX512VL 2 "register_operand")
24690 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
24691 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24692 "TARGET_AVX512IFMA"
24693 {
24694 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
24695 operands[0], operands[1], operands[2], operands[3],
24696 CONST0_RTX (<MODE>mode), operands[4]));
24697 DONE;
24698 })
24699
24700 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
24701 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
24702 (unspec:VI8_AVX512VL
24703 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
24704 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
24705 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
24706 VPMADD52))]
24707 "TARGET_AVX512IFMA"
24708 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
24709 [(set_attr "type" "ssemuladd")
24710 (set_attr "prefix" "evex")
24711 (set_attr "mode" "<sseinsnmode>")])
24712
24713 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
24714 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
24715 (vec_merge:VI8_AVX512VL
24716 (unspec:VI8_AVX512VL
24717 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
24718 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
24719 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
24720 VPMADD52)
24721 (match_dup 1)
24722 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
24723 "TARGET_AVX512IFMA"
24724 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
24725 [(set_attr "type" "ssemuladd")
24726 (set_attr "prefix" "evex")
24727 (set_attr "mode" "<sseinsnmode>")])
24728
24729 (define_insn "vpmultishiftqb<mode><mask_name>"
24730 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
24731 (unspec:VI1_AVX512VL
24732 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
24733 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
24734 UNSPEC_VPMULTISHIFT))]
24735 "TARGET_AVX512VBMI"
24736 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
24737 [(set_attr "type" "sselog")
24738 (set_attr "prefix" "evex")
24739 (set_attr "mode" "<sseinsnmode>")])
24740
24741 (define_mode_iterator IMOD4
24742 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
24743
24744 (define_mode_attr imod4_narrow
24745 [(V64SF "V16SF") (V64SI "V16SI")])
24746
24747 (define_expand "mov<mode>"
24748 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
24749 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
24750 "TARGET_AVX512F"
24751 {
24752 ix86_expand_vector_move (<MODE>mode, operands);
24753 DONE;
24754 })
24755
24756 (define_insn_and_split "*mov<mode>_internal"
24757 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
24758 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
24759 "TARGET_AVX512F
24760 && (register_operand (operands[0], <MODE>mode)
24761 || register_operand (operands[1], <MODE>mode))"
24762 "#"
24763 "&& reload_completed"
24764 [(const_int 0)]
24765 {
24766 rtx op0, op1;
24767 int i;
24768
24769 for (i = 0; i < 4; i++)
24770 {
24771 op0 = simplify_subreg
24772 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
24773 op1 = simplify_subreg
24774 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
24775 emit_move_insn (op0, op1);
24776 }
24777 DONE;
24778 })
24779
24780 (define_insn "avx5124fmaddps_4fmaddps"
24781 [(set (match_operand:V16SF 0 "register_operand" "=v")
24782 (unspec:V16SF
24783 [(match_operand:V16SF 1 "register_operand" "0")
24784 (match_operand:V64SF 2 "register_operand" "v")
24785 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
24786 "TARGET_AVX5124FMAPS"
24787 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
24788 [(set_attr ("type") ("ssemuladd"))
24789 (set_attr ("prefix") ("evex"))
24790 (set_attr ("mode") ("V16SF"))])
24791
24792 (define_insn "avx5124fmaddps_4fmaddps_mask"
24793 [(set (match_operand:V16SF 0 "register_operand" "=v")
24794 (vec_merge:V16SF
24795 (unspec:V16SF
24796 [(match_operand:V64SF 1 "register_operand" "v")
24797 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
24798 (match_operand:V16SF 3 "register_operand" "0")
24799 (match_operand:HI 4 "register_operand" "Yk")))]
24800 "TARGET_AVX5124FMAPS"
24801 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24802 [(set_attr ("type") ("ssemuladd"))
24803 (set_attr ("prefix") ("evex"))
24804 (set_attr ("mode") ("V16SF"))])
24805
24806 (define_insn "avx5124fmaddps_4fmaddps_maskz"
24807 [(set (match_operand:V16SF 0 "register_operand" "=v")
24808 (vec_merge:V16SF
24809 (unspec:V16SF
24810 [(match_operand:V16SF 1 "register_operand" "0")
24811 (match_operand:V64SF 2 "register_operand" "v")
24812 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
24813 (match_operand:V16SF 4 "const0_operand" "C")
24814 (match_operand:HI 5 "register_operand" "Yk")))]
24815 "TARGET_AVX5124FMAPS"
24816 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24817 [(set_attr ("type") ("ssemuladd"))
24818 (set_attr ("prefix") ("evex"))
24819 (set_attr ("mode") ("V16SF"))])
24820
24821 (define_insn "avx5124fmaddps_4fmaddss"
24822 [(set (match_operand:V4SF 0 "register_operand" "=v")
24823 (unspec:V4SF
24824 [(match_operand:V4SF 1 "register_operand" "0")
24825 (match_operand:V64SF 2 "register_operand" "v")
24826 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
24827 "TARGET_AVX5124FMAPS"
24828 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
24829 [(set_attr ("type") ("ssemuladd"))
24830 (set_attr ("prefix") ("evex"))
24831 (set_attr ("mode") ("SF"))])
24832
24833 (define_insn "avx5124fmaddps_4fmaddss_mask"
24834 [(set (match_operand:V4SF 0 "register_operand" "=v")
24835 (vec_merge:V4SF
24836 (unspec:V4SF
24837 [(match_operand:V64SF 1 "register_operand" "v")
24838 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
24839 (match_operand:V4SF 3 "register_operand" "0")
24840 (match_operand:QI 4 "register_operand" "Yk")))]
24841 "TARGET_AVX5124FMAPS"
24842 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
24843 [(set_attr ("type") ("ssemuladd"))
24844 (set_attr ("prefix") ("evex"))
24845 (set_attr ("mode") ("SF"))])
24846
24847 (define_insn "avx5124fmaddps_4fmaddss_maskz"
24848 [(set (match_operand:V4SF 0 "register_operand" "=v")
24849 (vec_merge:V4SF
24850 (unspec:V4SF
24851 [(match_operand:V4SF 1 "register_operand" "0")
24852 (match_operand:V64SF 2 "register_operand" "v")
24853 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
24854 (match_operand:V4SF 4 "const0_operand" "C")
24855 (match_operand:QI 5 "register_operand" "Yk")))]
24856 "TARGET_AVX5124FMAPS"
24857 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
24858 [(set_attr ("type") ("ssemuladd"))
24859 (set_attr ("prefix") ("evex"))
24860 (set_attr ("mode") ("SF"))])
24861
24862 (define_insn "avx5124fmaddps_4fnmaddps"
24863 [(set (match_operand:V16SF 0 "register_operand" "=v")
24864 (unspec:V16SF
24865 [(match_operand:V16SF 1 "register_operand" "0")
24866 (match_operand:V64SF 2 "register_operand" "v")
24867 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
24868 "TARGET_AVX5124FMAPS"
24869 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
24870 [(set_attr ("type") ("ssemuladd"))
24871 (set_attr ("prefix") ("evex"))
24872 (set_attr ("mode") ("V16SF"))])
24873
24874 (define_insn "avx5124fmaddps_4fnmaddps_mask"
24875 [(set (match_operand:V16SF 0 "register_operand" "=v")
24876 (vec_merge:V16SF
24877 (unspec:V16SF
24878 [(match_operand:V64SF 1 "register_operand" "v")
24879 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24880 (match_operand:V16SF 3 "register_operand" "0")
24881 (match_operand:HI 4 "register_operand" "Yk")))]
24882 "TARGET_AVX5124FMAPS"
24883 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24884 [(set_attr ("type") ("ssemuladd"))
24885 (set_attr ("prefix") ("evex"))
24886 (set_attr ("mode") ("V16SF"))])
24887
24888 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
24889 [(set (match_operand:V16SF 0 "register_operand" "=v")
24890 (vec_merge:V16SF
24891 (unspec:V16SF
24892 [(match_operand:V16SF 1 "register_operand" "0")
24893 (match_operand:V64SF 2 "register_operand" "v")
24894 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24895 (match_operand:V16SF 4 "const0_operand" "C")
24896 (match_operand:HI 5 "register_operand" "Yk")))]
24897 "TARGET_AVX5124FMAPS"
24898 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24899 [(set_attr ("type") ("ssemuladd"))
24900 (set_attr ("prefix") ("evex"))
24901 (set_attr ("mode") ("V16SF"))])
24902
24903 (define_insn "avx5124fmaddps_4fnmaddss"
24904 [(set (match_operand:V4SF 0 "register_operand" "=v")
24905 (unspec:V4SF
24906 [(match_operand:V4SF 1 "register_operand" "0")
24907 (match_operand:V64SF 2 "register_operand" "v")
24908 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
24909 "TARGET_AVX5124FMAPS"
24910 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
24911 [(set_attr ("type") ("ssemuladd"))
24912 (set_attr ("prefix") ("evex"))
24913 (set_attr ("mode") ("SF"))])
24914
24915 (define_insn "avx5124fmaddps_4fnmaddss_mask"
24916 [(set (match_operand:V4SF 0 "register_operand" "=v")
24917 (vec_merge:V4SF
24918 (unspec:V4SF
24919 [(match_operand:V64SF 1 "register_operand" "v")
24920 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24921 (match_operand:V4SF 3 "register_operand" "0")
24922 (match_operand:QI 4 "register_operand" "Yk")))]
24923 "TARGET_AVX5124FMAPS"
24924 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
24925 [(set_attr ("type") ("ssemuladd"))
24926 (set_attr ("prefix") ("evex"))
24927 (set_attr ("mode") ("SF"))])
24928
24929 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
24930 [(set (match_operand:V4SF 0 "register_operand" "=v")
24931 (vec_merge:V4SF
24932 (unspec:V4SF
24933 [(match_operand:V4SF 1 "register_operand" "0")
24934 (match_operand:V64SF 2 "register_operand" "v")
24935 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24936 (match_operand:V4SF 4 "const0_operand" "C")
24937 (match_operand:QI 5 "register_operand" "Yk")))]
24938 "TARGET_AVX5124FMAPS"
24939 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
24940 [(set_attr ("type") ("ssemuladd"))
24941 (set_attr ("prefix") ("evex"))
24942 (set_attr ("mode") ("SF"))])
24943
24944 (define_insn "avx5124vnniw_vp4dpwssd"
24945 [(set (match_operand:V16SI 0 "register_operand" "=v")
24946 (unspec:V16SI
24947 [(match_operand:V16SI 1 "register_operand" "0")
24948 (match_operand:V64SI 2 "register_operand" "v")
24949 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
24950 "TARGET_AVX5124VNNIW"
24951 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
24952 [(set_attr ("type") ("ssemuladd"))
24953 (set_attr ("prefix") ("evex"))
24954 (set_attr ("mode") ("TI"))])
24955
24956 (define_insn "avx5124vnniw_vp4dpwssd_mask"
24957 [(set (match_operand:V16SI 0 "register_operand" "=v")
24958 (vec_merge:V16SI
24959 (unspec:V16SI
24960 [(match_operand:V64SI 1 "register_operand" "v")
24961 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
24962 (match_operand:V16SI 3 "register_operand" "0")
24963 (match_operand:HI 4 "register_operand" "Yk")))]
24964 "TARGET_AVX5124VNNIW"
24965 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24966 [(set_attr ("type") ("ssemuladd"))
24967 (set_attr ("prefix") ("evex"))
24968 (set_attr ("mode") ("TI"))])
24969
24970 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
24971 [(set (match_operand:V16SI 0 "register_operand" "=v")
24972 (vec_merge:V16SI
24973 (unspec:V16SI
24974 [(match_operand:V16SI 1 "register_operand" "0")
24975 (match_operand:V64SI 2 "register_operand" "v")
24976 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
24977 (match_operand:V16SI 4 "const0_operand" "C")
24978 (match_operand:HI 5 "register_operand" "Yk")))]
24979 "TARGET_AVX5124VNNIW"
24980 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24981 [(set_attr ("type") ("ssemuladd"))
24982 (set_attr ("prefix") ("evex"))
24983 (set_attr ("mode") ("TI"))])
24984
24985 (define_insn "avx5124vnniw_vp4dpwssds"
24986 [(set (match_operand:V16SI 0 "register_operand" "=v")
24987 (unspec:V16SI
24988 [(match_operand:V16SI 1 "register_operand" "0")
24989 (match_operand:V64SI 2 "register_operand" "v")
24990 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
24991 "TARGET_AVX5124VNNIW"
24992 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
24993 [(set_attr ("type") ("ssemuladd"))
24994 (set_attr ("prefix") ("evex"))
24995 (set_attr ("mode") ("TI"))])
24996
24997 (define_insn "avx5124vnniw_vp4dpwssds_mask"
24998 [(set (match_operand:V16SI 0 "register_operand" "=v")
24999 (vec_merge:V16SI
25000 (unspec:V16SI
25001 [(match_operand:V64SI 1 "register_operand" "v")
25002 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
25003 (match_operand:V16SI 3 "register_operand" "0")
25004 (match_operand:HI 4 "register_operand" "Yk")))]
25005 "TARGET_AVX5124VNNIW"
25006 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
25007 [(set_attr ("type") ("ssemuladd"))
25008 (set_attr ("prefix") ("evex"))
25009 (set_attr ("mode") ("TI"))])
25010
25011 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
25012 [(set (match_operand:V16SI 0 "register_operand" "=v")
25013 (vec_merge:V16SI
25014 (unspec:V16SI
25015 [(match_operand:V16SI 1 "register_operand" "0")
25016 (match_operand:V64SI 2 "register_operand" "v")
25017 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
25018 (match_operand:V16SI 4 "const0_operand" "C")
25019 (match_operand:HI 5 "register_operand" "Yk")))]
25020 "TARGET_AVX5124VNNIW"
25021 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
25022 [(set_attr ("type") ("ssemuladd"))
25023 (set_attr ("prefix") ("evex"))
25024 (set_attr ("mode") ("TI"))])
25025
25026 (define_expand "popcount<mode>2"
25027 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
25028 (popcount:VI48_AVX512VL
25029 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
25030 "TARGET_AVX512VPOPCNTDQ")
25031
25032 (define_insn "vpopcount<mode><mask_name>"
25033 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
25034 (popcount:VI48_AVX512VL
25035 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
25036 "TARGET_AVX512VPOPCNTDQ"
25037 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
25038
25039 ;; Save multiple registers out-of-line.
25040 (define_insn "*save_multiple<mode>"
25041 [(match_parallel 0 "save_multiple"
25042 [(use (match_operand:P 1 "symbol_operand"))])]
25043 "TARGET_SSE && TARGET_64BIT"
25044 "call\t%P1")
25045
25046 ;; Restore multiple registers out-of-line.
25047 (define_insn "*restore_multiple<mode>"
25048 [(match_parallel 0 "restore_multiple"
25049 [(use (match_operand:P 1 "symbol_operand"))])]
25050 "TARGET_SSE && TARGET_64BIT"
25051 "call\t%P1")
25052
25053 ;; Restore multiple registers out-of-line and return.
25054 (define_insn "*restore_multiple_and_return<mode>"
25055 [(match_parallel 0 "restore_multiple"
25056 [(return)
25057 (use (match_operand:P 1 "symbol_operand"))
25058 (set (reg:DI SP_REG) (reg:DI R10_REG))
25059 ])]
25060 "TARGET_SSE && TARGET_64BIT"
25061 "jmp\t%P1")
25062
25063 ;; Restore multiple registers out-of-line when hard frame pointer is used,
25064 ;; perform the leave operation prior to returning (from the function).
25065 (define_insn "*restore_multiple_leave_return<mode>"
25066 [(match_parallel 0 "restore_multiple"
25067 [(return)
25068 (use (match_operand:P 1 "symbol_operand"))
25069 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
25070 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
25071 (clobber (mem:BLK (scratch)))
25072 ])]
25073 "TARGET_SSE && TARGET_64BIT"
25074 "jmp\t%P1")
25075
25076 (define_expand "popcount<mode>2"
25077 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
25078 (popcount:VI12_AVX512VL
25079 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
25080 "TARGET_AVX512BITALG")
25081
25082 (define_insn "vpopcount<mode><mask_name>"
25083 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
25084 (popcount:VI12_AVX512VL
25085 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
25086 "TARGET_AVX512BITALG"
25087 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
25088
25089 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
25090 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
25091 (unspec:VI1_AVX512F
25092 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
25093 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
25094 (match_operand 3 "const_0_to_255_operand" "n,n")]
25095 UNSPEC_GF2P8AFFINEINV))]
25096 "TARGET_GFNI"
25097 "@
25098 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
25099 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
25100 [(set_attr "isa" "noavx,avx")
25101 (set_attr "prefix_data16" "1,*")
25102 (set_attr "prefix_extra" "1")
25103 (set_attr "prefix" "orig,maybe_evex")
25104 (set_attr "mode" "<sseinsnmode>")])
25105
25106 (define_insn "vgf2p8affineqb_<mode><mask_name>"
25107 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
25108 (unspec:VI1_AVX512F
25109 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
25110 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
25111 (match_operand 3 "const_0_to_255_operand" "n,n")]
25112 UNSPEC_GF2P8AFFINE))]
25113 "TARGET_GFNI"
25114 "@
25115 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
25116 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
25117 [(set_attr "isa" "noavx,avx")
25118 (set_attr "prefix_data16" "1,*")
25119 (set_attr "prefix_extra" "1")
25120 (set_attr "prefix" "orig,maybe_evex")
25121 (set_attr "mode" "<sseinsnmode>")])
25122
25123 (define_insn "vgf2p8mulb_<mode><mask_name>"
25124 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
25125 (unspec:VI1_AVX512F
25126 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
25127 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
25128 UNSPEC_GF2P8MUL))]
25129 "TARGET_GFNI"
25130 "@
25131 gf2p8mulb\t{%2, %0| %0, %2}
25132 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
25133 [(set_attr "isa" "noavx,avx")
25134 (set_attr "prefix_data16" "1,*")
25135 (set_attr "prefix_extra" "1")
25136 (set_attr "prefix" "orig,maybe_evex")
25137 (set_attr "mode" "<sseinsnmode>")])
25138
25139 (define_insn "vpshrd_<mode><mask_name>"
25140 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25141 (unspec:VI248_AVX512VL
25142 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
25143 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
25144 (match_operand:SI 3 "const_0_to_255_operand" "n")]
25145 UNSPEC_VPSHRD))]
25146 "TARGET_AVX512VBMI2"
25147 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
25148 [(set_attr ("prefix") ("evex"))])
25149
25150 (define_insn "vpshld_<mode><mask_name>"
25151 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25152 (unspec:VI248_AVX512VL
25153 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
25154 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
25155 (match_operand:SI 3 "const_0_to_255_operand" "n")]
25156 UNSPEC_VPSHLD))]
25157 "TARGET_AVX512VBMI2"
25158 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
25159 [(set_attr ("prefix") ("evex"))])
25160
25161 (define_insn "vpshrdv_<mode>"
25162 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25163 (unspec:VI248_AVX512VL
25164 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
25165 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
25166 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
25167 UNSPEC_VPSHRDV))]
25168 "TARGET_AVX512VBMI2"
25169 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
25170 [(set_attr ("prefix") ("evex"))
25171 (set_attr "mode" "<sseinsnmode>")])
25172
25173 (define_insn "vpshrdv_<mode>_mask"
25174 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25175 (vec_merge:VI248_AVX512VL
25176 (unspec:VI248_AVX512VL
25177 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
25178 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
25179 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
25180 UNSPEC_VPSHRDV)
25181 (match_dup 1)
25182 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25183 "TARGET_AVX512VBMI2"
25184 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25185 [(set_attr ("prefix") ("evex"))
25186 (set_attr "mode" "<sseinsnmode>")])
25187
25188 (define_expand "vpshrdv_<mode>_maskz"
25189 [(match_operand:VI248_AVX512VL 0 "register_operand")
25190 (match_operand:VI248_AVX512VL 1 "register_operand")
25191 (match_operand:VI248_AVX512VL 2 "register_operand")
25192 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
25193 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25194 "TARGET_AVX512VBMI2"
25195 {
25196 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
25197 operands[2], operands[3],
25198 CONST0_RTX (<MODE>mode),
25199 operands[4]));
25200 DONE;
25201 })
25202
25203 (define_insn "vpshrdv_<mode>_maskz_1"
25204 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25205 (vec_merge:VI248_AVX512VL
25206 (unspec:VI248_AVX512VL
25207 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
25208 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
25209 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
25210 UNSPEC_VPSHRDV)
25211 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
25212 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25213 "TARGET_AVX512VBMI2"
25214 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25215 [(set_attr ("prefix") ("evex"))
25216 (set_attr "mode" "<sseinsnmode>")])
25217
25218 (define_insn "vpshldv_<mode>"
25219 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25220 (unspec:VI248_AVX512VL
25221 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
25222 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
25223 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
25224 UNSPEC_VPSHLDV))]
25225 "TARGET_AVX512VBMI2"
25226 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
25227 [(set_attr ("prefix") ("evex"))
25228 (set_attr "mode" "<sseinsnmode>")])
25229
25230 (define_insn "vpshldv_<mode>_mask"
25231 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25232 (vec_merge:VI248_AVX512VL
25233 (unspec:VI248_AVX512VL
25234 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
25235 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
25236 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
25237 UNSPEC_VPSHLDV)
25238 (match_dup 1)
25239 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25240 "TARGET_AVX512VBMI2"
25241 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25242 [(set_attr ("prefix") ("evex"))
25243 (set_attr "mode" "<sseinsnmode>")])
25244
25245 (define_expand "vpshldv_<mode>_maskz"
25246 [(match_operand:VI248_AVX512VL 0 "register_operand")
25247 (match_operand:VI248_AVX512VL 1 "register_operand")
25248 (match_operand:VI248_AVX512VL 2 "register_operand")
25249 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
25250 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25251 "TARGET_AVX512VBMI2"
25252 {
25253 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
25254 operands[2], operands[3],
25255 CONST0_RTX (<MODE>mode),
25256 operands[4]));
25257 DONE;
25258 })
25259
25260 (define_insn "vpshldv_<mode>_maskz_1"
25261 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
25262 (vec_merge:VI248_AVX512VL
25263 (unspec:VI248_AVX512VL
25264 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
25265 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
25266 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
25267 UNSPEC_VPSHLDV)
25268 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
25269 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25270 "TARGET_AVX512VBMI2"
25271 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25272 [(set_attr ("prefix") ("evex"))
25273 (set_attr "mode" "<sseinsnmode>")])
25274
25275 (define_insn "vpdpbusd_v16si"
25276 [(set (match_operand:V16SI 0 "register_operand" "=v")
25277 (unspec:V16SI
25278 [(match_operand:V16SI 1 "register_operand" "0")
25279 (match_operand:V16SI 2 "register_operand" "v")
25280 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
25281 UNSPEC_VPMADDUBSWACCD))]
25282 "TARGET_AVX512VNNI"
25283 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
25284 [(set_attr ("prefix") ("evex"))])
25285
25286 (define_insn "vpdpbusd_<mode>"
25287 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
25288 (unspec:VI4_AVX2
25289 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
25290 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
25291 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
25292 UNSPEC_VPMADDUBSWACCD))]
25293 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
25294 "@
25295 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
25296 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
25297 [(set_attr ("prefix") ("vex,evex"))
25298 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
25299
25300 (define_insn "vpdpbusd_<mode>_mask"
25301 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25302 (vec_merge:VI4_AVX512VL
25303 (unspec:VI4_AVX512VL
25304 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25305 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25306 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25307 UNSPEC_VPMADDUBSWACCD)
25308 (match_dup 1)
25309 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25310 "TARGET_AVX512VNNI"
25311 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25312 [(set_attr ("prefix") ("evex"))])
25313
25314 (define_expand "vpdpbusd_<mode>_maskz"
25315 [(match_operand:VI4_AVX512VL 0 "register_operand")
25316 (match_operand:VI4_AVX512VL 1 "register_operand")
25317 (match_operand:VI4_AVX512VL 2 "register_operand")
25318 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
25319 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25320 "TARGET_AVX512VNNI"
25321 {
25322 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
25323 operands[2], operands[3],
25324 CONST0_RTX (<MODE>mode),
25325 operands[4]));
25326 DONE;
25327 })
25328
25329 (define_insn "vpdpbusd_<mode>_maskz_1"
25330 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25331 (vec_merge:VI4_AVX512VL
25332 (unspec:VI4_AVX512VL
25333 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25334 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25335 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
25336 ] UNSPEC_VPMADDUBSWACCD)
25337 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
25338 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25339 "TARGET_AVX512VNNI"
25340 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25341 [(set_attr ("prefix") ("evex"))])
25342
25343 (define_insn "vpdpbusds_v16si"
25344 [(set (match_operand:V16SI 0 "register_operand" "=v")
25345 (unspec:V16SI
25346 [(match_operand:V16SI 1 "register_operand" "0")
25347 (match_operand:V16SI 2 "register_operand" "v")
25348 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
25349 UNSPEC_VPMADDUBSWACCSSD))]
25350 "TARGET_AVX512VNNI"
25351 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
25352 [(set_attr ("prefix") ("evex"))])
25353
25354 (define_insn "vpdpbusds_<mode>"
25355 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
25356 (unspec:VI4_AVX2
25357 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
25358 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
25359 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
25360 UNSPEC_VPMADDUBSWACCSSD))]
25361 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
25362 "@
25363 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
25364 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
25365 [(set_attr ("prefix") ("vex,evex"))
25366 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
25367
25368 (define_insn "vpdpbusds_<mode>_mask"
25369 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25370 (vec_merge:VI4_AVX512VL
25371 (unspec:VI4_AVX512VL
25372 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25373 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25374 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25375 UNSPEC_VPMADDUBSWACCSSD)
25376 (match_dup 1)
25377 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25378 "TARGET_AVX512VNNI"
25379 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25380 [(set_attr ("prefix") ("evex"))])
25381
25382 (define_expand "vpdpbusds_<mode>_maskz"
25383 [(match_operand:VI4_AVX512VL 0 "register_operand")
25384 (match_operand:VI4_AVX512VL 1 "register_operand")
25385 (match_operand:VI4_AVX512VL 2 "register_operand")
25386 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
25387 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25388 "TARGET_AVX512VNNI"
25389 {
25390 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
25391 operands[2], operands[3],
25392 CONST0_RTX (<MODE>mode),
25393 operands[4]));
25394 DONE;
25395 })
25396
25397 (define_insn "vpdpbusds_<mode>_maskz_1"
25398 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25399 (vec_merge:VI4_AVX512VL
25400 (unspec:VI4_AVX512VL
25401 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25402 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25403 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25404 UNSPEC_VPMADDUBSWACCSSD)
25405 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
25406 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25407 "TARGET_AVX512VNNI"
25408 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25409 [(set_attr ("prefix") ("evex"))])
25410
25411 (define_insn "vpdpwssd_v16si"
25412 [(set (match_operand:V16SI 0 "register_operand" "=v")
25413 (unspec:V16SI
25414 [(match_operand:V16SI 1 "register_operand" "0")
25415 (match_operand:V16SI 2 "register_operand" "v")
25416 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
25417 UNSPEC_VPMADDWDACCD))]
25418 "TARGET_AVX512VNNI"
25419 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
25420 [(set_attr ("prefix") ("evex"))])
25421
25422 (define_insn "vpdpwssd_<mode>"
25423 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
25424 (unspec:VI4_AVX2
25425 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
25426 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
25427 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
25428 UNSPEC_VPMADDWDACCD))]
25429 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
25430 "@
25431 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
25432 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
25433 [(set_attr ("prefix") ("vex,evex"))
25434 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
25435
25436 (define_insn "vpdpwssd_<mode>_mask"
25437 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25438 (vec_merge:VI4_AVX512VL
25439 (unspec:VI4_AVX512VL
25440 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25441 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25442 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25443 UNSPEC_VPMADDWDACCD)
25444 (match_dup 1)
25445 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25446 "TARGET_AVX512VNNI"
25447 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25448 [(set_attr ("prefix") ("evex"))])
25449
25450 (define_expand "vpdpwssd_<mode>_maskz"
25451 [(match_operand:VI4_AVX512VL 0 "register_operand")
25452 (match_operand:VI4_AVX512VL 1 "register_operand")
25453 (match_operand:VI4_AVX512VL 2 "register_operand")
25454 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
25455 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25456 "TARGET_AVX512VNNI"
25457 {
25458 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
25459 operands[2], operands[3],
25460 CONST0_RTX (<MODE>mode),
25461 operands[4]));
25462 DONE;
25463 })
25464
25465 (define_insn "vpdpwssd_<mode>_maskz_1"
25466 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25467 (vec_merge:VI4_AVX512VL
25468 (unspec:VI4_AVX512VL
25469 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25470 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25471 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25472 UNSPEC_VPMADDWDACCD)
25473 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
25474 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25475 "TARGET_AVX512VNNI"
25476 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25477 [(set_attr ("prefix") ("evex"))])
25478
25479 (define_insn "vpdpwssds_v16si"
25480 [(set (match_operand:V16SI 0 "register_operand" "=v")
25481 (unspec:V16SI
25482 [(match_operand:V16SI 1 "register_operand" "0")
25483 (match_operand:V16SI 2 "register_operand" "v")
25484 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
25485 UNSPEC_VPMADDWDACCSSD))]
25486 "TARGET_AVX512VNNI"
25487 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
25488 [(set_attr ("prefix") ("evex"))])
25489
25490 (define_insn "vpdpwssds_<mode>"
25491 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
25492 (unspec:VI4_AVX2
25493 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
25494 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
25495 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
25496 UNSPEC_VPMADDWDACCSSD))]
25497 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
25498 "@
25499 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
25500 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
25501 [(set_attr ("prefix") ("vex,evex"))
25502 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
25503
25504 (define_insn "vpdpwssds_<mode>_mask"
25505 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25506 (vec_merge:VI4_AVX512VL
25507 (unspec:VI4_AVX512VL
25508 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25509 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25510 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25511 UNSPEC_VPMADDWDACCSSD)
25512 (match_dup 1)
25513 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25514 "TARGET_AVX512VNNI"
25515 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25516 [(set_attr ("prefix") ("evex"))])
25517
25518 (define_expand "vpdpwssds_<mode>_maskz"
25519 [(match_operand:VI4_AVX512VL 0 "register_operand")
25520 (match_operand:VI4_AVX512VL 1 "register_operand")
25521 (match_operand:VI4_AVX512VL 2 "register_operand")
25522 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
25523 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25524 "TARGET_AVX512VNNI"
25525 {
25526 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
25527 operands[2], operands[3],
25528 CONST0_RTX (<MODE>mode),
25529 operands[4]));
25530 DONE;
25531 })
25532
25533 (define_insn "vpdpwssds_<mode>_maskz_1"
25534 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25535 (vec_merge:VI4_AVX512VL
25536 (unspec:VI4_AVX512VL
25537 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25538 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25539 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25540 UNSPEC_VPMADDWDACCSSD)
25541 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
25542 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25543 "TARGET_AVX512VNNI"
25544 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25545 [(set_attr ("prefix") ("evex"))])
25546
25547 (define_insn "vaesdec_<mode>"
25548 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25549 (unspec:VI1_AVX512VL_F
25550 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25551 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25552 UNSPEC_VAESDEC))]
25553 "TARGET_VAES"
25554 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
25555 )
25556
25557 (define_insn "vaesdeclast_<mode>"
25558 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25559 (unspec:VI1_AVX512VL_F
25560 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25561 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25562 UNSPEC_VAESDECLAST))]
25563 "TARGET_VAES"
25564 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
25565 )
25566
25567 (define_insn "vaesenc_<mode>"
25568 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25569 (unspec:VI1_AVX512VL_F
25570 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25571 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25572 UNSPEC_VAESENC))]
25573 "TARGET_VAES"
25574 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
25575 )
25576
25577 (define_insn "vaesenclast_<mode>"
25578 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25579 (unspec:VI1_AVX512VL_F
25580 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25581 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25582 UNSPEC_VAESENCLAST))]
25583 "TARGET_VAES"
25584 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
25585 )
25586
25587 (define_insn "vpclmulqdq_<mode>"
25588 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
25589 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
25590 (match_operand:VI8_FVL 2 "vector_operand" "vm")
25591 (match_operand:SI 3 "const_0_to_255_operand" "n")]
25592 UNSPEC_VPCLMULQDQ))]
25593 "TARGET_VPCLMULQDQ"
25594 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
25595 [(set_attr "mode" "DI")])
25596
25597 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
25598 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
25599 (unspec:<avx512fmaskmode>
25600 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
25601 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
25602 UNSPEC_VPSHUFBIT))]
25603 "TARGET_AVX512BITALG"
25604 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
25605 [(set_attr "prefix" "evex")
25606 (set_attr "mode" "<sseinsnmode>")])
25607
25608 (define_mode_iterator VI48_AVX512VP2VL
25609 [V8DI
25610 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
25611 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
25612
25613 (define_mode_iterator MASK_DWI [P2QI P2HI])
25614
25615 (define_expand "mov<mode>"
25616 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
25617 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
25618 "TARGET_AVX512VP2INTERSECT"
25619 {
25620 if (MEM_P (operands[0]) && MEM_P (operands[1]))
25621 operands[1] = force_reg (<MODE>mode, operands[1]);
25622 })
25623
25624 (define_insn_and_split "*mov<mode>_internal"
25625 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
25626 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
25627 "TARGET_AVX512VP2INTERSECT
25628 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
25629 "#"
25630 "&& reload_completed"
25631 [(set (match_dup 0) (match_dup 1))
25632 (set (match_dup 2) (match_dup 3))]
25633 {
25634 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
25635 })
25636
25637 (define_insn "avx512vp2intersect_2intersect<mode>"
25638 [(set (match_operand:P2QI 0 "register_operand" "=k")
25639 (unspec:P2QI
25640 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
25641 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
25642 UNSPEC_VP2INTERSECT))]
25643 "TARGET_AVX512VP2INTERSECT"
25644 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
25645 [(set_attr ("prefix") ("evex"))])
25646
25647 (define_insn "avx512vp2intersect_2intersectv16si"
25648 [(set (match_operand:P2HI 0 "register_operand" "=k")
25649 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
25650 (match_operand:V16SI 2 "vector_operand" "vm")]
25651 UNSPEC_VP2INTERSECT))]
25652 "TARGET_AVX512VP2INTERSECT"
25653 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
25654 [(set_attr ("prefix") ("evex"))])
25655
25656 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
25657 ;; Converting from BF to SF
25658 (define_mode_attr bf16_cvt_2sf
25659 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
25660 ;; Converting from SF to BF
25661 (define_mode_attr sf_cvt_bf16
25662 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
25663 ;; Mapping from BF to SF
25664 (define_mode_attr sf_bf16
25665 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
25666
25667 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
25668 [(match_operand:BF16 0 "register_operand")
25669 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
25670 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
25671 (match_operand:<avx512fmaskmode> 3 "register_operand")]
25672 "TARGET_AVX512BF16"
25673 {
25674 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
25675 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
25676 DONE;
25677 })
25678
25679 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
25680 [(set (match_operand:BF16 0 "register_operand" "=v")
25681 (unspec:BF16
25682 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
25683 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
25684 UNSPEC_VCVTNE2PS2BF16))]
25685 "TARGET_AVX512BF16"
25686 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
25687
25688 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
25689 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
25690 (match_operand:VF1_AVX512VL 1 "register_operand")
25691 (match_operand:<avx512fmaskmode> 2 "register_operand")]
25692 "TARGET_AVX512BF16"
25693 {
25694 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
25695 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
25696 DONE;
25697 })
25698
25699 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
25700 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
25701 (unspec:<sf_cvt_bf16>
25702 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
25703 UNSPEC_VCVTNEPS2BF16))]
25704 "TARGET_AVX512BF16"
25705 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
25706
25707 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
25708 [(match_operand:VF1_AVX512VL 0 "register_operand")
25709 (match_operand:VF1_AVX512VL 1 "register_operand")
25710 (match_operand:<sf_bf16> 2 "register_operand")
25711 (match_operand:<sf_bf16> 3 "register_operand")
25712 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
25713 "TARGET_AVX512BF16"
25714 {
25715 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
25716 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
25717 DONE;
25718 })
25719
25720 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
25721 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
25722 (unspec:VF1_AVX512VL
25723 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
25724 (match_operand:<sf_bf16> 2 "register_operand" "v")
25725 (match_operand:<sf_bf16> 3 "register_operand" "v")]
25726 UNSPEC_VDPBF16PS))]
25727 "TARGET_AVX512BF16"
25728 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
25729
25730 (define_insn "avx512f_dpbf16ps_<mode>_mask"
25731 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
25732 (vec_merge:VF1_AVX512VL
25733 (unspec:VF1_AVX512VL
25734 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
25735 (match_operand:<sf_bf16> 2 "register_operand" "v")
25736 (match_operand:<sf_bf16> 3 "register_operand" "v")]
25737 UNSPEC_VDPBF16PS)
25738 (match_dup 1)
25739 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
25740 "TARGET_AVX512BF16"
25741 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
25742
25743 ;; KEYLOCKER
25744 (define_insn "loadiwkey"
25745 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
25746 (match_operand:V2DI 1 "register_operand" "v")
25747 (match_operand:V2DI 2 "register_operand" "Yz")
25748 (match_operand:SI 3 "register_operand" "a")]
25749 UNSPECV_LOADIWKEY)
25750 (clobber (reg:CC FLAGS_REG))]
25751 "TARGET_KL"
25752 "loadiwkey\t{%0, %1|%1, %0}"
25753 [(set_attr "type" "other")])
25754
25755 (define_expand "encodekey128u32"
25756 [(match_par_dup 2
25757 [(set (match_operand:SI 0 "register_operand")
25758 (unspec_volatile:SI
25759 [(match_operand:SI 1 "register_operand")
25760 (reg:V2DI XMM0_REG)]
25761 UNSPECV_ENCODEKEY128U32))])]
25762 "TARGET_KL"
25763 {
25764 rtx xmm_regs[7];
25765 rtx tmp_unspec;
25766 unsigned i;
25767
25768 /* parallel rtx for encodekey128 predicate */
25769 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
25770
25771 for (i = 0; i < 7; i++)
25772 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
25773
25774 tmp_unspec
25775 = gen_rtx_UNSPEC_VOLATILE (SImode,
25776 gen_rtvec (2, operands[1], xmm_regs[0]),
25777 UNSPECV_ENCODEKEY128U32);
25778
25779 XVECEXP (operands[2], 0, 0)
25780 = gen_rtx_SET (operands[0], tmp_unspec);
25781
25782 tmp_unspec
25783 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
25784 gen_rtvec (1, const0_rtx),
25785 UNSPECV_ENCODEKEY128U32);
25786
25787 for (i = 0; i < 3; i++)
25788 XVECEXP (operands[2], 0, i + 1)
25789 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
25790
25791 for (i = 4; i < 7; i++)
25792 XVECEXP (operands[2], 0, i)
25793 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
25794
25795 XVECEXP (operands[2], 0, 7)
25796 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
25797 })
25798
25799 (define_insn "*encodekey128u32"
25800 [(match_parallel 2 "encodekey128_operation"
25801 [(set (match_operand:SI 0 "register_operand" "=r")
25802 (unspec_volatile:SI
25803 [(match_operand:SI 1 "register_operand" "r")
25804 (reg:V2DI XMM0_REG)]
25805 UNSPECV_ENCODEKEY128U32))])]
25806 "TARGET_KL"
25807 "encodekey128\t{%1, %0|%0, %1}"
25808 [(set_attr "type" "other")])
25809
25810 (define_expand "encodekey256u32"
25811 [(match_par_dup 2
25812 [(set (match_operand:SI 0 "register_operand")
25813 (unspec_volatile:SI
25814 [(match_operand:SI 1 "register_operand")
25815 (reg:V2DI XMM0_REG)
25816 (reg:V2DI XMM1_REG)]
25817 UNSPECV_ENCODEKEY256U32))])]
25818 "TARGET_KL"
25819 {
25820 rtx xmm_regs[7];
25821 rtx tmp_unspec;
25822 unsigned i;
25823
25824 /* parallel rtx for encodekey256 predicate */
25825 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
25826
25827 for (i = 0; i < 7; i++)
25828 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
25829
25830 tmp_unspec
25831 = gen_rtx_UNSPEC_VOLATILE (SImode,
25832 gen_rtvec (3, operands[1],
25833 xmm_regs[0], xmm_regs[1]),
25834 UNSPECV_ENCODEKEY256U32);
25835
25836 XVECEXP (operands[2], 0, 0)
25837 = gen_rtx_SET (operands[0], tmp_unspec);
25838
25839 tmp_unspec
25840 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
25841 gen_rtvec (1, const0_rtx),
25842 UNSPECV_ENCODEKEY256U32);
25843
25844 for (i = 0; i < 4; i++)
25845 XVECEXP (operands[2], 0, i + 1)
25846 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
25847
25848 for (i = 4; i < 7; i++)
25849 XVECEXP (operands[2], 0, i + 1)
25850 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
25851
25852 XVECEXP (operands[2], 0, 8)
25853 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
25854 })
25855
25856 (define_insn "*encodekey256u32"
25857 [(match_parallel 2 "encodekey256_operation"
25858 [(set (match_operand:SI 0 "register_operand" "=r")
25859 (unspec_volatile:SI
25860 [(match_operand:SI 1 "register_operand" "r")
25861 (reg:V2DI XMM0_REG)
25862 (reg:V2DI XMM1_REG)]
25863 UNSPECV_ENCODEKEY256U32))])]
25864 "TARGET_KL"
25865 "encodekey256\t{%1, %0|%0, %1}"
25866 [(set_attr "type" "other")])
25867
25868 (define_int_iterator AESDECENCKL
25869 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
25870 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
25871
25872 (define_int_attr aesklvariant
25873 [(UNSPECV_AESDEC128KLU8 "dec128kl")
25874 (UNSPECV_AESDEC256KLU8 "dec256kl")
25875 (UNSPECV_AESENC128KLU8 "enc128kl")
25876 (UNSPECV_AESENC256KLU8 "enc256kl")])
25877
25878 (define_insn "aes<aesklvariant>u8"
25879 [(set (match_operand:V2DI 0 "register_operand" "=v")
25880 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
25881 (match_operand:BLK 2 "memory_operand" "m")]
25882 AESDECENCKL))
25883 (set (reg:CCZ FLAGS_REG)
25884 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
25885 "TARGET_KL"
25886 "aes<aesklvariant>\t{%2, %0|%0, %2}"
25887 [(set_attr "type" "other")])
25888
25889 (define_int_iterator AESDECENCWIDEKL
25890 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
25891 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
25892
25893 (define_int_attr aeswideklvariant
25894 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
25895 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
25896 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
25897 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
25898
25899 (define_int_attr AESWIDEKLVARIANT
25900 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
25901 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
25902 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
25903 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
25904
25905 (define_expand "aes<aeswideklvariant>u8"
25906 [(match_par_dup 1
25907 [(set (reg:CCZ FLAGS_REG)
25908 (unspec_volatile:CCZ
25909 [(match_operand:BLK 0 "memory_operand")]
25910 AESDECENCWIDEKL))])]
25911 "TARGET_WIDEKL"
25912 {
25913 rtx tmp_unspec;
25914 unsigned i;
25915
25916 /* parallel rtx for widekl predicate */
25917 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
25918
25919 tmp_unspec
25920 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
25921 gen_rtvec (1, operands[0]),
25922 UNSPECV_<AESWIDEKLVARIANT>);
25923
25924 XVECEXP (operands[1], 0, 0)
25925 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
25926 tmp_unspec);
25927
25928 for (i = 0; i < 8; i++)
25929 {
25930 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
25931
25932 tmp_unspec
25933 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
25934 gen_rtvec (1, xmm_reg),
25935 UNSPECV_<AESWIDEKLVARIANT>);
25936 XVECEXP (operands[1], 0, i + 1)
25937 = gen_rtx_SET (xmm_reg, tmp_unspec);
25938 }
25939 })
25940
25941 (define_insn "*aes<aeswideklvariant>u8"
25942 [(match_parallel 1 "aeswidekl_operation"
25943 [(set (reg:CCZ FLAGS_REG)
25944 (unspec_volatile:CCZ
25945 [(match_operand:BLK 0 "memory_operand" "m")]
25946 AESDECENCWIDEKL))])]
25947 "TARGET_WIDEKL"
25948 "aes<aeswideklvariant>\t{%0}"
25949 [(set_attr "type" "other")])
25950
25951 ;; Modes handled by broadcast patterns. NB: Allow V64QI and V32HI with
25952 ;; TARGET_AVX512F since ix86_expand_vector_init_duplicate can expand
25953 ;; without TARGET_AVX512BW which is used by memset vector broadcast
25954 ;; expander to XI with:
25955 ;; vmovd %edi, %xmm15
25956 ;; vpbroadcastb %xmm15, %ymm15
25957 ;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15
25958
25959 (define_mode_iterator INT_BROADCAST_MODE
25960 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
25961 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
25962 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
25963 (V8DI "TARGET_AVX512F && TARGET_64BIT")
25964 (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
25965
25966 ;; Broadcast from an integer. NB: Enable broadcast only if we can move
25967 ;; from GPR to SSE register directly.
25968 (define_expand "vec_duplicate<mode>"
25969 [(set (match_operand:INT_BROADCAST_MODE 0 "register_operand")
25970 (vec_duplicate:INT_BROADCAST_MODE
25971 (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))]
25972 "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
25973 {
25974 if (!ix86_expand_vector_init_duplicate (false,
25975 GET_MODE (operands[0]),
25976 operands[0],
25977 operands[1]))
25978 gcc_unreachable ();
25979 DONE;
25980 })
This page took 1.198735 seconds and 5 git commands to generate.