]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.md
AVR: Update weblinks to AVR-LibC.
[gcc.git] / gcc / config / i386 / i386.md
1 ;; GCC machine description for IA-32 and x86-64.
2 ;; Copyright (C) 1988-2024 Free Software Foundation, Inc.
3 ;; Mostly by William Schelter.
4 ;; x86_64 support added by Jan Hubicka
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>. */
21 ;;
22 ;; The original PO technology requires these to be ordered by speed,
23 ;; so that assigner will pick the fastest.
24 ;;
25 ;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
26 ;;
27 ;; The special asm out single letter directives following a '%' are:
28 ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
29 ;; C -- print opcode suffix for set/cmov insn.
30 ;; c -- like C, but print reversed condition
31 ;; F,f -- likewise, but for floating-point.
32 ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
33 ;; otherwise nothing
34 ;; R -- print the prefix for register names.
35 ;; z -- print the opcode suffix for the size of the current operand.
36 ;; Z -- likewise, with special suffixes for x87 instructions.
37 ;; * -- print a star (in certain assembler syntax)
38 ;; A -- print an absolute memory reference.
39 ;; E -- print address with DImode register names if TARGET_64BIT.
40 ;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
41 ;; s -- print a shift double count, followed by the assemblers argument
42 ;; delimiter.
43 ;; b -- print the QImode name of the register for the indicated operand.
44 ;; %b0 would print %al if operands[0] is reg 0.
45 ;; w -- likewise, print the HImode name of the register.
46 ;; k -- likewise, print the SImode name of the register.
47 ;; q -- likewise, print the DImode name of the register.
48 ;; x -- likewise, print the V4SFmode name of the register.
49 ;; t -- likewise, print the V8SFmode name of the register.
50 ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
51 ;; y -- print "st(0)" instead of "st" as a register.
52 ;; d -- print duplicated register operand for AVX instruction.
53 ;; D -- print condition for SSE cmp instruction.
54 ;; P -- if PIC, print an @PLT suffix.
55 ;; p -- print raw symbol name.
56 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
57 ;; & -- print some in-use local-dynamic symbol name.
58 ;; H -- print a memory address offset by 8; used for sse high-parts
59 ;; K -- print HLE lock prefix
60 ;; Y -- print condition for XOP pcom* instruction.
61 ;; + -- print a branch hint as 'cs' or 'ds' prefix
62 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
63 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
64 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
65 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
66
67 (define_c_enum "unspec" [
68 ;; Relocation specifiers
69 UNSPEC_GOT
70 UNSPEC_GOTOFF
71 UNSPEC_GOTPCREL
72 UNSPEC_GOTTPOFF
73 UNSPEC_TPOFF
74 UNSPEC_NTPOFF
75 UNSPEC_DTPOFF
76 UNSPEC_GOTNTPOFF
77 UNSPEC_INDNTPOFF
78 UNSPEC_PLTOFF
79 UNSPEC_MACHOPIC_OFFSET
80 UNSPEC_PCREL
81 UNSPEC_SIZEOF
82
83 ;; Prologue support
84 UNSPEC_STACK_ALLOC
85 UNSPEC_SET_GOT
86 UNSPEC_SET_RIP
87 UNSPEC_SET_GOT_OFFSET
88 UNSPEC_MEMORY_BLOCKAGE
89 UNSPEC_PROBE_STACK
90
91 ;; TLS support
92 UNSPEC_TP
93 UNSPEC_TLS_GD
94 UNSPEC_TLS_LD_BASE
95 UNSPEC_TLSDESC
96 UNSPEC_TLS_IE_SUN
97
98 ;; Other random patterns
99 UNSPEC_SCAS
100 UNSPEC_FNSTSW
101 UNSPEC_SAHF
102 UNSPEC_NOTRAP
103 UNSPEC_PARITY
104 UNSPEC_FSTCW
105 UNSPEC_REP
106 UNSPEC_LD_MPIC ; load_macho_picbase
107 UNSPEC_TRUNC_NOOP
108 UNSPEC_DIV_ALREADY_SPLIT
109 UNSPEC_PAUSE
110 UNSPEC_LEA_ADDR
111 UNSPEC_XBEGIN_ABORT
112 UNSPEC_STOS
113 UNSPEC_PEEPSIB
114 UNSPEC_INSN_FALSE_DEP
115 UNSPEC_SBB
116 UNSPEC_CC_NE
117 UNSPEC_STC
118 UNSPEC_PUSHFL
119 UNSPEC_POPFL
120 UNSPEC_OPTCOMX
121
122 ;; For SSE/MMX support:
123 UNSPEC_FIX_NOTRUNC
124 UNSPEC_MASKMOV
125 UNSPEC_MOVCC_MASK
126 UNSPEC_MOVMSK
127 UNSPEC_INSERTPS
128 UNSPEC_BLENDV
129 UNSPEC_PSHUFB
130 UNSPEC_XOP_PERMUTE
131 UNSPEC_RCP
132 UNSPEC_RSQRT
133 UNSPEC_PSADBW
134
135 ;; Different from generic us_truncate RTX
136 ;; as it does unsigned saturation of signed source.
137 UNSPEC_US_TRUNCATE
138
139 ;; For AVX/AVX512F support
140 UNSPEC_SCALEF
141 UNSPEC_PCMP
142 UNSPEC_CVTBFSF
143 UNSPEC_COMX
144
145 ;; Generic math support
146 UNSPEC_IEEE_MIN ; not commutative
147 UNSPEC_IEEE_MAX ; not commutative
148
149 ;; x87 Floating point
150 UNSPEC_SIN
151 UNSPEC_COS
152 UNSPEC_FPATAN
153 UNSPEC_FYL2X
154 UNSPEC_FYL2XP1
155 UNSPEC_FRNDINT
156 UNSPEC_FIST
157 UNSPEC_F2XM1
158 UNSPEC_TAN
159 UNSPEC_FXAM
160
161 ;; x87 Rounding
162 UNSPEC_FRNDINT_ROUNDEVEN
163 UNSPEC_FRNDINT_FLOOR
164 UNSPEC_FRNDINT_CEIL
165 UNSPEC_FRNDINT_TRUNC
166 UNSPEC_FIST_FLOOR
167 UNSPEC_FIST_CEIL
168
169 ;; x87 Double output FP
170 UNSPEC_SINCOS_COS
171 UNSPEC_SINCOS_SIN
172 UNSPEC_XTRACT_FRACT
173 UNSPEC_XTRACT_EXP
174 UNSPEC_FSCALE_FRACT
175 UNSPEC_FSCALE_EXP
176 UNSPEC_FPREM_F
177 UNSPEC_FPREM_U
178 UNSPEC_FPREM1_F
179 UNSPEC_FPREM1_U
180
181 UNSPEC_C2_FLAG
182 UNSPEC_FXAM_MEM
183
184 ;; SSP patterns
185 UNSPEC_SP_SET
186 UNSPEC_SP_TEST
187
188 ;; For ROUND support
189 UNSPEC_ROUND
190
191 ;; For CRC32 support
192 UNSPEC_CRC32
193
194 ;; For LZCNT suppoprt
195 UNSPEC_LZCNT
196
197 ;; For BMI support
198 UNSPEC_TZCNT
199 UNSPEC_BEXTR
200
201 ;; For BMI2 support
202 UNSPEC_PDEP
203 UNSPEC_PEXT
204
205 ;; IRET support
206 UNSPEC_INTERRUPT_RETURN
207
208 ;; For MOVDIRI and MOVDIR64B support
209 UNSPEC_MOVDIRI
210 UNSPEC_MOVDIR64B
211
212 ;; For insn_callee_abi:
213 UNSPEC_CALLEE_ABI
214
215 ;; For APX PUSH2/POP2 support
216 UNSPEC_APXPUSH2
217 UNSPEC_APXPOP2_LOW
218 UNSPEC_APXPOP2_HIGH
219
220 ;; For APX PPX support
221 UNSPEC_APX_PPX
222
223 ;; For APX CCMP support
224 ;; DFV = default flag value
225 UNSPEC_APX_DFV
226 ])
227
228 (define_c_enum "unspecv" [
229 UNSPECV_UD2
230 UNSPECV_BLOCKAGE
231 UNSPECV_STACK_PROBE
232 UNSPECV_PROBE_STACK_RANGE
233 UNSPECV_ALIGN
234 UNSPECV_PROLOGUE_USE
235 UNSPECV_SPLIT_STACK_RETURN
236 UNSPECV_CLD
237 UNSPECV_NOPS
238 UNSPECV_RDTSC
239 UNSPECV_RDTSCP
240 UNSPECV_RDPMC
241 UNSPECV_LLWP_INTRINSIC
242 UNSPECV_SLWP_INTRINSIC
243 UNSPECV_LWPVAL_INTRINSIC
244 UNSPECV_LWPINS_INTRINSIC
245 UNSPECV_RDFSBASE
246 UNSPECV_RDGSBASE
247 UNSPECV_WRFSBASE
248 UNSPECV_WRGSBASE
249 UNSPECV_FXSAVE
250 UNSPECV_FXRSTOR
251 UNSPECV_FXSAVE64
252 UNSPECV_FXRSTOR64
253 UNSPECV_XSAVE
254 UNSPECV_XRSTOR
255 UNSPECV_XSAVE64
256 UNSPECV_XRSTOR64
257 UNSPECV_XSAVEOPT
258 UNSPECV_XSAVEOPT64
259 UNSPECV_XSAVES
260 UNSPECV_XRSTORS
261 UNSPECV_XSAVES64
262 UNSPECV_XRSTORS64
263 UNSPECV_XSAVEC
264 UNSPECV_XSAVEC64
265 UNSPECV_XGETBV
266 UNSPECV_XSETBV
267 UNSPECV_WBINVD
268 UNSPECV_WBNOINVD
269
270 ;; For atomic compound assignments.
271 UNSPECV_FNSTENV
272 UNSPECV_FLDENV
273 UNSPECV_FNSTSW
274 UNSPECV_FNCLEX
275
276 ;; For RDRAND support
277 UNSPECV_RDRAND
278
279 ;; For RDSEED support
280 UNSPECV_RDSEED
281
282 ;; For RTM support
283 UNSPECV_XBEGIN
284 UNSPECV_XEND
285 UNSPECV_XABORT
286 UNSPECV_XTEST
287
288 UNSPECV_NLGR
289
290 ;; For CLWB support
291 UNSPECV_CLWB
292
293 ;; For CLFLUSHOPT support
294 UNSPECV_CLFLUSHOPT
295
296 ;; For MONITORX and MWAITX support
297 UNSPECV_MONITORX
298 UNSPECV_MWAITX
299
300 ;; For CLZERO support
301 UNSPECV_CLZERO
302
303 ;; For RDPKRU and WRPKRU support
304 UNSPECV_PKU
305
306 ;; For RDPID support
307 UNSPECV_RDPID
308
309 ;; For CET support
310 UNSPECV_NOP_ENDBR
311 UNSPECV_NOP_RDSSP
312 UNSPECV_INCSSP
313 UNSPECV_SAVEPREVSSP
314 UNSPECV_RSTORSSP
315 UNSPECV_WRSS
316 UNSPECV_WRUSS
317 UNSPECV_SETSSBSY
318 UNSPECV_CLRSSBSY
319
320 ;; For TSXLDTRK support
321 UNSPECV_XSUSLDTRK
322 UNSPECV_XRESLDTRK
323
324 ;; For WAITPKG support
325 UNSPECV_UMWAIT
326 UNSPECV_UMONITOR
327 UNSPECV_TPAUSE
328
329 ;; For UINTR support
330 UNSPECV_CLUI
331 UNSPECV_STUI
332 UNSPECV_TESTUI
333 UNSPECV_SENDUIPI
334
335 ;; For CLDEMOTE support
336 UNSPECV_CLDEMOTE
337
338 ;; For Speculation Barrier support
339 UNSPECV_SPECULATION_BARRIER
340
341 UNSPECV_PTWRITE
342
343 ;; For ENQCMD and ENQCMDS support
344 UNSPECV_ENQCMD
345 UNSPECV_ENQCMDS
346
347 ;; For SERIALIZE support
348 UNSPECV_SERIALIZE
349
350 ;; For patchable area support
351 UNSPECV_PATCHABLE_AREA
352
353 ;; For HRESET support
354 UNSPECV_HRESET
355
356 ;; For PREFETCHI support
357 UNSPECV_PREFETCHI
358
359 ;; For USER_MSR support
360 UNSPECV_URDMSR
361 UNSPECV_UWRMSR
362
363 ;; For AMX-TILE
364 UNSPECV_LDTILECFG
365 UNSPECV_STTILECFG
366 ])
367
368 ;; Constants to represent rounding modes in the ROUND instruction
369 (define_constants
370 [(ROUND_ROUNDEVEN 0x0)
371 (ROUND_FLOOR 0x1)
372 (ROUND_CEIL 0x2)
373 (ROUND_TRUNC 0x3)
374 (ROUND_MXCSR 0x4)
375 (ROUND_NO_EXC 0x8)
376 ])
377
378 ;; Constants to represent AVX512F embeded rounding
379 (define_constants
380 [(ROUND_NEAREST_INT 0)
381 (ROUND_NEG_INF 1)
382 (ROUND_POS_INF 2)
383 (ROUND_ZERO 3)
384 (NO_ROUND 4)
385 (ROUND_SAE 8)
386 ])
387
388 ;; Constants to represent pcomtrue/pcomfalse variants
389 (define_constants
390 [(PCOM_FALSE 0)
391 (PCOM_TRUE 1)
392 (COM_FALSE_S 2)
393 (COM_FALSE_P 3)
394 (COM_TRUE_S 4)
395 (COM_TRUE_P 5)
396 ])
397
398 ;; Constants used in the XOP pperm instruction
399 (define_constants
400 [(PPERM_SRC 0x00) /* copy source */
401 (PPERM_INVERT 0x20) /* invert source */
402 (PPERM_REVERSE 0x40) /* bit reverse source */
403 (PPERM_REV_INV 0x60) /* bit reverse & invert src */
404 (PPERM_ZERO 0x80) /* all 0's */
405 (PPERM_ONES 0xa0) /* all 1's */
406 (PPERM_SIGN 0xc0) /* propagate sign bit */
407 (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
408 (PPERM_SRC1 0x00) /* use first source byte */
409 (PPERM_SRC2 0x10) /* use second source byte */
410 ])
411
412 ;; Registers by name.
413 (define_constants
414 [(AX_REG 0)
415 (DX_REG 1)
416 (CX_REG 2)
417 (BX_REG 3)
418 (SI_REG 4)
419 (DI_REG 5)
420 (BP_REG 6)
421 (SP_REG 7)
422 (ST0_REG 8)
423 (ST1_REG 9)
424 (ST2_REG 10)
425 (ST3_REG 11)
426 (ST4_REG 12)
427 (ST5_REG 13)
428 (ST6_REG 14)
429 (ST7_REG 15)
430 (ARGP_REG 16)
431 (FLAGS_REG 17)
432 (FPSR_REG 18)
433 (FRAME_REG 19)
434 (XMM0_REG 20)
435 (XMM1_REG 21)
436 (XMM2_REG 22)
437 (XMM3_REG 23)
438 (XMM4_REG 24)
439 (XMM5_REG 25)
440 (XMM6_REG 26)
441 (XMM7_REG 27)
442 (MM0_REG 28)
443 (MM1_REG 29)
444 (MM2_REG 30)
445 (MM3_REG 31)
446 (MM4_REG 32)
447 (MM5_REG 33)
448 (MM6_REG 34)
449 (MM7_REG 35)
450 (R8_REG 36)
451 (R9_REG 37)
452 (R10_REG 38)
453 (R11_REG 39)
454 (R12_REG 40)
455 (R13_REG 41)
456 (R14_REG 42)
457 (R15_REG 43)
458 (XMM8_REG 44)
459 (XMM9_REG 45)
460 (XMM10_REG 46)
461 (XMM11_REG 47)
462 (XMM12_REG 48)
463 (XMM13_REG 49)
464 (XMM14_REG 50)
465 (XMM15_REG 51)
466 (XMM16_REG 52)
467 (XMM17_REG 53)
468 (XMM18_REG 54)
469 (XMM19_REG 55)
470 (XMM20_REG 56)
471 (XMM21_REG 57)
472 (XMM22_REG 58)
473 (XMM23_REG 59)
474 (XMM24_REG 60)
475 (XMM25_REG 61)
476 (XMM26_REG 62)
477 (XMM27_REG 63)
478 (XMM28_REG 64)
479 (XMM29_REG 65)
480 (XMM30_REG 66)
481 (XMM31_REG 67)
482 (MASK0_REG 68)
483 (MASK1_REG 69)
484 (MASK2_REG 70)
485 (MASK3_REG 71)
486 (MASK4_REG 72)
487 (MASK5_REG 73)
488 (MASK6_REG 74)
489 (MASK7_REG 75)
490 (R16_REG 76)
491 (R17_REG 77)
492 (R18_REG 78)
493 (R19_REG 79)
494 (R20_REG 80)
495 (R21_REG 81)
496 (R22_REG 82)
497 (R23_REG 83)
498 (R24_REG 84)
499 (R25_REG 85)
500 (R26_REG 86)
501 (R27_REG 87)
502 (R28_REG 88)
503 (R29_REG 89)
504 (R30_REG 90)
505 (R31_REG 91)
506 (FIRST_PSEUDO_REG 92)
507 ])
508
509 ;; Insn callee abi index.
510 (define_constants
511 [(ABI_DEFAULT 0)
512 (ABI_VZEROUPPER 1)
513 (ABI_UNKNOWN 2)])
514
515 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
516 ;; from i386.cc.
517
518 ;; In C guard expressions, put expressions which may be compile-time
519 ;; constants first. This allows for better optimization. For
520 ;; example, write "TARGET_64BIT && reload_completed", not
521 ;; "reload_completed && TARGET_64BIT".
522
523 \f
524 ;; Processor type.
525 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
526 atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
527 bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4,
528 znver5"
529 (const (symbol_ref "ix86_schedule")))
530
531 ;; A basic instruction type. Refinements due to arguments to be
532 ;; provided in other attributes.
533 (define_attr "type"
534 "other,multi,
535 alu,alu1,negnot,imov,imovx,lea,
536 incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
537 imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
538 push,pop,call,callv,leave,
539 str,bitmanip,
540 fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
541 fxch,fistp,fisttp,frndint,
542 sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
543 ssemul,sseimul,ssediv,sselog,sselog1,
544 sseishft,sseishft1,ssecmp,ssecomi,
545 ssecvt,ssecvt1,sseicvt,sseins,
546 sseshuf,sseshuf1,ssemuladd,sse4arg,
547 lwp,mskmov,msklog,
548 mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
549 (const_string "other"))
550
551 ;; Main data type used by the insn
552 (define_attr "mode"
553 "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,
554 V32HF,V16HF,V8HF,V4HF,V2HF,V32BF,V16BF,V8BF,V4BF,V2BF,
555 V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF"
556 (const_string "unknown"))
557
558 ;; The CPU unit operations uses.
559 (define_attr "unit" "integer,i387,sse,mmx,unknown"
560 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
561 fxch,fistp,fisttp,frndint")
562 (const_string "i387")
563 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
564 ssemul,sseimul,ssediv,sselog,sselog1,
565 sseishft,sseishft1,ssecmp,ssecomi,
566 ssecvt,ssecvt1,sseicvt,sseins,
567 sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
568 (const_string "sse")
569 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
570 (const_string "mmx")
571 (eq_attr "type" "other")
572 (const_string "unknown")]
573 (const_string "integer")))
574
575 ;; Used to control the "enabled" attribute on a per-instruction basis.
576 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
577 x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64,
578 sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
579 avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
580 noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
581 noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
582 avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
583 avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
584 vaes_avx512vl,noapx_nf,avx10_2"
585 (const_string "base"))
586
587 ;; The (bounding maximum) length of an instruction immediate.
588 (define_attr "length_immediate" ""
589 (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
590 bitmanip,imulx,msklog,mskmov")
591 (const_int 0)
592 (ior (eq_attr "type" "sse4arg")
593 (eq_attr "isa" "fma4"))
594 (const_int 1)
595 (eq_attr "unit" "i387,sse,mmx")
596 (const_int 0)
597 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
598 rotate,rotatex,rotate1,imul,icmp,push,pop")
599 (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
600 (eq_attr "type" "imov,test")
601 (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
602 (eq_attr "type" "call")
603 (if_then_else (match_operand 0 "constant_call_address_operand")
604 (const_int 4)
605 (const_int 0))
606 (eq_attr "type" "callv")
607 (if_then_else (match_operand 1 "constant_call_address_operand")
608 (const_int 4)
609 (const_int 0))
610 ;; We don't know the size before shorten_branches. Expect
611 ;; the instruction to fit for better scheduling.
612 (eq_attr "type" "ibr")
613 (const_int 1)
614 ]
615 (symbol_ref "/* Update immediate_length and other attributes! */
616 gcc_unreachable (),1")))
617
618 ;; The (bounding maximum) length of an instruction address.
619 (define_attr "length_address" ""
620 (cond [(eq_attr "type" "str,other,multi,fxch")
621 (const_int 0)
622 (and (eq_attr "type" "call")
623 (match_operand 0 "constant_call_address_operand"))
624 (const_int 0)
625 (and (eq_attr "type" "callv")
626 (match_operand 1 "constant_call_address_operand"))
627 (const_int 0)
628 ]
629 (symbol_ref "ix86_attr_length_address_default (insn)")))
630
631 ;; Set when length prefix is used.
632 (define_attr "prefix_data16" ""
633 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
634 (const_int 0)
635 (eq_attr "mode" "HI")
636 (const_int 1)
637 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
638 (const_int 1)
639 ]
640 (const_int 0)))
641
642 ;; Set when string REP prefix is used.
643 (define_attr "prefix_rep" ""
644 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
645 (const_int 0)
646 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
647 (const_int 1)
648 ]
649 (const_int 0)))
650
651 ;; Set when 0f opcode prefix is used.
652 (define_attr "prefix_0f" ""
653 (if_then_else
654 (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
655 (eq_attr "unit" "sse,mmx"))
656 (const_int 1)
657 (const_int 0)))
658
659 ;; Set when REX opcode prefix is used.
660 (define_attr "prefix_rex" ""
661 (cond [(not (match_test "TARGET_64BIT"))
662 (const_int 0)
663 (and (eq_attr "mode" "DI")
664 (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
665 (eq_attr "unit" "!mmx")))
666 (const_int 1)
667 (and (eq_attr "mode" "QI")
668 (match_test "x86_extended_QIreg_mentioned_p (insn)"))
669 (const_int 1)
670 (match_test "x86_extended_reg_mentioned_p (insn)")
671 (const_int 1)
672 (and (eq_attr "type" "imovx")
673 (match_operand:QI 1 "ext_QIreg_operand"))
674 (const_int 1)
675 ]
676 (const_int 0)))
677
678 ;; There are also additional prefixes in 3DNOW, SSSE3.
679 ;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
680 ;; While generally inapplicable to VEX/XOP/EVEX encodings, "length_vex" uses
681 ;; the attribute evaluating to zero to know that VEX2 encoding may be usable.
682 (define_attr "prefix_extra" ""
683 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
684 (const_int 1)
685 ]
686 (const_int 0)))
687
688 ;; Prefix used: original, VEX or maybe VEX.
689 (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
690 (cond [(eq_attr "mode" "OI,V8SF,V4DF")
691 (const_string "vex")
692 (eq_attr "mode" "XI,V16SF,V8DF")
693 (const_string "evex")
694 (eq_attr "type" "ssemuladd")
695 (if_then_else (eq_attr "isa" "fma4")
696 (const_string "vex")
697 (const_string "maybe_evex"))
698 (eq_attr "type" "sse4arg")
699 (const_string "vex")
700 ]
701 (const_string "orig")))
702
703 ;; VEX W bit is used.
704 (define_attr "prefix_vex_w" "" (const_int 0))
705
706 ;; The length of VEX prefix
707 ;; Only instructions with 0f prefix can have 2 byte VEX prefix,
708 ;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is
709 ;; still prefix_0f 1, with prefix_extra 1.
710 (define_attr "length_vex" ""
711 (if_then_else (and (eq_attr "prefix_0f" "1")
712 (eq_attr "prefix_extra" "0"))
713 (if_then_else (eq_attr "prefix_vex_w" "1")
714 (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
715 (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
716 (if_then_else (eq_attr "prefix_vex_w" "1")
717 (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
718 (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
719
720 ;; 4-bytes evex prefix and 1 byte opcode.
721 (define_attr "length_evex" "" (const_int 5))
722
723 ;; Set when modrm byte is used.
724 (define_attr "modrm" ""
725 (cond [(eq_attr "type" "str,leave")
726 (const_int 0)
727 (eq_attr "unit" "i387")
728 (const_int 0)
729 (and (eq_attr "type" "incdec")
730 (and (not (match_test "TARGET_64BIT"))
731 (ior (match_operand:SI 1 "register_operand")
732 (match_operand:HI 1 "register_operand"))))
733 (const_int 0)
734 (and (eq_attr "type" "push")
735 (not (match_operand 1 "memory_operand")))
736 (const_int 0)
737 (and (eq_attr "type" "pop")
738 (not (match_operand 0 "memory_operand")))
739 (const_int 0)
740 (and (eq_attr "type" "imov")
741 (and (not (eq_attr "mode" "DI"))
742 (ior (and (match_operand 0 "register_operand")
743 (match_operand 1 "immediate_operand"))
744 (ior (and (match_operand 0 "ax_reg_operand")
745 (match_operand 1 "memory_displacement_only_operand"))
746 (and (match_operand 0 "memory_displacement_only_operand")
747 (match_operand 1 "ax_reg_operand"))))))
748 (const_int 0)
749 (and (eq_attr "type" "call")
750 (match_operand 0 "constant_call_address_operand"))
751 (const_int 0)
752 (and (eq_attr "type" "callv")
753 (match_operand 1 "constant_call_address_operand"))
754 (const_int 0)
755 (and (eq_attr "type" "alu,alu1,icmp,test")
756 (match_operand 0 "ax_reg_operand"))
757 (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
758 ]
759 (const_int 1)))
760
761 ;; The (bounding maximum) length of an instruction in bytes.
762 ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
763 ;; Later we may want to split them and compute proper length as for
764 ;; other insns.
765 (define_attr "length" ""
766 (cond [(eq_attr "type" "other,multi,fistp,frndint")
767 (const_int 16)
768 (eq_attr "type" "fcmp")
769 (const_int 4)
770 (eq_attr "unit" "i387")
771 (plus (const_int 2)
772 (plus (attr "prefix_data16")
773 (attr "length_address")))
774 (ior (eq_attr "prefix" "evex")
775 (and (ior (eq_attr "prefix" "maybe_evex")
776 (eq_attr "prefix" "maybe_vex"))
777 (match_test "TARGET_AVX512F")))
778 (plus (attr "length_evex")
779 (plus (attr "length_immediate")
780 (plus (attr "modrm")
781 (attr "length_address"))))
782 (ior (eq_attr "prefix" "vex")
783 (and (ior (eq_attr "prefix" "maybe_vex")
784 (eq_attr "prefix" "maybe_evex"))
785 (match_test "TARGET_AVX")))
786 (plus (attr "length_vex")
787 (plus (attr "length_immediate")
788 (plus (attr "modrm")
789 (attr "length_address"))))]
790 (plus (plus (attr "modrm")
791 (plus (attr "prefix_0f")
792 (plus (attr "prefix_rex")
793 (plus (attr "prefix_extra")
794 (const_int 1)))))
795 (plus (attr "prefix_rep")
796 (plus (attr "prefix_data16")
797 (plus (attr "length_immediate")
798 (attr "length_address")))))))
799
800 ;; The `memory' attribute is `none' if no memory is referenced, `load' or
801 ;; `store' if there is a simple memory reference therein, or `unknown'
802 ;; if the instruction is complex.
803
804 (define_attr "memory" "none,load,store,both,unknown"
805 (cond [(eq_attr "type" "other,multi,str,lwp")
806 (const_string "unknown")
807 (eq_attr "type" "lea,fcmov,fpspc")
808 (const_string "none")
809 (eq_attr "type" "fistp,leave")
810 (const_string "both")
811 (eq_attr "type" "frndint")
812 (const_string "load")
813 (eq_attr "type" "push")
814 (if_then_else (match_operand 1 "memory_operand")
815 (const_string "both")
816 (const_string "store"))
817 (eq_attr "type" "pop")
818 (if_then_else (match_operand 0 "memory_operand")
819 (const_string "both")
820 (const_string "load"))
821 (eq_attr "type" "setcc")
822 (if_then_else (match_operand 0 "memory_operand")
823 (const_string "store")
824 (const_string "none"))
825 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
826 (if_then_else (ior (match_operand 0 "memory_operand")
827 (match_operand 1 "memory_operand"))
828 (const_string "load")
829 (const_string "none"))
830 (eq_attr "type" "ibr")
831 (if_then_else (match_operand 0 "memory_operand")
832 (const_string "load")
833 (const_string "none"))
834 (eq_attr "type" "call")
835 (if_then_else (match_operand 0 "constant_call_address_operand")
836 (const_string "none")
837 (const_string "load"))
838 (eq_attr "type" "callv")
839 (if_then_else (match_operand 1 "constant_call_address_operand")
840 (const_string "none")
841 (const_string "load"))
842 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
843 (match_operand 1 "memory_operand"))
844 (const_string "both")
845 (and (match_operand 0 "memory_operand")
846 (match_operand 1 "memory_operand"))
847 (const_string "both")
848 (match_operand 0 "memory_operand")
849 (const_string "store")
850 (match_operand 1 "memory_operand")
851 (const_string "load")
852 (and (eq_attr "type"
853 "!alu1,negnot,ishift1,rotate1,
854 imov,imovx,icmp,test,bitmanip,
855 fmov,fcmp,fsgn,
856 sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
857 sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
858 mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
859 (match_operand 2 "memory_operand"))
860 (const_string "load")
861 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
862 (match_operand 3 "memory_operand"))
863 (const_string "load")
864 ]
865 (const_string "none")))
866
867 ;; Indicates if an instruction has both an immediate and a displacement.
868
869 (define_attr "imm_disp" "false,true,unknown"
870 (cond [(eq_attr "type" "other,multi")
871 (const_string "unknown")
872 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
873 (and (match_operand 0 "memory_displacement_operand")
874 (match_operand 1 "immediate_operand")))
875 (const_string "true")
876 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
877 (and (match_operand 0 "memory_displacement_operand")
878 (match_operand 2 "immediate_operand")))
879 (const_string "true")
880 ]
881 (const_string "false")))
882
883 ;; Indicates if an FP operation has an integer source.
884
885 (define_attr "fp_int_src" "false,true"
886 (const_string "false"))
887
888 ;; Defines rounding mode of an FP operation.
889
890 (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
891 (const_string "any"))
892
893 ;; Define attribute to indicate AVX insns with partial XMM register update.
894 (define_attr "avx_partial_xmm_update" "false,true"
895 (const_string "false"))
896
897 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
898 (define_attr "use_carry" "0,1" (const_string "0"))
899
900 ;; Define attribute to indicate unaligned ssemov insns
901 (define_attr "movu" "0,1" (const_string "0"))
902
903 ;; Define attribute to limit memory address register set.
904 (define_attr "addr" "gpr8,gpr16,gpr32" (const_string "gpr32"))
905
906 ;; Define instruction set of MMX instructions
907 (define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
908 (const_string "base"))
909
910 (define_attr "enabled" ""
911 (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
912 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
913 (eq_attr "isa" "x64_sse2")
914 (symbol_ref "TARGET_64BIT && TARGET_SSE2")
915 (eq_attr "isa" "x64_sse4")
916 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
917 (eq_attr "isa" "x64_sse4_noavx")
918 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
919 (eq_attr "isa" "x64_avx")
920 (symbol_ref "TARGET_64BIT && TARGET_AVX")
921 (eq_attr "isa" "x64_avx512bw")
922 (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
923 (eq_attr "isa" "x64_avx512dq")
924 (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
925 (eq_attr "isa" "sse_noavx")
926 (symbol_ref "TARGET_SSE && !TARGET_AVX")
927 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
928 (eq_attr "isa" "sse2_noavx")
929 (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
930 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
931 (eq_attr "isa" "sse3_noavx")
932 (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
933 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
934 (eq_attr "isa" "sse4_noavx")
935 (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
936 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
937 (eq_attr "isa" "avx_noavx512f")
938 (symbol_ref "TARGET_AVX && !TARGET_AVX512F")
939 (eq_attr "isa" "avx_noavx512vl")
940 (symbol_ref "TARGET_AVX && !TARGET_AVX512VL")
941 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
942 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
943 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
944 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
945 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
946 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
947 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
948 (eq_attr "isa" "fma_or_avx512vl")
949 (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
950 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
951 (eq_attr "isa" "avx512f_512")
952 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
953 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
954 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
955 (eq_attr "isa" "avx512bw_512")
956 (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
957 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
958 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
959 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
960 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
961 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
962 (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
963 (eq_attr "isa" "avx512vnnivl")
964 (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
965 (eq_attr "isa" "avx512fp16")
966 (symbol_ref "TARGET_AVX512FP16")
967 (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA")
968 (eq_attr "isa" "avx512ifmavl")
969 (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL")
970 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
971 (eq_attr "isa" "avx512bf16vl")
972 (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
973 (eq_attr "isa" "vpclmulqdqvl")
974 (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
975 (eq_attr "isa" "apx_ndd")
976 (symbol_ref "TARGET_APX_NDD")
977 (eq_attr "isa" "apx_ndd_64")
978 (symbol_ref "TARGET_APX_NDD && Pmode == DImode")
979 (eq_attr "isa" "vaes_avx512vl")
980 (symbol_ref "TARGET_VAES && TARGET_AVX512VL")
981 (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2_256")
982
983 (eq_attr "mmx_isa" "native")
984 (symbol_ref "!TARGET_MMX_WITH_SSE")
985 (eq_attr "mmx_isa" "sse")
986 (symbol_ref "TARGET_MMX_WITH_SSE")
987 (eq_attr "mmx_isa" "sse_noavx")
988 (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
989 (eq_attr "mmx_isa" "avx")
990 (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
991 (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF")
992 ]
993 (const_int 1)))
994
995 (define_attr "preferred_for_size" "" (const_int 1))
996 (define_attr "preferred_for_speed" "" (const_int 1))
997
998 ;; Define attribute to mark the insn has nf variant.
999 (define_attr "has_nf" "0,1" (const_string "0"))
1000
1001 ;; Describe a user's asm statement.
1002 (define_asm_attributes
1003 [(set_attr "length" "128")
1004 (set_attr "type" "multi")])
1005
1006 (define_code_iterator plusminus [plus minus])
1007 (define_code_iterator plusminusmult [plus minus mult])
1008 (define_code_iterator plusminusmultdiv [plus minus mult div])
1009
1010 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
1011
1012 ;; Base name for insn mnemonic.
1013 (define_code_attr plusminus_mnemonic
1014 [(plus "add") (ss_plus "adds") (us_plus "addus")
1015 (minus "sub") (ss_minus "subs") (us_minus "subus")])
1016
1017 (define_code_iterator multdiv [mult div])
1018
1019 (define_code_attr multdiv_mnemonic
1020 [(mult "mul") (div "div")])
1021
1022 ;; Mark commutative operators as such in constraints.
1023 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
1024 (minus "") (ss_minus "") (us_minus "")
1025 (mult "%") (div "")])
1026
1027 ;; Mapping of max and min
1028 (define_code_iterator maxmin [smax smin umax umin])
1029
1030 ;; Mapping of signed max and min
1031 (define_code_iterator smaxmin [smax smin])
1032
1033 ;; Mapping of unsigned max and min
1034 (define_code_iterator umaxmin [umax umin])
1035
1036 ;; Base name for integer and FP insn mnemonic
1037 (define_code_attr maxmin_int [(smax "maxs") (smin "mins")
1038 (umax "maxu") (umin "minu")])
1039 (define_code_attr maxmin_float [(smax "max") (smin "min")])
1040
1041 (define_int_iterator IEEE_MAXMIN
1042 [UNSPEC_IEEE_MAX
1043 UNSPEC_IEEE_MIN])
1044
1045 (define_int_attr ieee_maxmin
1046 [(UNSPEC_IEEE_MAX "max")
1047 (UNSPEC_IEEE_MIN "min")])
1048
1049 ;; Mapping of logic operators
1050 (define_code_iterator any_logic [and ior xor])
1051 (define_code_iterator any_or [ior xor])
1052 (define_code_iterator fpint_logic [and xor])
1053
1054 ;; Base name for insn mnemonic.
1055 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1056
1057 ;; Mapping of logic-shift operators
1058 (define_code_iterator any_lshift [ashift lshiftrt])
1059
1060 ;; Mapping of shift-right operators
1061 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
1062
1063 ;; Mapping of all shift operators
1064 (define_code_iterator any_shift [ashift lshiftrt ashiftrt])
1065
1066 ;; Base name for insn mnemonic.
1067 (define_code_attr shift [(ashift "sal") (lshiftrt "shr") (ashiftrt "sar")])
1068 (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
1069
1070 ;; Mapping of rotate operators
1071 (define_code_iterator any_rotate [rotate rotatert])
1072
1073 ;; Base name for insn mnemonic.
1074 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
1075
1076 ;; Mapping of abs neg operators
1077 (define_code_iterator absneg [abs neg])
1078
1079 ;; Mapping of abs neg operators to logic operation
1080 (define_code_attr absneg_op [(abs "and") (neg "xor")])
1081
1082 ;; Base name for x87 insn mnemonic.
1083 (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
1084
1085 ;; Mapping of extend operators
1086 (define_code_iterator any_extend [sign_extend zero_extend])
1087
1088 ;; Mapping of highpart multiply operators
1089 (define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
1090
1091 ;; Prefix for insn menmonic.
1092 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
1093 (smul_highpart "i") (umul_highpart "")
1094 (div "i") (udiv "")])
1095 ;; Prefix for define_insn
1096 (define_code_attr s [(sign_extend "s") (zero_extend "u")
1097 (smul_highpart "s") (umul_highpart "u")])
1098 (define_code_attr u [(sign_extend "") (zero_extend "u")
1099 (div "") (udiv "u")])
1100 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
1101 (div "false") (udiv "true")])
1102
1103 ;; Used in signed and unsigned truncations.
1104 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
1105 ;; Instruction suffix for truncations.
1106 (define_code_attr trunsuffix
1107 [(ss_truncate "s") (truncate "") (us_truncate "us")])
1108
1109 ;; Instruction suffix for SSE sign and zero extensions.
1110 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
1111
1112 ;; Used in signed and unsigned fix.
1113 (define_code_iterator any_fix [fix unsigned_fix])
1114 (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
1115 (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
1116 (define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
1117
1118 ;; Used in signed and unsigned float.
1119 (define_code_iterator any_float [float unsigned_float])
1120 (define_code_attr floatsuffix [(float "") (unsigned_float "u")])
1121 (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
1122 (define_code_attr floatprefix [(float "s") (unsigned_float "u")])
1123
1124 ;; Base name for expression
1125 (define_code_attr insn
1126 [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
1127 (minus "sub") (ss_minus "sssub") (us_minus "ussub")
1128 (sign_extend "extend") (zero_extend "zero_extend")
1129 (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
1130 (rotate "rotl") (rotatert "rotr")
1131 (mult "mul") (div "div")])
1132
1133 ;; All integer modes.
1134 (define_mode_iterator SWI1248x [QI HI SI DI])
1135
1136 ;; All integer modes without QImode.
1137 (define_mode_iterator SWI248x [HI SI DI])
1138
1139 ;; All integer modes without QImode and HImode.
1140 (define_mode_iterator SWI48x [SI DI])
1141
1142 ;; All integer modes without SImode and DImode.
1143 (define_mode_iterator SWI12 [QI HI])
1144
1145 ;; All integer modes without DImode.
1146 (define_mode_iterator SWI124 [QI HI SI])
1147
1148 ;; All integer modes without QImode and DImode.
1149 (define_mode_iterator SWI24 [HI SI])
1150
1151 ;; Single word integer modes.
1152 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
1153
1154 ;; Single word integer modes without QImode.
1155 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
1156
1157 ;; Single word integer modes without QImode and HImode.
1158 (define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
1159
1160 ;; All math-dependant single and double word integer modes.
1161 (define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
1162 (HI "TARGET_HIMODE_MATH")
1163 SI DI (TI "TARGET_64BIT")])
1164
1165 ;; Math-dependant single word integer modes.
1166 (define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
1167 (HI "TARGET_HIMODE_MATH")
1168 SI (DI "TARGET_64BIT")])
1169
1170 ;; Math-dependant integer modes without DImode.
1171 (define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
1172 (HI "TARGET_HIMODE_MATH")
1173 SI])
1174
1175 ;; Math-dependant integer modes with DImode.
1176 (define_mode_iterator SWIM1248x
1177 [(QI "TARGET_QIMODE_MATH")
1178 (HI "TARGET_HIMODE_MATH")
1179 SI DI])
1180
1181 ;; Math-dependant single word integer modes without QImode.
1182 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
1183 SI (DI "TARGET_64BIT")])
1184
1185 ;; Double word integer modes.
1186 (define_mode_iterator DWI [(DI "!TARGET_64BIT")
1187 (TI "TARGET_64BIT")])
1188
1189 ;; SWI and DWI together.
1190 (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
1191
1192 ;; SWI48 and DWI together.
1193 (define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
1194
1195 ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not
1196 ;; compile time constant, it is faster to use <MODE_SIZE> than
1197 ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
1198 ;; command line options just use GET_MODE_SIZE macro.
1199 (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
1200 (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
1201 (XF "GET_MODE_SIZE (XFmode)")
1202 (V16QI "16") (V32QI "32") (V64QI "64")
1203 (V8HI "16") (V16HI "32") (V32HI "64")
1204 (V4SI "16") (V8SI "32") (V16SI "64")
1205 (V2DI "16") (V4DI "32") (V8DI "64")
1206 (V1TI "16") (V2TI "32") (V4TI "64")
1207 (V2DF "16") (V4DF "32") (V8DF "64")
1208 (V4SF "16") (V8SF "32") (V16SF "64")
1209 (V8HF "16") (V16HF "32") (V32HF "64")
1210 (V4HF "8") (V2HF "4")
1211 (V8BF "16") (V16BF "32") (V32BF "64")
1212 (V4BF "8") (V2BF "4")])
1213
1214 ;; Double word integer modes as mode attribute.
1215 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
1216 (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])
1217
1218 ;; Half sized integer modes.
1219 (define_mode_attr HALF [(TI "DI") (DI "SI")])
1220 (define_mode_attr half [(TI "di") (DI "si")])
1221
1222 ;; LEA mode corresponding to an integer mode
1223 (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
1224
1225 ;; Half mode for double word integer modes.
1226 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
1227 (DI "TARGET_64BIT")])
1228
1229 ;; Instruction suffix for integer modes.
1230 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
1231
1232 ;; Instruction suffix for masks.
1233 (define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
1234
1235 ;; Pointer size prefix for integer modes (Intel asm dialect)
1236 (define_mode_attr iptrsize [(QI "BYTE")
1237 (HI "WORD")
1238 (SI "DWORD")
1239 (DI "QWORD")])
1240
1241 ;; Register class for integer modes.
1242 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
1243
1244 ;; Immediate operand constraint for integer modes.
1245 (define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])
1246
1247 ;; General operand constraint for word modes.
1248 (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])
1249
1250 ;; Memory operand constraint for word modes.
1251 (define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")])
1252
1253 ;; Immediate operand constraint for double integer modes.
1254 (define_mode_attr di [(SI "nF") (DI "Wd")])
1255
1256 ;; Immediate operand constraint for shifts.
1257 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
1258 (define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
1259
1260 ;; Print register name in the specified mode.
1261 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
1262
1263 ;; General operand predicate for integer modes.
1264 (define_mode_attr general_operand
1265 [(QI "general_operand")
1266 (HI "general_operand")
1267 (SI "x86_64_general_operand")
1268 (DI "x86_64_general_operand")
1269 (TI "x86_64_general_operand")])
1270
1271 ;; General operand predicate for integer modes, where for TImode
1272 ;; we need both words of the operand to be general operands.
1273 (define_mode_attr general_hilo_operand
1274 [(QI "general_operand")
1275 (HI "general_operand")
1276 (SI "x86_64_general_operand")
1277 (DI "x86_64_general_operand")
1278 (TI "x86_64_hilo_general_operand")])
1279
1280 ;; General sign extend operand predicate for integer modes,
1281 ;; which disallows VOIDmode operands and thus it is suitable
1282 ;; for use inside sign_extend.
1283 (define_mode_attr general_sext_operand
1284 [(QI "sext_operand")
1285 (HI "sext_operand")
1286 (SI "x86_64_sext_operand")
1287 (DI "x86_64_sext_operand")])
1288
1289 ;; General sign/zero extend operand predicate for integer modes.
1290 (define_mode_attr general_szext_operand
1291 [(QI "general_operand")
1292 (HI "general_operand")
1293 (SI "x86_64_szext_general_operand")
1294 (DI "x86_64_szext_general_operand")
1295 (TI "x86_64_hilo_general_operand")])
1296
1297 (define_mode_attr nonmemory_szext_operand
1298 [(QI "nonmemory_operand")
1299 (HI "nonmemory_operand")
1300 (SI "x86_64_szext_nonmemory_operand")
1301 (DI "x86_64_szext_nonmemory_operand")])
1302
1303 ;; Immediate operand predicate for integer modes.
1304 (define_mode_attr immediate_operand
1305 [(QI "immediate_operand")
1306 (HI "immediate_operand")
1307 (SI "x86_64_immediate_operand")
1308 (DI "x86_64_immediate_operand")])
1309
1310 ;; Nonmemory operand predicate for integer modes.
1311 (define_mode_attr nonmemory_operand
1312 [(QI "nonmemory_operand")
1313 (HI "nonmemory_operand")
1314 (SI "x86_64_nonmemory_operand")
1315 (DI "x86_64_nonmemory_operand")])
1316
1317 ;; Operand predicate for shifts.
1318 (define_mode_attr shift_operand
1319 [(QI "nonimmediate_operand")
1320 (HI "nonimmediate_operand")
1321 (SI "nonimmediate_operand")
1322 (DI "shiftdi_operand")
1323 (TI "register_operand")])
1324
1325 ;; Operand predicate for shift argument.
1326 (define_mode_attr shift_immediate_operand
1327 [(QI "const_1_to_31_operand")
1328 (HI "const_1_to_31_operand")
1329 (SI "const_1_to_31_operand")
1330 (DI "const_1_to_63_operand")])
1331
1332 ;; Input operand predicate for arithmetic left shifts.
1333 (define_mode_attr ashl_input_operand
1334 [(QI "nonimmediate_operand")
1335 (HI "nonimmediate_operand")
1336 (SI "nonimmediate_operand")
1337 (DI "ashldi_input_operand")
1338 (TI "reg_or_pm1_operand")])
1339
1340 ;; SSE and x87 SFmode and DFmode floating point modes
1341 (define_mode_iterator MODEF [SF DF])
1342
1343 (define_mode_iterator MODEF248 [BF HF SF (DF "TARGET_SSE2")])
1344
1345 ;; SSE floating point modes
1346 (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF])
1347
1348 ;; All x87 floating point modes
1349 (define_mode_iterator X87MODEF [SF DF XF])
1350
1351 ;; All x87 floating point modes plus HFmode
1352 (define_mode_iterator X87MODEFH [HF SF DF XF BF])
1353
1354 ;; All SSE floating point modes
1355 (define_mode_iterator SSEMODEF [HF SF DF TF])
1356 (define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")])
1357
1358 ;; SSE instruction suffix for various modes
1359 (define_mode_attr ssemodesuffix
1360 [(HF "sh") (SF "ss") (DF "sd")
1361 (V32HF "ph") (V16SF "ps") (V8DF "pd")
1362 (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd")
1363 (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd")
1364 (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
1365 (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
1366 (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
1367
1368 ;; SSE vector suffix for floating point modes
1369 ;; BF HF use same suffix as SF for logic operations.
1370 (define_mode_attr ssevecmodesuffix [(BF "ps") (HF "ps") (SF "ps") (DF "pd")])
1371
1372 ;; SSE vector mode corresponding to a scalar mode
1373 (define_mode_attr ssevecmode
1374 [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")])
1375 (define_mode_attr ssevecmodelower
1376 [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
1377
1378 ;; AVX512F vector mode corresponding to a scalar mode
1379 (define_mode_attr avx512fvecmode
1380 [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI")
1381 (HF "V32HF") (BF "V32BF") (SF "V16SF") (DF "V8DF")])
1382
1383 ;; Instruction suffix for REX 64bit operators.
1384 (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
1385 (define_mode_attr rex64namesuffix [(SI "") (DI "q")])
1386
1387 ;; This mode iterator allows :P to be used for patterns that operate on
1388 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
1389 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
1390
1391 ;; This mode iterator allows :W to be used for patterns that operate on
1392 ;; word_mode sized quantities.
1393 (define_mode_iterator W
1394 [(SI "word_mode == SImode") (DI "word_mode == DImode")])
1395
1396 ;; This mode iterator allows :PTR to be used for patterns that operate on
1397 ;; ptr_mode sized quantities.
1398 (define_mode_iterator PTR
1399 [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
1400 \f
1401 ;; Scheduling descriptions
1402
1403 (include "pentium.md")
1404 (include "ppro.md")
1405 (include "k6.md")
1406 (include "athlon.md")
1407 (include "bdver1.md")
1408 (include "bdver3.md")
1409 (include "btver2.md")
1410 (include "znver.md")
1411 (include "zn4zn5.md")
1412 (include "geode.md")
1413 (include "atom.md")
1414 (include "slm.md")
1415 (include "glm.md")
1416 (include "core2.md")
1417 (include "haswell.md")
1418 (include "lujiazui.md")
1419 (include "yongfeng.md")
1420
1421 \f
1422 ;; Operand and operator predicates and constraints
1423
1424 (include "predicates.md")
1425 (include "constraints.md")
1426
1427 \f
1428 ;; Compare and branch/compare and store instructions.
1429
1430 (define_expand "cbranch<mode>4"
1431 [(set (reg:CC FLAGS_REG)
1432 (compare:CC (match_operand:SWIM1248x 1 "nonimmediate_operand")
1433 (match_operand:SWIM1248x 2 "<general_operand>")))
1434 (set (pc) (if_then_else
1435 (match_operator 0 "ordered_comparison_operator"
1436 [(reg:CC FLAGS_REG) (const_int 0)])
1437 (label_ref (match_operand 3))
1438 (pc)))]
1439 ""
1440 {
1441 if (MEM_P (operands[1]) && MEM_P (operands[2]))
1442 operands[1] = force_reg (<MODE>mode, operands[1]);
1443 ix86_expand_branch (GET_CODE (operands[0]),
1444 operands[1], operands[2], operands[3]);
1445 DONE;
1446 })
1447
1448 (define_expand "cbranchti4"
1449 [(set (reg:CC FLAGS_REG)
1450 (compare:CC (match_operand:TI 1 "nonimmediate_operand")
1451 (match_operand:TI 2 "ix86_timode_comparison_operand")))
1452 (set (pc) (if_then_else
1453 (match_operator 0 "ix86_timode_comparison_operator"
1454 [(reg:CC FLAGS_REG) (const_int 0)])
1455 (label_ref (match_operand 3))
1456 (pc)))]
1457 "TARGET_64BIT || TARGET_SSE4_1"
1458 {
1459 ix86_expand_branch (GET_CODE (operands[0]),
1460 operands[1], operands[2], operands[3]);
1461 DONE;
1462 })
1463
1464 (define_expand "cbranchoi4"
1465 [(set (reg:CC FLAGS_REG)
1466 (compare:CC (match_operand:OI 1 "nonimmediate_operand")
1467 (match_operand:OI 2 "nonimmediate_operand")))
1468 (set (pc) (if_then_else
1469 (match_operator 0 "bt_comparison_operator"
1470 [(reg:CC FLAGS_REG) (const_int 0)])
1471 (label_ref (match_operand 3))
1472 (pc)))]
1473 "TARGET_AVX"
1474 {
1475 ix86_expand_branch (GET_CODE (operands[0]),
1476 operands[1], operands[2], operands[3]);
1477 DONE;
1478 })
1479
1480 (define_expand "cbranchxi4"
1481 [(set (reg:CC FLAGS_REG)
1482 (compare:CC (match_operand:XI 1 "nonimmediate_operand")
1483 (match_operand:XI 2 "nonimmediate_operand")))
1484 (set (pc) (if_then_else
1485 (match_operator 0 "bt_comparison_operator"
1486 [(reg:CC FLAGS_REG) (const_int 0)])
1487 (label_ref (match_operand 3))
1488 (pc)))]
1489 "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
1490 {
1491 ix86_expand_branch (GET_CODE (operands[0]),
1492 operands[1], operands[2], operands[3]);
1493 DONE;
1494 })
1495
1496 (define_expand "cstore<mode>4"
1497 [(set (reg:CC FLAGS_REG)
1498 (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
1499 (match_operand:SDWIM 3 "<general_operand>")))
1500 (set (match_operand:QI 0 "register_operand")
1501 (match_operator 1 "ordered_comparison_operator"
1502 [(reg:CC FLAGS_REG) (const_int 0)]))]
1503 ""
1504 {
1505 if (<MODE>mode == (TARGET_64BIT ? TImode : DImode))
1506 {
1507 if (GET_CODE (operands[1]) != EQ
1508 && GET_CODE (operands[1]) != NE)
1509 FAIL;
1510 }
1511 else if (MEM_P (operands[2]) && MEM_P (operands[3]))
1512 operands[2] = force_reg (<MODE>mode, operands[2]);
1513 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1514 operands[2], operands[3]);
1515 DONE;
1516 })
1517
1518 (define_insn "@ccmp<mode>"
1519 [(set (match_operand:CC 0 "flags_reg_operand")
1520 (if_then_else:CC
1521 (match_operator 1 "comparison_operator"
1522 [(reg:CC FLAGS_REG) (const_int 0)])
1523 (compare:CC
1524 (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>,<r>m,<r>")
1525 (match_operand:SWI 3 "<general_operand>" "C,<r><i>,<m>"))
1526 (const_int 0))
1527 (unspec:CC
1528 [(match_operand:SI 4 "const_0_to_15_operand")]
1529 UNSPEC_APX_DFV)))]
1530 "TARGET_APX_CCMP"
1531 "@
1532 ctest%C1{<imodesuffix>}\t%G4 %2, %2
1533 ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}
1534 ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"
1535 [(set_attr "type" "icmp")
1536 (set_attr "mode" "<MODE>")
1537 (set_attr "length_immediate" "1")
1538 (set_attr "prefix" "evex")])
1539
1540 (define_expand "@cmp<mode>_1"
1541 [(set (reg:CC FLAGS_REG)
1542 (compare:CC (match_operand:SWI 0 "nonimmediate_operand")
1543 (match_operand:SWI 1 "<general_operand>")))])
1544
1545 (define_mode_iterator SWI1248_AVX512BWDQ_64
1546 [(QI "TARGET_AVX512DQ") HI
1547 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
1548
1549 (define_insn "*cmp<mode>_ccz_1"
1550 [(set (reg FLAGS_REG)
1551 (compare (match_operand:SWI1248_AVX512BWDQ_64 0
1552 "nonimmediate_operand" "<r>,?m<r>,$k")
1553 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
1554 "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
1555 "@
1556 test{<imodesuffix>}\t%0, %0
1557 cmp{<imodesuffix>}\t{%1, %0|%0, %1}
1558 kortest<mskmodesuffix>\t%0, %0"
1559 [(set_attr "type" "test,icmp,msklog")
1560 (set_attr "length_immediate" "0,1,*")
1561 (set_attr "prefix" "*,*,vex")
1562 (set_attr "mode" "<MODE>")])
1563
1564 (define_insn "*cmp<mode>_ccno_1"
1565 [(set (reg FLAGS_REG)
1566 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
1567 (match_operand:SWI 1 "const0_operand")))]
1568 "ix86_match_ccmode (insn, CCNOmode)"
1569 "@
1570 test{<imodesuffix>}\t%0, %0
1571 cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1572 [(set_attr "type" "test,icmp")
1573 (set_attr "length_immediate" "0,1")
1574 (set_attr "mode" "<MODE>")])
1575
1576 (define_insn "*cmp<mode>_1"
1577 [(set (reg FLAGS_REG)
1578 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1579 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>")))]
1580 "ix86_match_ccmode (insn, CCmode)"
1581 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1582 [(set_attr "type" "icmp")
1583 (set_attr "mode" "<MODE>")])
1584
1585 (define_insn "*cmp<mode>_minus_1"
1586 [(set (reg FLAGS_REG)
1587 (compare
1588 (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1589 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>"))
1590 (const_int 0)))]
1591 "ix86_match_ccmode (insn, CCGOCmode)"
1592 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1593 [(set_attr "type" "icmp")
1594 (set_attr "mode" "<MODE>")])
1595
1596 (define_insn "*cmpqi_ext<mode>_1"
1597 [(set (reg FLAGS_REG)
1598 (compare
1599 (match_operand:QI 0 "nonimmediate_operand" "QBn")
1600 (subreg:QI
1601 (match_operator:SWI248 2 "extract_operator"
1602 [(match_operand 1 "int248_register_operand" "Q")
1603 (const_int 8)
1604 (const_int 8)]) 0)))]
1605 "ix86_match_ccmode (insn, CCmode)"
1606 "cmp{b}\t{%h1, %0|%0, %h1}"
1607 [(set_attr "addr" "gpr8")
1608 (set_attr "type" "icmp")
1609 (set_attr "mode" "QI")])
1610
1611 (define_insn "*cmpqi_ext<mode>_2"
1612 [(set (reg FLAGS_REG)
1613 (compare
1614 (subreg:QI
1615 (match_operator:SWI248 2 "extract_operator"
1616 [(match_operand 0 "int248_register_operand" "Q")
1617 (const_int 8)
1618 (const_int 8)]) 0)
1619 (match_operand:QI 1 "const0_operand")))]
1620 "ix86_match_ccmode (insn, CCNOmode)"
1621 "test{b}\t%h0, %h0"
1622 [(set_attr "type" "test")
1623 (set_attr "length_immediate" "0")
1624 (set_attr "mode" "QI")])
1625
1626 (define_expand "cmpqi_ext_3"
1627 [(set (reg:CC FLAGS_REG)
1628 (compare:CC
1629 (subreg:QI
1630 (zero_extract:HI
1631 (match_operand:HI 0 "register_operand")
1632 (const_int 8)
1633 (const_int 8)) 0)
1634 (match_operand:QI 1 "const_int_operand")))])
1635
1636 (define_insn "*cmpqi_ext<mode>_3"
1637 [(set (reg FLAGS_REG)
1638 (compare
1639 (subreg:QI
1640 (match_operator:SWI248 2 "extract_operator"
1641 [(match_operand 0 "int248_register_operand" "Q")
1642 (const_int 8)
1643 (const_int 8)]) 0)
1644 (match_operand:QI 1 "general_operand" "QnBn")))]
1645 "ix86_match_ccmode (insn, CCmode)"
1646 "cmp{b}\t{%1, %h0|%h0, %1}"
1647 [(set_attr "addr" "gpr8")
1648 (set_attr "type" "icmp")
1649 (set_attr "mode" "QI")])
1650
1651 (define_insn "*cmpqi_ext<mode>_4"
1652 [(set (reg FLAGS_REG)
1653 (compare
1654 (subreg:QI
1655 (match_operator:SWI248 2 "extract_operator"
1656 [(match_operand 0 "int248_register_operand" "Q")
1657 (const_int 8)
1658 (const_int 8)]) 0)
1659 (subreg:QI
1660 (match_operator:SWI248 3 "extract_operator"
1661 [(match_operand 1 "int248_register_operand" "Q")
1662 (const_int 8)
1663 (const_int 8)]) 0)))]
1664 "ix86_match_ccmode (insn, CCmode)"
1665 "cmp{b}\t{%h1, %h0|%h0, %h1}"
1666 [(set_attr "type" "icmp")
1667 (set_attr "mode" "QI")])
1668
1669 (define_insn_and_split "*cmp<dwi>_doubleword"
1670 [(set (reg:CCZ FLAGS_REG)
1671 (compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
1672 (match_operand:<DWI> 1 "general_operand")))]
1673 "ix86_pre_reload_split ()"
1674 "#"
1675 "&& 1"
1676 [(parallel [(set (reg:CCZ FLAGS_REG)
1677 (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5))
1678 (const_int 0)))
1679 (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
1680 {
1681 split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
1682
1683 operands[4] = gen_reg_rtx (<MODE>mode);
1684
1685 /* Special case comparisons against -1. */
1686 if (operands[1] == constm1_rtx && operands[3] == constm1_rtx)
1687 {
1688 emit_insn (gen_and<mode>3 (operands[4], operands[0], operands[2]));
1689 emit_insn (gen_cmp_1 (<MODE>mode, operands[4], constm1_rtx));
1690 DONE;
1691 }
1692
1693 if (operands[1] == const0_rtx)
1694 emit_move_insn (operands[4], operands[0]);
1695 else if (operands[0] == const0_rtx)
1696 emit_move_insn (operands[4], operands[1]);
1697 else if (operands[1] == constm1_rtx)
1698 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[0]));
1699 else if (operands[0] == constm1_rtx)
1700 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
1701 else
1702 {
1703 if (CONST_SCALAR_INT_P (operands[1])
1704 && !x86_64_immediate_operand (operands[1], <MODE>mode))
1705 operands[1] = force_reg (<MODE>mode, operands[1]);
1706 emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
1707 }
1708
1709 if (operands[3] == const0_rtx)
1710 operands[5] = operands[2];
1711 else if (operands[2] == const0_rtx)
1712 operands[5] = operands[3];
1713 else
1714 {
1715 operands[5] = gen_reg_rtx (<MODE>mode);
1716 if (operands[3] == constm1_rtx)
1717 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[2]));
1718 else if (operands[2] == constm1_rtx)
1719 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
1720 else
1721 {
1722 if (CONST_SCALAR_INT_P (operands[3])
1723 && !x86_64_immediate_operand (operands[3], <MODE>mode))
1724 operands[3] = force_reg (<MODE>mode, operands[3]);
1725 emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
1726 }
1727 }
1728 })
1729
1730 ;; These implement float point compares.
1731 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
1732 ;; which would allow mix and match FP modes on the compares. Which is what
1733 ;; the old patterns did, but with many more of them.
1734
1735 (define_expand "cbranchxf4"
1736 [(set (reg:CC FLAGS_REG)
1737 (compare:CC (match_operand:XF 1 "nonmemory_operand")
1738 (match_operand:XF 2 "nonmemory_operand")))
1739 (set (pc) (if_then_else
1740 (match_operator 0 "ix86_fp_comparison_operator_xf"
1741 [(reg:CC FLAGS_REG)
1742 (const_int 0)])
1743 (label_ref (match_operand 3))
1744 (pc)))]
1745 "TARGET_80387"
1746 {
1747 ix86_expand_branch (GET_CODE (operands[0]),
1748 operands[1], operands[2], operands[3]);
1749 DONE;
1750 })
1751
1752 (define_expand "cstorexf4"
1753 [(set (reg:CC FLAGS_REG)
1754 (compare:CC (match_operand:XF 2 "nonmemory_operand")
1755 (match_operand:XF 3 "nonmemory_operand")))
1756 (set (match_operand:QI 0 "register_operand")
1757 (match_operator 1 "ix86_fp_comparison_operator_xf"
1758 [(reg:CC FLAGS_REG)
1759 (const_int 0)]))]
1760 "TARGET_80387"
1761 {
1762 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1763 operands[2], operands[3]);
1764 DONE;
1765 })
1766
1767 (define_expand "cbranchhf4"
1768 [(set (reg:CC FLAGS_REG)
1769 (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand")
1770 (match_operand:HF 2 "cmp_fp_expander_operand")))
1771 (set (pc) (if_then_else
1772 (match_operator 0 "ix86_fp_comparison_operator"
1773 [(reg:CC FLAGS_REG)
1774 (const_int 0)])
1775 (label_ref (match_operand 3))
1776 (pc)))]
1777 "TARGET_AVX512FP16"
1778 {
1779 ix86_expand_branch (GET_CODE (operands[0]),
1780 operands[1], operands[2], operands[3]);
1781 DONE;
1782 })
1783
1784 (define_expand "cbranch<mode>4"
1785 [(set (reg:CC FLAGS_REG)
1786 (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
1787 (match_operand:MODEF 2 "cmp_fp_expander_operand")))
1788 (set (pc) (if_then_else
1789 (match_operator 0 "ix86_fp_comparison_operator"
1790 [(reg:CC FLAGS_REG)
1791 (const_int 0)])
1792 (label_ref (match_operand 3))
1793 (pc)))]
1794 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1795 {
1796 ix86_expand_branch (GET_CODE (operands[0]),
1797 operands[1], operands[2], operands[3]);
1798 DONE;
1799 })
1800
1801 (define_expand "cbranchbf4"
1802 [(set (reg:CC FLAGS_REG)
1803 (compare:CC (match_operand:BF 1 "cmp_fp_expander_operand")
1804 (match_operand:BF 2 "cmp_fp_expander_operand")))
1805 (set (pc) (if_then_else
1806 (match_operator 0 "comparison_operator"
1807 [(reg:CC FLAGS_REG)
1808 (const_int 0)])
1809 (label_ref (match_operand 3))
1810 (pc)))]
1811 "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
1812 {
1813 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
1814 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1815 do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
1816 SFmode, NULL_RTX, NULL,
1817 as_a <rtx_code_label *> (operands[3]),
1818 /* Unfortunately this isn't propagated. */
1819 profile_probability::even ());
1820 DONE;
1821 })
1822
1823 (define_expand "cstorehf4"
1824 [(set (reg:CC FLAGS_REG)
1825 (compare:CC (match_operand:HF 2 "cmp_fp_expander_operand")
1826 (match_operand:HF 3 "cmp_fp_expander_operand")))
1827 (set (match_operand:QI 0 "register_operand")
1828 (match_operator 1 "ix86_fp_comparison_operator"
1829 [(reg:CC FLAGS_REG)
1830 (const_int 0)]))]
1831 "TARGET_AVX512FP16"
1832 {
1833 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1834 operands[2], operands[3]);
1835 DONE;
1836 })
1837
1838 (define_expand "cstorebf4"
1839 [(set (reg:CC FLAGS_REG)
1840 (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
1841 (match_operand:BF 3 "cmp_fp_expander_operand")))
1842 (set (match_operand:QI 0 "register_operand")
1843 (match_operator 1 "comparison_operator"
1844 [(reg:CC FLAGS_REG)
1845 (const_int 0)]))]
1846 "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
1847 {
1848 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1849 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
1850 rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
1851 op1, op2, SFmode, 0, 1);
1852 if (!rtx_equal_p (res, operands[0]))
1853 emit_move_insn (operands[0], res);
1854 DONE;
1855 })
1856
1857 (define_expand "cstore<mode>4"
1858 [(set (reg:CC FLAGS_REG)
1859 (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
1860 (match_operand:MODEF 3 "cmp_fp_expander_operand")))
1861 (set (match_operand:QI 0 "register_operand")
1862 (match_operator 1 "ix86_fp_comparison_operator"
1863 [(reg:CC FLAGS_REG)
1864 (const_int 0)]))]
1865 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1866 {
1867 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1868 operands[2], operands[3]);
1869 DONE;
1870 })
1871
1872 (define_expand "cbranchcc4"
1873 [(set (pc) (if_then_else
1874 (match_operator 0 "comparison_operator"
1875 [(match_operand 1 "flags_reg_operand")
1876 (match_operand 2 "const0_operand")])
1877 (label_ref (match_operand 3))
1878 (pc)))]
1879 ""
1880 {
1881 ix86_expand_branch (GET_CODE (operands[0]),
1882 operands[1], operands[2], operands[3]);
1883 DONE;
1884 })
1885
1886 ;; For conditonal compare, the middle-end hook will convert
1887 ;; CCmode to sub-CCmode using SELECT_CC_MODE macro and try
1888 ;; to find cstore<submodes> in optab. Add ALL_CC to support
1889 ;; the cstore after ccmp sequence.
1890
1891 (define_mode_iterator ALL_CC
1892 [CCGC CCGOC CCNO CCGZ CCA CCC CCO CCP CCS CCZ CC])
1893
1894 (define_expand "cstore<mode>4"
1895 [(set (match_operand:QI 0 "register_operand")
1896 (match_operator 1 "comparison_operator"
1897 [(match_operand:ALL_CC 2 "flags_reg_operand")
1898 (match_operand 3 "const0_operand")]))]
1899 ""
1900 {
1901 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1902 operands[2], operands[3]);
1903 DONE;
1904 })
1905
1906 ;; FP compares, step 1:
1907 ;; Set the FP condition codes and move fpsr to ax.
1908
1909 ;; We may not use "#" to split and emit these
1910 ;; due to reg-stack pops killing fpsr.
1911
1912 (define_insn "*cmpxf_i387"
1913 [(set (match_operand:HI 0 "register_operand" "=a")
1914 (unspec:HI
1915 [(compare:CCFP
1916 (match_operand:XF 1 "register_operand" "f")
1917 (match_operand:XF 2 "reg_or_0_operand" "fC"))]
1918 UNSPEC_FNSTSW))]
1919 "TARGET_80387"
1920 "* return output_fp_compare (insn, operands, false, false);"
1921 [(set_attr "type" "multi")
1922 (set_attr "unit" "i387")
1923 (set_attr "mode" "XF")])
1924
1925 (define_insn "*cmp<mode>_i387"
1926 [(set (match_operand:HI 0 "register_operand" "=a")
1927 (unspec:HI
1928 [(compare:CCFP
1929 (match_operand:MODEF 1 "register_operand" "f")
1930 (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
1931 UNSPEC_FNSTSW))]
1932 "TARGET_80387"
1933 "* return output_fp_compare (insn, operands, false, false);"
1934 [(set_attr "type" "multi")
1935 (set_attr "unit" "i387")
1936 (set_attr "mode" "<MODE>")])
1937
1938 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
1939 [(set (match_operand:HI 0 "register_operand" "=a")
1940 (unspec:HI
1941 [(compare:CCFP
1942 (match_operand:X87MODEF 1 "register_operand" "f")
1943 (float:X87MODEF
1944 (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
1945 UNSPEC_FNSTSW))]
1946 "TARGET_80387
1947 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
1948 || optimize_function_for_size_p (cfun))"
1949 "* return output_fp_compare (insn, operands, false, false);"
1950 [(set_attr "type" "multi")
1951 (set_attr "unit" "i387")
1952 (set_attr "fp_int_src" "true")
1953 (set_attr "mode" "<SWI24:MODE>")])
1954
1955 (define_insn "*cmpu<mode>_i387"
1956 [(set (match_operand:HI 0 "register_operand" "=a")
1957 (unspec:HI
1958 [(unspec:CCFP
1959 [(compare:CCFP
1960 (match_operand:X87MODEF 1 "register_operand" "f")
1961 (match_operand:X87MODEF 2 "register_operand" "f"))]
1962 UNSPEC_NOTRAP)]
1963 UNSPEC_FNSTSW))]
1964 "TARGET_80387"
1965 "* return output_fp_compare (insn, operands, false, true);"
1966 [(set_attr "type" "multi")
1967 (set_attr "unit" "i387")
1968 (set_attr "mode" "<MODE>")])
1969
1970 ;; FP compares, step 2:
1971 ;; Get ax into flags, general case.
1972
1973 (define_insn "x86_sahf_1"
1974 [(set (reg:CC FLAGS_REG)
1975 (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
1976 UNSPEC_SAHF))]
1977 "TARGET_SAHF"
1978 {
1979 #ifndef HAVE_AS_IX86_SAHF
1980 if (TARGET_64BIT)
1981 return ASM_BYTE "0x9e";
1982 else
1983 #endif
1984 return "sahf";
1985 }
1986 [(set_attr "length" "1")
1987 (set_attr "athlon_decode" "vector")
1988 (set_attr "amdfam10_decode" "direct")
1989 (set_attr "bdver1_decode" "direct")
1990 (set_attr "mode" "SI")])
1991
1992 ;; Pentium Pro can do both steps in one go.
1993 ;; (these instructions set flags directly)
1994
1995 (define_subst_attr "unord" "unord_subst" "" "u")
1996 (define_subst_attr "unordered" "unord_subst" "false" "true")
1997
1998 (define_subst "unord_subst"
1999 [(set (match_operand:CCFP 0)
2000 (match_operand:CCFP 1))]
2001 ""
2002 [(set (match_dup 0)
2003 (unspec:CCFP
2004 [(match_dup 1)]
2005 UNSPEC_NOTRAP))])
2006
2007 (define_insn "*cmpi<unord>xf_i387"
2008 [(set (reg:CCFP FLAGS_REG)
2009 (compare:CCFP
2010 (match_operand:XF 0 "register_operand" "f")
2011 (match_operand:XF 1 "register_operand" "f")))]
2012 "TARGET_80387 && TARGET_CMOVE"
2013 "* return output_fp_compare (insn, operands, true, <unordered>);"
2014 [(set_attr "type" "fcmp")
2015 (set_attr "mode" "XF")
2016 (set_attr "athlon_decode" "vector")
2017 (set_attr "amdfam10_decode" "direct")
2018 (set_attr "bdver1_decode" "double")
2019 (set_attr "znver1_decode" "double")])
2020
2021 (define_insn "*cmpx<unord><MODEF:mode>"
2022 [(set (reg:CCFP FLAGS_REG)
2023 (unspec:CCFP [
2024 (compare:CCFP
2025 (match_operand:MODEF 0 "register_operand" "v")
2026 (match_operand:MODEF 1 "nonimmediate_operand" "vm"))]
2027 UNSPEC_OPTCOMX))]
2028 "TARGET_AVX10_2_256"
2029 "%v<unord>comx<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
2030 [(set_attr "type" "ssecomi")
2031 (set_attr "prefix" "evex")
2032 (set_attr "mode" "<MODEF:MODE>")])
2033
2034 (define_insn "*cmpx<unord>hf"
2035 [(set (reg:CCFP FLAGS_REG)
2036 (unspec:CCFP [
2037 (compare:CCFP
2038 (match_operand:HF 0 "register_operand" "v")
2039 (match_operand:HF 1 "nonimmediate_operand" "vm"))]
2040 UNSPEC_OPTCOMX))]
2041 "TARGET_AVX10_2_256"
2042 "v<unord>comxsh\t{%1, %0|%0, %1}"
2043 [(set_attr "type" "ssecomi")
2044 (set_attr "prefix" "evex")
2045 (set_attr "mode" "HF")])
2046
2047 (define_insn "*cmpi<unord><MODEF:mode>"
2048 [(set (reg:CCFP FLAGS_REG)
2049 (compare:CCFP
2050 (match_operand:MODEF 0 "register_operand" "f,v")
2051 (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
2052 "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
2053 || (TARGET_80387 && TARGET_CMOVE)"
2054 "@
2055 * return output_fp_compare (insn, operands, true, <unordered>);
2056 %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
2057 [(set_attr "type" "fcmp,ssecomi")
2058 (set_attr "prefix" "orig,maybe_vex")
2059 (set_attr "mode" "<MODEF:MODE>")
2060 (set_attr "prefix_rep" "*,0")
2061 (set (attr "prefix_data16")
2062 (cond [(eq_attr "alternative" "0")
2063 (const_string "*")
2064 (eq_attr "mode" "DF")
2065 (const_string "1")
2066 ]
2067 (const_string "0")))
2068 (set_attr "athlon_decode" "vector")
2069 (set_attr "amdfam10_decode" "direct")
2070 (set_attr "bdver1_decode" "double")
2071 (set_attr "znver1_decode" "double")
2072 (set (attr "enabled")
2073 (if_then_else
2074 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
2075 (if_then_else
2076 (eq_attr "alternative" "0")
2077 (symbol_ref "TARGET_MIX_SSE_I387")
2078 (symbol_ref "true"))
2079 (if_then_else
2080 (eq_attr "alternative" "0")
2081 (symbol_ref "true")
2082 (symbol_ref "false"))))])
2083
2084 (define_insn "*cmpi<unord>hf"
2085 [(set (reg:CCFP FLAGS_REG)
2086 (compare:CCFP
2087 (match_operand:HF 0 "register_operand" "v")
2088 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
2089 "TARGET_AVX512FP16"
2090 "v<unord>comish\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecomi")
2092 (set_attr "prefix" "evex")
2093 (set_attr "mode" "HF")])
2094
2095 ;; Set carry flag.
2096 (define_insn "x86_stc"
2097 [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
2098 ""
2099 "stc"
2100 [(set_attr "length" "1")
2101 (set_attr "length_immediate" "0")
2102 (set_attr "modrm" "0")])
2103
2104 ;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al.
2105 (define_peephole2
2106 [(match_scratch:QI 0 "r")
2107 (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
2108 "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
2109 [(set (match_dup 0) (const_int 1))
2110 (parallel
2111 [(set (reg:CCC FLAGS_REG)
2112 (compare:CCC (plus:QI (match_dup 0) (const_int -1))
2113 (match_dup 0)))
2114 (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
2115
2116 ;; Complement carry flag.
2117 (define_insn "*x86_cmc"
2118 [(set (reg:CCC FLAGS_REG)
2119 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
2120 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
2121 ""
2122 "cmc"
2123 [(set_attr "length" "1")
2124 (set_attr "length_immediate" "0")
2125 (set_attr "use_carry" "1")
2126 (set_attr "modrm" "0")])
2127
2128 ;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al.
2129 (define_peephole2
2130 [(match_scratch:QI 0 "r")
2131 (set (reg:CCC FLAGS_REG)
2132 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
2133 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
2134 "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
2135 [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
2136 (parallel
2137 [(set (reg:CCC FLAGS_REG)
2138 (compare:CCC (plus:QI (match_dup 0) (const_int -1))
2139 (match_dup 0)))
2140 (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
2141 \f
2142 ;; Push/pop instructions.
2143
2144 (define_insn_and_split "*pushv1ti2"
2145 [(set (match_operand:V1TI 0 "push_operand" "=<")
2146 (match_operand:V1TI 1 "register_operand" "v"))]
2147 "TARGET_64BIT && TARGET_STV"
2148 "#"
2149 "&& reload_completed"
2150 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2151 (set (match_dup 0) (match_dup 1))]
2152 {
2153 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
2154 /* Preserve memory attributes. */
2155 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2156 }
2157 [(set_attr "type" "multi")
2158 (set_attr "mode" "TI")])
2159
2160 (define_insn "*push<mode>2"
2161 [(set (match_operand:DWI 0 "push_operand" "=<,<")
2162 (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))]
2163 ""
2164 "#"
2165 [(set_attr "type" "multi")
2166 (set_attr "mode" "<MODE>")])
2167
2168 (define_split
2169 [(set (match_operand:DWI 0 "push_operand")
2170 (match_operand:DWI 1 "general_gr_operand"))]
2171 "reload_completed"
2172 [(const_int 0)]
2173 "ix86_split_long_move (operands); DONE;")
2174
2175 (define_insn "*pushdi2_rex64"
2176 [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
2177 (match_operand:DI 1 "general_no_elim_operand" "re*m,*v,n"))]
2178 "TARGET_64BIT"
2179 "@
2180 push{q}\t%1
2181 #
2182 #"
2183 [(set_attr "type" "push,multi,multi")
2184 (set_attr "mode" "DI")])
2185
2186 ;; Convert impossible pushes of immediate to existing instructions.
2187 ;; First try to get scratch register and go through it. In case this
2188 ;; fails, push sign extended lower part first and then overwrite
2189 ;; upper part by 32bit move.
2190
2191 (define_peephole2
2192 [(match_scratch:DI 2 "r")
2193 (set (match_operand:DI 0 "push_operand")
2194 (match_operand:DI 1 "immediate_operand"))]
2195 "TARGET_64BIT
2196 && !symbolic_operand (operands[1], DImode)
2197 && !x86_64_immediate_operand (operands[1], DImode)"
2198 [(set (match_dup 2) (match_dup 1))
2199 (set (match_dup 0) (match_dup 2))])
2200
2201 (define_split
2202 [(set (match_operand:DI 0 "push_operand")
2203 (match_operand:DI 1 "immediate_operand"))]
2204 "TARGET_64BIT && epilogue_completed
2205 && !symbolic_operand (operands[1], DImode)
2206 && !x86_64_immediate_operand (operands[1], DImode)"
2207 [(set (match_dup 0) (match_dup 1))
2208 (set (match_dup 2) (match_dup 3))]
2209 {
2210 split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
2211
2212 operands[1] = gen_lowpart (DImode, operands[2]);
2213 operands[2] = gen_rtx_MEM (SImode,
2214 plus_constant (Pmode, stack_pointer_rtx, 4));
2215 })
2216
2217 ;; For TARGET_64BIT we always round up to 8 bytes.
2218 (define_insn "*pushsi2_rex64"
2219 [(set (match_operand:SI 0 "push_operand" "=X,X")
2220 (match_operand:SI 1 "nonmemory_no_elim_operand" "re,*v"))]
2221 "TARGET_64BIT"
2222 "@
2223 push{q}\t%q1
2224 #"
2225 [(set_attr "type" "push,multi")
2226 (set_attr "mode" "DI")])
2227
2228 (define_insn "*pushsi2"
2229 [(set (match_operand:SI 0 "push_operand" "=<,<")
2230 (match_operand:SI 1 "general_no_elim_operand" "ri*m,*v"))]
2231 "!TARGET_64BIT"
2232 "@
2233 push{l}\t%1
2234 #"
2235 [(set_attr "type" "push,multi")
2236 (set_attr "mode" "SI")])
2237
2238 (define_split
2239 [(set (match_operand:SWI48DWI 0 "push_operand")
2240 (match_operand:SWI48DWI 1 "sse_reg_operand"))]
2241 "TARGET_SSE && reload_completed"
2242 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2243 (set (match_dup 0) (match_dup 1))]
2244 {
2245 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
2246 /* Preserve memory attributes. */
2247 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2248 })
2249
2250 ;; emit_push_insn when it calls move_by_pieces requires an insn to
2251 ;; "push a byte/word". But actually we use push{l,q}, which has
2252 ;; the effect of rounding the amount pushed up to a word.
2253
2254 (define_insn "*push<mode>2"
2255 [(set (match_operand:SWI12 0 "push_operand" "=X")
2256 (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
2257 ""
2258 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";"
2259 [(set_attr "type" "push")
2260 (set (attr "mode")
2261 (if_then_else (match_test "TARGET_64BIT")
2262 (const_string "DI")
2263 (const_string "SI")))])
2264
2265 (define_insn "*push<mode>2_prologue"
2266 [(set (match_operand:W 0 "push_operand" "=<")
2267 (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
2268 (clobber (mem:BLK (scratch)))]
2269 ""
2270 "push{<imodesuffix>}\t%1"
2271 [(set_attr "type" "push")
2272 (set_attr "mode" "<MODE>")])
2273
2274 (define_insn "*pop<mode>1"
2275 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2276 (match_operand:W 1 "pop_operand" ">"))]
2277 ""
2278 "pop{<imodesuffix>}\t%0"
2279 [(set_attr "type" "pop")
2280 (set_attr "mode" "<MODE>")])
2281
2282 (define_insn "*pop<mode>1_epilogue"
2283 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2284 (match_operand:W 1 "pop_operand" ">"))
2285 (clobber (mem:BLK (scratch)))]
2286 ""
2287 "pop{<imodesuffix>}\t%0"
2288 [(set_attr "type" "pop")
2289 (set_attr "mode" "<MODE>")])
2290
2291 (define_insn "@pushfl<mode>2"
2292 [(set (match_operand:W 0 "push_operand" "=<")
2293 (unspec:W [(match_operand 1 "flags_reg_operand")]
2294 UNSPEC_PUSHFL))]
2295 "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
2296 "pushf{<imodesuffix>}"
2297 [(set_attr "type" "push")
2298 (set_attr "mode" "<MODE>")])
2299
2300 (define_insn "@popfl<mode>1"
2301 [(set (match_operand:CC 0 "flags_reg_operand")
2302 (unspec:CC [(match_operand:W 1 "pop_operand" ">")]
2303 UNSPEC_POPFL))]
2304 ""
2305 "popf{<imodesuffix>}"
2306 [(set_attr "type" "pop")
2307 (set_attr "mode" "<MODE>")])
2308
2309 \f
2310 ;; Reload patterns to support multi-word load/store
2311 ;; with non-offsetable address.
2312 (define_expand "reload_noff_store"
2313 [(parallel [(match_operand 0 "memory_operand" "=m")
2314 (match_operand 1 "register_operand" "r")
2315 (match_operand:DI 2 "register_operand" "=&r")])]
2316 "TARGET_64BIT"
2317 {
2318 rtx mem = operands[0];
2319 rtx addr = XEXP (mem, 0);
2320
2321 emit_move_insn (operands[2], addr);
2322 mem = replace_equiv_address_nv (mem, operands[2]);
2323
2324 emit_insn (gen_rtx_SET (mem, operands[1]));
2325 DONE;
2326 })
2327
2328 (define_expand "reload_noff_load"
2329 [(parallel [(match_operand 0 "register_operand" "=r")
2330 (match_operand 1 "memory_operand" "m")
2331 (match_operand:DI 2 "register_operand" "=r")])]
2332 "TARGET_64BIT"
2333 {
2334 rtx mem = operands[1];
2335 rtx addr = XEXP (mem, 0);
2336
2337 emit_move_insn (operands[2], addr);
2338 mem = replace_equiv_address_nv (mem, operands[2]);
2339
2340 emit_insn (gen_rtx_SET (operands[0], mem));
2341 DONE;
2342 })
2343
2344 ;; Move instructions.
2345
2346 (define_expand "movxi"
2347 [(set (match_operand:XI 0 "nonimmediate_operand")
2348 (match_operand:XI 1 "general_operand"))]
2349 "TARGET_AVX512F && TARGET_EVEX512"
2350 "ix86_expand_vector_move (XImode, operands); DONE;")
2351
2352 (define_expand "movoi"
2353 [(set (match_operand:OI 0 "nonimmediate_operand")
2354 (match_operand:OI 1 "general_operand"))]
2355 "TARGET_AVX"
2356 "ix86_expand_vector_move (OImode, operands); DONE;")
2357
2358 (define_expand "movti"
2359 [(set (match_operand:TI 0 "nonimmediate_operand")
2360 (match_operand:TI 1 "general_operand"))]
2361 "TARGET_64BIT || TARGET_SSE"
2362 {
2363 if (TARGET_64BIT)
2364 ix86_expand_move (TImode, operands);
2365 else
2366 ix86_expand_vector_move (TImode, operands);
2367 DONE;
2368 })
2369
2370 ;; This expands to what emit_move_complex would generate if we didn't
2371 ;; have a movti pattern. Having this avoids problems with reload on
2372 ;; 32-bit targets when SSE is present, but doesn't seem to be harmful
2373 ;; to have around all the time.
2374 (define_expand "movcdi"
2375 [(set (match_operand:CDI 0 "nonimmediate_operand")
2376 (match_operand:CDI 1 "general_operand"))]
2377 ""
2378 {
2379 if (push_operand (operands[0], CDImode))
2380 emit_move_complex_push (CDImode, operands[0], operands[1]);
2381 else
2382 emit_move_complex_parts (operands[0], operands[1]);
2383 DONE;
2384 })
2385
2386 (define_expand "mov<mode>"
2387 [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
2388 (match_operand:SWI1248x 1 "general_operand"))]
2389 ""
2390 "ix86_expand_move (<MODE>mode, operands); DONE;")
2391
2392 (define_insn "*mov<mode>_xor"
2393 [(set (match_operand:SWI48 0 "register_operand" "=r")
2394 (match_operand:SWI48 1 "const0_operand"))
2395 (clobber (reg:CC FLAGS_REG))]
2396 "reload_completed"
2397 "xor{l}\t%k0, %k0"
2398 [(set_attr "type" "alu1")
2399 (set_attr "mode" "SI")
2400 (set_attr "length_immediate" "0")])
2401
2402 (define_insn "*mov<mode>_and"
2403 [(set (match_operand:SWI248 0 "memory_operand" "=m")
2404 (match_operand:SWI248 1 "const0_operand"))
2405 (clobber (reg:CC FLAGS_REG))]
2406 "reload_completed"
2407 "and{<imodesuffix>}\t{%1, %0|%0, %1}"
2408 [(set_attr "type" "alu1")
2409 (set_attr "mode" "<MODE>")
2410 (set_attr "length_immediate" "1")])
2411
2412 (define_insn "*mov<mode>_or"
2413 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
2414 (match_operand:SWI248 1 "constm1_operand"))
2415 (clobber (reg:CC FLAGS_REG))]
2416 "reload_completed"
2417 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
2418 [(set_attr "type" "alu1")
2419 (set_attr "mode" "<MODE>")
2420 (set_attr "length_immediate" "1")])
2421
2422 (define_insn "*movxi_internal_avx512f"
2423 [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
2424 (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2425 "TARGET_AVX512F && TARGET_EVEX512
2426 && (register_operand (operands[0], XImode)
2427 || register_operand (operands[1], XImode))"
2428 {
2429 switch (get_attr_type (insn))
2430 {
2431 case TYPE_SSELOG1:
2432 return standard_sse_constant_opcode (insn, operands);
2433
2434 case TYPE_SSEMOV:
2435 return ix86_output_ssemov (insn, operands);
2436
2437 default:
2438 gcc_unreachable ();
2439 }
2440 }
2441 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2442 (set_attr "prefix" "evex")
2443 (set_attr "mode" "XI")])
2444
2445 (define_insn "*movoi_internal_avx"
2446 [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,v ,m")
2447 (match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2448 "TARGET_AVX
2449 && (register_operand (operands[0], OImode)
2450 || register_operand (operands[1], OImode))"
2451 {
2452 switch (get_attr_type (insn))
2453 {
2454 case TYPE_SSELOG1:
2455 return standard_sse_constant_opcode (insn, operands);
2456
2457 case TYPE_SSEMOV:
2458 return ix86_output_ssemov (insn, operands);
2459
2460 default:
2461 gcc_unreachable ();
2462 }
2463 }
2464 [(set_attr "isa" "*,avx2,*,*")
2465 (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2466 (set_attr "prefix" "vex")
2467 (set_attr "mode" "OI")])
2468
2469 (define_insn "*movti_internal"
2470 [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?jc,?Yd")
2471 (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,jc"))]
2472 "(TARGET_64BIT
2473 && !(MEM_P (operands[0]) && MEM_P (operands[1])))
2474 || (TARGET_SSE
2475 && nonimmediate_or_sse_const_operand (operands[1], TImode)
2476 && (register_operand (operands[0], TImode)
2477 || register_operand (operands[1], TImode)))"
2478 {
2479 switch (get_attr_type (insn))
2480 {
2481 case TYPE_MULTI:
2482 return "#";
2483
2484 case TYPE_SSELOG1:
2485 return standard_sse_constant_opcode (insn, operands);
2486
2487 case TYPE_SSEMOV:
2488 return ix86_output_ssemov (insn, operands);
2489
2490 default:
2491 gcc_unreachable ();
2492 }
2493 }
2494 [(set (attr "isa")
2495 (cond [(eq_attr "alternative" "0,1,6,7")
2496 (const_string "x64")
2497 (eq_attr "alternative" "3")
2498 (const_string "sse2")
2499 ]
2500 (const_string "*")))
2501 (set (attr "type")
2502 (cond [(eq_attr "alternative" "0,1,6,7")
2503 (const_string "multi")
2504 (eq_attr "alternative" "2,3")
2505 (const_string "sselog1")
2506 ]
2507 (const_string "ssemov")))
2508 (set (attr "prefix")
2509 (if_then_else (eq_attr "type" "sselog1,ssemov")
2510 (const_string "maybe_vex")
2511 (const_string "orig")))
2512 (set (attr "mode")
2513 (cond [(eq_attr "alternative" "0,1")
2514 (const_string "DI")
2515 (match_test "TARGET_AVX")
2516 (const_string "TI")
2517 (ior (not (match_test "TARGET_SSE2"))
2518 (match_test "optimize_function_for_size_p (cfun)"))
2519 (const_string "V4SF")
2520 (and (eq_attr "alternative" "5")
2521 (match_test "TARGET_SSE_TYPELESS_STORES"))
2522 (const_string "V4SF")
2523 ]
2524 (const_string "TI")))
2525 (set (attr "preferred_for_speed")
2526 (cond [(eq_attr "alternative" "6")
2527 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2528 (eq_attr "alternative" "7")
2529 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2530 ]
2531 (symbol_ref "true")))])
2532
2533 (define_split
2534 [(set (match_operand:TI 0 "sse_reg_operand")
2535 (match_operand:TI 1 "general_reg_operand"))]
2536 "TARGET_64BIT && TARGET_SSE4_1
2537 && reload_completed"
2538 [(set (match_dup 2)
2539 (vec_merge:V2DI
2540 (vec_duplicate:V2DI (match_dup 3))
2541 (match_dup 2)
2542 (const_int 2)))]
2543 {
2544 operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
2545 operands[3] = gen_highpart (DImode, operands[1]);
2546
2547 emit_move_insn (gen_lowpart (DImode, operands[0]),
2548 gen_lowpart (DImode, operands[1]));
2549 })
2550
2551 (define_insn "*movdi_internal"
2552 [(set (match_operand:DI 0 "nonimmediate_operand"
2553 "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,m,?jc,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
2554 (match_operand:DI 1 "general_operand"
2555 "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,jc ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
2556 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2557 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2558 {
2559 switch (get_attr_type (insn))
2560 {
2561 case TYPE_MSKMOV:
2562 return "kmovq\t{%1, %0|%0, %1}";
2563
2564 case TYPE_MSKLOG:
2565 if (operands[1] == const0_rtx)
2566 return "kxorq\t%0, %0, %0";
2567 else if (operands[1] == constm1_rtx)
2568 return "kxnorq\t%0, %0, %0";
2569 gcc_unreachable ();
2570
2571 case TYPE_MULTI:
2572 return "#";
2573
2574 case TYPE_MMX:
2575 return "pxor\t%0, %0";
2576
2577 case TYPE_MMXMOV:
2578 /* Handle broken assemblers that require movd instead of movq. */
2579 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
2580 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
2581 return "movd\t{%1, %0|%0, %1}";
2582 return "movq\t{%1, %0|%0, %1}";
2583
2584 case TYPE_SSELOG1:
2585 return standard_sse_constant_opcode (insn, operands);
2586
2587 case TYPE_SSEMOV:
2588 return ix86_output_ssemov (insn, operands);
2589
2590 case TYPE_SSECVT:
2591 if (SSE_REG_P (operands[0]))
2592 return "movq2dq\t{%1, %0|%0, %1}";
2593 else
2594 return "movdq2q\t{%1, %0|%0, %1}";
2595
2596 case TYPE_LEA:
2597 return "lea{q}\t{%E1, %0|%0, %E1}";
2598
2599 case TYPE_IMOV:
2600 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2601 if (get_attr_mode (insn) == MODE_SI)
2602 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2603 else if (which_alternative == 4)
2604 return "movabs{q}\t{%1, %0|%0, %1}";
2605 else if (ix86_use_lea_for_mov (insn, operands))
2606 return "lea{q}\t{%E1, %0|%0, %E1}";
2607 else
2608 return "mov{q}\t{%1, %0|%0, %1}";
2609
2610 default:
2611 gcc_unreachable ();
2612 }
2613 }
2614 [(set (attr "isa")
2615 (cond [(eq_attr "alternative" "0,1,17,18")
2616 (const_string "nox64")
2617 (eq_attr "alternative" "2,3,4,5,10,11,23,25")
2618 (const_string "x64")
2619 (eq_attr "alternative" "19,20")
2620 (const_string "x64_sse2")
2621 (eq_attr "alternative" "21,22")
2622 (const_string "sse2")
2623 ]
2624 (const_string "*")))
2625 (set (attr "type")
2626 (cond [(eq_attr "alternative" "0,1,17,18")
2627 (const_string "multi")
2628 (eq_attr "alternative" "6")
2629 (const_string "mmx")
2630 (eq_attr "alternative" "7,8,9,10,11")
2631 (const_string "mmxmov")
2632 (eq_attr "alternative" "12")
2633 (const_string "sselog1")
2634 (eq_attr "alternative" "13,14,15,16,19,20")
2635 (const_string "ssemov")
2636 (eq_attr "alternative" "21,22")
2637 (const_string "ssecvt")
2638 (eq_attr "alternative" "23,24,25,26")
2639 (const_string "mskmov")
2640 (eq_attr "alternative" "27")
2641 (const_string "msklog")
2642 (and (match_operand 0 "register_operand")
2643 (match_operand 1 "pic_32bit_operand"))
2644 (const_string "lea")
2645 ]
2646 (const_string "imov")))
2647 (set (attr "modrm")
2648 (if_then_else
2649 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2650 (const_string "0")
2651 (const_string "*")))
2652 (set (attr "length_immediate")
2653 (if_then_else
2654 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2655 (const_string "8")
2656 (const_string "*")))
2657 (set (attr "prefix_rex")
2658 (if_then_else
2659 (eq_attr "alternative" "10,11,19,20")
2660 (const_string "1")
2661 (const_string "*")))
2662 (set (attr "prefix")
2663 (if_then_else (eq_attr "type" "sselog1,ssemov")
2664 (const_string "maybe_vex")
2665 (const_string "orig")))
2666 (set (attr "prefix_data16")
2667 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
2668 (const_string "1")
2669 (const_string "*")))
2670 (set (attr "mode")
2671 (cond [(eq_attr "alternative" "2")
2672 (const_string "SI")
2673 (eq_attr "alternative" "12")
2674 (cond [(match_test "TARGET_AVX")
2675 (const_string "TI")
2676 (ior (not (match_test "TARGET_SSE2"))
2677 (match_test "optimize_function_for_size_p (cfun)"))
2678 (const_string "V4SF")
2679 ]
2680 (const_string "TI"))
2681 (eq_attr "alternative" "13")
2682 (cond [(match_test "TARGET_AVX512VL")
2683 (const_string "TI")
2684 (match_test "TARGET_AVX512F")
2685 (const_string "DF")
2686 (match_test "TARGET_AVX")
2687 (const_string "TI")
2688 (ior (not (match_test "TARGET_SSE2"))
2689 (match_test "optimize_function_for_size_p (cfun)"))
2690 (const_string "V4SF")
2691 ]
2692 (const_string "TI"))
2693
2694 (and (eq_attr "alternative" "14,15,16")
2695 (not (match_test "TARGET_SSE2")))
2696 (const_string "V2SF")
2697 ]
2698 (const_string "DI")))
2699 (set (attr "preferred_for_speed")
2700 (cond [(eq_attr "alternative" "10,17,19")
2701 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2702 (eq_attr "alternative" "11,18,20")
2703 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2704 ]
2705 (symbol_ref "true")))
2706 (set (attr "enabled")
2707 (cond [(eq_attr "alternative" "15")
2708 (if_then_else
2709 (match_test "TARGET_STV && TARGET_SSE2")
2710 (symbol_ref "false")
2711 (const_string "*"))
2712 (eq_attr "alternative" "16")
2713 (if_then_else
2714 (match_test "TARGET_STV && TARGET_SSE2")
2715 (symbol_ref "true")
2716 (symbol_ref "false"))
2717 ]
2718 (const_string "*")))])
2719
2720 (define_split
2721 [(set (match_operand:<DWI> 0 "general_reg_operand")
2722 (match_operand:<DWI> 1 "sse_reg_operand"))]
2723 "TARGET_SSE4_1
2724 && reload_completed"
2725 [(set (match_dup 2)
2726 (vec_select:DWIH
2727 (match_dup 3)
2728 (parallel [(const_int 1)])))]
2729 {
2730 operands[2] = gen_highpart (<MODE>mode, operands[0]);
2731 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);
2732
2733 emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
2734 gen_lowpart (<MODE>mode, operands[1]));
2735 })
2736
2737 (define_split
2738 [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
2739 (match_operand:DWI 1 "general_gr_operand"))]
2740 "reload_completed"
2741 [(const_int 0)]
2742 "ix86_split_long_move (operands); DONE;")
2743
2744 (define_split
2745 [(set (match_operand:DI 0 "sse_reg_operand")
2746 (match_operand:DI 1 "general_reg_operand"))]
2747 "!TARGET_64BIT && TARGET_SSE4_1
2748 && reload_completed"
2749 [(set (match_dup 2)
2750 (vec_merge:V4SI
2751 (vec_duplicate:V4SI (match_dup 3))
2752 (match_dup 2)
2753 (const_int 2)))]
2754 {
2755 operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
2756 operands[3] = gen_highpart (SImode, operands[1]);
2757
2758 emit_move_insn (gen_lowpart (SImode, operands[0]),
2759 gen_lowpart (SImode, operands[1]));
2760 })
2761
2762 ;; movabsq $0x0012345678000000, %rax is longer
2763 ;; than movl $0x12345678, %eax; shlq $24, %rax.
2764 (define_peephole2
2765 [(set (match_operand:DI 0 "register_operand")
2766 (match_operand:DI 1 "const_int_operand"))]
2767 "TARGET_64BIT
2768 && optimize_insn_for_size_p ()
2769 && LEGACY_INT_REG_P (operands[0])
2770 && !x86_64_immediate_operand (operands[1], DImode)
2771 && !x86_64_zext_immediate_operand (operands[1], DImode)
2772 && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
2773 & ~HOST_WIDE_INT_C (0xffffffff))
2774 && peep2_regno_dead_p (0, FLAGS_REG)"
2775 [(set (match_dup 0) (match_dup 1))
2776 (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
2777 (clobber (reg:CC FLAGS_REG))])]
2778 {
2779 int shift = ctz_hwi (UINTVAL (operands[1]));
2780 rtx op1 = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
2781 if (ix86_endbr_immediate_operand (op1, VOIDmode))
2782 FAIL;
2783 operands[1] = op1;
2784 operands[2] = gen_int_mode (shift, QImode);
2785 })
2786
2787 (define_insn "*movsi_internal"
2788 [(set (match_operand:SI 0 "nonimmediate_operand"
2789 "=r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
2790 (match_operand:SI 1 "general_operand"
2791 "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
2792 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2793 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2794 {
2795 switch (get_attr_type (insn))
2796 {
2797 case TYPE_SSELOG1:
2798 return standard_sse_constant_opcode (insn, operands);
2799
2800 case TYPE_MSKMOV:
2801 return "kmovd\t{%1, %0|%0, %1}";
2802
2803 case TYPE_MSKLOG:
2804 if (operands[1] == const0_rtx)
2805 return "kxord\t%0, %0, %0";
2806 else if (operands[1] == constm1_rtx)
2807 return "kxnord\t%0, %0, %0";
2808 gcc_unreachable ();
2809
2810 case TYPE_SSEMOV:
2811 return ix86_output_ssemov (insn, operands);
2812
2813 case TYPE_MMX:
2814 return "pxor\t%0, %0";
2815
2816 case TYPE_MMXMOV:
2817 switch (get_attr_mode (insn))
2818 {
2819 case MODE_DI:
2820 return "movq\t{%1, %0|%0, %1}";
2821 case MODE_SI:
2822 return "movd\t{%1, %0|%0, %1}";
2823
2824 default:
2825 gcc_unreachable ();
2826 }
2827
2828 case TYPE_LEA:
2829 return "lea{l}\t{%E1, %0|%0, %E1}";
2830
2831 case TYPE_IMOV:
2832 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2833 if (ix86_use_lea_for_mov (insn, operands))
2834 return "lea{l}\t{%E1, %0|%0, %E1}";
2835 else
2836 return "mov{l}\t{%1, %0|%0, %1}";
2837
2838 default:
2839 gcc_unreachable ();
2840 }
2841 }
2842 [(set (attr "isa")
2843 (cond [(eq_attr "alternative" "12,13")
2844 (const_string "sse2")
2845 ]
2846 (const_string "*")))
2847 (set (attr "type")
2848 (cond [(eq_attr "alternative" "2")
2849 (const_string "mmx")
2850 (eq_attr "alternative" "3,4,5,6,7")
2851 (const_string "mmxmov")
2852 (eq_attr "alternative" "8")
2853 (const_string "sselog1")
2854 (eq_attr "alternative" "9,10,11,12,13")
2855 (const_string "ssemov")
2856 (eq_attr "alternative" "14,15,16")
2857 (const_string "mskmov")
2858 (eq_attr "alternative" "17")
2859 (const_string "msklog")
2860 (and (match_operand 0 "register_operand")
2861 (match_operand 1 "pic_32bit_operand"))
2862 (const_string "lea")
2863 ]
2864 (const_string "imov")))
2865 (set (attr "prefix")
2866 (if_then_else (eq_attr "type" "sselog1,ssemov")
2867 (const_string "maybe_vex")
2868 (const_string "orig")))
2869 (set (attr "prefix_data16")
2870 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
2871 (const_string "1")
2872 (const_string "*")))
2873 (set (attr "mode")
2874 (cond [(eq_attr "alternative" "2,3")
2875 (const_string "DI")
2876 (eq_attr "alternative" "8")
2877 (cond [(match_test "TARGET_AVX")
2878 (const_string "TI")
2879 (ior (not (match_test "TARGET_SSE2"))
2880 (match_test "optimize_function_for_size_p (cfun)"))
2881 (const_string "V4SF")
2882 ]
2883 (const_string "TI"))
2884 (eq_attr "alternative" "9")
2885 (cond [(match_test "TARGET_AVX512VL")
2886 (const_string "TI")
2887 (match_test "TARGET_AVX512F")
2888 (const_string "SF")
2889 (match_test "TARGET_AVX")
2890 (const_string "TI")
2891 (ior (not (match_test "TARGET_SSE2"))
2892 (match_test "optimize_function_for_size_p (cfun)"))
2893 (const_string "V4SF")
2894 ]
2895 (const_string "TI"))
2896
2897 (and (eq_attr "alternative" "10,11")
2898 (not (match_test "TARGET_SSE2")))
2899 (const_string "SF")
2900 ]
2901 (const_string "SI")))
2902 (set (attr "preferred_for_speed")
2903 (cond [(eq_attr "alternative" "6,12")
2904 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2905 (eq_attr "alternative" "7,13")
2906 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2907 ]
2908 (symbol_ref "true")))])
2909
2910 ;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg.
2911 (define_peephole2
2912 [(set (match_operand:SWI248 0 "general_reg_operand")
2913 (match_operand:SWI248 1 "const_int_operand"))]
2914 "optimize_insn_for_size_p () && optimize_size > 1
2915 && operands[1] != const0_rtx
2916 && IN_RANGE (INTVAL (operands[1]), -128, 127)
2917 && !ix86_red_zone_used
2918 && REGNO (operands[0]) != SP_REG"
2919 [(set (match_dup 2) (match_dup 1))
2920 (set (match_dup 0) (match_dup 3))]
2921 {
2922 if (GET_MODE (operands[0]) != word_mode)
2923 operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0]));
2924
2925 operands[2] = gen_rtx_MEM (word_mode,
2926 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2927 operands[3] = gen_rtx_MEM (word_mode,
2928 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
2929 })
2930
2931 ;; With -Oz, transform mov $0,mem to the shorter and $0,mem.
2932 ;; Likewise, transform mov $-1,mem to the shorter or $-1,mem.
2933 (define_peephole2
2934 [(set (match_operand:SWI248 0 "memory_operand")
2935 (match_operand:SWI248 1 "const_int_operand"))]
2936 "(operands[1] == const0_rtx || operands[1] == constm1_rtx)
2937 && optimize_insn_for_size_p () && optimize_size > 1
2938 && peep2_regno_dead_p (0, FLAGS_REG)"
2939 [(parallel [(set (match_dup 0) (match_dup 1))
2940 (clobber (reg:CC FLAGS_REG))])])
2941
2942 (define_insn "*movhi_internal"
2943 [(set (match_operand:HI 0 "nonimmediate_operand"
2944 "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*Yv,*v,*v,jm,m")
2945 (match_operand:HI 1 "general_operand"
2946 "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*x,*v"))]
2947 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2948 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2949 {
2950 switch (get_attr_type (insn))
2951 {
2952 case TYPE_IMOVX:
2953 /* movzwl is faster than movw on p2 due to partial word stalls,
2954 though not as fast as an aligned movl. */
2955 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
2956
2957 case TYPE_MSKMOV:
2958 switch (which_alternative)
2959 {
2960 case 4:
2961 return "kmovw\t{%k1, %0|%0, %k1}";
2962 case 6:
2963 return "kmovw\t{%1, %k0|%k0, %1}";
2964 case 5:
2965 case 7:
2966 return "kmovw\t{%1, %0|%0, %1}";
2967 default:
2968 gcc_unreachable ();
2969 }
2970
2971 case TYPE_SSEMOV:
2972 return ix86_output_ssemov (insn, operands);
2973
2974 case TYPE_SSELOG1:
2975 if (satisfies_constraint_C (operands[1]))
2976 return standard_sse_constant_opcode (insn, operands);
2977
2978 if (SSE_REG_P (operands[0]))
2979 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
2980 else
2981 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
2982
2983 case TYPE_MSKLOG:
2984 if (operands[1] == const0_rtx)
2985 return "kxorw\t%0, %0, %0";
2986 else if (operands[1] == constm1_rtx)
2987 return "kxnorw\t%0, %0, %0";
2988 gcc_unreachable ();
2989
2990 default:
2991 if (get_attr_mode (insn) == MODE_SI)
2992 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2993 else
2994 return "mov{w}\t{%1, %0|%0, %1}";
2995 }
2996 }
2997 [(set (attr "isa")
2998 (cond [(eq_attr "alternative" "9,10,11,12,13")
2999 (const_string "sse2")
3000 (eq_attr "alternative" "14")
3001 (const_string "sse4_noavx")
3002 (eq_attr "alternative" "15")
3003 (const_string "avx")
3004 ]
3005 (const_string "*")))
3006 (set (attr "addr")
3007 (if_then_else (eq_attr "alternative" "14")
3008 (const_string "gpr16")
3009 (const_string "*")))
3010 (set (attr "type")
3011 (cond [(eq_attr "alternative" "4,5,6,7")
3012 (const_string "mskmov")
3013 (eq_attr "alternative" "8")
3014 (const_string "msklog")
3015 (eq_attr "alternative" "13,14,15")
3016 (if_then_else (match_test "TARGET_AVX512FP16")
3017 (const_string "ssemov")
3018 (const_string "sselog1"))
3019 (eq_attr "alternative" "11")
3020 (const_string "sselog1")
3021 (eq_attr "alternative" "9,10,12")
3022 (const_string "ssemov")
3023 (match_test "optimize_function_for_size_p (cfun)")
3024 (const_string "imov")
3025 (and (eq_attr "alternative" "0")
3026 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3027 (not (match_test "TARGET_HIMODE_MATH"))))
3028 (const_string "imov")
3029 (and (eq_attr "alternative" "1,2")
3030 (match_operand:HI 1 "aligned_operand"))
3031 (const_string "imov")
3032 (and (match_test "TARGET_MOVX")
3033 (eq_attr "alternative" "0,2"))
3034 (const_string "imovx")
3035 ]
3036 (const_string "imov")))
3037 (set (attr "prefix")
3038 (cond [(eq_attr "alternative" "4,5,6,7,8")
3039 (const_string "vex")
3040 (eq_attr "alternative" "9,10,11,12,13,14,15")
3041 (const_string "maybe_evex")
3042 ]
3043 (const_string "orig")))
3044 (set (attr "mode")
3045 (cond [(eq_attr "alternative" "9,10")
3046 (if_then_else (match_test "TARGET_AVX512FP16")
3047 (const_string "HI")
3048 (const_string "SI"))
3049 (eq_attr "alternative" "13,14,15")
3050 (if_then_else (match_test "TARGET_AVX512FP16")
3051 (const_string "HI")
3052 (const_string "TI"))
3053 (eq_attr "alternative" "11")
3054 (cond [(match_test "TARGET_AVX")
3055 (const_string "TI")
3056 (ior (not (match_test "TARGET_SSE2"))
3057 (match_test "optimize_function_for_size_p (cfun)"))
3058 (const_string "V4SF")
3059 ]
3060 (const_string "TI"))
3061 (eq_attr "alternative" "12")
3062 (cond [(match_test "TARGET_AVX512VL")
3063 (const_string "TI")
3064 (match_test "TARGET_AVX512FP16")
3065 (const_string "HF")
3066 (match_test "TARGET_AVX512F")
3067 (const_string "SF")
3068 (match_test "TARGET_AVX")
3069 (const_string "TI")
3070 (ior (not (match_test "TARGET_SSE2"))
3071 (match_test "optimize_function_for_size_p (cfun)"))
3072 (const_string "V4SF")
3073 ]
3074 (const_string "TI"))
3075 (eq_attr "type" "imovx")
3076 (const_string "SI")
3077 (and (eq_attr "alternative" "1,2")
3078 (match_operand:HI 1 "aligned_operand"))
3079 (const_string "SI")
3080 (and (eq_attr "alternative" "0")
3081 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3082 (not (match_test "TARGET_HIMODE_MATH"))))
3083 (const_string "SI")
3084 ]
3085 (const_string "HI")))
3086 (set (attr "preferred_for_speed")
3087 (cond [(eq_attr "alternative" "9")
3088 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
3089 (eq_attr "alternative" "10")
3090 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
3091 ]
3092 (symbol_ref "true")))])
3093
3094 ;; Situation is quite tricky about when to choose full sized (SImode) move
3095 ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
3096 ;; partial register dependency machines (such as AMD Athlon), where QImode
3097 ;; moves issue extra dependency and for partial register stalls machines
3098 ;; that don't use QImode patterns (and QImode move cause stall on the next
3099 ;; instruction).
3100 ;;
3101 ;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
3102 ;; register stall machines with, where we use QImode instructions, since
3103 ;; partial register stall can be caused there. Then we use movzx.
3104
3105 (define_insn "*movqi_internal"
3106 [(set (match_operand:QI 0 "nonimmediate_operand"
3107 "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k")
3108 (match_operand:QI 1 "general_operand"
3109 "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))]
3110 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3111 && ix86_hardreg_mov_ok (operands[0], operands[1])"
3112
3113 {
3114 char buf[128];
3115 const char *ops;
3116 const char *suffix;
3117
3118 switch (get_attr_type (insn))
3119 {
3120 case TYPE_IMOVX:
3121 gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
3122 return "movz{bl|x}\t{%1, %k0|%k0, %1}";
3123
3124 case TYPE_MSKMOV:
3125 switch (which_alternative)
3126 {
3127 case 9:
3128 ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
3129 break;
3130 case 11:
3131 ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
3132 break;
3133 case 12:
3134 case 13:
3135 gcc_assert (TARGET_AVX512DQ);
3136 /* FALLTHRU */
3137 case 10:
3138 ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
3139 break;
3140 default:
3141 gcc_unreachable ();
3142 }
3143
3144 suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
3145
3146 snprintf (buf, sizeof (buf), ops, suffix);
3147 output_asm_insn (buf, operands);
3148 return "";
3149
3150 case TYPE_MSKLOG:
3151 if (operands[1] == const0_rtx)
3152 {
3153 if (get_attr_mode (insn) == MODE_HI)
3154 return "kxorw\t%0, %0, %0";
3155 else
3156 return "kxorb\t%0, %0, %0";
3157 }
3158 else if (operands[1] == constm1_rtx)
3159 {
3160 gcc_assert (TARGET_AVX512DQ);
3161 return "kxnorb\t%0, %0, %0";
3162 }
3163 gcc_unreachable ();
3164
3165 default:
3166 if (get_attr_mode (insn) == MODE_SI)
3167 return "mov{l}\t{%k1, %k0|%k0, %k1}";
3168 else
3169 return "mov{b}\t{%1, %0|%0, %1}";
3170 }
3171 }
3172 [(set (attr "isa")
3173 (cond [(eq_attr "alternative" "1,2")
3174 (const_string "x64")
3175 (eq_attr "alternative" "12,13,15")
3176 (const_string "avx512dq")
3177 ]
3178 (const_string "*")))
3179 (set (attr "type")
3180 (cond [(eq_attr "alternative" "9,10,11,12,13")
3181 (const_string "mskmov")
3182 (eq_attr "alternative" "14,15")
3183 (const_string "msklog")
3184 (and (eq_attr "alternative" "7")
3185 (not (match_operand:QI 1 "aligned_operand")))
3186 (const_string "imovx")
3187 (match_test "optimize_function_for_size_p (cfun)")
3188 (const_string "imov")
3189 (and (eq_attr "alternative" "5")
3190 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3191 (not (match_test "TARGET_QIMODE_MATH"))))
3192 (const_string "imov")
3193 (eq_attr "alternative" "5,7")
3194 (const_string "imovx")
3195 (and (match_test "TARGET_MOVX")
3196 (eq_attr "alternative" "4"))
3197 (const_string "imovx")
3198 ]
3199 (const_string "imov")))
3200 (set (attr "prefix")
3201 (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
3202 (const_string "vex")
3203 (const_string "orig")))
3204 (set (attr "mode")
3205 (cond [(eq_attr "alternative" "5,6,7")
3206 (const_string "SI")
3207 (eq_attr "alternative" "8")
3208 (const_string "QI")
3209 (and (eq_attr "alternative" "9,10,11,14")
3210 (not (match_test "TARGET_AVX512DQ")))
3211 (const_string "HI")
3212 (eq_attr "type" "imovx")
3213 (const_string "SI")
3214 ;; For -Os, 8-bit immediates are always shorter than 32-bit
3215 ;; ones.
3216 (and (eq_attr "type" "imov")
3217 (and (eq_attr "alternative" "3")
3218 (match_test "optimize_function_for_size_p (cfun)")))
3219 (const_string "QI")
3220 ;; For -Os, movl where one or both operands are NON_Q_REGS
3221 ;; and both are LEGACY_REGS is shorter than movb.
3222 ;; Otherwise movb and movl sizes are the same, so decide purely
3223 ;; based on speed factors.
3224 (and (eq_attr "type" "imov")
3225 (and (eq_attr "alternative" "1")
3226 (match_test "optimize_function_for_size_p (cfun)")))
3227 (const_string "SI")
3228 (and (eq_attr "type" "imov")
3229 (and (eq_attr "alternative" "0,1,2,3")
3230 (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
3231 (not (match_test "TARGET_PARTIAL_REG_STALL")))))
3232 (const_string "SI")
3233 ;; Avoid partial register stalls when not using QImode arithmetic
3234 (and (eq_attr "type" "imov")
3235 (and (eq_attr "alternative" "0,1,2,3")
3236 (and (match_test "TARGET_PARTIAL_REG_STALL")
3237 (not (match_test "TARGET_QIMODE_MATH")))))
3238 (const_string "SI")
3239 ]
3240 (const_string "QI")))])
3241
3242 /* Reload dislikes loading 0/-1 directly into mask registers.
3243 Try to tidy things up here. */
3244 (define_peephole2
3245 [(set (match_operand:SWI 0 "general_reg_operand")
3246 (match_operand:SWI 1 "immediate_operand"))
3247 (set (match_operand:SWI 2 "mask_reg_operand")
3248 (match_dup 0))]
3249 "peep2_reg_dead_p (2, operands[0])
3250 && (const0_operand (operands[1], <MODE>mode)
3251 || (constm1_operand (operands[1], <MODE>mode)
3252 && (<MODE_SIZE> > 1 || TARGET_AVX512DQ)))"
3253 [(set (match_dup 2) (match_dup 1))])
3254
3255 ;; Stores and loads of ax to arbitrary constant address.
3256 ;; We fake an second form of instruction to force reload to load address
3257 ;; into register when rax is not available
3258 (define_insn "*movabs<mode>_1"
3259 [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
3260 (match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
3261 "TARGET_LP64 && ix86_check_movabs (insn, 0)"
3262 {
3263 /* Recover the full memory rtx. */
3264 operands[0] = SET_DEST (PATTERN (insn));
3265 switch (which_alternative)
3266 {
3267 case 0:
3268 return "movabs{<imodesuffix>}\t{%1, %P0|<iptrsize> PTR [%P0], %1}";
3269 case 1:
3270 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3271 default:
3272 gcc_unreachable ();
3273 }
3274 }
3275 [(set_attr "type" "imov")
3276 (set_attr "modrm" "0,*")
3277 (set_attr "length_address" "8,0")
3278 (set_attr "length_immediate" "0,*")
3279 (set_attr "memory" "store")
3280 (set_attr "mode" "<MODE>")])
3281
3282 (define_insn "*movabs<mode>_2"
3283 [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
3284 (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
3285 "TARGET_LP64 && ix86_check_movabs (insn, 1)"
3286 {
3287 /* Recover the full memory rtx. */
3288 operands[1] = SET_SRC (PATTERN (insn));
3289 switch (which_alternative)
3290 {
3291 case 0:
3292 return "movabs{<imodesuffix>}\t{%P1, %0|%0, <iptrsize> PTR [%P1]}";
3293 case 1:
3294 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3295 default:
3296 gcc_unreachable ();
3297 }
3298 }
3299 [(set_attr "type" "imov")
3300 (set_attr "modrm" "0,*")
3301 (set_attr "length_address" "8,0")
3302 (set_attr "length_immediate" "0")
3303 (set_attr "memory" "load")
3304 (set_attr "mode" "<MODE>")])
3305
3306 (define_insn "swap<mode>"
3307 [(set (match_operand:SWI48 0 "register_operand" "+r")
3308 (match_operand:SWI48 1 "register_operand" "+r"))
3309 (set (match_dup 1)
3310 (match_dup 0))]
3311 ""
3312 "xchg{<imodesuffix>}\t%1, %0"
3313 [(set_attr "type" "imov")
3314 (set_attr "mode" "<MODE>")
3315 (set_attr "pent_pair" "np")
3316 (set_attr "athlon_decode" "vector")
3317 (set_attr "amdfam10_decode" "double")
3318 (set_attr "bdver1_decode" "double")])
3319
3320 (define_insn "*swap<mode>"
3321 [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
3322 (match_operand:SWI12 1 "register_operand" "+<r>,r"))
3323 (set (match_dup 1)
3324 (match_dup 0))]
3325 ""
3326 "@
3327 xchg{<imodesuffix>}\t%1, %0
3328 xchg{l}\t%k1, %k0"
3329 [(set_attr "type" "imov")
3330 (set_attr "mode" "<MODE>,SI")
3331 (set (attr "preferred_for_size")
3332 (cond [(eq_attr "alternative" "0")
3333 (symbol_ref "false")]
3334 (symbol_ref "true")))
3335 ;; Potential partial reg stall on alternative 1.
3336 (set (attr "preferred_for_speed")
3337 (cond [(eq_attr "alternative" "1")
3338 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
3339 (symbol_ref "true")))
3340 (set_attr "pent_pair" "np")
3341 (set_attr "athlon_decode" "vector")
3342 (set_attr "amdfam10_decode" "double")
3343 (set_attr "bdver1_decode" "double")])
3344
3345 (define_peephole2
3346 [(set (match_operand:SWI 0 "general_reg_operand")
3347 (match_operand:SWI 1 "general_reg_operand"))
3348 (set (match_dup 1)
3349 (match_operand:SWI 2 "general_reg_operand"))
3350 (set (match_dup 2) (match_dup 0))]
3351 "peep2_reg_dead_p (3, operands[0])
3352 && optimize_insn_for_size_p ()"
3353 [(parallel [(set (match_dup 1) (match_dup 2))
3354 (set (match_dup 2) (match_dup 1))])])
3355
3356 ;; Convert xchg with a REG_UNUSED note to a mov (variant #1).
3357 (define_peephole2
3358 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
3359 (match_operand:SWI 1 "general_reg_operand"))
3360 (set (match_dup 1) (match_dup 0))])]
3361 "((REGNO (operands[0]) != AX_REG
3362 && REGNO (operands[1]) != AX_REG)
3363 || optimize_size < 2
3364 || !optimize_insn_for_size_p ())
3365 && peep2_reg_dead_p (1, operands[0])"
3366 [(set (match_dup 1) (match_dup 0))])
3367
3368 ;; Convert xchg with a REG_UNUSED note to a mov (variant #2).
3369 (define_peephole2
3370 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
3371 (match_operand:SWI 1 "general_reg_operand"))
3372 (set (match_dup 1) (match_dup 0))])]
3373 "((REGNO (operands[0]) != AX_REG
3374 && REGNO (operands[1]) != AX_REG)
3375 || optimize_size < 2
3376 || !optimize_insn_for_size_p ())
3377 && peep2_reg_dead_p (1, operands[1])"
3378 [(set (match_dup 0) (match_dup 1))])
3379
3380 ;; Convert moves to/from AX_REG into xchg with -Oz.
3381 (define_peephole2
3382 [(set (match_operand:SWI48 0 "general_reg_operand")
3383 (match_operand:SWI48 1 "general_reg_operand"))]
3384 "optimize_size > 1
3385 && ((REGNO (operands[0]) == AX_REG)
3386 != (REGNO (operands[1]) == AX_REG))
3387 && optimize_insn_for_size_p ()
3388 && peep2_reg_dead_p (1, operands[1])"
3389 [(parallel [(set (match_dup 0) (match_dup 1))
3390 (set (match_dup 1) (match_dup 0))])])
3391
3392 (define_expand "movstrict<mode>"
3393 [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
3394 (match_operand:SWI12 1 "general_operand"))]
3395 ""
3396 {
3397 gcc_assert (SUBREG_P (operands[0]));
3398 if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
3399 || !VALID_INT_MODE_P (GET_MODE (SUBREG_REG (operands[0]))))
3400 FAIL;
3401 })
3402
3403 (define_insn "*movstrict<mode>_1"
3404 [(set (strict_low_part
3405 (match_operand:SWI12 0 "register_operand" "+<r>"))
3406 (match_operand:SWI12 1 "general_operand" "<r>mn"))]
3407 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3408 "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
3409 [(set_attr "type" "imov")
3410 (set_attr "mode" "<MODE>")])
3411
3412 (define_insn "*movstrict<mode>_xor"
3413 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
3414 (match_operand:SWI12 1 "const0_operand"))
3415 (clobber (reg:CC FLAGS_REG))]
3416 "reload_completed"
3417 "xor{<imodesuffix>}\t%0, %0"
3418 [(set_attr "type" "alu1")
3419 (set_attr "mode" "<MODE>")
3420 (set_attr "length_immediate" "0")])
3421
3422 (define_insn "*movstrictqi_ext<mode>_1"
3423 [(set (strict_low_part
3424 (match_operand:QI 0 "register_operand" "+Q"))
3425 (subreg:QI
3426 (match_operator:SWI248 2 "extract_operator"
3427 [(match_operand 1 "int248_register_operand" "Q")
3428 (const_int 8)
3429 (const_int 8)]) 0))]
3430 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3431 "mov{b}\t{%h1, %0|%0, %h1}"
3432 [(set_attr "type" "imov")
3433 (set_attr "mode" "QI")])
3434
3435 (define_expand "extv<mode>"
3436 [(set (match_operand:SWI24 0 "register_operand")
3437 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
3438 (match_operand:QI 2 "const_int_operand")
3439 (match_operand:QI 3 "const_int_operand")))]
3440 ""
3441 {
3442 /* Handle extractions from %ah et al. */
3443 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3444 FAIL;
3445
3446 unsigned int regno = reg_or_subregno (operands[1]);
3447
3448 /* Be careful to expand only with registers having upper parts. */
3449 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3450 operands[1] = copy_to_reg (operands[1]);
3451 })
3452
3453 (define_insn "*extv<mode>"
3454 [(set (match_operand:SWI24 0 "register_operand" "=R")
3455 (sign_extract:SWI24 (match_operand 1 "int248_register_operand" "Q")
3456 (const_int 8)
3457 (const_int 8)))]
3458 ""
3459 "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
3460 [(set_attr "type" "imovx")
3461 (set_attr "mode" "SI")])
3462
3463 ;; Split sign-extension of single least significant bit as and x,$1;neg x
3464 (define_insn_and_split "*extv<mode>_1_0"
3465 [(set (match_operand:SWI48 0 "register_operand" "=r")
3466 (sign_extract:SWI48 (match_operand:SWI48 1 "register_operand" "0")
3467 (const_int 1)
3468 (const_int 0)))
3469 (clobber (reg:CC FLAGS_REG))]
3470 ""
3471 "#"
3472 ""
3473 [(parallel [(set (match_dup 0) (and:SWI48 (match_dup 1) (const_int 1)))
3474 (clobber (reg:CC FLAGS_REG))])
3475 (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0)))
3476 (clobber (reg:CC FLAGS_REG))])])
3477
3478 (define_expand "extzv<mode>"
3479 [(set (match_operand:SWI248 0 "register_operand")
3480 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3481 (match_operand:QI 2 "const_int_operand")
3482 (match_operand:QI 3 "const_int_operand")))]
3483 ""
3484 {
3485 if (ix86_expand_pextr (operands))
3486 DONE;
3487
3488 /* Handle extractions from %ah et al. */
3489 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3490 FAIL;
3491
3492 unsigned int regno = reg_or_subregno (operands[1]);
3493
3494 /* Be careful to expand only with registers having upper parts. */
3495 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3496 operands[1] = copy_to_reg (operands[1]);
3497 })
3498
3499 (define_insn "*extzv<mode>"
3500 [(set (match_operand:SWI248 0 "register_operand" "=R")
3501 (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "Q")
3502 (const_int 8)
3503 (const_int 8)))]
3504 ""
3505 "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
3506 [(set_attr "type" "imovx")
3507 (set_attr "mode" "SI")])
3508
3509 (define_insn "*extzvqi"
3510 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R")
3511 (subreg:QI
3512 (match_operator:SWI248 2 "extract_operator"
3513 [(match_operand 1 "int248_register_operand" "Q,Q")
3514 (const_int 8)
3515 (const_int 8)]) 0))]
3516 ""
3517 {
3518 switch (get_attr_type (insn))
3519 {
3520 case TYPE_IMOVX:
3521 return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
3522 default:
3523 return "mov{b}\t{%h1, %0|%0, %h1}";
3524 }
3525 }
3526 [(set_attr "addr" "gpr8,*")
3527 (set (attr "type")
3528 (if_then_else (and (match_operand:QI 0 "register_operand")
3529 (ior (not (match_operand:QI 0 "QIreg_operand"))
3530 (match_test "TARGET_MOVX")))
3531 (const_string "imovx")
3532 (const_string "imov")))
3533 (set (attr "mode")
3534 (if_then_else (eq_attr "type" "imovx")
3535 (const_string "SI")
3536 (const_string "QI")))])
3537
3538 (define_expand "insv<mode>"
3539 [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
3540 (match_operand:QI 1 "const_int_operand")
3541 (match_operand:QI 2 "const_int_operand"))
3542 (match_operand:SWI248 3 "register_operand"))]
3543 ""
3544 {
3545 rtx dst;
3546
3547 if (ix86_expand_pinsr (operands))
3548 DONE;
3549
3550 /* Handle insertions to %ah et al. */
3551 if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
3552 FAIL;
3553
3554 unsigned int regno = reg_or_subregno (operands[0]);
3555
3556 /* Be careful to expand only with registers having upper parts. */
3557 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3558 dst = copy_to_reg (operands[0]);
3559 else
3560 dst = operands[0];
3561
3562 emit_insn (gen_insv_1 (<MODE>mode, dst, operands[3]));
3563
3564 /* Fix up the destination if needed. */
3565 if (dst != operands[0])
3566 emit_move_insn (operands[0], dst);
3567
3568 DONE;
3569 })
3570
3571 (define_insn "@insv<mode>_1"
3572 [(set (zero_extract:SWI248
3573 (match_operand 0 "int248_register_operand" "+Q")
3574 (const_int 8)
3575 (const_int 8))
3576 (match_operand:SWI248 1 "general_operand" "QnBn"))]
3577 ""
3578 {
3579 if (CONST_INT_P (operands[1]))
3580 operands[1] = gen_int_mode (INTVAL (operands[1]), QImode);
3581 return "mov{b}\t{%b1, %h0|%h0, %b1}";
3582 }
3583 [(set_attr "addr" "gpr8")
3584 (set_attr "type" "imov")
3585 (set_attr "mode" "QI")])
3586
3587 (define_insn "*insvqi_1"
3588 [(set (zero_extract:SWI248
3589 (match_operand 0 "int248_register_operand" "+Q")
3590 (const_int 8)
3591 (const_int 8))
3592 (subreg:SWI248
3593 (match_operand:QI 1 "general_operand" "QnBn") 0))]
3594 ""
3595 "mov{b}\t{%1, %h0|%h0, %1}"
3596 [(set_attr "addr" "gpr8")
3597 (set_attr "type" "imov")
3598 (set_attr "mode" "QI")])
3599
3600 ;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah
3601 (define_peephole2
3602 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3603 (const_int 0))
3604 (clobber (reg:CC FLAGS_REG))])
3605 (set (zero_extract:SWI248 (match_operand 1 "int248_register_operand")
3606 (const_int 8)
3607 (const_int 8))
3608 (const_int 0))]
3609 "REGNO (operands[0]) == REGNO (operands[1])"
3610 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3611 (const_int 0))
3612 (clobber (reg:CC FLAGS_REG))])])
3613
3614 ;; Combine movl followed by movb.
3615 (define_peephole2
3616 [(set (match_operand:SWI48 0 "general_reg_operand")
3617 (match_operand:SWI48 1 "const_int_operand"))
3618 (set (zero_extract:SWI248 (match_operand 2 "int248_register_operand")
3619 (const_int 8)
3620 (const_int 8))
3621 (match_operand:SWI248 3 "const_int_operand"))]
3622 "REGNO (operands[0]) == REGNO (operands[2])"
3623 [(set (match_operand:SWI48 0 "general_reg_operand")
3624 (match_dup 4))]
3625 {
3626 HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~HOST_WIDE_INT_C (0xff00);
3627 tmp |= (INTVAL (operands[3]) & 0xff) << 8;
3628 operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
3629 })
3630
3631 (define_insn "*insvqi_2"
3632 [(set (zero_extract:SWI248
3633 (match_operand 0 "int248_register_operand" "+Q")
3634 (const_int 8)
3635 (const_int 8))
3636 (match_operator:SWI248 2 "extract_operator"
3637 [(match_operand 1 "int248_register_operand" "Q")
3638 (const_int 8)
3639 (const_int 8)]))]
3640 ""
3641 "mov{b}\t{%h1, %h0|%h0, %h1}"
3642 [(set_attr "type" "imov")
3643 (set_attr "mode" "QI")])
3644
3645 (define_insn "*insvqi_3"
3646 [(set (zero_extract:SWI248
3647 (match_operand 0 "int248_register_operand" "+Q")
3648 (const_int 8)
3649 (const_int 8))
3650 (any_shiftrt:SWI248
3651 (match_operand:SWI248 1 "register_operand" "Q")
3652 (const_int 8)))]
3653 ""
3654 "mov{b}\t{%h1, %h0|%h0, %h1}"
3655 [(set_attr "type" "imov")
3656 (set_attr "mode" "QI")])
3657
3658 (define_code_iterator any_or_plus [plus ior xor])
3659
3660 (define_insn_and_split "*insvti_highpart_1"
3661 [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
3662 (any_or_plus:TI
3663 (and:TI
3664 (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
3665 (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
3666 (ashift:TI
3667 (zero_extend:TI
3668 (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))
3669 (const_int 64))))]
3670 "TARGET_64BIT
3671 && CONST_WIDE_INT_P (operands[3])
3672 && CONST_WIDE_INT_NUNITS (operands[3]) == 2
3673 && CONST_WIDE_INT_ELT (operands[3], 0) == -1
3674 && CONST_WIDE_INT_ELT (operands[3], 1) == 0"
3675 "#"
3676 "&& reload_completed"
3677 [(const_int 0)]
3678 {
3679 operands[4] = gen_lowpart (DImode, operands[1]);
3680 split_double_concat (TImode, operands[0], operands[4], operands[2]);
3681 DONE;
3682 })
3683
3684 (define_insn_and_split "*insvti_lowpart_1"
3685 [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
3686 (any_or_plus:TI
3687 (and:TI
3688 (match_operand:TI 1 "nonimmediate_operand" "r,o,r,o")
3689 (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
3690 (zero_extend:TI
3691 (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))]
3692 "TARGET_64BIT
3693 && CONST_WIDE_INT_P (operands[3])
3694 && CONST_WIDE_INT_NUNITS (operands[3]) == 2
3695 && CONST_WIDE_INT_ELT (operands[3], 0) == 0
3696 && CONST_WIDE_INT_ELT (operands[3], 1) == -1"
3697 "#"
3698 "&& reload_completed"
3699 [(const_int 0)]
3700 {
3701 operands[4] = gen_highpart (DImode, operands[1]);
3702 split_double_concat (TImode, operands[0], operands[2], operands[4]);
3703 DONE;
3704 })
3705
3706 (define_insn_and_split "*insvdi_lowpart_1"
3707 [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
3708 (any_or_plus:DI
3709 (and:DI
3710 (match_operand:DI 1 "nonimmediate_operand" "r,o,r,o")
3711 (match_operand:DI 3 "const_int_operand" "n,n,n,n"))
3712 (zero_extend:DI
3713 (match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))]
3714 "!TARGET_64BIT
3715 && CONST_INT_P (operands[3])
3716 && UINTVAL (operands[3]) == 0xffffffff00000000ll"
3717 "#"
3718 "&& reload_completed"
3719 [(const_int 0)]
3720 {
3721 operands[4] = gen_highpart (SImode, operands[1]);
3722 split_double_concat (DImode, operands[0], operands[2], operands[4]);
3723 DONE;
3724 })
3725 \f
3726 ;; Floating point push instructions.
3727
3728 (define_insn "*pushtf"
3729 [(set (match_operand:TF 0 "push_operand" "=<,<")
3730 (match_operand:TF 1 "general_no_elim_operand" "v,*roC"))]
3731 "TARGET_64BIT || TARGET_SSE"
3732 {
3733 /* This insn should be already split before reg-stack. */
3734 return "#";
3735 }
3736 [(set_attr "isa" "*,x64")
3737 (set_attr "type" "multi")
3738 (set_attr "unit" "sse,*")
3739 (set_attr "mode" "TF,DI")])
3740
3741 ;; %%% Kill this when call knows how to work this out.
3742 (define_split
3743 [(set (match_operand:TF 0 "push_operand")
3744 (match_operand:TF 1 "sse_reg_operand"))]
3745 "TARGET_SSE && reload_completed"
3746 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
3747 (set (match_dup 0) (match_dup 1))]
3748 {
3749 /* Preserve memory attributes. */
3750 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3751 })
3752
3753 (define_insn "*pushxf"
3754 [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<")
3755 (match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))]
3756 ""
3757 {
3758 /* This insn should be already split before reg-stack. */
3759 return "#";
3760 }
3761 [(set_attr "isa" "*,*,*,nox64,x64")
3762 (set_attr "type" "multi")
3763 (set_attr "unit" "i387,*,*,*,*")
3764 (set (attr "mode")
3765 (cond [(eq_attr "alternative" "1,2,3,4")
3766 (if_then_else (match_test "TARGET_64BIT")
3767 (const_string "DI")
3768 (const_string "SI"))
3769 ]
3770 (const_string "XF")))
3771 (set (attr "preferred_for_size")
3772 (cond [(eq_attr "alternative" "1")
3773 (symbol_ref "false")]
3774 (symbol_ref "true")))])
3775
3776 ;; %%% Kill this when call knows how to work this out.
3777 (define_split
3778 [(set (match_operand:XF 0 "push_operand")
3779 (match_operand:XF 1 "fp_register_operand"))]
3780 "reload_completed"
3781 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3782 (set (match_dup 0) (match_dup 1))]
3783 {
3784 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode)));
3785 /* Preserve memory attributes. */
3786 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3787 })
3788
3789 (define_insn "*pushdf"
3790 [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<")
3791 (match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,v"))]
3792 ""
3793 {
3794 /* This insn should be already split before reg-stack. */
3795 return "#";
3796 }
3797 [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2")
3798 (set_attr "type" "multi")
3799 (set_attr "unit" "i387,*,*,*,*,sse")
3800 (set_attr "mode" "DF,SI,SI,SI,DI,DF")
3801 (set (attr "preferred_for_size")
3802 (cond [(eq_attr "alternative" "1")
3803 (symbol_ref "false")]
3804 (symbol_ref "true")))
3805 (set (attr "preferred_for_speed")
3806 (cond [(eq_attr "alternative" "1")
3807 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
3808 (symbol_ref "true")))])
3809
3810 ;; %%% Kill this when call knows how to work this out.
3811 (define_split
3812 [(set (match_operand:DF 0 "push_operand")
3813 (match_operand:DF 1 "any_fp_register_operand"))]
3814 "reload_completed"
3815 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
3816 (set (match_dup 0) (match_dup 1))]
3817 {
3818 /* Preserve memory attributes. */
3819 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3820 })
3821
3822 (define_mode_iterator HFBF [HF BF])
3823
3824 (define_insn "*push<mode>_rex64"
3825 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3826 (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
3827 "TARGET_64BIT"
3828 {
3829 /* Anything else should be already split before reg-stack. */
3830 gcc_assert (which_alternative == 0);
3831 return "push{q}\t%q1";
3832 }
3833 [(set_attr "isa" "*,sse4")
3834 (set_attr "type" "push,multi")
3835 (set_attr "mode" "DI,TI")])
3836
3837 (define_insn "*push<mode>"
3838 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3839 (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
3840 "!TARGET_64BIT"
3841 {
3842 /* Anything else should be already split before reg-stack. */
3843 gcc_assert (which_alternative == 0);
3844 return "push{l}\t%k1";
3845 }
3846 [(set_attr "isa" "*,sse4")
3847 (set_attr "type" "push,multi")
3848 (set_attr "mode" "SI,TI")])
3849
3850 (define_insn "push2_di"
3851 [(set (match_operand:TI 0 "push_operand" "=<")
3852 (unspec:TI [(match_operand:DI 1 "register_operand" "r")
3853 (match_operand:DI 2 "register_operand" "r")]
3854 UNSPEC_APXPUSH2))]
3855 "TARGET_APX_PUSH2POP2"
3856 "push2\t{%2, %1|%1, %2}"
3857 [(set_attr "mode" "TI")
3858 (set_attr "type" "multi")
3859 (set_attr "prefix" "evex")])
3860
3861 (define_insn "pop2_di"
3862 [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
3863 (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
3864 UNSPEC_APXPOP2_LOW))
3865 (set (match_operand:DI 2 "register_operand" "=r")
3866 (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))])]
3867 "TARGET_APX_PUSH2POP2"
3868 "pop2\t{%2, %0|%0, %2}"
3869 [(set_attr "mode" "TI")
3870 (set_attr "prefix" "evex")])
3871
3872 (define_insn "pushp_di"
3873 [(set (match_operand:DI 0 "push_operand" "=<")
3874 (match_operand:DI 1 "register_operand" "r"))
3875 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3876 "TARGET_64BIT"
3877 "pushp\t%1"
3878 [(set_attr "mode" "DI")])
3879
3880 (define_insn "popp_di"
3881 [(set (match_operand:DI 0 "register_operand" "=r")
3882 (match_operand:DI 1 "pop_operand" ">"))
3883 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3884 "TARGET_APX_PPX"
3885 "popp\t%0"
3886 [(set_attr "mode" "DI")])
3887
3888 (define_insn "push2p_di"
3889 [(set (match_operand:TI 0 "push_operand" "=<")
3890 (unspec:TI [(match_operand:DI 1 "register_operand" "r")
3891 (match_operand:DI 2 "register_operand" "r")]
3892 UNSPEC_APXPUSH2))
3893 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3894 "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX"
3895 "push2p\t{%2, %1|%1, %2}"
3896 [(set_attr "mode" "TI")
3897 (set_attr "type" "multi")
3898 (set_attr "prefix" "evex")])
3899
3900 (define_insn "pop2p_di"
3901 [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
3902 (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
3903 UNSPEC_APXPOP2_LOW))
3904 (set (match_operand:DI 2 "register_operand" "=r")
3905 (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))
3906 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)])]
3907 "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX"
3908 "pop2p\t{%2, %0|%0, %2}"
3909 [(set_attr "mode" "TI")
3910 (set_attr "prefix" "evex")])
3911
3912 (define_insn "*pushsf_rex64"
3913 [(set (match_operand:SF 0 "push_operand" "=X,X,X")
3914 (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
3915 "TARGET_64BIT"
3916 {
3917 /* Anything else should be already split before reg-stack. */
3918 if (which_alternative != 1)
3919 return "#";
3920 return "push{q}\t%q1";
3921 }
3922 [(set_attr "type" "multi,push,multi")
3923 (set_attr "unit" "i387,*,*")
3924 (set_attr "mode" "SF,DI,SF")])
3925
3926 (define_insn "*pushsf"
3927 [(set (match_operand:SF 0 "push_operand" "=<,<,<")
3928 (match_operand:SF 1 "general_no_elim_operand" "f,rmF,v"))]
3929 "!TARGET_64BIT"
3930 {
3931 /* Anything else should be already split before reg-stack. */
3932 if (which_alternative != 1)
3933 return "#";
3934 return "push{l}\t%1";
3935 }
3936 [(set_attr "type" "multi,push,multi")
3937 (set_attr "unit" "i387,*,*")
3938 (set_attr "mode" "SF,SI,SF")])
3939
3940 (define_mode_iterator MODESH [SF HF BF])
3941 ;; %%% Kill this when call knows how to work this out.
3942 (define_split
3943 [(set (match_operand:MODESH 0 "push_operand")
3944 (match_operand:MODESH 1 "any_fp_register_operand"))]
3945 "reload_completed"
3946 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3947 (set (match_dup 0) (match_dup 1))]
3948 {
3949 rtx op = XEXP (operands[0], 0);
3950 if (GET_CODE (op) == PRE_DEC)
3951 {
3952 gcc_assert (!TARGET_64BIT);
3953 op = GEN_INT (-4);
3954 }
3955 else
3956 {
3957 op = XEXP (XEXP (op, 1), 1);
3958 gcc_assert (CONST_INT_P (op));
3959 }
3960 operands[2] = op;
3961 /* Preserve memory attributes. */
3962 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3963 })
3964
3965 (define_split
3966 [(set (match_operand:SF 0 "push_operand")
3967 (match_operand:SF 1 "memory_operand"))]
3968 "reload_completed
3969 && find_constant_src (insn)"
3970 [(set (match_dup 0) (match_dup 2))]
3971 "operands[2] = find_constant_src (curr_insn);")
3972
3973 (define_split
3974 [(set (match_operand 0 "push_operand")
3975 (match_operand 1 "general_gr_operand"))]
3976 "reload_completed
3977 && (GET_MODE (operands[0]) == TFmode
3978 || GET_MODE (operands[0]) == XFmode
3979 || GET_MODE (operands[0]) == DFmode)"
3980 [(const_int 0)]
3981 "ix86_split_long_move (operands); DONE;")
3982 \f
3983 ;; Floating point move instructions.
3984
3985 (define_expand "movtf"
3986 [(set (match_operand:TF 0 "nonimmediate_operand")
3987 (match_operand:TF 1 "nonimmediate_operand"))]
3988 "TARGET_64BIT || TARGET_SSE"
3989 "ix86_expand_move (TFmode, operands); DONE;")
3990
3991 (define_expand "mov<mode>"
3992 [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
3993 (match_operand:X87MODEFH 1 "general_operand"))]
3994 ""
3995 "ix86_expand_move (<MODE>mode, operands); DONE;")
3996
3997 (define_insn "*movtf_internal"
3998 [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
3999 (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))]
4000 "(TARGET_64BIT || TARGET_SSE)
4001 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
4002 && (lra_in_progress || reload_completed
4003 || !CONST_DOUBLE_P (operands[1])
4004 || (standard_sse_constant_p (operands[1], TFmode) == 1
4005 && !memory_operand (operands[0], TFmode))
4006 || (!TARGET_MEMORY_MISMATCH_STALL
4007 && memory_operand (operands[0], TFmode)))"
4008 {
4009 switch (get_attr_type (insn))
4010 {
4011 case TYPE_SSELOG1:
4012 return standard_sse_constant_opcode (insn, operands);
4013
4014 case TYPE_SSEMOV:
4015 return ix86_output_ssemov (insn, operands);
4016
4017 case TYPE_MULTI:
4018 return "#";
4019
4020 default:
4021 gcc_unreachable ();
4022 }
4023 }
4024 [(set_attr "isa" "*,*,*,x64,x64")
4025 (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
4026 (set (attr "prefix")
4027 (if_then_else (eq_attr "type" "sselog1,ssemov")
4028 (const_string "maybe_vex")
4029 (const_string "orig")))
4030 (set (attr "mode")
4031 (cond [(eq_attr "alternative" "3,4")
4032 (const_string "DI")
4033 (match_test "TARGET_AVX")
4034 (const_string "TI")
4035 (ior (not (match_test "TARGET_SSE2"))
4036 (match_test "optimize_function_for_size_p (cfun)"))
4037 (const_string "V4SF")
4038 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4039 (const_string "V4SF")
4040 (and (eq_attr "alternative" "2")
4041 (match_test "TARGET_SSE_TYPELESS_STORES"))
4042 (const_string "V4SF")
4043 ]
4044 (const_string "TI")))])
4045
4046 (define_split
4047 [(set (match_operand:TF 0 "nonimmediate_gr_operand")
4048 (match_operand:TF 1 "general_gr_operand"))]
4049 "reload_completed"
4050 [(const_int 0)]
4051 "ix86_split_long_move (operands); DONE;")
4052
4053 ;; Possible store forwarding (partial memory) stall
4054 ;; in alternatives 4, 6, 7 and 8.
4055 (define_insn "*movxf_internal"
4056 [(set (match_operand:XF 0 "nonimmediate_operand"
4057 "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r ,o ,o")
4058 (match_operand:XF 1 "general_operand"
4059 "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))]
4060 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4061 && (lra_in_progress || reload_completed
4062 || !CONST_DOUBLE_P (operands[1])
4063 || ((optimize_function_for_size_p (cfun)
4064 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4065 && standard_80387_constant_p (operands[1]) > 0
4066 && !memory_operand (operands[0], XFmode))
4067 || (!TARGET_MEMORY_MISMATCH_STALL
4068 && memory_operand (operands[0], XFmode))
4069 || !TARGET_HARD_XF_REGS)"
4070 {
4071 switch (get_attr_type (insn))
4072 {
4073 case TYPE_FMOV:
4074 if (which_alternative == 2)
4075 return standard_80387_constant_opcode (operands[1]);
4076 return output_387_reg_move (insn, operands);
4077
4078 case TYPE_MULTI:
4079 return "#";
4080
4081 default:
4082 gcc_unreachable ();
4083 }
4084 }
4085 [(set (attr "isa")
4086 (cond [(eq_attr "alternative" "7,10")
4087 (const_string "nox64")
4088 (eq_attr "alternative" "8,11")
4089 (const_string "x64")
4090 ]
4091 (const_string "*")))
4092 (set (attr "type")
4093 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
4094 (const_string "multi")
4095 ]
4096 (const_string "fmov")))
4097 (set (attr "mode")
4098 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
4099 (if_then_else (match_test "TARGET_64BIT")
4100 (const_string "DI")
4101 (const_string "SI"))
4102 ]
4103 (const_string "XF")))
4104 (set (attr "preferred_for_size")
4105 (cond [(eq_attr "alternative" "3,4")
4106 (symbol_ref "false")]
4107 (symbol_ref "true")))
4108 (set (attr "enabled")
4109 (cond [(eq_attr "alternative" "9,10,11")
4110 (if_then_else
4111 (match_test "TARGET_HARD_XF_REGS")
4112 (symbol_ref "false")
4113 (const_string "*"))
4114 (not (match_test "TARGET_HARD_XF_REGS"))
4115 (symbol_ref "false")
4116 ]
4117 (const_string "*")))])
4118
4119 (define_split
4120 [(set (match_operand:XF 0 "nonimmediate_gr_operand")
4121 (match_operand:XF 1 "general_gr_operand"))]
4122 "reload_completed"
4123 [(const_int 0)]
4124 "ix86_split_long_move (operands); DONE;")
4125
4126 ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
4127 (define_insn "*movdf_internal"
4128 [(set (match_operand:DF 0 "nonimmediate_operand"
4129 "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,Yv,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
4130 (match_operand:DF 1 "general_operand"
4131 "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C ,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
4132 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4133 && (lra_in_progress || reload_completed
4134 || !CONST_DOUBLE_P (operands[1])
4135 || ((optimize_function_for_size_p (cfun)
4136 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4137 && IS_STACK_MODE (DFmode)
4138 && standard_80387_constant_p (operands[1]) > 0
4139 && !memory_operand (operands[0], DFmode))
4140 || (TARGET_SSE2 && TARGET_SSE_MATH
4141 && standard_sse_constant_p (operands[1], DFmode) == 1
4142 && !memory_operand (operands[0], DFmode))
4143 || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
4144 && memory_operand (operands[0], DFmode))
4145 || !TARGET_HARD_DF_REGS)"
4146 {
4147 switch (get_attr_type (insn))
4148 {
4149 case TYPE_FMOV:
4150 if (which_alternative == 2)
4151 return standard_80387_constant_opcode (operands[1]);
4152 return output_387_reg_move (insn, operands);
4153
4154 case TYPE_MULTI:
4155 return "#";
4156
4157 case TYPE_IMOV:
4158 if (get_attr_mode (insn) == MODE_SI)
4159 return "mov{l}\t{%1, %k0|%k0, %1}";
4160 else if (which_alternative == 11)
4161 return "movabs{q}\t{%1, %0|%0, %1}";
4162 else
4163 return "mov{q}\t{%1, %0|%0, %1}";
4164
4165 case TYPE_SSELOG1:
4166 return standard_sse_constant_opcode (insn, operands);
4167
4168 case TYPE_SSEMOV:
4169 return ix86_output_ssemov (insn, operands);
4170
4171 default:
4172 gcc_unreachable ();
4173 }
4174 }
4175 [(set (attr "isa")
4176 (cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
4177 (const_string "nox64")
4178 (eq_attr "alternative" "8,9,10,11,24,25")
4179 (const_string "x64")
4180 (eq_attr "alternative" "12,13,14,15")
4181 (const_string "sse2")
4182 (eq_attr "alternative" "20,21")
4183 (const_string "x64_sse2")
4184 ]
4185 (const_string "*")))
4186 (set (attr "type")
4187 (cond [(eq_attr "alternative" "0,1,2")
4188 (const_string "fmov")
4189 (eq_attr "alternative" "3,4,5,6,7,22,23")
4190 (const_string "multi")
4191 (eq_attr "alternative" "8,9,10,11,24,25")
4192 (const_string "imov")
4193 (eq_attr "alternative" "12,16")
4194 (const_string "sselog1")
4195 ]
4196 (const_string "ssemov")))
4197 (set (attr "modrm")
4198 (if_then_else (eq_attr "alternative" "11")
4199 (const_string "0")
4200 (const_string "*")))
4201 (set (attr "length_immediate")
4202 (if_then_else (eq_attr "alternative" "11")
4203 (const_string "8")
4204 (const_string "*")))
4205 (set (attr "prefix")
4206 (if_then_else (eq_attr "type" "sselog1,ssemov")
4207 (const_string "maybe_vex")
4208 (const_string "orig")))
4209 (set (attr "prefix_data16")
4210 (if_then_else
4211 (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
4212 (eq_attr "mode" "V1DF"))
4213 (const_string "1")
4214 (const_string "*")))
4215 (set (attr "mode")
4216 (cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23")
4217 (const_string "SI")
4218 (eq_attr "alternative" "8,9,11,20,21,24,25")
4219 (const_string "DI")
4220
4221 /* xorps is one byte shorter for non-AVX targets. */
4222 (eq_attr "alternative" "12,16")
4223 (cond [(match_test "TARGET_AVX")
4224 (const_string "V2DF")
4225 (ior (not (match_test "TARGET_SSE2"))
4226 (match_test "optimize_function_for_size_p (cfun)"))
4227 (const_string "V4SF")
4228 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4229 (const_string "TI")
4230 ]
4231 (const_string "V2DF"))
4232
4233 /* For architectures resolving dependencies on
4234 whole SSE registers use movapd to break dependency
4235 chains, otherwise use short move to avoid extra work. */
4236
4237 /* movaps is one byte shorter for non-AVX targets. */
4238 (eq_attr "alternative" "13,17")
4239 (cond [(match_test "TARGET_AVX512VL")
4240 (const_string "V2DF")
4241 (match_test "TARGET_AVX512F")
4242 (const_string "DF")
4243 (match_test "TARGET_AVX")
4244 (const_string "V2DF")
4245 (ior (not (match_test "TARGET_SSE2"))
4246 (match_test "optimize_function_for_size_p (cfun)"))
4247 (const_string "V4SF")
4248 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4249 (const_string "V4SF")
4250 (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4251 (const_string "V2DF")
4252 ]
4253 (const_string "DF"))
4254
4255 /* For architectures resolving dependencies on register
4256 parts we may avoid extra work to zero out upper part
4257 of register. */
4258 (eq_attr "alternative" "14,18")
4259 (cond [(not (match_test "TARGET_SSE2"))
4260 (const_string "V2SF")
4261 (match_test "TARGET_AVX")
4262 (const_string "DF")
4263 (match_test "TARGET_SSE_SPLIT_REGS")
4264 (const_string "V1DF")
4265 ]
4266 (const_string "DF"))
4267
4268 (and (eq_attr "alternative" "15,19")
4269 (not (match_test "TARGET_SSE2")))
4270 (const_string "V2SF")
4271 ]
4272 (const_string "DF")))
4273 (set (attr "preferred_for_size")
4274 (cond [(eq_attr "alternative" "3,4")
4275 (symbol_ref "false")]
4276 (symbol_ref "true")))
4277 (set (attr "preferred_for_speed")
4278 (cond [(eq_attr "alternative" "3,4")
4279 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
4280 (eq_attr "alternative" "20")
4281 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4282 (eq_attr "alternative" "21")
4283 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4284 ]
4285 (symbol_ref "true")))
4286 (set (attr "enabled")
4287 (cond [(eq_attr "alternative" "22,23,24,25")
4288 (if_then_else
4289 (match_test "TARGET_HARD_DF_REGS")
4290 (symbol_ref "false")
4291 (const_string "*"))
4292 (not (match_test "TARGET_HARD_DF_REGS"))
4293 (symbol_ref "false")
4294 ]
4295 (const_string "*")))])
4296
4297 (define_split
4298 [(set (match_operand:DF 0 "nonimmediate_gr_operand")
4299 (match_operand:DF 1 "general_gr_operand"))]
4300 "!TARGET_64BIT && reload_completed"
4301 [(const_int 0)]
4302 "ix86_split_long_move (operands); DONE;")
4303
4304 (define_insn "*movsf_internal"
4305 [(set (match_operand:SF 0 "nonimmediate_operand"
4306 "=Yf*f,m ,Yf*f,?r ,?m,Yv,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
4307 (match_operand:SF 1 "general_operand"
4308 "Yf*fm,Yf*f,G ,rmF,rF,C ,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
4309 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4310 && (lra_in_progress || reload_completed
4311 || !CONST_DOUBLE_P (operands[1])
4312 || ((optimize_function_for_size_p (cfun)
4313 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4314 && IS_STACK_MODE (SFmode)
4315 && standard_80387_constant_p (operands[1]) > 0)
4316 || (TARGET_SSE && TARGET_SSE_MATH
4317 && standard_sse_constant_p (operands[1], SFmode) == 1)
4318 || memory_operand (operands[0], SFmode)
4319 || !TARGET_HARD_SF_REGS)"
4320 {
4321 switch (get_attr_type (insn))
4322 {
4323 case TYPE_FMOV:
4324 if (which_alternative == 2)
4325 return standard_80387_constant_opcode (operands[1]);
4326 return output_387_reg_move (insn, operands);
4327
4328 case TYPE_IMOV:
4329 return "mov{l}\t{%1, %0|%0, %1}";
4330
4331 case TYPE_SSELOG1:
4332 return standard_sse_constant_opcode (insn, operands);
4333
4334 case TYPE_SSEMOV:
4335 return ix86_output_ssemov (insn, operands);
4336
4337 case TYPE_MMXMOV:
4338 switch (get_attr_mode (insn))
4339 {
4340 case MODE_DI:
4341 return "movq\t{%1, %0|%0, %1}";
4342 case MODE_SI:
4343 return "movd\t{%1, %0|%0, %1}";
4344
4345 default:
4346 gcc_unreachable ();
4347 }
4348
4349 default:
4350 gcc_unreachable ();
4351 }
4352 }
4353 [(set (attr "isa")
4354 (cond [(eq_attr "alternative" "9,10")
4355 (const_string "sse2")
4356 ]
4357 (const_string "*")))
4358 (set (attr "type")
4359 (cond [(eq_attr "alternative" "0,1,2")
4360 (const_string "fmov")
4361 (eq_attr "alternative" "3,4,16,17")
4362 (const_string "imov")
4363 (eq_attr "alternative" "5")
4364 (const_string "sselog1")
4365 (eq_attr "alternative" "11,12,13,14,15")
4366 (const_string "mmxmov")
4367 ]
4368 (const_string "ssemov")))
4369 (set (attr "prefix")
4370 (if_then_else (eq_attr "type" "sselog1,ssemov")
4371 (const_string "maybe_vex")
4372 (const_string "orig")))
4373 (set (attr "prefix_data16")
4374 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
4375 (const_string "1")
4376 (const_string "*")))
4377 (set (attr "mode")
4378 (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17")
4379 (const_string "SI")
4380 (eq_attr "alternative" "11")
4381 (const_string "DI")
4382 (eq_attr "alternative" "5")
4383 (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
4384 (not (match_test "TARGET_PREFER_AVX256")))
4385 (const_string "V16SF")
4386 (match_test "TARGET_AVX")
4387 (const_string "V4SF")
4388 (ior (not (match_test "TARGET_SSE2"))
4389 (match_test "optimize_function_for_size_p (cfun)"))
4390 (const_string "V4SF")
4391 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4392 (const_string "TI")
4393 ]
4394 (const_string "V4SF"))
4395
4396 /* For architectures resolving dependencies on
4397 whole SSE registers use APS move to break dependency
4398 chains, otherwise use short move to avoid extra work.
4399
4400 Do the same for architectures resolving dependencies on
4401 the parts. While in DF mode it is better to always handle
4402 just register parts, the SF mode is different due to lack
4403 of instructions to load just part of the register. It is
4404 better to maintain the whole registers in single format
4405 to avoid problems on using packed logical operations. */
4406 (eq_attr "alternative" "6")
4407 (cond [(match_test "TARGET_AVX512VL")
4408 (const_string "V4SF")
4409 (match_test "TARGET_AVX512F")
4410 (const_string "SF")
4411 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4412 (match_test "TARGET_SSE_SPLIT_REGS"))
4413 (const_string "V4SF")
4414 ]
4415 (const_string "SF"))
4416 ]
4417 (const_string "SF")))
4418 (set (attr "preferred_for_speed")
4419 (cond [(eq_attr "alternative" "9,14")
4420 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4421 (eq_attr "alternative" "10,15")
4422 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4423 ]
4424 (symbol_ref "true")))
4425 (set (attr "enabled")
4426 (cond [(eq_attr "alternative" "16,17")
4427 (if_then_else
4428 (match_test "TARGET_HARD_SF_REGS")
4429 (symbol_ref "false")
4430 (const_string "*"))
4431 (not (match_test "TARGET_HARD_SF_REGS"))
4432 (symbol_ref "false")
4433 ]
4434 (const_string "*")))])
4435
4436 (define_mode_attr hfbfconstf
4437 [(HF "F") (BF "")])
4438
4439 (define_insn "*mov<mode>_internal"
4440 [(set (match_operand:HFBF 0 "nonimmediate_operand"
4441 "=?r,?r,?r,?m ,Yv,v,?r,jm,m,?v,v")
4442 (match_operand:HFBF 1 "general_operand"
4443 "r ,F ,m ,r<hfbfconstf>,C ,v, v,v ,v,r ,m"))]
4444 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4445 && (lra_in_progress
4446 || reload_completed
4447 || !CONST_DOUBLE_P (operands[1])
4448 || (TARGET_SSE2
4449 && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
4450 || memory_operand (operands[0], <MODE>mode))"
4451 {
4452 switch (get_attr_type (insn))
4453 {
4454 case TYPE_IMOVX:
4455 /* movzwl is faster than movw on p2 due to partial word stalls,
4456 though not as fast as an aligned movl. */
4457 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
4458
4459 case TYPE_SSEMOV:
4460 return ix86_output_ssemov (insn, operands);
4461
4462 case TYPE_SSELOG1:
4463 if (satisfies_constraint_C (operands[1]))
4464 return standard_sse_constant_opcode (insn, operands);
4465
4466 if (SSE_REG_P (operands[0]))
4467 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
4468 else
4469 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
4470
4471 default:
4472 if (get_attr_mode (insn) == MODE_SI)
4473 return "mov{l}\t{%k1, %k0|%k0, %k1}";
4474 else
4475 return "mov{w}\t{%1, %0|%0, %1}";
4476 }
4477 }
4478 [(set (attr "isa")
4479 (cond [(eq_attr "alternative" "4,5,6,9,10")
4480 (const_string "sse2")
4481 (eq_attr "alternative" "7")
4482 (const_string "sse4_noavx")
4483 (eq_attr "alternative" "8")
4484 (const_string "avx")
4485 ]
4486 (const_string "*")))
4487 (set (attr "addr")
4488 (if_then_else (eq_attr "alternative" "7")
4489 (const_string "gpr16")
4490 (const_string "*")))
4491 (set (attr "type")
4492 (cond [(eq_attr "alternative" "4")
4493 (const_string "sselog1")
4494 (eq_attr "alternative" "5,6,9")
4495 (const_string "ssemov")
4496 (eq_attr "alternative" "7,8,10")
4497 (if_then_else
4498 (match_test ("TARGET_AVX512FP16"))
4499 (const_string "ssemov")
4500 (const_string "sselog1"))
4501 (match_test "optimize_function_for_size_p (cfun)")
4502 (const_string "imov")
4503 (and (eq_attr "alternative" "0")
4504 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4505 (not (match_test "TARGET_HIMODE_MATH"))))
4506 (const_string "imov")
4507 (and (eq_attr "alternative" "1,2")
4508 (match_operand:HI 1 "aligned_operand"))
4509 (const_string "imov")
4510 (and (match_test "TARGET_MOVX")
4511 (eq_attr "alternative" "0,2"))
4512 (const_string "imovx")
4513 ]
4514 (const_string "imov")))
4515 (set (attr "prefix")
4516 (cond [(eq_attr "alternative" "4,5,6,7,8,9,10")
4517 (const_string "maybe_vex")
4518 ]
4519 (const_string "orig")))
4520 (set (attr "mode")
4521 (cond [(eq_attr "alternative" "4")
4522 (const_string "V4SF")
4523 (eq_attr "alternative" "6,9")
4524 (if_then_else
4525 (match_test "TARGET_AVX512FP16")
4526 (const_string "HI")
4527 (const_string "SI"))
4528 (eq_attr "alternative" "7,8,10")
4529 (if_then_else
4530 (match_test "TARGET_AVX512FP16")
4531 (const_string "HI")
4532 (const_string "TI"))
4533 (eq_attr "alternative" "5")
4534 (cond [(match_test "TARGET_AVX512VL")
4535 (const_string "V4SF")
4536 (match_test "TARGET_AVX512FP16")
4537 (const_string "HF")
4538 (match_test "TARGET_AVX512F")
4539 (const_string "SF")
4540 (match_test "TARGET_AVX")
4541 (const_string "V4SF")
4542 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4543 (match_test "TARGET_SSE_SPLIT_REGS"))
4544 (const_string "V4SF")
4545 ]
4546 (const_string "SF"))
4547 (eq_attr "type" "imovx")
4548 (const_string "SI")
4549 (and (eq_attr "alternative" "1,2")
4550 (match_operand:HI 1 "aligned_operand"))
4551 (const_string "SI")
4552 (and (eq_attr "alternative" "0")
4553 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4554 (not (match_test "TARGET_HIMODE_MATH"))))
4555 (const_string "SI")
4556 ]
4557 (const_string "HI")))
4558 (set (attr "enabled")
4559 (cond [(and (match_test "<MODE>mode == BFmode")
4560 (eq_attr "alternative" "1"))
4561 (symbol_ref "false")
4562 ]
4563 (const_string "*")))])
4564
4565 (define_split
4566 [(set (match_operand 0 "any_fp_register_operand")
4567 (match_operand 1 "memory_operand"))]
4568 "reload_completed
4569 && (GET_MODE (operands[0]) == TFmode
4570 || GET_MODE (operands[0]) == XFmode
4571 || GET_MODE (operands[0]) == DFmode
4572 || GET_MODE (operands[0]) == SFmode)
4573 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4574 [(set (match_dup 0) (match_dup 2))]
4575 "operands[2] = find_constant_src (curr_insn);")
4576
4577 (define_split
4578 [(set (match_operand 0 "any_fp_register_operand")
4579 (float_extend (match_operand 1 "memory_operand")))]
4580 "reload_completed
4581 && (GET_MODE (operands[0]) == TFmode
4582 || GET_MODE (operands[0]) == XFmode
4583 || GET_MODE (operands[0]) == DFmode)
4584 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4585 [(set (match_dup 0) (match_dup 2))]
4586 "operands[2] = find_constant_src (curr_insn);")
4587
4588 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
4589 (define_split
4590 [(set (match_operand:X87MODEF 0 "fp_register_operand")
4591 (match_operand:X87MODEF 1 "immediate_operand"))]
4592 "reload_completed
4593 && (standard_80387_constant_p (operands[1]) == 8
4594 || standard_80387_constant_p (operands[1]) == 9)"
4595 [(set (match_dup 0)(match_dup 1))
4596 (set (match_dup 0)
4597 (neg:X87MODEF (match_dup 0)))]
4598 {
4599 if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1])))
4600 operands[1] = CONST0_RTX (<MODE>mode);
4601 else
4602 operands[1] = CONST1_RTX (<MODE>mode);
4603 })
4604
4605 (define_insn "*swapxf"
4606 [(set (match_operand:XF 0 "register_operand" "+f")
4607 (match_operand:XF 1 "register_operand" "+f"))
4608 (set (match_dup 1)
4609 (match_dup 0))]
4610 "TARGET_80387"
4611 {
4612 if (STACK_TOP_P (operands[0]))
4613 return "fxch\t%1";
4614 else
4615 return "fxch\t%0";
4616 }
4617 [(set_attr "type" "fxch")
4618 (set_attr "mode" "XF")])
4619 \f
4620
4621 ;; Zero extension instructions
4622
4623 (define_insn_and_split "zero_extendditi2"
4624 [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
4625 (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
4626 "TARGET_64BIT"
4627 "#"
4628 "&& reload_completed"
4629 [(set (match_dup 3) (match_dup 1))
4630 (set (match_dup 4) (const_int 0))]
4631 "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
4632
4633 (define_expand "zero_extendsidi2"
4634 [(set (match_operand:DI 0 "nonimmediate_operand")
4635 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
4636
4637 (define_insn "*zero_extendsidi2"
4638 [(set (match_operand:DI 0 "nonimmediate_operand"
4639 "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k")
4640 (zero_extend:DI
4641 (match_operand:SI 1 "x86_64_zext_operand"
4642 "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,?k,?km")))]
4643 ""
4644 {
4645 switch (get_attr_type (insn))
4646 {
4647 case TYPE_IMOVX:
4648 if (ix86_use_lea_for_mov (insn, operands))
4649 return "lea{l}\t{%E1, %k0|%k0, %E1}";
4650 else
4651 return "mov{l}\t{%1, %k0|%k0, %1}";
4652
4653 case TYPE_MULTI:
4654 return "#";
4655
4656 case TYPE_MMXMOV:
4657 return "movd\t{%1, %0|%0, %1}";
4658
4659 case TYPE_SSEMOV:
4660 if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
4661 {
4662 if (EXT_REX_SSE_REG_P (operands[0])
4663 || EXT_REX_SSE_REG_P (operands[1]))
4664 return "vpmovzxdq\t{%t1, %g0|%g0, %t1}";
4665 else
4666 return "%vpmovzxdq\t{%1, %0|%0, %1}";
4667 }
4668
4669 if (GENERAL_REG_P (operands[0]))
4670 return "%vmovd\t{%1, %k0|%k0, %1}";
4671
4672 return "%vmovd\t{%1, %0|%0, %1}";
4673
4674 case TYPE_MSKMOV:
4675 return "kmovd\t{%1, %k0|%k0, %1}";
4676
4677 default:
4678 gcc_unreachable ();
4679 }
4680 }
4681 [(set (attr "isa")
4682 (cond [(eq_attr "alternative" "0,1,2")
4683 (const_string "nox64")
4684 (eq_attr "alternative" "3")
4685 (const_string "x64")
4686 (eq_attr "alternative" "7,8,9")
4687 (const_string "sse2")
4688 (eq_attr "alternative" "10")
4689 (const_string "sse4")
4690 (eq_attr "alternative" "11")
4691 (const_string "avx512f")
4692 (eq_attr "alternative" "12")
4693 (const_string "x64_avx512bw")
4694 (eq_attr "alternative" "13")
4695 (const_string "avx512bw")
4696 ]
4697 (const_string "*")))
4698 (set (attr "mmx_isa")
4699 (if_then_else (eq_attr "alternative" "5,6")
4700 (const_string "native")
4701 (const_string "*")))
4702 (set (attr "type")
4703 (cond [(eq_attr "alternative" "0,1,2,4")
4704 (const_string "multi")
4705 (eq_attr "alternative" "5,6")
4706 (const_string "mmxmov")
4707 (eq_attr "alternative" "7")
4708 (if_then_else (match_test "TARGET_64BIT")
4709 (const_string "ssemov")
4710 (const_string "multi"))
4711 (eq_attr "alternative" "8,9,10,11")
4712 (const_string "ssemov")
4713 (eq_attr "alternative" "12,13")
4714 (const_string "mskmov")
4715 ]
4716 (const_string "imovx")))
4717 (set (attr "prefix_extra")
4718 (if_then_else (eq_attr "alternative" "10,11")
4719 (const_string "1")
4720 (const_string "*")))
4721 (set (attr "prefix")
4722 (if_then_else (eq_attr "type" "ssemov")
4723 (const_string "maybe_vex")
4724 (const_string "orig")))
4725 (set (attr "prefix_0f")
4726 (if_then_else (eq_attr "type" "imovx")
4727 (const_string "0")
4728 (const_string "*")))
4729 (set (attr "mode")
4730 (cond [(eq_attr "alternative" "5,6")
4731 (const_string "DI")
4732 (and (eq_attr "alternative" "7")
4733 (match_test "TARGET_64BIT"))
4734 (const_string "TI")
4735 (eq_attr "alternative" "8,10,11")
4736 (const_string "TI")
4737 ]
4738 (const_string "SI")))
4739 (set (attr "preferred_for_speed")
4740 (cond [(eq_attr "alternative" "7")
4741 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4742 (eq_attr "alternative" "5,8")
4743 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4744 ]
4745 (symbol_ref "true")))])
4746
4747 (define_split
4748 [(set (match_operand:DI 0 "memory_operand")
4749 (zero_extend:DI (match_operand:SI 1 "memory_operand")))]
4750 "reload_completed"
4751 [(set (match_dup 4) (const_int 0))]
4752 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4753
4754 (define_split
4755 [(set (match_operand:DI 0 "general_reg_operand")
4756 (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
4757 "!TARGET_64BIT && reload_completed
4758 && REGNO (operands[0]) == REGNO (operands[1])"
4759 [(set (match_dup 4) (const_int 0))]
4760 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4761
4762 (define_split
4763 [(set (match_operand:DI 0 "nonimmediate_gr_operand")
4764 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
4765 "!TARGET_64BIT && reload_completed
4766 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4767 [(set (match_dup 3) (match_dup 1))
4768 (set (match_dup 4) (const_int 0))]
4769 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4770
4771 (define_mode_attr kmov_isa
4772 [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
4773
4774 (define_insn "zero_extend<mode>di2"
4775 [(set (match_operand:DI 0 "register_operand" "=r,?r,?k")
4776 (zero_extend:DI
4777 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
4778 "TARGET_64BIT"
4779 "@
4780 movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
4781 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
4782 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
4783 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4784 (set_attr "type" "imovx,mskmov,mskmov")
4785 (set_attr "mode" "SI,<MODE>,<MODE>")])
4786
4787 (define_expand "zero_extend<mode>si2"
4788 [(set (match_operand:SI 0 "register_operand")
4789 (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
4790 ""
4791 {
4792 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4793 {
4794 operands[1] = force_reg (<MODE>mode, operands[1]);
4795 emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
4796 DONE;
4797 }
4798 })
4799
4800 (define_insn_and_split "zero_extend<mode>si2_and"
4801 [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
4802 (zero_extend:SI
4803 (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
4804 (clobber (reg:CC FLAGS_REG))]
4805 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4806 "#"
4807 "&& reload_completed"
4808 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
4809 (clobber (reg:CC FLAGS_REG))])]
4810 {
4811 if (!REG_P (operands[1])
4812 || REGNO (operands[0]) != REGNO (operands[1]))
4813 {
4814 ix86_expand_clear (operands[0]);
4815
4816 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4817 emit_insn (gen_rtx_SET
4818 (gen_rtx_STRICT_LOW_PART
4819 (VOIDmode, gen_lowpart (<MODE>mode, operands[0])),
4820 operands[1]));
4821 DONE;
4822 }
4823
4824 operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
4825 }
4826 [(set_attr "type" "alu1")
4827 (set_attr "mode" "SI")])
4828
4829 (define_insn "*zero_extend<mode>si2"
4830 [(set (match_operand:SI 0 "register_operand" "=r,?r,?k")
4831 (zero_extend:SI
4832 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
4833 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4834 "@
4835 movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
4836 kmov<mskmodesuffix>\t{%1, %0|%0, %1}
4837 kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
4838 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4839 (set_attr "type" "imovx,mskmov,mskmov")
4840 (set_attr "mode" "SI,<MODE>,<MODE>")])
4841
4842 (define_expand "zero_extendqihi2"
4843 [(set (match_operand:HI 0 "register_operand")
4844 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
4845 ""
4846 {
4847 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4848 {
4849 operands[1] = force_reg (QImode, operands[1]);
4850 emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
4851 DONE;
4852 }
4853 })
4854
4855 (define_insn_and_split "zero_extendqihi2_and"
4856 [(set (match_operand:HI 0 "register_operand" "=r,?&q")
4857 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
4858 (clobber (reg:CC FLAGS_REG))]
4859 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4860 "#"
4861 "&& reload_completed"
4862 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
4863 (clobber (reg:CC FLAGS_REG))])]
4864 {
4865 if (!REG_P (operands[1])
4866 || REGNO (operands[0]) != REGNO (operands[1]))
4867 {
4868 ix86_expand_clear (operands[0]);
4869
4870 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4871 emit_insn (gen_rtx_SET
4872 (gen_rtx_STRICT_LOW_PART
4873 (VOIDmode, gen_lowpart (QImode, operands[0])),
4874 operands[1]));
4875 DONE;
4876 }
4877
4878 operands[0] = gen_lowpart (SImode, operands[0]);
4879 }
4880 [(set_attr "type" "alu1")
4881 (set_attr "mode" "SI")])
4882
4883 ; zero extend to SImode to avoid partial register stalls
4884 (define_insn "*zero_extendqihi2"
4885 [(set (match_operand:HI 0 "register_operand" "=r,?r,?k")
4886 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,?k,?km")))]
4887 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4888 "@
4889 movz{bl|x}\t{%1, %k0|%k0, %1}
4890 kmovb\t{%1, %k0|%k0, %1}
4891 kmovb\t{%1, %0|%0, %1}"
4892 [(set_attr "isa" "*,avx512dq,avx512dq")
4893 (set_attr "type" "imovx,mskmov,mskmov")
4894 (set_attr "mode" "SI,QI,QI")])
4895
4896 ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
4897 (define_peephole2
4898 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
4899 (const_int 0))
4900 (clobber (reg:CC FLAGS_REG))])
4901 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4902 (match_operand:SWI12 2 "nonimmediate_operand"))]
4903 "REGNO (operands[0]) == REGNO (operands[1])
4904 && (<SWI48:MODE>mode != SImode
4905 || !TARGET_ZERO_EXTEND_WITH_AND
4906 || !optimize_function_for_speed_p (cfun))"
4907 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4908
4909 ;; Likewise, but preserving FLAGS_REG.
4910 (define_peephole2
4911 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
4912 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4913 (match_operand:SWI12 2 "nonimmediate_operand"))]
4914 "REGNO (operands[0]) == REGNO (operands[1])
4915 && (<SWI48:MODE>mode != SImode
4916 || !TARGET_ZERO_EXTEND_WITH_AND
4917 || !optimize_function_for_speed_p (cfun))"
4918 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4919 \f
4920 ;; Sign extension instructions
4921
4922 (define_expand "extendsidi2"
4923 [(set (match_operand:DI 0 "register_operand")
4924 (sign_extend:DI (match_operand:SI 1 "register_operand")))]
4925 ""
4926 {
4927 if (!TARGET_64BIT)
4928 {
4929 emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
4930 DONE;
4931 }
4932 })
4933
4934 (define_insn "*extendsidi2_rex64"
4935 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4936 (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
4937 "TARGET_64BIT"
4938 "@
4939 {cltq|cdqe}
4940 movs{lq|x}\t{%1, %0|%0, %1}"
4941 [(set_attr "type" "imovx")
4942 (set_attr "mode" "DI")
4943 (set_attr "prefix_0f" "0")
4944 (set_attr "modrm" "0,1")])
4945
4946 (define_insn "extendsidi2_1"
4947 [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4948 (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
4949 (clobber (reg:CC FLAGS_REG))
4950 (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
4951 "!TARGET_64BIT"
4952 "#")
4953
4954 (define_insn "extendditi2"
4955 [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4956 (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r")))
4957 (clobber (reg:CC FLAGS_REG))
4958 (clobber (match_scratch:DI 2 "=X,X,X,&r"))]
4959 "TARGET_64BIT"
4960 "#")
4961
4962 ;; Split the memory case. If the source register doesn't die, it will stay
4963 ;; this way, if it does die, following peephole2s take care of it.
4964 (define_split
4965 [(set (match_operand:<DWI> 0 "memory_operand")
4966 (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
4967 (clobber (reg:CC FLAGS_REG))
4968 (clobber (match_operand:DWIH 2 "register_operand"))]
4969 "reload_completed"
4970 [(const_int 0)]
4971 {
4972 rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
4973
4974 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
4975
4976 emit_move_insn (operands[3], operands[1]);
4977
4978 /* Generate a cltd if possible and doing so it profitable. */
4979 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4980 && REGNO (operands[1]) == AX_REG
4981 && REGNO (operands[2]) == DX_REG)
4982 {
4983 emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[1], bits));
4984 }
4985 else
4986 {
4987 emit_move_insn (operands[2], operands[1]);
4988 emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[2], bits));
4989 }
4990 emit_move_insn (operands[4], operands[2]);
4991 DONE;
4992 })
4993
4994 ;; Peepholes for the case where the source register does die, after
4995 ;; being split with the above splitter.
4996 (define_peephole2
4997 [(set (match_operand:DWIH 0 "memory_operand")
4998 (match_operand:DWIH 1 "general_reg_operand"))
4999 (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1))
5000 (parallel [(set (match_dup 2)
5001 (ashiftrt:DWIH (match_dup 2)
5002 (match_operand 4 "const_int_operand")))
5003 (clobber (reg:CC FLAGS_REG))])
5004 (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
5005 "REGNO (operands[1]) != REGNO (operands[2])
5006 && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
5007 && peep2_reg_dead_p (2, operands[1])
5008 && peep2_reg_dead_p (4, operands[2])
5009 && !reg_mentioned_p (operands[2], operands[3])"
5010 [(set (match_dup 0) (match_dup 1))
5011 (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
5012 (clobber (reg:CC FLAGS_REG))])
5013 (set (match_dup 3) (match_dup 1))])
5014
5015 (define_peephole2
5016 [(set (match_operand:DWIH 0 "memory_operand")
5017 (match_operand:DWIH 1 "general_reg_operand"))
5018 (parallel [(set (match_operand:DWIH 2 "general_reg_operand")
5019 (ashiftrt:DWIH (match_dup 1)
5020 (match_operand 4 "const_int_operand")))
5021 (clobber (reg:CC FLAGS_REG))])
5022 (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
5023 "/* cltd is shorter than sarl $31, %eax */
5024 !optimize_function_for_size_p (cfun)
5025 && REGNO (operands[1]) == AX_REG
5026 && REGNO (operands[2]) == DX_REG
5027 && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
5028 && peep2_reg_dead_p (2, operands[1])
5029 && peep2_reg_dead_p (3, operands[2])
5030 && !reg_mentioned_p (operands[2], operands[3])"
5031 [(set (match_dup 0) (match_dup 1))
5032 (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
5033 (clobber (reg:CC FLAGS_REG))])
5034 (set (match_dup 3) (match_dup 1))])
5035
5036 ;; Extend to register case. Optimize case where source and destination
5037 ;; registers match and cases where we can use cltd.
5038 (define_split
5039 [(set (match_operand:<DWI> 0 "register_operand")
5040 (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
5041 (clobber (reg:CC FLAGS_REG))
5042 (clobber (match_scratch:DWIH 2))]
5043 "reload_completed"
5044 [(const_int 0)]
5045 {
5046 rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
5047
5048 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
5049
5050 if (REGNO (operands[3]) != REGNO (operands[1]))
5051 emit_move_insn (operands[3], operands[1]);
5052
5053 rtx src = operands[1];
5054 if (REGNO (operands[3]) == AX_REG)
5055 src = operands[3];
5056
5057 /* Generate a cltd if possible and doing so it profitable. */
5058 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
5059 && REGNO (src) == AX_REG
5060 && REGNO (operands[4]) == DX_REG)
5061 {
5062 emit_insn (gen_ashr<mode>3_cvt (operands[4], src, bits));
5063 DONE;
5064 }
5065
5066 if (REGNO (operands[4]) != REGNO (operands[1]))
5067 emit_move_insn (operands[4], operands[1]);
5068
5069 emit_insn (gen_ashr<mode>3_cvt (operands[4], operands[4], bits));
5070 DONE;
5071 })
5072
5073 (define_peephole2
5074 [(set (match_operand:DI 0 "general_reg_operand")
5075 (match_operand:DI 1 "general_reg_operand"))
5076 (parallel [(set (match_dup 0)
5077 (ashiftrt:DI (match_dup 0)
5078 (const_int 63)))
5079 (clobber (reg:CC FLAGS_REG))])
5080 (set (match_operand:DI 2 "general_reg_operand") (match_dup 1))
5081 (set (match_operand:DI 3 "general_reg_operand") (match_dup 0))]
5082 "(optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
5083 && REGNO (operands[2]) == AX_REG
5084 && REGNO (operands[3]) == DX_REG
5085 && peep2_reg_dead_p (4, operands[0])
5086 && !reg_mentioned_p (operands[0], operands[1])
5087 && !reg_mentioned_p (operands[2], operands[0])"
5088 [(set (match_dup 2) (match_dup 1))
5089 (parallel [(set (match_dup 3) (ashiftrt:DI (match_dup 2) (const_int 63)))
5090 (clobber (reg:CC FLAGS_REG))])])
5091
5092 (define_insn "extend<mode>di2"
5093 [(set (match_operand:DI 0 "register_operand" "=r")
5094 (sign_extend:DI
5095 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
5096 "TARGET_64BIT"
5097 "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
5098 [(set_attr "type" "imovx")
5099 (set_attr "mode" "DI")])
5100
5101 (define_insn "extendhisi2"
5102 [(set (match_operand:SI 0 "register_operand" "=*a,r")
5103 (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
5104 ""
5105 {
5106 switch (get_attr_prefix_0f (insn))
5107 {
5108 case 0:
5109 return "{cwtl|cwde}";
5110 default:
5111 return "movs{wl|x}\t{%1, %0|%0, %1}";
5112 }
5113 }
5114 [(set_attr "type" "imovx")
5115 (set_attr "mode" "SI")
5116 (set (attr "prefix_0f")
5117 ;; movsx is short decodable while cwtl is vector decoded.
5118 (if_then_else (and (eq_attr "cpu" "!k6")
5119 (eq_attr "alternative" "0"))
5120 (const_string "0")
5121 (const_string "1")))
5122 (set (attr "znver1_decode")
5123 (if_then_else (eq_attr "prefix_0f" "0")
5124 (const_string "double")
5125 (const_string "direct")))
5126 (set (attr "modrm")
5127 (if_then_else (eq_attr "prefix_0f" "0")
5128 (const_string "0")
5129 (const_string "1")))])
5130
5131 (define_insn "*extendhisi2_zext"
5132 [(set (match_operand:DI 0 "register_operand" "=*a,r")
5133 (zero_extend:DI
5134 (sign_extend:SI
5135 (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
5136 "TARGET_64BIT"
5137 {
5138 switch (get_attr_prefix_0f (insn))
5139 {
5140 case 0:
5141 return "{cwtl|cwde}";
5142 default:
5143 return "movs{wl|x}\t{%1, %k0|%k0, %1}";
5144 }
5145 }
5146 [(set_attr "type" "imovx")
5147 (set_attr "mode" "SI")
5148 (set (attr "prefix_0f")
5149 ;; movsx is short decodable while cwtl is vector decoded.
5150 (if_then_else (and (eq_attr "cpu" "!k6")
5151 (eq_attr "alternative" "0"))
5152 (const_string "0")
5153 (const_string "1")))
5154 (set (attr "modrm")
5155 (if_then_else (eq_attr "prefix_0f" "0")
5156 (const_string "0")
5157 (const_string "1")))])
5158
5159 (define_insn "extendqisi2"
5160 [(set (match_operand:SI 0 "register_operand" "=r")
5161 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
5162 ""
5163 "movs{bl|x}\t{%1, %0|%0, %1}"
5164 [(set_attr "type" "imovx")
5165 (set_attr "mode" "SI")])
5166
5167 (define_insn "*extendqisi2_zext"
5168 [(set (match_operand:DI 0 "register_operand" "=r")
5169 (zero_extend:DI
5170 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
5171 "TARGET_64BIT"
5172 "movs{bl|x}\t{%1, %k0|%k0, %1}"
5173 [(set_attr "type" "imovx")
5174 (set_attr "mode" "SI")])
5175
5176 (define_insn "extendqihi2"
5177 [(set (match_operand:HI 0 "register_operand" "=*a,r")
5178 (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
5179 ""
5180 {
5181 switch (get_attr_prefix_0f (insn))
5182 {
5183 case 0:
5184 return "{cbtw|cbw}";
5185 default:
5186 return "movs{bw|x}\t{%1, %0|%0, %1}";
5187 }
5188 }
5189 [(set_attr "type" "imovx")
5190 (set_attr "mode" "HI")
5191 (set (attr "prefix_0f")
5192 ;; movsx is short decodable while cwtl is vector decoded.
5193 (if_then_else (and (eq_attr "cpu" "!k6")
5194 (eq_attr "alternative" "0"))
5195 (const_string "0")
5196 (const_string "1")))
5197 (set (attr "modrm")
5198 (if_then_else (eq_attr "prefix_0f" "0")
5199 (const_string "0")
5200 (const_string "1")))])
5201
5202 (define_insn "*extendqi<SWI24:mode>_ext_1"
5203 [(set (match_operand:SWI24 0 "register_operand" "=R")
5204 (sign_extend:SWI24
5205 (subreg:QI
5206 (match_operator:SWI248 2 "extract_operator"
5207 [(match_operand 1 "int248_register_operand" "Q")
5208 (const_int 8)
5209 (const_int 8)]) 0)))]
5210 ""
5211 "movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}"
5212 [(set_attr "type" "imovx")
5213 (set_attr "mode" "<SWI24:MODE>")])
5214 \f
5215 ;; Conversions between float and double.
5216
5217 ;; These are all no-ops in the model used for the 80387.
5218 ;; So just emit moves.
5219
5220 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
5221 (define_split
5222 [(set (match_operand:DF 0 "push_operand")
5223 (float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
5224 "reload_completed"
5225 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
5226 (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
5227
5228 (define_split
5229 [(set (match_operand:XF 0 "push_operand")
5230 (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
5231 "reload_completed"
5232 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
5233 (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
5234 "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
5235
5236 (define_expand "extendsfdf2"
5237 [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
5238 (float_extend:DF (match_operand:SF 1 "general_operand")))]
5239 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5240 {
5241 /* ??? Needed for compress_float_constant since all fp constants
5242 are TARGET_LEGITIMATE_CONSTANT_P. */
5243 if (CONST_DOUBLE_P (operands[1]))
5244 {
5245 if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
5246 && standard_80387_constant_p (operands[1]) > 0)
5247 {
5248 operands[1] = simplify_const_unary_operation
5249 (FLOAT_EXTEND, DFmode, operands[1], SFmode);
5250 emit_move_insn_1 (operands[0], operands[1]);
5251 DONE;
5252 }
5253 operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
5254 }
5255 })
5256
5257 (define_insn "*extendsfdf2"
5258 [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
5259 (float_extend:DF
5260 (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
5261 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5262 {
5263 switch (which_alternative)
5264 {
5265 case 0:
5266 case 1:
5267 return output_387_reg_move (insn, operands);
5268
5269 case 2:
5270 return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
5271 case 3:
5272 return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
5273
5274 default:
5275 gcc_unreachable ();
5276 }
5277 }
5278 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5279 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5280 (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
5281 (set_attr "mode" "SF,XF,DF,DF")
5282 (set (attr "enabled")
5283 (if_then_else
5284 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5285 (if_then_else
5286 (eq_attr "alternative" "0,1")
5287 (symbol_ref "TARGET_MIX_SSE_I387")
5288 (symbol_ref "true"))
5289 (if_then_else
5290 (eq_attr "alternative" "0,1")
5291 (symbol_ref "true")
5292 (symbol_ref "false"))))])
5293
5294 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
5295 cvtss2sd:
5296 unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5297 cvtps2pd xmm2,xmm1
5298 We do the conversion post reload to avoid producing of 128bit spills
5299 that might lead to ICE on 32bit target. The sequence unlikely combine
5300 anyway. */
5301 (define_split
5302 [(set (match_operand:DF 0 "sse_reg_operand")
5303 (float_extend:DF
5304 (match_operand:SF 1 "nonimmediate_operand")))]
5305 "TARGET_USE_VECTOR_FP_CONVERTS
5306 && optimize_insn_for_speed_p ()
5307 && reload_completed
5308 && (!EXT_REX_SSE_REG_P (operands[0])
5309 || TARGET_AVX512VL)"
5310 [(set (match_dup 2)
5311 (float_extend:V2DF
5312 (vec_select:V2SF
5313 (match_dup 3)
5314 (parallel [(const_int 0) (const_int 1)]))))]
5315 {
5316 operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode);
5317 operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode);
5318 /* Use movss for loading from memory, unpcklps reg, reg for registers.
5319 Try to avoid move when unpacking can be done in source. */
5320 if (REG_P (operands[1]))
5321 {
5322 /* If it is unsafe to overwrite upper half of source, we need
5323 to move to destination and unpack there. */
5324 if (REGNO (operands[0]) != REGNO (operands[1])
5325 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5326 {
5327 rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
5328 emit_move_insn (tmp, operands[1]);
5329 }
5330 else
5331 operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode);
5332 /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
5333 =v, v, then vbroadcastss will be only needed for AVX512F without
5334 AVX512VL. */
5335 if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
5336 emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
5337 operands[3]));
5338 else
5339 {
5340 rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
5341 emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
5342 }
5343 }
5344 else
5345 emit_insn (gen_vec_setv4sf_0 (operands[3],
5346 CONST0_RTX (V4SFmode), operands[1]));
5347 })
5348
5349 ;; It's more profitable to split and then extend in the same register.
5350 (define_peephole2
5351 [(set (match_operand:DF 0 "sse_reg_operand")
5352 (float_extend:DF
5353 (match_operand:SF 1 "memory_operand")))]
5354 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5355 && optimize_insn_for_speed_p ()"
5356 [(set (match_dup 2) (match_dup 1))
5357 (set (match_dup 0) (float_extend:DF (match_dup 2)))]
5358 "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
5359
5360 ;; Break partial SSE register dependency stall. This splitter should split
5361 ;; late in the pass sequence (after register rename pass), so allocated
5362 ;; registers won't change anymore
5363
5364 (define_split
5365 [(set (match_operand:DF 0 "sse_reg_operand")
5366 (float_extend:DF
5367 (match_operand:SF 1 "nonimmediate_operand")))]
5368 "!TARGET_AVX
5369 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5370 && epilogue_completed
5371 && optimize_function_for_speed_p (cfun)
5372 && (!REG_P (operands[1])
5373 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5374 && (!EXT_REX_SSE_REG_P (operands[0])
5375 || TARGET_AVX512VL)"
5376 [(set (match_dup 0)
5377 (vec_merge:V2DF
5378 (vec_duplicate:V2DF
5379 (float_extend:DF
5380 (match_dup 1)))
5381 (match_dup 0)
5382 (const_int 1)))]
5383 {
5384 operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
5385 emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
5386 })
5387
5388 (define_expand "extendhfsf2"
5389 [(set (match_operand:SF 0 "register_operand")
5390 (float_extend:SF
5391 (match_operand:HF 1 "nonimmediate_operand")))]
5392 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5393 {
5394 if (!TARGET_AVX512FP16)
5395 {
5396 rtx res = gen_reg_rtx (V4SFmode);
5397 rtx tmp = gen_reg_rtx (V8HFmode);
5398 rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
5399
5400 emit_insn (gen_vec_setv8hf_0 (tmp, zero, operands[1]));
5401 emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
5402 emit_move_insn (operands[0], gen_lowpart (SFmode, res));
5403 DONE;
5404 }
5405 })
5406
5407 (define_expand "extendhfdf2"
5408 [(set (match_operand:DF 0 "register_operand")
5409 (float_extend:DF
5410 (match_operand:HF 1 "nonimmediate_operand")))]
5411 "TARGET_AVX512FP16")
5412
5413 (define_insn "*extendhf<mode>2"
5414 [(set (match_operand:MODEF 0 "register_operand" "=v")
5415 (float_extend:MODEF
5416 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5417 "TARGET_AVX512FP16"
5418 "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
5419 [(set_attr "type" "ssecvt")
5420 (set_attr "prefix" "evex")
5421 (set_attr "mode" "<MODE>")])
5422
5423 (define_expand "extendbfsf2"
5424 [(set (match_operand:SF 0 "register_operand")
5425 (unspec:SF
5426 [(match_operand:BF 1 "register_operand")]
5427 UNSPEC_CVTBFSF))]
5428 "TARGET_SSE2 && !HONOR_NANS (BFmode)")
5429
5430 ;; Don't use float_extend since psrlld doesn't raise
5431 ;; exceptions and turn a sNaN into a qNaN.
5432 (define_insn "extendbfsf2_1"
5433 [(set (match_operand:SF 0 "register_operand" "=x,Yv,v")
5434 (unspec:SF
5435 [(match_operand:BF 1 "register_operand" " 0,Yv,v")]
5436 UNSPEC_CVTBFSF))]
5437 "TARGET_SSE2"
5438 "@
5439 pslld\t{$16, %0|%0, 16}
5440 vpslld\t{$16, %1, %0|%0, %1, 16}
5441 vpslld\t{$16, %g1, %g0|%g0, %g1, 16}"
5442 [(set_attr "isa" "noavx,avx,*")
5443 (set_attr "type" "sseishft1")
5444 (set_attr "length_immediate" "1")
5445 (set_attr "prefix_data16" "1,*,*")
5446 (set_attr "prefix" "orig,maybe_evex,evex")
5447 (set_attr "mode" "TI,TI,XI")
5448 (set_attr "memory" "none")
5449 (set (attr "enabled")
5450 (if_then_else (eq_attr "alternative" "2")
5451 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
5452 && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
5453 (const_string "*")))])
5454
5455 (define_expand "extend<mode>xf2"
5456 [(set (match_operand:XF 0 "nonimmediate_operand")
5457 (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
5458 "TARGET_80387"
5459 {
5460 /* ??? Needed for compress_float_constant since all fp constants
5461 are TARGET_LEGITIMATE_CONSTANT_P. */
5462 if (CONST_DOUBLE_P (operands[1]))
5463 {
5464 if (standard_80387_constant_p (operands[1]) > 0)
5465 {
5466 operands[1] = simplify_const_unary_operation
5467 (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
5468 emit_move_insn_1 (operands[0], operands[1]);
5469 DONE;
5470 }
5471 operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
5472 }
5473 })
5474
5475 (define_insn "*extend<mode>xf2_i387"
5476 [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
5477 (float_extend:XF
5478 (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
5479 "TARGET_80387"
5480 "* return output_387_reg_move (insn, operands);"
5481 [(set_attr "type" "fmov")
5482 (set_attr "mode" "<MODE>,XF")])
5483
5484 ;; %%% This seems like bad news.
5485 ;; This cannot output into an f-reg because there is no way to be sure
5486 ;; of truncating in that case. Otherwise this is just like a simple move
5487 ;; insn. So we pretend we can output to a reg in order to get better
5488 ;; register preferencing, but we really use a stack slot.
5489
5490 ;; Conversion from DFmode to SFmode.
5491
5492 (define_insn "truncdfsf2"
5493 [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
5494 (float_truncate:SF
5495 (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
5496 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5497 {
5498 switch (which_alternative)
5499 {
5500 case 0:
5501 case 1:
5502 return output_387_reg_move (insn, operands);
5503
5504 case 2:
5505 return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
5506 case 3:
5507 return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
5508
5509 default:
5510 gcc_unreachable ();
5511 }
5512 }
5513 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5514 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5515 (set_attr "mode" "SF")
5516 (set (attr "enabled")
5517 (if_then_else
5518 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5519 (cond [(eq_attr "alternative" "0")
5520 (symbol_ref "TARGET_MIX_SSE_I387")
5521 (eq_attr "alternative" "1")
5522 (symbol_ref "TARGET_MIX_SSE_I387
5523 && flag_unsafe_math_optimizations")
5524 ]
5525 (symbol_ref "true"))
5526 (cond [(eq_attr "alternative" "0")
5527 (symbol_ref "true")
5528 (eq_attr "alternative" "1")
5529 (symbol_ref "flag_unsafe_math_optimizations")
5530 ]
5531 (symbol_ref "false"))))])
5532
5533 /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
5534 cvtsd2ss:
5535 unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5536 cvtpd2ps xmm2,xmm1
5537 We do the conversion post reload to avoid producing of 128bit spills
5538 that might lead to ICE on 32bit target. The sequence unlikely combine
5539 anyway. */
5540 (define_split
5541 [(set (match_operand:SF 0 "sse_reg_operand")
5542 (float_truncate:SF
5543 (match_operand:DF 1 "nonimmediate_operand")))]
5544 "TARGET_USE_VECTOR_FP_CONVERTS
5545 && optimize_insn_for_speed_p ()
5546 && reload_completed
5547 && (!EXT_REX_SSE_REG_P (operands[0])
5548 || TARGET_AVX512VL)"
5549 [(set (match_dup 2)
5550 (vec_concat:V4SF
5551 (float_truncate:V2SF
5552 (match_dup 4))
5553 (match_dup 3)))]
5554 {
5555 operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5556 operands[3] = CONST0_RTX (V2SFmode);
5557 operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode);
5558 /* Use movsd for loading from memory, unpcklpd for registers.
5559 Try to avoid move when unpacking can be done in source, or SSE3
5560 movddup is available. */
5561 if (REG_P (operands[1]))
5562 {
5563 if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
5564 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5565 {
5566 rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
5567 emit_move_insn (tmp, operands[1]);
5568 operands[1] = tmp;
5569 }
5570 else if (!TARGET_SSE3)
5571 operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode);
5572 emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
5573 }
5574 else
5575 emit_insn (gen_vec_concatv2df (operands[4], operands[1],
5576 CONST0_RTX (DFmode)));
5577 })
5578
5579 ;; It's more profitable to split and then truncate in the same register.
5580 (define_peephole2
5581 [(set (match_operand:SF 0 "sse_reg_operand")
5582 (float_truncate:SF
5583 (match_operand:DF 1 "memory_operand")))]
5584 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5585 && optimize_insn_for_speed_p ()"
5586 [(set (match_dup 2) (match_dup 1))
5587 (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
5588 "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
5589
5590 ;; Break partial SSE register dependency stall. This splitter should split
5591 ;; late in the pass sequence (after register rename pass), so allocated
5592 ;; registers won't change anymore
5593
5594 (define_split
5595 [(set (match_operand:SF 0 "sse_reg_operand")
5596 (float_truncate:SF
5597 (match_operand:DF 1 "nonimmediate_operand")))]
5598 "!TARGET_AVX
5599 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5600 && epilogue_completed
5601 && optimize_function_for_speed_p (cfun)
5602 && (!REG_P (operands[1])
5603 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5604 && (!EXT_REX_SSE_REG_P (operands[0])
5605 || TARGET_AVX512VL)"
5606 [(set (match_dup 0)
5607 (vec_merge:V4SF
5608 (vec_duplicate:V4SF
5609 (float_truncate:SF
5610 (match_dup 1)))
5611 (match_dup 0)
5612 (const_int 1)))]
5613 {
5614 operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5615 emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
5616 })
5617
5618 ;; Conversion from XFmode to {SF,DF}mode
5619
5620 (define_insn "truncxf<mode>2"
5621 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
5622 (float_truncate:MODEF
5623 (match_operand:XF 1 "register_operand" "f,f")))]
5624 "TARGET_80387"
5625 "* return output_387_reg_move (insn, operands);"
5626 [(set_attr "type" "fmov")
5627 (set_attr "mode" "<MODE>")
5628 (set (attr "enabled")
5629 (cond [(eq_attr "alternative" "1")
5630 (symbol_ref "flag_unsafe_math_optimizations")
5631 ]
5632 (symbol_ref "true")))])
5633
5634 ;; Conversion from {SF,DF}mode to HFmode.
5635
5636 (define_expand "truncsfhf2"
5637 [(set (match_operand:HF 0 "register_operand")
5638 (float_truncate:HF
5639 (match_operand:SF 1 "nonimmediate_operand")))]
5640 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5641 {
5642 if (!TARGET_AVX512FP16)
5643 {
5644 rtx res = gen_reg_rtx (V8HFmode);
5645 rtx tmp = gen_reg_rtx (V4SFmode);
5646 rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
5647
5648 emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
5649 emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
5650 emit_move_insn (operands[0], gen_lowpart (HFmode, res));
5651 DONE;
5652 }
5653 })
5654
5655 (define_expand "truncdfhf2"
5656 [(set (match_operand:HF 0 "register_operand")
5657 (float_truncate:HF
5658 (match_operand:DF 1 "nonimmediate_operand")))]
5659 "TARGET_AVX512FP16")
5660
5661 (define_insn "*trunc<mode>hf2"
5662 [(set (match_operand:HF 0 "register_operand" "=v")
5663 (float_truncate:HF
5664 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5665 "TARGET_AVX512FP16"
5666 "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
5667 [(set_attr "type" "ssecvt")
5668 (set_attr "prefix" "evex")
5669 (set_attr "mode" "HF")])
5670
5671 (define_insn "truncsfbf2"
5672 [(set (match_operand:BF 0 "register_operand" "=x, v")
5673 (float_truncate:BF
5674 (match_operand:SF 1 "register_operand" "x,v")))]
5675 "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
5676 && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
5677 "@
5678 %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
5679 vcvtneps2bf16\t{%1, %0|%0, %1}"
5680 [(set_attr "isa" "avxneconvert,avx512bf16vl")
5681 (set_attr "prefix" "vex,evex")])
5682
5683 ;; Signed conversion to DImode.
5684
5685 (define_expand "fix_truncxfdi2"
5686 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5687 (fix:DI (match_operand:XF 1 "register_operand")))
5688 (clobber (reg:CC FLAGS_REG))])]
5689 "TARGET_80387"
5690 {
5691 if (TARGET_FISTTP)
5692 {
5693 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5694 DONE;
5695 }
5696 })
5697
5698 (define_expand "fix_trunc<mode>di2"
5699 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5700 (fix:DI (match_operand:MODEF 1 "register_operand")))
5701 (clobber (reg:CC FLAGS_REG))])]
5702 "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
5703 {
5704 if (TARGET_FISTTP
5705 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5706 {
5707 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5708 DONE;
5709 }
5710 if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
5711 {
5712 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
5713 emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
5714 if (out != operands[0])
5715 emit_move_insn (operands[0], out);
5716 DONE;
5717 }
5718 })
5719
5720 (define_insn "fix<fixunssuffix>_trunchf<mode>2"
5721 [(set (match_operand:SWI48 0 "register_operand" "=r")
5722 (any_fix:SWI48
5723 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5724 "TARGET_AVX512FP16"
5725 "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
5726 [(set_attr "type" "sseicvt")
5727 (set_attr "prefix" "evex")
5728 (set_attr "mode" "<MODE>")])
5729
5730 ;; Signed conversion to SImode.
5731
5732 (define_expand "fix_truncxfsi2"
5733 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5734 (fix:SI (match_operand:XF 1 "register_operand")))
5735 (clobber (reg:CC FLAGS_REG))])]
5736 "TARGET_80387"
5737 {
5738 if (TARGET_FISTTP)
5739 {
5740 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5741 DONE;
5742 }
5743 })
5744
5745 (define_expand "fix_trunc<mode>si2"
5746 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5747 (fix:SI (match_operand:MODEF 1 "register_operand")))
5748 (clobber (reg:CC FLAGS_REG))])]
5749 "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
5750 {
5751 if (TARGET_FISTTP
5752 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5753 {
5754 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5755 DONE;
5756 }
5757 if (SSE_FLOAT_MODE_P (<MODE>mode))
5758 {
5759 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
5760 emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
5761 if (out != operands[0])
5762 emit_move_insn (operands[0], out);
5763 DONE;
5764 }
5765 })
5766
5767 ;; Signed conversion to HImode.
5768
5769 (define_expand "fix_trunc<mode>hi2"
5770 [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
5771 (fix:HI (match_operand:X87MODEF 1 "register_operand")))
5772 (clobber (reg:CC FLAGS_REG))])]
5773 "TARGET_80387
5774 && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
5775 {
5776 if (TARGET_FISTTP)
5777 {
5778 emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
5779 DONE;
5780 }
5781 })
5782
5783 ;; Unsigned conversion to DImode
5784
5785 (define_insn "fixuns_trunc<mode>di2"
5786 [(set (match_operand:DI 0 "register_operand" "=r")
5787 (unsigned_fix:DI
5788 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5789 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5790 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5791 [(set_attr "type" "sseicvt")
5792 (set_attr "prefix" "evex")
5793 (set_attr "mode" "DI")])
5794
5795 ;; Unsigned conversion to SImode.
5796
5797 (define_expand "fixuns_trunc<mode>si2"
5798 [(parallel
5799 [(set (match_operand:SI 0 "register_operand")
5800 (unsigned_fix:SI
5801 (match_operand:MODEF 1 "nonimmediate_operand")))
5802 (use (match_dup 2))
5803 (clobber (scratch:<ssevecmode>))
5804 (clobber (scratch:<ssevecmode>))])]
5805 "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
5806 {
5807 machine_mode mode = <MODE>mode;
5808 machine_mode vecmode = <ssevecmode>mode;
5809 REAL_VALUE_TYPE TWO31r;
5810 rtx two31;
5811
5812 if (TARGET_AVX512F)
5813 {
5814 emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
5815 DONE;
5816 }
5817
5818 if (optimize_insn_for_size_p ())
5819 FAIL;
5820
5821 real_ldexp (&TWO31r, &dconst1, 31);
5822 two31 = const_double_from_real_value (TWO31r, mode);
5823 two31 = ix86_build_const_vector (vecmode, true, two31);
5824 operands[2] = force_reg (vecmode, two31);
5825 })
5826
5827 (define_insn "fixuns_trunc<mode>si2_avx512f"
5828 [(set (match_operand:SI 0 "register_operand" "=r")
5829 (unsigned_fix:SI
5830 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5831 "TARGET_AVX512F && TARGET_SSE_MATH"
5832 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5833 [(set_attr "type" "sseicvt")
5834 (set_attr "prefix" "evex")
5835 (set_attr "mode" "SI")])
5836
5837 (define_insn "*fixuns_trunchfsi2zext"
5838 [(set (match_operand:DI 0 "register_operand" "=r")
5839 (zero_extend:DI
5840 (unsigned_fix:SI
5841 (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
5842 "TARGET_64BIT && TARGET_AVX512FP16"
5843 "vcvttsh2usi\t{%1, %k0|%k0, %1}"
5844 [(set_attr "type" "sseicvt")
5845 (set_attr "prefix" "evex")
5846 (set_attr "mode" "SI")])
5847
5848 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
5849 [(set (match_operand:DI 0 "register_operand" "=r")
5850 (zero_extend:DI
5851 (unsigned_fix:SI
5852 (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
5853 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5854 "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
5855 [(set_attr "type" "sseicvt")
5856 (set_attr "prefix" "evex")
5857 (set_attr "mode" "SI")])
5858
5859 (define_insn_and_split "*fixuns_trunc<mode>_1"
5860 [(set (match_operand:SI 0 "register_operand" "=&x,&x")
5861 (unsigned_fix:SI
5862 (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
5863 (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x"))
5864 (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
5865 (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
5866 "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
5867 && optimize_function_for_speed_p (cfun)"
5868 "#"
5869 "&& reload_completed"
5870 [(const_int 0)]
5871 {
5872 ix86_split_convert_uns_si_sse (operands);
5873 DONE;
5874 })
5875
5876 ;; Unsigned conversion to HImode.
5877 ;; Without these patterns, we'll try the unsigned SI conversion which
5878 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
5879
5880 (define_expand "fixuns_trunchfhi2"
5881 [(set (match_dup 2)
5882 (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
5883 (set (match_operand:HI 0 "nonimmediate_operand")
5884 (subreg:HI (match_dup 2) 0))]
5885 "TARGET_AVX512FP16"
5886 "operands[2] = gen_reg_rtx (SImode);")
5887
5888 (define_expand "fixuns_trunc<mode>hi2"
5889 [(set (match_dup 2)
5890 (fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
5891 (set (match_operand:HI 0 "nonimmediate_operand")
5892 (subreg:HI (match_dup 2) 0))]
5893 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
5894 "operands[2] = gen_reg_rtx (SImode);")
5895
5896 ;; When SSE is available, it is always faster to use it!
5897 (define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
5898 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5899 (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
5900 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5901 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
5902 "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
5903 [(set_attr "type" "sseicvt")
5904 (set_attr "prefix" "maybe_vex")
5905 (set (attr "prefix_rex")
5906 (if_then_else
5907 (match_test "<SWI48:MODE>mode == DImode")
5908 (const_string "1")
5909 (const_string "*")))
5910 (set_attr "mode" "<MODEF:MODE>")
5911 (set_attr "athlon_decode" "double,vector")
5912 (set_attr "amdfam10_decode" "double,double")
5913 (set_attr "bdver1_decode" "double,double")])
5914
5915 ;; Avoid vector decoded forms of the instruction.
5916 (define_peephole2
5917 [(match_scratch:MODEF 2 "x")
5918 (set (match_operand:SWI48 0 "register_operand")
5919 (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
5920 "TARGET_AVOID_VECTOR_DECODE
5921 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5922 && optimize_insn_for_speed_p ()"
5923 [(set (match_dup 2) (match_dup 1))
5924 (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
5925
5926 (define_insn "fix_trunc<mode>_i387_fisttp"
5927 [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
5928 (fix:SWI248x (match_operand 1 "register_operand" "f")))
5929 (clobber (match_scratch:XF 2 "=&f"))]
5930 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5931 && TARGET_FISTTP
5932 && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5933 && (TARGET_64BIT || <MODE>mode != DImode))
5934 && TARGET_SSE_MATH)"
5935 "* return output_fix_trunc (insn, operands, true);"
5936 [(set_attr "type" "fisttp")
5937 (set_attr "mode" "<MODE>")])
5938
5939 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
5940 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
5941 ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
5942 ;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
5943 ;; function in i386.cc.
5944 (define_insn_and_split "*fix_trunc<mode>_i387_1"
5945 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
5946 (fix:SWI248x (match_operand 1 "register_operand")))
5947 (clobber (reg:CC FLAGS_REG))]
5948 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5949 && !TARGET_FISTTP
5950 && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5951 && (TARGET_64BIT || <MODE>mode != DImode))
5952 && ix86_pre_reload_split ()"
5953 "#"
5954 "&& 1"
5955 [(const_int 0)]
5956 {
5957 ix86_optimize_mode_switching[I387_TRUNC] = 1;
5958
5959 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
5960 operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
5961
5962 emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
5963 operands[2], operands[3]));
5964 DONE;
5965 }
5966 [(set_attr "type" "fistp")
5967 (set_attr "i387_cw" "trunc")
5968 (set_attr "mode" "<MODE>")])
5969
5970 (define_insn "fix_truncdi_i387"
5971 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
5972 (fix:DI (match_operand 1 "register_operand" "f")))
5973 (use (match_operand:HI 2 "memory_operand" "m"))
5974 (use (match_operand:HI 3 "memory_operand" "m"))
5975 (clobber (match_scratch:XF 4 "=&f"))]
5976 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5977 && !TARGET_FISTTP
5978 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
5979 "* return output_fix_trunc (insn, operands, false);"
5980 [(set_attr "type" "fistp")
5981 (set_attr "i387_cw" "trunc")
5982 (set_attr "mode" "DI")])
5983
5984 (define_insn "fix_trunc<mode>_i387"
5985 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
5986 (fix:SWI24 (match_operand 1 "register_operand" "f")))
5987 (use (match_operand:HI 2 "memory_operand" "m"))
5988 (use (match_operand:HI 3 "memory_operand" "m"))]
5989 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5990 && !TARGET_FISTTP
5991 && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
5992 "* return output_fix_trunc (insn, operands, false);"
5993 [(set_attr "type" "fistp")
5994 (set_attr "i387_cw" "trunc")
5995 (set_attr "mode" "<MODE>")])
5996
5997 (define_insn "x86_fnstcw_1"
5998 [(set (match_operand:HI 0 "memory_operand" "=m")
5999 (unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
6000 "TARGET_80387"
6001 "fnstcw\t%0"
6002 [(set (attr "length")
6003 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
6004 (set_attr "mode" "HI")
6005 (set_attr "unit" "i387")
6006 (set_attr "bdver1_decode" "vector")])
6007 \f
6008 ;; Conversion between fixed point and floating point.
6009
6010 ;; Even though we only accept memory inputs, the backend _really_
6011 ;; wants to be able to do this between registers. Thankfully, LRA
6012 ;; will fix this up for us during register allocation.
6013
6014 (define_insn "floathi<mode>2"
6015 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
6016 (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
6017 "TARGET_80387
6018 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
6019 || TARGET_MIX_SSE_I387)"
6020 "fild%Z1\t%1"
6021 [(set_attr "type" "fmov")
6022 (set_attr "mode" "<MODE>")
6023 (set_attr "znver1_decode" "double")
6024 (set_attr "fp_int_src" "true")])
6025
6026 (define_insn "float<SWI48x:mode>xf2"
6027 [(set (match_operand:XF 0 "register_operand" "=f")
6028 (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
6029 "TARGET_80387"
6030 "fild%Z1\t%1"
6031 [(set_attr "type" "fmov")
6032 (set_attr "mode" "XF")
6033 (set_attr "znver1_decode" "double")
6034 (set_attr "fp_int_src" "true")])
6035
6036 (define_expand "float<SWI48x:mode><MODEF:mode>2"
6037 [(set (match_operand:MODEF 0 "register_operand")
6038 (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
6039 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
6040 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
6041 && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")
6042
6043 (define_insn "*float<SWI48:mode><MODEF:mode>2"
6044 [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
6045 (float:MODEF
6046 (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
6047 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
6048 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
6049 "@
6050 fild%Z1\t%1
6051 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
6052 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
6053 [(set_attr "type" "fmov,sseicvt,sseicvt")
6054 (set_attr "avx_partial_xmm_update" "false,true,true")
6055 (set_attr "prefix" "orig,maybe_vex,maybe_vex")
6056 (set_attr "mode" "<MODEF:MODE>")
6057 (set (attr "prefix_rex")
6058 (if_then_else
6059 (and (eq_attr "prefix" "maybe_vex")
6060 (match_test "<SWI48:MODE>mode == DImode"))
6061 (const_string "1")
6062 (const_string "*")))
6063 (set_attr "unit" "i387,*,*")
6064 (set_attr "athlon_decode" "*,double,direct")
6065 (set_attr "amdfam10_decode" "*,vector,double")
6066 (set_attr "bdver1_decode" "*,double,direct")
6067 (set_attr "znver1_decode" "double,*,*")
6068 (set_attr "fp_int_src" "true")
6069 (set (attr "enabled")
6070 (if_then_else
6071 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
6072 (if_then_else
6073 (eq_attr "alternative" "0")
6074 (symbol_ref "TARGET_MIX_SSE_I387
6075 && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
6076 <SWI48:MODE>mode)")
6077 (symbol_ref "true"))
6078 (if_then_else
6079 (eq_attr "alternative" "0")
6080 (symbol_ref "true")
6081 (symbol_ref "false"))))
6082 (set (attr "preferred_for_speed")
6083 (cond [(eq_attr "alternative" "1")
6084 (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
6085 (symbol_ref "true")))])
6086
6087 (define_insn "float<floatunssuffix><mode>hf2"
6088 [(set (match_operand:HF 0 "register_operand" "=v")
6089 (any_float:HF
6090 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
6091 "TARGET_AVX512FP16"
6092 "vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}"
6093 [(set_attr "type" "sseicvt")
6094 (set_attr "prefix" "evex")
6095 (set_attr "mode" "HF")])
6096
6097 (define_insn "*floatdi<MODEF:mode>2_i387"
6098 [(set (match_operand:MODEF 0 "register_operand" "=f")
6099 (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
6100 "!TARGET_64BIT
6101 && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
6102 "fild%Z1\t%1"
6103 [(set_attr "type" "fmov")
6104 (set_attr "mode" "<MODEF:MODE>")
6105 (set_attr "znver1_decode" "double")
6106 (set_attr "fp_int_src" "true")])
6107
6108 ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
6109 ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
6110 ;; alternative in sse2_loadld.
6111 (define_split
6112 [(set (match_operand:MODEF 0 "sse_reg_operand")
6113 (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
6114 "TARGET_SSE2
6115 && TARGET_USE_VECTOR_CONVERTS
6116 && optimize_function_for_speed_p (cfun)
6117 && reload_completed
6118 && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
6119 && (!EXT_REX_SSE_REG_P (operands[0])
6120 || TARGET_AVX512VL)"
6121 [(const_int 0)]
6122 {
6123 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[0], <MODE>mode);
6124 operands[4] = lowpart_subreg (V4SImode, operands[0], <MODE>mode);
6125
6126 emit_insn (gen_sse2_loadld (operands[4],
6127 CONST0_RTX (V4SImode), operands[1]));
6128
6129 if (<ssevecmode>mode == V4SFmode)
6130 emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
6131 else
6132 emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
6133 DONE;
6134 })
6135
6136 ;; Avoid store forwarding (partial memory) stall penalty
6137 ;; by passing DImode value through XMM registers. */
6138
6139 (define_split
6140 [(set (match_operand:X87MODEF 0 "register_operand")
6141 (float:X87MODEF
6142 (match_operand:DI 1 "register_operand")))]
6143 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
6144 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6145 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
6146 && can_create_pseudo_p ()"
6147 [(const_int 0)]
6148 {
6149 rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387);
6150 emit_insn (gen_floatdi<mode>2_i387_with_xmm (operands[0], operands[1], s));
6151 DONE;
6152 })
6153
6154 (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
6155 [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
6156 (float:X87MODEF
6157 (match_operand:DI 1 "register_operand" "r,r")))
6158 (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
6159 (clobber (match_scratch:V4SI 3 "=x,x"))
6160 (clobber (match_scratch:V4SI 4 "=X,x"))]
6161 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
6162 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6163 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
6164 "#"
6165 "&& reload_completed"
6166 [(set (match_dup 2) (match_dup 3))
6167 (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
6168 {
6169 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
6170 Assemble the 64-bit DImode value in an xmm register. */
6171 emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
6172 gen_lowpart (SImode, operands[1])));
6173 if (TARGET_SSE4_1)
6174 emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
6175 gen_highpart (SImode, operands[1]),
6176 GEN_INT (2)));
6177 else
6178 {
6179 emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
6180 gen_highpart (SImode, operands[1])));
6181 emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
6182 operands[4]));
6183 }
6184 operands[3] = gen_lowpart (DImode, operands[3]);
6185 }
6186 [(set_attr "isa" "sse4,*")
6187 (set_attr "type" "multi")
6188 (set_attr "mode" "<X87MODEF:MODE>")
6189 (set_attr "unit" "i387")
6190 (set_attr "fp_int_src" "true")])
6191
6192 ;; Break partial SSE register dependency stall. This splitter should split
6193 ;; late in the pass sequence (after register rename pass), so allocated
6194 ;; registers won't change anymore
6195
6196 (define_split
6197 [(set (match_operand:MODEF 0 "sse_reg_operand")
6198 (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
6199 "!TARGET_AVX
6200 && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
6201 && epilogue_completed
6202 && optimize_function_for_speed_p (cfun)
6203 && (!EXT_REX_SSE_REG_P (operands[0])
6204 || TARGET_AVX512VL)"
6205 [(set (match_dup 0)
6206 (vec_merge:<MODEF:ssevecmode>
6207 (vec_duplicate:<MODEF:ssevecmode>
6208 (float:MODEF
6209 (match_dup 1)))
6210 (match_dup 0)
6211 (const_int 1)))]
6212 {
6213 const machine_mode vmode = <MODEF:ssevecmode>mode;
6214
6215 operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
6216 emit_move_insn (operands[0], CONST0_RTX (vmode));
6217 })
6218
6219 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
6220 [(set (match_operand:MODEF 0 "register_operand")
6221 (unsigned_float:MODEF
6222 (match_operand:SWI12 1 "nonimmediate_operand")))]
6223 "!TARGET_64BIT
6224 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
6225 {
6226 operands[1] = convert_to_mode (SImode, operands[1], 1);
6227 emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
6228 DONE;
6229 })
6230
6231 (define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
6232 [(set (match_operand:MODEF 0 "register_operand" "=v")
6233 (unsigned_float:MODEF
6234 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
6235 "TARGET_AVX512F && TARGET_SSE_MATH"
6236 "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
6237 [(set_attr "type" "sseicvt")
6238 (set_attr "avx_partial_xmm_update" "true")
6239 (set_attr "prefix" "evex")
6240 (set_attr "mode" "<MODEF:MODE>")])
6241
6242 ;; Avoid store forwarding (partial memory) stall penalty by extending
6243 ;; SImode value to DImode through XMM register instead of pushing two
6244 ;; SImode values to stack. Also note that fild loads from memory only.
6245
6246 (define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
6247 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
6248 (unsigned_float:X87MODEF
6249 (match_operand:SI 1 "nonimmediate_operand" "rm")))
6250 (clobber (match_operand:DI 2 "memory_operand" "=m"))
6251 (clobber (match_scratch:DI 3 "=x"))]
6252 "!TARGET_64BIT
6253 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6254 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
6255 "#"
6256 "&& reload_completed"
6257 [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
6258 (set (match_dup 2) (match_dup 3))
6259 (set (match_dup 0)
6260 (float:X87MODEF (match_dup 2)))]
6261 ""
6262 [(set_attr "type" "multi")
6263 (set_attr "mode" "<MODE>")])
6264
6265 (define_expand "floatunssi<mode>2"
6266 [(set (match_operand:X87MODEF 0 "register_operand")
6267 (unsigned_float:X87MODEF
6268 (match_operand:SI 1 "nonimmediate_operand")))]
6269 "(!TARGET_64BIT
6270 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6271 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
6272 || ((!TARGET_64BIT || TARGET_AVX512F)
6273 && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
6274 {
6275 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
6276 {
6277 emit_insn (gen_floatunssi<mode>2_i387_with_xmm
6278 (operands[0], operands[1],
6279 assign_386_stack_local (DImode, SLOT_TEMP)));
6280 DONE;
6281 }
6282 if (!TARGET_AVX512F)
6283 {
6284 ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
6285 DONE;
6286 }
6287 })
6288
6289 (define_expand "floatunsdisf2"
6290 [(set (match_operand:SF 0 "register_operand")
6291 (unsigned_float:SF
6292 (match_operand:DI 1 "nonimmediate_operand")))]
6293 "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
6294 {
6295 if (!TARGET_AVX512F)
6296 {
6297 x86_emit_floatuns (operands);
6298 DONE;
6299 }
6300 })
6301
6302 (define_expand "floatunsdidf2"
6303 [(set (match_operand:DF 0 "register_operand")
6304 (unsigned_float:DF
6305 (match_operand:DI 1 "nonimmediate_operand")))]
6306 "((TARGET_64BIT && TARGET_AVX512F)
6307 || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
6308 && TARGET_SSE2 && TARGET_SSE_MATH"
6309 {
6310 if (!TARGET_64BIT)
6311 {
6312 ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
6313 DONE;
6314 }
6315 if (!TARGET_AVX512F)
6316 {
6317 x86_emit_floatuns (operands);
6318 DONE;
6319 }
6320 })
6321 \f
6322 ;; Load effective address instructions
6323
6324 (define_insn "*lea<mode>"
6325 [(set (match_operand:SWI48 0 "register_operand" "=r")
6326 (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
6327 "ix86_hardreg_mov_ok (operands[0], operands[1])"
6328 {
6329 if (SImode_address_operand (operands[1], VOIDmode))
6330 {
6331 gcc_assert (TARGET_64BIT);
6332 return "lea{l}\t{%E1, %k0|%k0, %E1}";
6333 }
6334 else
6335 return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
6336 }
6337 [(set_attr "type" "lea")
6338 (set (attr "mode")
6339 (if_then_else
6340 (match_operand 1 "SImode_address_operand")
6341 (const_string "SI")
6342 (const_string "<MODE>")))])
6343
6344 (define_peephole2
6345 [(set (match_operand:SWI48 0 "register_operand")
6346 (match_operand:SWI48 1 "address_no_seg_operand"))]
6347 "ix86_hardreg_mov_ok (operands[0], operands[1])
6348 && peep2_regno_dead_p (0, FLAGS_REG)
6349 && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
6350 [(const_int 0)]
6351 {
6352 machine_mode mode = <MODE>mode;
6353
6354 /* Emit all operations in SImode for zero-extended addresses. */
6355 if (SImode_address_operand (operands[1], VOIDmode))
6356 mode = SImode;
6357
6358 ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
6359
6360 /* Zero-extend return register to DImode for zero-extended addresses. */
6361 if (mode != <MODE>mode)
6362 emit_insn (gen_zero_extendsidi2 (operands[0],
6363 gen_lowpart (mode, operands[0])));
6364
6365 DONE;
6366 })
6367
6368 ;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
6369 ;; peephole2 optimized back into a lea. Split that into the shift during
6370 ;; the following split pass.
6371 (define_split
6372 [(set (match_operand:SWI48 0 "general_reg_operand")
6373 (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
6374 (clobber (reg:CC FLAGS_REG))]
6375 "reload_completed"
6376 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
6377 (clobber (reg:CC FLAGS_REG))])]
6378 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
6379
6380 (define_split
6381 [(set (match_operand:SWI48 0 "general_reg_operand")
6382 (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
6383 "TARGET_APX_NF && reload_completed"
6384 [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
6385 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
6386
6387 ;; The peephole2 pass may expose consecutive additions suitable for lea.
6388 (define_peephole2
6389 [(parallel [(set (match_operand:SWI48 0 "register_operand")
6390 (plus:SWI48 (match_dup 0)
6391 (match_operand 1 "register_operand")))
6392 (clobber (reg:CC FLAGS_REG))])
6393 (parallel [(set (match_dup 0)
6394 (plus:SWI48 (match_dup 0)
6395 (match_operand 2 "x86_64_immediate_operand")))
6396 (clobber (reg:CC FLAGS_REG))])]
6397 "!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)"
6398 [(set (match_dup 0) (plus:SWI48 (plus:SWI48 (match_dup 0)
6399 (match_dup 1))
6400 (match_dup 2)))])
6401 \f
6402 ;; Add instructions
6403
6404 (define_expand "add<mode>3"
6405 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
6406 (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
6407 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
6408 ""
6409 {
6410 ix86_expand_binary_operator (PLUS, <MODE>mode, operands, TARGET_APX_NDD);
6411 DONE;
6412 })
6413
6414 (define_insn_and_split "*add<dwi>3_doubleword"
6415 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r")
6416 (plus:<DWI>
6417 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r")
6418 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,K,<di>,r")))
6419 (clobber (reg:CC FLAGS_REG))]
6420 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
6421 "#"
6422 "&& reload_completed"
6423 [(parallel [(set (reg:CCC FLAGS_REG)
6424 (compare:CCC
6425 (plus:DWIH (match_dup 1) (match_dup 2))
6426 (match_dup 1)))
6427 (set (match_dup 0)
6428 (plus:DWIH (match_dup 1) (match_dup 2)))])
6429 (parallel [(set (match_dup 3)
6430 (plus:DWIH
6431 (plus:DWIH
6432 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6433 (match_dup 4))
6434 (match_dup 5)))
6435 (clobber (reg:CC FLAGS_REG))])]
6436 {
6437 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6438 if (operands[2] == const0_rtx)
6439 {
6440 /* Under NDD op0 and op1 may not equal, do not delete insn then. */
6441 bool emit_insn_deleted_note_p = true;
6442 if (!rtx_equal_p (operands[0], operands[1]))
6443 {
6444 emit_move_insn (operands[0], operands[1]);
6445 emit_insn_deleted_note_p = false;
6446 }
6447 if (operands[5] != const0_rtx)
6448 ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3],
6449 TARGET_APX_NDD);
6450 else if (!rtx_equal_p (operands[3], operands[4]))
6451 emit_move_insn (operands[3], operands[4]);
6452 else if (emit_insn_deleted_note_p)
6453 emit_note (NOTE_INSN_DELETED);
6454 DONE;
6455 }
6456 }
6457 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
6458
6459 (define_insn_and_split "*add<dwi>3_doubleword_zext"
6460 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
6461 (plus:<DWI>
6462 (zero_extend:<DWI>
6463 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
6464 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")))
6465 (clobber (reg:CC FLAGS_REG))]
6466 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
6467 "#"
6468 "&& reload_completed"
6469 [(parallel [(set (reg:CCC FLAGS_REG)
6470 (compare:CCC
6471 (plus:DWIH (match_dup 1) (match_dup 2))
6472 (match_dup 1)))
6473 (set (match_dup 0)
6474 (plus:DWIH (match_dup 1) (match_dup 2)))])
6475 (parallel [(set (match_dup 3)
6476 (plus:DWIH
6477 (plus:DWIH
6478 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6479 (match_dup 4))
6480 (const_int 0)))
6481 (clobber (reg:CC FLAGS_REG))])]
6482 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
6483 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
6484
6485 (define_insn_and_split "*add<dwi>3_doubleword_concat"
6486 [(set (match_operand:<DWI> 0 "register_operand" "=&r")
6487 (plus:<DWI>
6488 (any_or_plus:<DWI>
6489 (ashift:<DWI>
6490 (zero_extend:<DWI>
6491 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
6492 (match_operand:QI 3 "const_int_operand"))
6493 (zero_extend:<DWI>
6494 (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
6495 (match_operand:<DWI> 1 "register_operand" "0")))
6496 (clobber (reg:CC FLAGS_REG))]
6497 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
6498 "#"
6499 "&& reload_completed"
6500 [(parallel [(set (reg:CCC FLAGS_REG)
6501 (compare:CCC
6502 (plus:DWIH (match_dup 1) (match_dup 4))
6503 (match_dup 1)))
6504 (set (match_dup 0)
6505 (plus:DWIH (match_dup 1) (match_dup 4)))])
6506 (parallel [(set (match_dup 5)
6507 (plus:DWIH
6508 (plus:DWIH
6509 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6510 (match_dup 6))
6511 (match_dup 2)))
6512 (clobber (reg:CC FLAGS_REG))])]
6513 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[5]);")
6514
6515 (define_insn_and_split "*add<dwi>3_doubleword_concat_zext"
6516 [(set (match_operand:<DWI> 0 "register_operand" "=&r")
6517 (plus:<DWI>
6518 (any_or_plus:<DWI>
6519 (ashift:<DWI>
6520 (zero_extend:<DWI>
6521 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
6522 (match_operand:QI 3 "const_int_operand"))
6523 (zero_extend:<DWI>
6524 (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
6525 (zero_extend:<DWI>
6526 (match_operand:DWIH 1 "nonimmediate_operand" "rm"))))
6527 (clobber (reg:CC FLAGS_REG))]
6528 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
6529 "#"
6530 "&& reload_completed"
6531 [(set (match_dup 0) (match_dup 4))
6532 (parallel [(set (reg:CCC FLAGS_REG)
6533 (compare:CCC
6534 (plus:DWIH (match_dup 0) (match_dup 1))
6535 (match_dup 0)))
6536 (set (match_dup 0)
6537 (plus:DWIH (match_dup 0) (match_dup 1)))])
6538 (set (match_dup 5) (match_dup 2))
6539 (parallel [(set (match_dup 5)
6540 (plus:DWIH
6541 (plus:DWIH
6542 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6543 (match_dup 5))
6544 (const_int 0)))
6545 (clobber (reg:CC FLAGS_REG))])]
6546 "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
6547
6548 (define_subst_attr "nf_name" "nf_subst" "_nf" "")
6549 (define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
6550 (define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
6551 (define_subst_attr "nf_add_mem_constraint" "nf_subst" "je" "m")
6552 (define_subst_attr "nf_mem_constraint" "nf_subst" "jM" "m")
6553 (define_subst_attr "nf_applied" "nf_subst" "true" "false")
6554 (define_subst_attr "nf_nonf_attr" "nf_subst" "noapx_nf" "*")
6555 (define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
6556
6557 (define_subst "nf_subst"
6558 [(set (match_operand:SWIDWI 0)
6559 (match_operand:SWIDWI 1))]
6560 ""
6561 [(set (match_dup 0)
6562 (match_dup 1))
6563 (clobber (reg:CC FLAGS_REG))])
6564
6565 (define_insn "*add<mode>_1<nf_name>"
6566 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r<nf_add_mem_constraint>,r<nf_mem_constraint>,r,r,r,r,r,r")
6567 (plus:SWI48
6568 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
6569 (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
6570 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
6571 && <nf_condition>"
6572 {
6573 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6574 switch (get_attr_type (insn))
6575 {
6576 case TYPE_LEA:
6577 if (TARGET_APX_NDD && <nf_applied>)
6578 return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
6579 else
6580 return "#";
6581
6582 case TYPE_INCDEC:
6583 if (operands[2] == const1_rtx)
6584 return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6585 : "<nf_prefix>inc{<imodesuffix>}\t%0";
6586 else
6587 {
6588 gcc_assert (operands[2] == constm1_rtx);
6589 return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6590 : "<nf_prefix>dec{<imodesuffix>}\t%0";
6591 }
6592
6593 default:
6594 /* For most processors, ADD is faster than LEA. This alternative
6595 was added to use ADD as much as possible. */
6596 if (which_alternative == 3)
6597 std::swap (operands[1], operands[2]);
6598
6599 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6600 return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6601 : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6602
6603 return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6604 : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
6605 }
6606 }
6607 [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
6608 (set (attr "type")
6609 (cond [(eq_attr "alternative" "4")
6610 (const_string "lea")
6611 (match_operand:SWI48 2 "incdec_operand")
6612 (const_string "incdec")
6613 ]
6614 (const_string "alu")))
6615 (set (attr "length_immediate")
6616 (if_then_else
6617 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6618 (const_string "1")
6619 (const_string "*")))
6620 (set_attr "has_nf" "1")
6621 (set_attr "mode" "<MODE>")])
6622
6623 ;; For APX instruction with an NDD, the destination GPR will get the
6624 ;; instruction’s result in bits [OSIZE-1:0] and, if OSIZE < 64b, have
6625 ;; its upper bits [63:OSIZE] zeroed.
6626
6627 (define_insn "*addqi_1_zext<mode><nf_name>"
6628 [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
6629 (zero_extend:SWI248x
6630 (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
6631 (match_operand:QI 2 "general_operand" "rn,m"))))]
6632 "TARGET_APX_NDD && <nf_condition>
6633 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6634 {
6635 switch (get_attr_type (insn))
6636 {
6637 case TYPE_INCDEC:
6638 if (operands[2] == const1_rtx)
6639 return "<nf_prefix>inc{b}\t{%1, %b0|%b0, %1}";
6640 else
6641 {
6642 gcc_assert (operands[2] == constm1_rtx);
6643 return "<nf_prefix>dec{b}\t{%1, %b0|%b0, %1}";
6644 }
6645
6646 default:
6647 if (x86_maybe_negate_const_int (&operands[2], QImode))
6648 return "<nf_prefix>sub{b}\t{%2, %1, %b0|%b0, %1, %2}";
6649 return "<nf_prefix>add{b}\t{%2, %1, %b0|%b0, %1, %2}";
6650 }
6651 }
6652 [(set (attr "type")
6653 (cond [(match_operand:QI 2 "incdec_operand")
6654 (const_string "incdec")
6655 ]
6656 (const_string "alu")))
6657 (set (attr "length_immediate")
6658 (if_then_else
6659 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6660 (const_string "1")
6661 (const_string "*")))
6662 (set_attr "has_nf" "1")
6663 (set_attr "mode" "QI")])
6664
6665 (define_insn "*addhi_1_zext<mode><nf_name>"
6666 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
6667 (zero_extend:SWI48x
6668 (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
6669 (match_operand:HI 2 "general_operand" "rn,m"))))]
6670 "TARGET_APX_NDD && <nf_condition>
6671 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6672 {
6673 switch (get_attr_type (insn))
6674 {
6675 case TYPE_INCDEC:
6676 if (operands[2] == const1_rtx)
6677 return "<nf_prefix>inc{w}\t{%1, %w0|%w0, %1}";
6678 else
6679 {
6680 gcc_assert (operands[2] == constm1_rtx);
6681 return "<nf_prefix>dec{w}\t{%1, %w0|%w0, %1}";
6682 }
6683
6684 default:
6685 if (x86_maybe_negate_const_int (&operands[2], HImode))
6686 return "<nf_prefix>sub{w}\t{%2, %1, %w0|%w0, %1, %2}";
6687 return "<nf_prefix>add{w}\t{%2, %1, %w0|%w0, %1, %2}";
6688 }
6689 }
6690 [(set (attr "type")
6691 (cond [(match_operand:QI 2 "incdec_operand")
6692 (const_string "incdec")
6693 ]
6694 (const_string "alu")))
6695 (set (attr "length_immediate")
6696 (if_then_else
6697 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6698 (const_string "1")
6699 (const_string "*")))
6700 (set_attr "has_nf" "1")
6701 (set_attr "mode" "HI")])
6702
6703 ;; It may seem that nonimmediate operand is proper one for operand 1.
6704 ;; The addsi_1 pattern allows nonimmediate operand at that place and
6705 ;; we take care in ix86_binary_operator_ok to not allow two memory
6706 ;; operands so proper swapping will be done in reload. This allow
6707 ;; patterns constructed from addsi_1 to match.
6708
6709 (define_insn "addsi_1_zext"
6710 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
6711 (zero_extend:DI
6712 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r,rm,rjM")
6713 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe,r,e"))))
6714 (clobber (reg:CC FLAGS_REG))]
6715 "TARGET_64BIT
6716 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
6717 {
6718 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6719 switch (get_attr_type (insn))
6720 {
6721 case TYPE_LEA:
6722 return "#";
6723
6724 case TYPE_INCDEC:
6725 if (operands[2] == const1_rtx)
6726 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
6727 : "inc{l}\t%k0";
6728 else
6729 {
6730 gcc_assert (operands[2] == constm1_rtx);
6731 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
6732 : "dec{l}\t%k0";
6733 }
6734
6735 default:
6736 /* For most processors, ADD is faster than LEA. This alternative
6737 was added to use ADD as much as possible. */
6738 if (which_alternative == 1)
6739 std::swap (operands[1], operands[2]);
6740
6741 if (x86_maybe_negate_const_int (&operands[2], SImode))
6742 return use_ndd ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
6743 : "sub{l}\t{%2, %k0|%k0, %2}";
6744
6745 return use_ndd ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
6746 : "add{l}\t{%2, %k0|%k0, %2}";
6747 }
6748 }
6749 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
6750 (set (attr "type")
6751 (cond [(eq_attr "alternative" "2")
6752 (const_string "lea")
6753 (match_operand:SI 2 "incdec_operand")
6754 (const_string "incdec")
6755 ]
6756 (const_string "alu")))
6757 (set (attr "length_immediate")
6758 (if_then_else
6759 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6760 (const_string "1")
6761 (const_string "*")))
6762 (set_attr "mode" "SI")])
6763
6764 (define_insn "*addhi_1<nf_name>"
6765 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
6766 (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
6767 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
6768 "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
6769 && <nf_condition>"
6770 {
6771 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6772 switch (get_attr_type (insn))
6773 {
6774 case TYPE_LEA:
6775 if (TARGET_APX_NDD && <nf_applied>)
6776 return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
6777 else
6778 return "#";
6779
6780 case TYPE_INCDEC:
6781 if (operands[2] == const1_rtx)
6782 return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}"
6783 : "<nf_prefix>inc{w}\t%0";
6784 else
6785 {
6786 gcc_assert (operands[2] == constm1_rtx);
6787 return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}"
6788 : "<nf_prefix>dec{w}\t%0";
6789 }
6790
6791 default:
6792 /* For most processors, ADD is faster than LEA. This alternative
6793 was added to use ADD as much as possible. */
6794 if (which_alternative == 2)
6795 std::swap (operands[1], operands[2]);
6796
6797 if (x86_maybe_negate_const_int (&operands[2], HImode))
6798 return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
6799 : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
6800
6801 return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
6802 : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
6803 }
6804 }
6805 [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
6806 (set (attr "type")
6807 (cond [(eq_attr "alternative" "3")
6808 (const_string "lea")
6809 (match_operand:HI 2 "incdec_operand")
6810 (const_string "incdec")
6811 ]
6812 (const_string "alu")))
6813 (set (attr "length_immediate")
6814 (if_then_else
6815 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6816 (const_string "1")
6817 (const_string "*")))
6818 (set_attr "has_nf" "1")
6819 (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
6820
6821 (define_insn "*addqi_1<nf_name>"
6822 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
6823 (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
6824 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
6825 "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
6826 && <nf_condition>"
6827 {
6828 bool widen = (get_attr_mode (insn) != MODE_QI);
6829 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6830 switch (get_attr_type (insn))
6831 {
6832 case TYPE_LEA:
6833 if (TARGET_APX_NDD && <nf_applied>)
6834 return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
6835 else
6836 return "#";
6837
6838 case TYPE_INCDEC:
6839 if (operands[2] == const1_rtx)
6840 if (use_ndd)
6841 return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
6842 else
6843 return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
6844 else
6845 {
6846 gcc_assert (operands[2] == constm1_rtx);
6847 if (use_ndd)
6848 return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
6849 else
6850 return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
6851 }
6852
6853 default:
6854 /* For most processors, ADD is faster than LEA. These alternatives
6855 were added to use ADD as much as possible. */
6856 if (which_alternative == 2 || which_alternative == 4)
6857 std::swap (operands[1], operands[2]);
6858
6859 if (x86_maybe_negate_const_int (&operands[2], QImode))
6860 {
6861 if (use_ndd)
6862 return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
6863 else
6864 return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
6865 : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
6866 }
6867 if (use_ndd)
6868 return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
6869 else
6870 return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
6871 : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
6872 }
6873 }
6874 [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
6875 (set (attr "type")
6876 (cond [(eq_attr "alternative" "5")
6877 (const_string "lea")
6878 (match_operand:QI 2 "incdec_operand")
6879 (const_string "incdec")
6880 ]
6881 (const_string "alu")))
6882 (set (attr "length_immediate")
6883 (if_then_else
6884 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6885 (const_string "1")
6886 (const_string "*")))
6887 (set_attr "has_nf" "1")
6888 (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI")
6889 ;; Potential partial reg stall on alternatives 3 and 4.
6890 (set (attr "preferred_for_speed")
6891 (cond [(eq_attr "alternative" "3,4")
6892 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
6893 (symbol_ref "true")))])
6894
6895 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6896 (define_insn_and_split "*add<mode>_1_slp"
6897 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
6898 (plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
6899 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
6900 (clobber (reg:CC FLAGS_REG))]
6901 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6902 {
6903 if (which_alternative)
6904 return "#";
6905
6906 switch (get_attr_type (insn))
6907 {
6908 case TYPE_INCDEC:
6909 if (operands[2] == const1_rtx)
6910 return "inc{<imodesuffix>}\t%0";
6911 else
6912 {
6913 gcc_assert (operands[2] == constm1_rtx);
6914 return "dec{<imodesuffix>}\t%0";
6915 }
6916
6917 default:
6918 if (x86_maybe_negate_const_int (&operands[2], QImode))
6919 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6920
6921 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6922 }
6923 }
6924 "&& reload_completed
6925 && !(rtx_equal_p (operands[0], operands[1])
6926 || rtx_equal_p (operands[0], operands[2]))"
6927 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6928 (parallel
6929 [(set (strict_low_part (match_dup 0))
6930 (plus:SWI12 (match_dup 0) (match_dup 2)))
6931 (clobber (reg:CC FLAGS_REG))])]
6932 ""
6933 [(set (attr "type")
6934 (if_then_else (match_operand:QI 2 "incdec_operand")
6935 (const_string "incdec")
6936 (const_string "alu")))
6937 (set_attr "mode" "<MODE>")])
6938
6939 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6940 (define_insn_and_split "*addqi_ext<mode>_1_slp"
6941 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
6942 (plus:QI
6943 (subreg:QI
6944 (match_operator:SWI248 3 "extract_operator"
6945 [(match_operand 2 "int248_register_operand" "Q,Q")
6946 (const_int 8)
6947 (const_int 8)]) 0)
6948 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
6949 (clobber (reg:CC FLAGS_REG))]
6950 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6951 "@
6952 add{b}\t{%h2, %0|%0, %h2}
6953 #"
6954 "&& reload_completed
6955 && !rtx_equal_p (operands[0], operands[1])"
6956 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6957 (parallel
6958 [(set (strict_low_part (match_dup 0))
6959 (plus:QI
6960 (subreg:QI
6961 (match_op_dup 3
6962 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
6963 (match_dup 0)))
6964 (clobber (reg:CC FLAGS_REG))])]
6965 ""
6966 [(set_attr "type" "alu")
6967 (set_attr "mode" "QI")])
6968
6969 (define_insn_and_split "*addqi_ext<mode>_2_slp"
6970 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
6971 (plus:QI
6972 (subreg:QI
6973 (match_operator:SWI248 3 "extract_operator"
6974 [(match_operand 1 "int248_register_operand" "Q")
6975 (const_int 8)
6976 (const_int 8)]) 0)
6977 (subreg:QI
6978 (match_operator:SWI248 4 "extract_operator"
6979 [(match_operand 2 "int248_register_operand" "Q")
6980 (const_int 8)
6981 (const_int 8)]) 0)))
6982 (clobber (reg:CC FLAGS_REG))]
6983 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6984 "#"
6985 "&& reload_completed"
6986 [(set (strict_low_part (match_dup 0))
6987 (subreg:QI
6988 (match_op_dup 4
6989 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
6990 (parallel
6991 [(set (strict_low_part (match_dup 0))
6992 (plus:QI
6993 (subreg:QI
6994 (match_op_dup 3
6995 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
6996 (match_dup 0)))
6997 (clobber (reg:CC FLAGS_REG))])]
6998 ""
6999 [(set_attr "type" "alu")
7000 (set_attr "mode" "QI")])
7001
7002 ;; Split non destructive adds if we cannot use lea.
7003 (define_split
7004 [(set (match_operand:SWI48 0 "register_operand")
7005 (plus:SWI48 (match_operand:SWI48 1 "register_operand")
7006 (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
7007 (clobber (reg:CC FLAGS_REG))]
7008 "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
7009 [(set (match_dup 0) (match_dup 1))
7010 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
7011 (clobber (reg:CC FLAGS_REG))])])
7012
7013 ;; Split non destructive adds if we cannot use lea.
7014 (define_split
7015 [(set (match_operand:DI 0 "register_operand")
7016 (zero_extend:DI
7017 (plus:SI (match_operand:SI 1 "register_operand")
7018 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
7019 (clobber (reg:CC FLAGS_REG))]
7020 "TARGET_64BIT
7021 && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
7022 [(set (match_dup 3) (match_dup 1))
7023 (parallel [(set (match_dup 0)
7024 (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
7025 (clobber (reg:CC FLAGS_REG))])]
7026 "operands[3] = gen_lowpart (SImode, operands[0]);")
7027
7028 ;; Convert add to the lea pattern to avoid flags dependency.
7029 (define_split
7030 [(set (match_operand:SWI 0 "register_operand")
7031 (plus:SWI (match_operand:SWI 1 "register_operand")
7032 (match_operand:SWI 2 "<nonmemory_operand>")))
7033 (clobber (reg:CC FLAGS_REG))]
7034 "reload_completed && ix86_lea_for_add_ok (insn, operands)"
7035 [(set (match_dup 0)
7036 (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
7037 {
7038 if (<MODE>mode != <LEAMODE>mode)
7039 {
7040 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
7041 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
7042 operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
7043 }
7044 })
7045
7046 (define_split
7047 [(set (match_operand:SWI 0 "register_operand")
7048 (plus:SWI (match_operand:SWI 1 "register_operand")
7049 (match_operand:SWI 2 "<nonmemory_operand>")))]
7050 "TARGET_APX_NF && reload_completed
7051 && ix86_lea_for_add_ok (insn, operands)"
7052 [(set (match_dup 0)
7053 (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
7054 {
7055 if (<MODE>mode != <LEAMODE>mode)
7056 {
7057 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
7058 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
7059 operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
7060 }
7061 })
7062
7063 ;; Convert add to the lea pattern to avoid flags dependency.
7064 (define_split
7065 [(set (match_operand:DI 0 "register_operand")
7066 (zero_extend:DI
7067 (plus:SI (match_operand:SI 1 "register_operand")
7068 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
7069 (clobber (reg:CC FLAGS_REG))]
7070 "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
7071 [(set (match_dup 0)
7072 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
7073
7074 (define_insn "*add<mode>_2"
7075 [(set (reg FLAGS_REG)
7076 (compare
7077 (plus:SWI
7078 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>,rm,r")
7079 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0,r<i>,<m>"))
7080 (const_int 0)))
7081 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>,r,r")
7082 (plus:SWI (match_dup 1) (match_dup 2)))]
7083 "ix86_match_ccmode (insn, CCGOCmode)
7084 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7085 {
7086 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7087 switch (get_attr_type (insn))
7088 {
7089 case TYPE_INCDEC:
7090 if (operands[2] == const1_rtx)
7091 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7092 : "inc{<imodesuffix>}\t%0";
7093 else
7094 {
7095 gcc_assert (operands[2] == constm1_rtx);
7096 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7097 : "dec{<imodesuffix>}\t%0";
7098 }
7099
7100 default:
7101 if (which_alternative == 2)
7102 std::swap (operands[1], operands[2]);
7103
7104 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7105 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7106 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7107
7108 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7109 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7110 }
7111 }
7112 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
7113 (set (attr "type")
7114 (if_then_else (match_operand:SWI 2 "incdec_operand")
7115 (const_string "incdec")
7116 (const_string "alu")))
7117 (set (attr "length_immediate")
7118 (if_then_else
7119 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7120 (const_string "1")
7121 (const_string "*")))
7122 (set_attr "mode" "<MODE>")])
7123
7124 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
7125 (define_insn "*addsi_2_zext"
7126 [(set (reg FLAGS_REG)
7127 (compare
7128 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")
7129 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
7130 (const_int 0)))
7131 (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
7132 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
7133 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
7134 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
7135 {
7136 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7137 switch (get_attr_type (insn))
7138 {
7139 case TYPE_INCDEC:
7140 if (operands[2] == const1_rtx)
7141 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
7142 : "inc{l}\t%k0";
7143 else
7144 {
7145 gcc_assert (operands[2] == constm1_rtx);
7146 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
7147 : "dec{l}\t%k0";
7148 }
7149
7150 default:
7151 if (which_alternative == 1)
7152 std::swap (operands[1], operands[2]);
7153
7154 if (x86_maybe_negate_const_int (&operands[2], SImode))
7155 return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7156 : "sub{l}\t{%2, %k0|%k0, %2}";
7157
7158 return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
7159 : "add{l}\t{%2, %k0|%k0, %2}";
7160 }
7161 }
7162 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7163 (set (attr "type")
7164 (if_then_else (match_operand:SI 2 "incdec_operand")
7165 (const_string "incdec")
7166 (const_string "alu")))
7167 (set (attr "length_immediate")
7168 (if_then_else
7169 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7170 (const_string "1")
7171 (const_string "*")))
7172 (set_attr "mode" "SI")])
7173
7174 (define_insn "*add<mode>_3"
7175 [(set (reg FLAGS_REG)
7176 (compare
7177 (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
7178 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")))
7179 (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
7180 "ix86_match_ccmode (insn, CCZmode)
7181 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7182 {
7183 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7184 switch (get_attr_type (insn))
7185 {
7186 case TYPE_INCDEC:
7187 if (operands[2] == const1_rtx)
7188 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7189 : "inc{<imodesuffix>}\t%0";
7190 else
7191 {
7192 gcc_assert (operands[2] == constm1_rtx);
7193 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7194 : "dec{<imodesuffix>}\t%0";
7195 }
7196
7197 default:
7198 if (which_alternative == 1)
7199 std::swap (operands[1], operands[2]);
7200
7201 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7202 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7203 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7204
7205 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7206 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7207 }
7208 }
7209 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7210 (set (attr "type")
7211 (if_then_else (match_operand:SWI 2 "incdec_operand")
7212 (const_string "incdec")
7213 (const_string "alu")))
7214 (set (attr "length_immediate")
7215 (if_then_else
7216 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7217 (const_string "1")
7218 (const_string "*")))
7219 (set_attr "mode" "<MODE>")])
7220
7221 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
7222 (define_insn "*addsi_3_zext"
7223 [(set (reg FLAGS_REG)
7224 (compare
7225 (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
7226 (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")))
7227 (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
7228 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
7229 "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
7230 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
7231 {
7232 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7233 switch (get_attr_type (insn))
7234 {
7235 case TYPE_INCDEC:
7236 if (operands[2] == const1_rtx)
7237 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0";
7238 else
7239 {
7240 gcc_assert (operands[2] == constm1_rtx);
7241 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0";
7242 }
7243
7244 default:
7245 if (which_alternative == 1)
7246 std::swap (operands[1], operands[2]);
7247
7248 if (x86_maybe_negate_const_int (&operands[2], SImode))
7249 return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7250 : "sub{l}\t{%2, %k0|%k0, %2}";
7251
7252 return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
7253 : "add{l}\t{%2, %k0|%k0, %2}";
7254 }
7255 }
7256 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7257 (set (attr "type")
7258 (if_then_else (match_operand:SI 2 "incdec_operand")
7259 (const_string "incdec")
7260 (const_string "alu")))
7261 (set (attr "length_immediate")
7262 (if_then_else
7263 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7264 (const_string "1")
7265 (const_string "*")))
7266 (set_attr "mode" "SI")])
7267
7268 ; For comparisons against 1, -1 and 128, we may generate better code
7269 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
7270 ; is matched then. We can't accept general immediate, because for
7271 ; case of overflows, the result is messed up.
7272 ; Also carry flag is reversed compared to cmp, so this conversion is valid
7273 ; only for comparisons not depending on it.
7274
7275 (define_insn "*adddi_4"
7276 [(set (reg FLAGS_REG)
7277 (compare
7278 (match_operand:DI 1 "nonimmediate_operand" "0")
7279 (match_operand:DI 2 "x86_64_immediate_operand" "e")))
7280 (clobber (match_scratch:DI 0 "=r"))]
7281 "TARGET_64BIT
7282 && ix86_match_ccmode (insn, CCGCmode)"
7283 {
7284 switch (get_attr_type (insn))
7285 {
7286 case TYPE_INCDEC:
7287 if (operands[2] == constm1_rtx)
7288 return "inc{q}\t%0";
7289 else
7290 {
7291 gcc_assert (operands[2] == const1_rtx);
7292 return "dec{q}\t%0";
7293 }
7294
7295 default:
7296 if (x86_maybe_negate_const_int (&operands[2], DImode))
7297 return "add{q}\t{%2, %0|%0, %2}";
7298
7299 return "sub{q}\t{%2, %0|%0, %2}";
7300 }
7301 }
7302 [(set (attr "type")
7303 (if_then_else (match_operand:DI 2 "incdec_operand")
7304 (const_string "incdec")
7305 (const_string "alu")))
7306 (set (attr "length_immediate")
7307 (if_then_else
7308 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7309 (const_string "1")
7310 (const_string "*")))
7311 (set_attr "mode" "DI")])
7312
7313 ; For comparisons against 1, -1 and 128, we may generate better code
7314 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
7315 ; is matched then. We can't accept general immediate, because for
7316 ; case of overflows, the result is messed up.
7317 ; Also carry flag is reversed compared to cmp, so this conversion is valid
7318 ; only for comparisons not depending on it.
7319
7320 (define_insn "*add<mode>_4"
7321 [(set (reg FLAGS_REG)
7322 (compare
7323 (match_operand:SWI124 1 "nonimmediate_operand" "0")
7324 (match_operand:SWI124 2 "const_int_operand")))
7325 (clobber (match_scratch:SWI124 0 "=<r>"))]
7326 "ix86_match_ccmode (insn, CCGCmode)"
7327 {
7328 switch (get_attr_type (insn))
7329 {
7330 case TYPE_INCDEC:
7331 if (operands[2] == constm1_rtx)
7332 return "inc{<imodesuffix>}\t%0";
7333 else
7334 {
7335 gcc_assert (operands[2] == const1_rtx);
7336 return "dec{<imodesuffix>}\t%0";
7337 }
7338
7339 default:
7340 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7341 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7342
7343 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7344 }
7345 }
7346 [(set (attr "type")
7347 (if_then_else (match_operand:<MODE> 2 "incdec_operand")
7348 (const_string "incdec")
7349 (const_string "alu")))
7350 (set (attr "length_immediate")
7351 (if_then_else
7352 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7353 (const_string "1")
7354 (const_string "*")))
7355 (set_attr "mode" "<MODE>")])
7356
7357 (define_insn "*add<mode>_5"
7358 [(set (reg FLAGS_REG)
7359 (compare
7360 (plus:SWI
7361 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")
7362 (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
7363 (const_int 0)))
7364 (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
7365 "ix86_match_ccmode (insn, CCGOCmode)
7366 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7367 {
7368 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7369 switch (get_attr_type (insn))
7370 {
7371 case TYPE_INCDEC:
7372 if (operands[2] == const1_rtx)
7373 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7374 : "inc{<imodesuffix>}\t%0";
7375 else
7376 {
7377 gcc_assert (operands[2] == constm1_rtx);
7378 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7379 : "dec{<imodesuffix>}\t%0";
7380 }
7381
7382 default:
7383 if (which_alternative == 1)
7384 std::swap (operands[1], operands[2]);
7385
7386 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7387 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7388 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7389
7390 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7391 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7392 }
7393 }
7394 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7395 (set (attr "type")
7396 (if_then_else (match_operand:SWI 2 "incdec_operand")
7397 (const_string "incdec")
7398 (const_string "alu")))
7399 (set (attr "length_immediate")
7400 (if_then_else
7401 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7402 (const_string "1")
7403 (const_string "*")))
7404 (set_attr "mode" "<MODE>")])
7405
7406 (define_insn "*addqi_ext<mode>_0"
7407 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
7408 (plus:QI
7409 (subreg:QI
7410 (match_operator:SWI248 3 "extract_operator"
7411 [(match_operand 2 "int248_register_operand" "Q")
7412 (const_int 8)
7413 (const_int 8)]) 0)
7414 (match_operand:QI 1 "nonimmediate_operand" "0")))
7415 (clobber (reg:CC FLAGS_REG))]
7416 ""
7417 "add{b}\t{%h2, %0|%0, %h2}"
7418 [(set_attr "addr" "gpr8")
7419 (set_attr "type" "alu")
7420 (set_attr "mode" "QI")])
7421
7422 (define_insn_and_split "*addqi_ext2<mode>_0"
7423 [(set (match_operand:QI 0 "register_operand" "=&Q")
7424 (plus:QI
7425 (subreg:QI
7426 (match_operator:SWI248 3 "extract_operator"
7427 [(match_operand 1 "int248_register_operand" "Q")
7428 (const_int 8)
7429 (const_int 8)]) 0)
7430 (subreg:QI
7431 (match_operator:SWI248 4 "extract_operator"
7432 [(match_operand 2 "int248_register_operand" "Q")
7433 (const_int 8)
7434 (const_int 8)]) 0)))
7435 (clobber (reg:CC FLAGS_REG))]
7436 ""
7437 "#"
7438 "&& reload_completed"
7439 [(set (match_dup 0)
7440 (subreg:QI
7441 (match_op_dup 4
7442 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
7443 (parallel
7444 [(set (match_dup 0)
7445 (plus:QI
7446 (subreg:QI
7447 (match_op_dup 3
7448 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
7449 (match_dup 0)))
7450 (clobber (reg:CC FLAGS_REG))])]
7451 ""
7452 [(set_attr "type" "alu")
7453 (set_attr "mode" "QI")])
7454
7455 (define_expand "addqi_ext_1"
7456 [(parallel
7457 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
7458 (const_int 8)
7459 (const_int 8))
7460 (subreg:HI
7461 (plus:QI
7462 (subreg:QI
7463 (zero_extract:HI (match_operand:HI 1 "register_operand")
7464 (const_int 8)
7465 (const_int 8)) 0)
7466 (match_operand:QI 2 "const_int_operand")) 0))
7467 (clobber (reg:CC FLAGS_REG))])])
7468
7469 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7470 (define_insn_and_split "*addqi_ext<mode>_1"
7471 [(set (zero_extract:SWI248
7472 (match_operand 0 "int248_register_operand" "+Q,&Q")
7473 (const_int 8)
7474 (const_int 8))
7475 (subreg:SWI248
7476 (plus:QI
7477 (subreg:QI
7478 (match_operator:SWI248 3 "extract_operator"
7479 [(match_operand 1 "int248_register_operand" "0,!Q")
7480 (const_int 8)
7481 (const_int 8)]) 0)
7482 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
7483 (clobber (reg:CC FLAGS_REG))]
7484 ""
7485 {
7486 if (which_alternative)
7487 return "#";
7488
7489 switch (get_attr_type (insn))
7490 {
7491 case TYPE_INCDEC:
7492 if (operands[2] == const1_rtx)
7493 return "inc{b}\t%h0";
7494 else
7495 {
7496 gcc_assert (operands[2] == constm1_rtx);
7497 return "dec{b}\t%h0";
7498 }
7499
7500 default:
7501 return "add{b}\t{%2, %h0|%h0, %2}";
7502 }
7503 }
7504 "reload_completed
7505 && !rtx_equal_p (operands[0], operands[1])"
7506 [(set (zero_extract:SWI248
7507 (match_dup 0) (const_int 8) (const_int 8))
7508 (zero_extract:SWI248
7509 (match_dup 1) (const_int 8) (const_int 8)))
7510 (parallel
7511 [(set (zero_extract:SWI248
7512 (match_dup 0) (const_int 8) (const_int 8))
7513 (subreg:SWI248
7514 (plus:QI
7515 (subreg:QI
7516 (match_op_dup 3
7517 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
7518 (match_dup 2)) 0))
7519 (clobber (reg:CC FLAGS_REG))])]
7520 ""
7521 [(set_attr "addr" "gpr8")
7522 (set (attr "type")
7523 (if_then_else (match_operand:QI 2 "incdec_operand")
7524 (const_string "incdec")
7525 (const_string "alu")))
7526 (set_attr "mode" "QI")])
7527
7528 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7529 (define_insn_and_split "*<insn>qi_ext<mode>_2"
7530 [(set (zero_extract:SWI248
7531 (match_operand 0 "int248_register_operand" "+Q,&Q")
7532 (const_int 8)
7533 (const_int 8))
7534 (subreg:SWI248
7535 (plusminus:QI
7536 (subreg:QI
7537 (match_operator:SWI248 3 "extract_operator"
7538 [(match_operand 1 "int248_register_operand" "<comm>0,!Q")
7539 (const_int 8)
7540 (const_int 8)]) 0)
7541 (subreg:QI
7542 (match_operator:SWI248 4 "extract_operator"
7543 [(match_operand 2 "int248_register_operand" "Q,Q")
7544 (const_int 8)
7545 (const_int 8)]) 0)) 0))
7546 (clobber (reg:CC FLAGS_REG))]
7547 ""
7548 "@
7549 <insn>{b}\t{%h2, %h0|%h0, %h2}
7550 #"
7551 "reload_completed
7552 && !(rtx_equal_p (operands[0], operands[1])
7553 || (<CODE> == PLUS && rtx_equal_p (operands[0], operands[2])))"
7554 [(set (zero_extract:SWI248
7555 (match_dup 0) (const_int 8) (const_int 8))
7556 (zero_extract:SWI248
7557 (match_dup 1) (const_int 8) (const_int 8)))
7558 (parallel
7559 [(set (zero_extract:SWI248
7560 (match_dup 0) (const_int 8) (const_int 8))
7561 (subreg:SWI248
7562 (plusminus:QI
7563 (subreg:QI
7564 (match_op_dup 3
7565 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
7566 (subreg:QI
7567 (match_op_dup 4
7568 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
7569 (clobber (reg:CC FLAGS_REG))])]
7570 ""
7571 [(set_attr "type" "alu")
7572 (set_attr "mode" "QI")])
7573
7574 ;; Like DWI, but use POImode instead of OImode.
7575 (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])
7576
7577 ;; Add with jump on overflow.
7578 (define_expand "addv<mode>4"
7579 [(parallel [(set (reg:CCO FLAGS_REG)
7580 (eq:CCO
7581 (plus:<DPWI>
7582 (sign_extend:<DPWI>
7583 (match_operand:SWIDWI 1 "nonimmediate_operand"))
7584 (match_dup 4))
7585 (sign_extend:<DPWI>
7586 (plus:SWIDWI (match_dup 1)
7587 (match_operand:SWIDWI 2
7588 "<general_hilo_operand>")))))
7589 (set (match_operand:SWIDWI 0 "register_operand")
7590 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
7591 (set (pc) (if_then_else
7592 (eq (reg:CCO FLAGS_REG) (const_int 0))
7593 (label_ref (match_operand 3))
7594 (pc)))]
7595 ""
7596 {
7597 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7598 if (CONST_SCALAR_INT_P (operands[2]))
7599 operands[4] = operands[2];
7600 else
7601 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
7602 })
7603
7604 (define_insn "*addv<mode>4"
7605 [(set (reg:CCO FLAGS_REG)
7606 (eq:CCO (plus:<DWI>
7607 (sign_extend:<DWI>
7608 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
7609 (sign_extend:<DWI>
7610 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
7611 (sign_extend:<DWI>
7612 (plus:SWI (match_dup 1) (match_dup 2)))))
7613 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7614 (plus:SWI (match_dup 1) (match_dup 2)))]
7615 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7616 "@
7617 add{<imodesuffix>}\t{%2, %0|%0, %2}
7618 add{<imodesuffix>}\t{%2, %0|%0, %2}
7619 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7620 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7621 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7622 (set_attr "type" "alu")
7623 (set_attr "mode" "<MODE>")])
7624
7625 (define_insn "addv<mode>4_1"
7626 [(set (reg:CCO FLAGS_REG)
7627 (eq:CCO (plus:<DWI>
7628 (sign_extend:<DWI>
7629 (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
7630 (match_operand:<DWI> 3 "const_int_operand"))
7631 (sign_extend:<DWI>
7632 (plus:SWI
7633 (match_dup 1)
7634 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
7635 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
7636 (plus:SWI (match_dup 1) (match_dup 2)))]
7637 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
7638 && CONST_INT_P (operands[2])
7639 && INTVAL (operands[2]) == INTVAL (operands[3])"
7640 "@
7641 add{<imodesuffix>}\t{%2, %0|%0, %2}
7642 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7643 [(set_attr "isa" "*,apx_ndd")
7644 (set_attr "type" "alu")
7645 (set_attr "mode" "<MODE>")
7646 (set (attr "length_immediate")
7647 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7648 (const_string "1")
7649 (match_test "<MODE_SIZE> == 8")
7650 (const_string "4")]
7651 (const_string "<MODE_SIZE>")))])
7652
7653 ;; Quad word integer modes as mode attribute.
7654 (define_mode_attr QPWI [(SI "TI") (DI "POI")])
7655
7656 (define_insn_and_split "*addv<dwi>4_doubleword"
7657 [(set (reg:CCO FLAGS_REG)
7658 (eq:CCO
7659 (plus:<QPWI>
7660 (sign_extend:<QPWI>
7661 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r"))
7662 (sign_extend:<QPWI>
7663 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
7664 (sign_extend:<QPWI>
7665 (plus:<DWI> (match_dup 1) (match_dup 2)))))
7666 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
7667 (plus:<DWI> (match_dup 1) (match_dup 2)))]
7668 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
7669 "#"
7670 "&& reload_completed"
7671 [(parallel [(set (reg:CCC FLAGS_REG)
7672 (compare:CCC
7673 (plus:DWIH (match_dup 1) (match_dup 2))
7674 (match_dup 1)))
7675 (set (match_dup 0)
7676 (plus:DWIH (match_dup 1) (match_dup 2)))])
7677 (parallel [(set (reg:CCO FLAGS_REG)
7678 (eq:CCO
7679 (plus:<DWI>
7680 (plus:<DWI>
7681 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7682 (sign_extend:<DWI> (match_dup 4)))
7683 (sign_extend:<DWI> (match_dup 5)))
7684 (sign_extend:<DWI>
7685 (plus:DWIH
7686 (plus:DWIH
7687 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7688 (match_dup 4))
7689 (match_dup 5)))))
7690 (set (match_dup 3)
7691 (plus:DWIH
7692 (plus:DWIH
7693 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7694 (match_dup 4))
7695 (match_dup 5)))])]
7696 {
7697 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7698 }
7699 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7700
7701 (define_insn_and_split "*addv<dwi>4_doubleword_1"
7702 [(set (reg:CCO FLAGS_REG)
7703 (eq:CCO
7704 (plus:<QPWI>
7705 (sign_extend:<QPWI>
7706 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rjO"))
7707 (match_operand:<QPWI> 3 "const_scalar_int_operand" "n,n"))
7708 (sign_extend:<QPWI>
7709 (plus:<DWI>
7710 (match_dup 1)
7711 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
7712 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
7713 (plus:<DWI> (match_dup 1) (match_dup 2)))]
7714 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)
7715 && CONST_SCALAR_INT_P (operands[2])
7716 && rtx_equal_p (operands[2], operands[3])"
7717 "#"
7718 "&& reload_completed"
7719 [(parallel [(set (reg:CCC FLAGS_REG)
7720 (compare:CCC
7721 (plus:DWIH (match_dup 1) (match_dup 2))
7722 (match_dup 1)))
7723 (set (match_dup 0)
7724 (plus:DWIH (match_dup 1) (match_dup 2)))])
7725 (parallel [(set (reg:CCO FLAGS_REG)
7726 (eq:CCO
7727 (plus:<DWI>
7728 (plus:<DWI>
7729 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7730 (sign_extend:<DWI> (match_dup 4)))
7731 (match_dup 5))
7732 (sign_extend:<DWI>
7733 (plus:DWIH
7734 (plus:DWIH
7735 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7736 (match_dup 4))
7737 (match_dup 5)))))
7738 (set (match_dup 3)
7739 (plus:DWIH
7740 (plus:DWIH
7741 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7742 (match_dup 4))
7743 (match_dup 5)))])]
7744 {
7745 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7746 if (operands[2] == const0_rtx)
7747 {
7748 if (!rtx_equal_p (operands[0], operands[1]))
7749 emit_move_insn (operands[0], operands[1]);
7750 emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
7751 operands[5]));
7752 DONE;
7753 }
7754 }
7755 [(set_attr "isa" "*,apx_ndd")])
7756
7757 (define_insn "*addv<mode>4_overflow_1"
7758 [(set (reg:CCO FLAGS_REG)
7759 (eq:CCO
7760 (plus:<DWI>
7761 (plus:<DWI>
7762 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7763 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7764 (sign_extend:<DWI>
7765 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")))
7766 (sign_extend:<DWI>
7767 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
7768 (sign_extend:<DWI>
7769 (plus:SWI
7770 (plus:SWI
7771 (match_operator:SWI 5 "ix86_carry_flag_operator"
7772 [(match_dup 3) (const_int 0)])
7773 (match_dup 1))
7774 (match_dup 2)))))
7775 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
7776 (plus:SWI
7777 (plus:SWI
7778 (match_op_dup 5 [(match_dup 3) (const_int 0)])
7779 (match_dup 1))
7780 (match_dup 2)))]
7781 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7782 "@
7783 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7784 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7785 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7786 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7787 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7788 (set_attr "type" "alu")
7789 (set_attr "mode" "<MODE>")])
7790
7791 (define_insn "*addv<mode>4_overflow_2"
7792 [(set (reg:CCO FLAGS_REG)
7793 (eq:CCO
7794 (plus:<DWI>
7795 (plus:<DWI>
7796 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7797 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7798 (sign_extend:<DWI>
7799 (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")))
7800 (match_operand:<DWI> 6 "const_int_operand" "n,n"))
7801 (sign_extend:<DWI>
7802 (plus:SWI
7803 (plus:SWI
7804 (match_operator:SWI 5 "ix86_carry_flag_operator"
7805 [(match_dup 3) (const_int 0)])
7806 (match_dup 1))
7807 (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
7808 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
7809 (plus:SWI
7810 (plus:SWI
7811 (match_op_dup 5 [(match_dup 3) (const_int 0)])
7812 (match_dup 1))
7813 (match_dup 2)))]
7814 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
7815 && CONST_INT_P (operands[2])
7816 && INTVAL (operands[2]) == INTVAL (operands[6])"
7817 "@
7818 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7819 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7820 [(set_attr "isa" "*,apx_ndd")
7821 (set_attr "type" "alu")
7822 (set_attr "mode" "<MODE>")
7823 (set (attr "length_immediate")
7824 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7825 (const_string "1")
7826 (const_string "4")))])
7827
7828 (define_expand "uaddv<mode>4"
7829 [(parallel [(set (reg:CCC FLAGS_REG)
7830 (compare:CCC
7831 (plus:SWIDWI
7832 (match_operand:SWIDWI 1 "nonimmediate_operand")
7833 (match_operand:SWIDWI 2 "<general_hilo_operand>"))
7834 (match_dup 1)))
7835 (set (match_operand:SWIDWI 0 "register_operand")
7836 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
7837 (set (pc) (if_then_else
7838 (ltu (reg:CCC FLAGS_REG) (const_int 0))
7839 (label_ref (match_operand 3))
7840 (pc)))]
7841 ""
7842 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
7843
7844 ;; The lea patterns for modes less than 32 bits need to be matched by
7845 ;; several insns converted to real lea by splitters.
7846
7847 (define_insn_and_split "*lea<mode>_general_1"
7848 [(set (match_operand:SWI12 0 "register_operand" "=r")
7849 (plus:SWI12
7850 (plus:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7851 (match_operand:SWI12 2 "register_operand" "r"))
7852 (match_operand:SWI12 3 "immediate_operand" "i")))]
7853 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7854 "#"
7855 "&& reload_completed"
7856 [(set (match_dup 0)
7857 (plus:SI
7858 (plus:SI (match_dup 1) (match_dup 2))
7859 (match_dup 3)))]
7860 {
7861 operands[0] = gen_lowpart (SImode, operands[0]);
7862 operands[1] = gen_lowpart (SImode, operands[1]);
7863 operands[2] = gen_lowpart (SImode, operands[2]);
7864 operands[3] = gen_lowpart (SImode, operands[3]);
7865 }
7866 [(set_attr "type" "lea")
7867 (set_attr "mode" "SI")])
7868
7869 (define_insn_and_split "*lea<mode>_general_2"
7870 [(set (match_operand:SWI12 0 "register_operand" "=r")
7871 (plus:SWI12
7872 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7873 (match_operand 2 "const248_operand" "n"))
7874 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
7875 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7876 "#"
7877 "&& reload_completed"
7878 [(set (match_dup 0)
7879 (plus:SI
7880 (mult:SI (match_dup 1) (match_dup 2))
7881 (match_dup 3)))]
7882 {
7883 operands[0] = gen_lowpart (SImode, operands[0]);
7884 operands[1] = gen_lowpart (SImode, operands[1]);
7885 operands[3] = gen_lowpart (SImode, operands[3]);
7886 }
7887 [(set_attr "type" "lea")
7888 (set_attr "mode" "SI")])
7889
7890 (define_insn_and_split "*lea<mode>_general_2b"
7891 [(set (match_operand:SWI12 0 "register_operand" "=r")
7892 (plus:SWI12
7893 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7894 (match_operand 2 "const123_operand" "n"))
7895 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
7896 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7897 "#"
7898 "&& reload_completed"
7899 [(set (match_dup 0)
7900 (plus:SI
7901 (ashift:SI (match_dup 1) (match_dup 2))
7902 (match_dup 3)))]
7903 {
7904 operands[0] = gen_lowpart (SImode, operands[0]);
7905 operands[1] = gen_lowpart (SImode, operands[1]);
7906 operands[3] = gen_lowpart (SImode, operands[3]);
7907 }
7908 [(set_attr "type" "lea")
7909 (set_attr "mode" "SI")])
7910
7911 (define_insn_and_split "*lea<mode>_general_3"
7912 [(set (match_operand:SWI12 0 "register_operand" "=r")
7913 (plus:SWI12
7914 (plus:SWI12
7915 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7916 (match_operand 2 "const248_operand" "n"))
7917 (match_operand:SWI12 3 "register_operand" "r"))
7918 (match_operand:SWI12 4 "immediate_operand" "i")))]
7919 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7920 "#"
7921 "&& reload_completed"
7922 [(set (match_dup 0)
7923 (plus:SI
7924 (plus:SI
7925 (mult:SI (match_dup 1) (match_dup 2))
7926 (match_dup 3))
7927 (match_dup 4)))]
7928 {
7929 operands[0] = gen_lowpart (SImode, operands[0]);
7930 operands[1] = gen_lowpart (SImode, operands[1]);
7931 operands[3] = gen_lowpart (SImode, operands[3]);
7932 operands[4] = gen_lowpart (SImode, operands[4]);
7933 }
7934 [(set_attr "type" "lea")
7935 (set_attr "mode" "SI")])
7936
7937 (define_insn_and_split "*lea<mode>_general_3b"
7938 [(set (match_operand:SWI12 0 "register_operand" "=r")
7939 (plus:SWI12
7940 (plus:SWI12
7941 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7942 (match_operand 2 "const123_operand" "n"))
7943 (match_operand:SWI12 3 "register_operand" "r"))
7944 (match_operand:SWI12 4 "immediate_operand" "i")))]
7945 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7946 "#"
7947 "&& reload_completed"
7948 [(set (match_dup 0)
7949 (plus:SI
7950 (plus:SI
7951 (ashift:SI (match_dup 1) (match_dup 2))
7952 (match_dup 3))
7953 (match_dup 4)))]
7954 {
7955 operands[0] = gen_lowpart (SImode, operands[0]);
7956 operands[1] = gen_lowpart (SImode, operands[1]);
7957 operands[3] = gen_lowpart (SImode, operands[3]);
7958 operands[4] = gen_lowpart (SImode, operands[4]);
7959 }
7960 [(set_attr "type" "lea")
7961 (set_attr "mode" "SI")])
7962
7963 (define_insn_and_split "*lea<mode>_general_4"
7964 [(set (match_operand:SWI12 0 "register_operand" "=r")
7965 (any_or:SWI12
7966 (ashift:SWI12
7967 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7968 (match_operand 2 "const_0_to_3_operand"))
7969 (match_operand 3 "const_int_operand")))]
7970 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
7971 && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
7972 < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
7973 "#"
7974 "&& reload_completed"
7975 [(set (match_dup 0)
7976 (plus:SI
7977 (mult:SI (match_dup 1) (match_dup 2))
7978 (match_dup 3)))]
7979 {
7980 operands[0] = gen_lowpart (SImode, operands[0]);
7981 operands[1] = gen_lowpart (SImode, operands[1]);
7982 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
7983 }
7984 [(set_attr "type" "lea")
7985 (set_attr "mode" "SI")])
7986
7987 (define_insn_and_split "*lea<mode>_general_4"
7988 [(set (match_operand:SWI48 0 "register_operand" "=r")
7989 (any_or:SWI48
7990 (ashift:SWI48
7991 (match_operand:SWI48 1 "register_no_SP_operand" "l")
7992 (match_operand 2 "const_0_to_3_operand"))
7993 (match_operand 3 "const_int_operand")))]
7994 "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
7995 < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
7996 "#"
7997 "&& reload_completed"
7998 [(set (match_dup 0)
7999 (plus:SWI48
8000 (mult:SWI48 (match_dup 1) (match_dup 2))
8001 (match_dup 3)))]
8002 "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
8003 [(set_attr "type" "lea")
8004 (set_attr "mode" "<MODE>")])
8005 \f
8006 ;; Subtract instructions
8007
8008 (define_expand "sub<mode>3"
8009 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
8010 (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
8011 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
8012 ""
8013 {
8014 ix86_expand_binary_operator (MINUS, <MODE>mode, operands, TARGET_APX_NDD);
8015 DONE;
8016 })
8017
8018 (define_insn_and_split "*sub<dwi>3_doubleword"
8019 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
8020 (minus:<DWI>
8021 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r")
8022 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
8023 (clobber (reg:CC FLAGS_REG))]
8024 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8025 "#"
8026 "&& reload_completed"
8027 [(parallel [(set (reg:CC FLAGS_REG)
8028 (compare:CC (match_dup 1) (match_dup 2)))
8029 (set (match_dup 0)
8030 (minus:DWIH (match_dup 1) (match_dup 2)))])
8031 (parallel [(set (match_dup 3)
8032 (minus:DWIH
8033 (minus:DWIH
8034 (match_dup 4)
8035 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8036 (match_dup 5)))
8037 (clobber (reg:CC FLAGS_REG))])]
8038 {
8039 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8040 if (operands[2] == const0_rtx)
8041 {
8042 if (!rtx_equal_p (operands[0], operands[1]))
8043 emit_move_insn (operands[0], operands[1]);
8044 ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3],
8045 TARGET_APX_NDD);
8046 DONE;
8047 }
8048 }
8049 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
8050
8051 (define_insn_and_split "*sub<dwi>3_doubleword_zext"
8052 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
8053 (minus:<DWI>
8054 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")
8055 (zero_extend:<DWI>
8056 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))))
8057 (clobber (reg:CC FLAGS_REG))]
8058 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
8059 "#"
8060 "&& reload_completed"
8061 [(parallel [(set (reg:CC FLAGS_REG)
8062 (compare:CC (match_dup 1) (match_dup 2)))
8063 (set (match_dup 0)
8064 (minus:DWIH (match_dup 1) (match_dup 2)))])
8065 (parallel [(set (match_dup 3)
8066 (minus:DWIH
8067 (minus:DWIH
8068 (match_dup 4)
8069 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8070 (const_int 0)))
8071 (clobber (reg:CC FLAGS_REG))])]
8072 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
8073 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
8074
8075 (define_insn "*sub<mode>_1<nf_name>"
8076 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r<nf_mem_constraint>,<r>,r,r,r")
8077 (minus:SWI
8078 (match_operand:SWI 1 "nonimmediate_operand" "0,0,0,rm,rjM,r")
8079 (match_operand:SWI 2 "<general_operand>" "<r>,<i>,<m>,r,<i>,<m>")))]
8080 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8081 && <nf_condition>"
8082 "@
8083 <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}
8084 <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}
8085 <nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}
8086 <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8087 <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8088 <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8089 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
8090 (set_attr "type" "alu")
8091 (set_attr "has_nf" "1")
8092 (set_attr "mode" "<MODE>")])
8093
8094 (define_insn "*subqi_1_zext<mode><nf_name>"
8095 [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
8096 (zero_extend:SWI248x
8097 (minus:QI (match_operand:QI 1 "nonimmediate_operand" "rm,r")
8098 (match_operand:QI 2 "x86_64_general_operand" "rn,m"))))]
8099 "TARGET_APX_NDD && <nf_condition>
8100 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8101 "@
8102 <nf_prefix>sub{b}\t{%2, %1, %b0|%b0, %1, %2}
8103 <nf_prefix>sub{b}\t{%2, %1, %b0|%b0, %1, %2}"
8104 [(set_attr "type" "alu")
8105 (set_attr "has_nf" "1")
8106 (set_attr "mode" "QI")])
8107
8108 (define_insn "*subhi_1_zext<mode><nf_name>"
8109 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
8110 (zero_extend:SWI48x
8111 (minus:HI (match_operand:HI 1 "nonimmediate_operand" "rm,r")
8112 (match_operand:HI 2 "x86_64_general_operand" "rn,m"))))]
8113 "TARGET_APX_NDD && <nf_condition>
8114 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8115 "@
8116 <nf_prefix>sub{w}\t{%2, %1, %w0|%w0, %1, %2}
8117 <nf_prefix>sub{w}\t{%2, %1, %w0|%w0, %1, %2}"
8118 [(set_attr "type" "alu")
8119 (set_attr "has_nf" "1")
8120 (set_attr "mode" "HI")])
8121
8122 (define_insn "*subsi_1_zext"
8123 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8124 (zero_extend:DI
8125 (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8126 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
8127 (clobber (reg:CC FLAGS_REG))]
8128 "TARGET_64BIT
8129 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
8130 "@
8131 sub{l}\t{%2, %k0|%k0, %2}
8132 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
8133 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
8134 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8135 (set_attr "type" "alu")
8136 (set_attr "mode" "SI")])
8137
8138 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
8139 (define_insn_and_split "*sub<mode>_1_slp"
8140 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
8141 (minus:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
8142 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
8143 (clobber (reg:CC FLAGS_REG))]
8144 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
8145 "@
8146 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8147 #"
8148 "&& reload_completed
8149 && !(rtx_equal_p (operands[0], operands[1]))"
8150 [(set (strict_low_part (match_dup 0)) (match_dup 1))
8151 (parallel
8152 [(set (strict_low_part (match_dup 0))
8153 (minus:SWI12 (match_dup 0) (match_dup 2)))
8154 (clobber (reg:CC FLAGS_REG))])]
8155 ""
8156 [(set_attr "type" "alu")
8157 (set_attr "mode" "<MODE>")])
8158
8159 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
8160 (define_insn_and_split "*subqi_ext<mode>_1_slp"
8161 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
8162 (minus:QI
8163 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")
8164 (subreg:QI
8165 (match_operator:SWI248 3 "extract_operator"
8166 [(match_operand 2 "int248_register_operand" "Q,Q")
8167 (const_int 8)
8168 (const_int 8)]) 0)))
8169 (clobber (reg:CC FLAGS_REG))]
8170 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
8171 "@
8172 sub{b}\t{%h2, %0|%0, %h2}
8173 #"
8174 "&& reload_completed
8175 && !rtx_equal_p (operands[0], operands[1])"
8176 [(set (strict_low_part (match_dup 0)) (match_dup 1))
8177 (parallel
8178 [(set (strict_low_part (match_dup 0))
8179 (minus:QI
8180 (match_dup 0)
8181 (subreg:QI
8182 (match_op_dup 3
8183 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
8184 (clobber (reg:CC FLAGS_REG))])]
8185 ""
8186 [(set_attr "type" "alu")
8187 (set_attr "mode" "QI")])
8188
8189 (define_insn_and_split "*subqi_ext<mode>_2_slp"
8190 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
8191 (minus:QI
8192 (subreg:QI
8193 (match_operator:SWI248 3 "extract_operator"
8194 [(match_operand 1 "int248_register_operand" "Q")
8195 (const_int 8)
8196 (const_int 8)]) 0)
8197 (subreg:QI
8198 (match_operator:SWI248 4 "extract_operator"
8199 [(match_operand 2 "int248_register_operand" "Q")
8200 (const_int 8)
8201 (const_int 8)]) 0)))
8202 (clobber (reg:CC FLAGS_REG))]
8203 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
8204 "#"
8205 "&& reload_completed"
8206 [(set (strict_low_part (match_dup 0))
8207 (subreg:QI
8208 (match_op_dup 3
8209 [(match_dup 1) (const_int 8) (const_int 8)]) 0))
8210 (parallel
8211 [(set (strict_low_part (match_dup 0))
8212 (minus:QI
8213 (match_dup 0)
8214 (subreg:QI
8215 (match_op_dup 4
8216 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
8217 (clobber (reg:CC FLAGS_REG))])]
8218 ""
8219 [(set_attr "type" "alu")
8220 (set_attr "mode" "QI")])
8221
8222 (define_insn "*sub<mode>_2"
8223 [(set (reg FLAGS_REG)
8224 (compare
8225 (minus:SWI
8226 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
8227 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
8228 (const_int 0)))
8229 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8230 (minus:SWI (match_dup 1) (match_dup 2)))]
8231 "ix86_match_ccmode (insn, CCGOCmode)
8232 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8233 "@
8234 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8235 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8236 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8237 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8238 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8239 (set_attr "type" "alu")
8240 (set_attr "mode" "<MODE>")])
8241
8242 (define_insn "*subsi_2_zext"
8243 [(set (reg FLAGS_REG)
8244 (compare
8245 (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8246 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
8247 (const_int 0)))
8248 (set (match_operand:DI 0 "register_operand" "=r,r,r")
8249 (zero_extend:DI
8250 (minus:SI (match_dup 1)
8251 (match_dup 2))))]
8252 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
8253 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
8254 "@
8255 sub{l}\t{%2, %k0|%k0, %2}
8256 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
8257 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
8258 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8259 (set_attr "type" "alu")
8260 (set_attr "mode" "SI")])
8261
8262 (define_insn "*subqi_ext<mode>_0"
8263 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
8264 (minus:QI
8265 (match_operand:QI 1 "nonimmediate_operand" "0")
8266 (subreg:QI
8267 (match_operator:SWI248 3 "extract_operator"
8268 [(match_operand 2 "int248_register_operand" "Q")
8269 (const_int 8)
8270 (const_int 8)]) 0)))
8271 (clobber (reg:CC FLAGS_REG))]
8272 ""
8273 "sub{b}\t{%h2, %0|%0, %h2}"
8274 [(set_attr "addr" "gpr8")
8275 (set_attr "type" "alu")
8276 (set_attr "mode" "QI")])
8277
8278 (define_insn_and_split "*subqi_ext2<mode>_0"
8279 [(set (match_operand:QI 0 "register_operand" "=&Q")
8280 (minus:QI
8281 (subreg:QI
8282 (match_operator:SWI248 3 "extract_operator"
8283 [(match_operand 1 "int248_register_operand" "Q")
8284 (const_int 8)
8285 (const_int 8)]) 0)
8286 (subreg:QI
8287 (match_operator:SWI248 4 "extract_operator"
8288 [(match_operand 2 "int248_register_operand" "Q")
8289 (const_int 8)
8290 (const_int 8)]) 0)))
8291 (clobber (reg:CC FLAGS_REG))]
8292 ""
8293 "#"
8294 "&& reload_completed"
8295 [(set (match_dup 0)
8296 (subreg:QI
8297 (match_op_dup 3
8298 [(match_dup 1) (const_int 8) (const_int 8)]) 0))
8299 (parallel
8300 [(set (match_dup 0)
8301 (minus:QI
8302 (match_dup 0)
8303 (subreg:QI
8304 (match_op_dup 4
8305 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
8306 (clobber (reg:CC FLAGS_REG))])]
8307 ""
8308 [(set_attr "type" "alu")
8309 (set_attr "mode" "QI")])
8310
8311 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
8312 (define_insn_and_split "*subqi_ext<mode>_1"
8313 [(set (zero_extract:SWI248
8314 (match_operand 0 "int248_register_operand" "+Q,&Q")
8315 (const_int 8)
8316 (const_int 8))
8317 (subreg:SWI248
8318 (minus:QI
8319 (subreg:QI
8320 (match_operator:SWI248 3 "extract_operator"
8321 [(match_operand 1 "int248_register_operand" "0,!Q")
8322 (const_int 8)
8323 (const_int 8)]) 0)
8324 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
8325 (clobber (reg:CC FLAGS_REG))]
8326 ""
8327 "@
8328 sub{b}\t{%2, %h0|%h0, %2}
8329 #"
8330 "reload_completed
8331 && !(rtx_equal_p (operands[0], operands[1]))"
8332 [(set (zero_extract:SWI248
8333 (match_dup 0) (const_int 8) (const_int 8))
8334 (zero_extract:SWI248
8335 (match_dup 1) (const_int 8) (const_int 8)))
8336 (parallel
8337 [(set (zero_extract:SWI248
8338 (match_dup 0) (const_int 8) (const_int 8))
8339 (subreg:SWI248
8340 (minus:QI
8341 (subreg:QI
8342 (match_op_dup 3
8343 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
8344 (match_dup 2)) 0))
8345 (clobber (reg:CC FLAGS_REG))])]
8346 ""
8347 [(set_attr "addr" "gpr8")
8348 (set_attr "type" "alu")
8349 (set_attr "mode" "QI")])
8350
8351 ;; Subtract with jump on overflow.
8352 (define_expand "subv<mode>4"
8353 [(parallel [(set (reg:CCO FLAGS_REG)
8354 (eq:CCO
8355 (minus:<DPWI>
8356 (sign_extend:<DPWI>
8357 (match_operand:SWIDWI 1 "nonimmediate_operand"))
8358 (match_dup 4))
8359 (sign_extend:<DPWI>
8360 (minus:SWIDWI (match_dup 1)
8361 (match_operand:SWIDWI 2
8362 "<general_hilo_operand>")))))
8363 (set (match_operand:SWIDWI 0 "register_operand")
8364 (minus:SWIDWI (match_dup 1) (match_dup 2)))])
8365 (set (pc) (if_then_else
8366 (eq (reg:CCO FLAGS_REG) (const_int 0))
8367 (label_ref (match_operand 3))
8368 (pc)))]
8369 ""
8370 {
8371 ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
8372 TARGET_APX_NDD);
8373 if (CONST_SCALAR_INT_P (operands[2]))
8374 operands[4] = operands[2];
8375 else
8376 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
8377 })
8378
8379 (define_insn "*subv<mode>4"
8380 [(set (reg:CCO FLAGS_REG)
8381 (eq:CCO (minus:<DWI>
8382 (sign_extend:<DWI>
8383 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r"))
8384 (sign_extend:<DWI>
8385 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
8386 (sign_extend:<DWI>
8387 (minus:SWI (match_dup 1) (match_dup 2)))))
8388 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8389 (minus:SWI (match_dup 1) (match_dup 2)))]
8390 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8391 "@
8392 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8393 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8394 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8395 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8396 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8397 (set_attr "type" "alu")
8398 (set_attr "mode" "<MODE>")])
8399
8400 (define_insn "subv<mode>4_1"
8401 [(set (reg:CCO FLAGS_REG)
8402 (eq:CCO (minus:<DWI>
8403 (sign_extend:<DWI>
8404 (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
8405 (match_operand:<DWI> 3 "const_int_operand"))
8406 (sign_extend:<DWI>
8407 (minus:SWI
8408 (match_dup 1)
8409 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
8410 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
8411 (minus:SWI (match_dup 1) (match_dup 2)))]
8412 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8413 && CONST_INT_P (operands[2])
8414 && INTVAL (operands[2]) == INTVAL (operands[3])"
8415 "@
8416 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8417 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8418 [(set_attr "isa" "*,apx_ndd")
8419 (set_attr "type" "alu")
8420 (set_attr "mode" "<MODE>")
8421 (set (attr "length_immediate")
8422 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8423 (const_string "1")
8424 (match_test "<MODE_SIZE> == 8")
8425 (const_string "4")]
8426 (const_string "<MODE_SIZE>")))])
8427
8428 (define_insn_and_split "*subv<dwi>4_doubleword"
8429 [(set (reg:CCO FLAGS_REG)
8430 (eq:CCO
8431 (minus:<QPWI>
8432 (sign_extend:<QPWI>
8433 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r"))
8434 (sign_extend:<QPWI>
8435 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
8436 (sign_extend:<QPWI>
8437 (minus:<DWI> (match_dup 1) (match_dup 2)))))
8438 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
8439 (minus:<DWI> (match_dup 1) (match_dup 2)))]
8440 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8441 "#"
8442 "&& reload_completed"
8443 [(parallel [(set (reg:CC FLAGS_REG)
8444 (compare:CC (match_dup 1) (match_dup 2)))
8445 (set (match_dup 0)
8446 (minus:DWIH (match_dup 1) (match_dup 2)))])
8447 (parallel [(set (reg:CCO FLAGS_REG)
8448 (eq:CCO
8449 (minus:<DWI>
8450 (minus:<DWI>
8451 (sign_extend:<DWI> (match_dup 4))
8452 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
8453 (sign_extend:<DWI> (match_dup 5)))
8454 (sign_extend:<DWI>
8455 (minus:DWIH
8456 (minus:DWIH
8457 (match_dup 4)
8458 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8459 (match_dup 5)))))
8460 (set (match_dup 3)
8461 (minus:DWIH
8462 (minus:DWIH
8463 (match_dup 4)
8464 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8465 (match_dup 5)))])]
8466 {
8467 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8468 }
8469 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
8470
8471 (define_insn_and_split "*subv<dwi>4_doubleword_1"
8472 [(set (reg:CCO FLAGS_REG)
8473 (eq:CCO
8474 (minus:<QPWI>
8475 (sign_extend:<QPWI>
8476 (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro"))
8477 (match_operand:<QPWI> 3 "const_scalar_int_operand"))
8478 (sign_extend:<QPWI>
8479 (minus:<DWI>
8480 (match_dup 1)
8481 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
8482 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
8483 (minus:<DWI> (match_dup 1) (match_dup 2)))]
8484 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8485 && CONST_SCALAR_INT_P (operands[2])
8486 && rtx_equal_p (operands[2], operands[3])"
8487 "#"
8488 "&& reload_completed"
8489 [(parallel [(set (reg:CC FLAGS_REG)
8490 (compare:CC (match_dup 1) (match_dup 2)))
8491 (set (match_dup 0)
8492 (minus:DWIH (match_dup 1) (match_dup 2)))])
8493 (parallel [(set (reg:CCO FLAGS_REG)
8494 (eq:CCO
8495 (minus:<DWI>
8496 (minus:<DWI>
8497 (sign_extend:<DWI> (match_dup 4))
8498 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
8499 (match_dup 5))
8500 (sign_extend:<DWI>
8501 (minus:DWIH
8502 (minus:DWIH
8503 (match_dup 4)
8504 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8505 (match_dup 5)))))
8506 (set (match_dup 3)
8507 (minus:DWIH
8508 (minus:DWIH
8509 (match_dup 4)
8510 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8511 (match_dup 5)))])]
8512 {
8513 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8514 if (operands[2] == const0_rtx)
8515 {
8516 if (!rtx_equal_p (operands[0], operands[1]))
8517 emit_move_insn (operands[0], operands[1]);
8518 emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
8519 operands[5]));
8520 DONE;
8521 }
8522 }
8523 [(set_attr "isa" "*,apx_ndd")])
8524
8525 (define_insn "*subv<mode>4_overflow_1"
8526 [(set (reg:CCO FLAGS_REG)
8527 (eq:CCO
8528 (minus:<DWI>
8529 (minus:<DWI>
8530 (sign_extend:<DWI>
8531 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
8532 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8533 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8534 (sign_extend:<DWI>
8535 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
8536 (sign_extend:<DWI>
8537 (minus:SWI
8538 (minus:SWI
8539 (match_dup 1)
8540 (match_operator:SWI 5 "ix86_carry_flag_operator"
8541 [(match_dup 3) (const_int 0)]))
8542 (match_dup 2)))))
8543 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
8544 (minus:SWI
8545 (minus:SWI
8546 (match_dup 1)
8547 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
8548 (match_dup 2)))]
8549 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8550 "@
8551 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8552 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8553 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8554 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8555 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8556 (set_attr "type" "alu")
8557 (set_attr "mode" "<MODE>")])
8558
8559 (define_insn "*subv<mode>4_overflow_2"
8560 [(set (reg:CCO FLAGS_REG)
8561 (eq:CCO
8562 (minus:<DWI>
8563 (minus:<DWI>
8564 (sign_extend:<DWI>
8565 (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))
8566 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8567 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8568 (match_operand:<DWI> 6 "const_int_operand" "n,n"))
8569 (sign_extend:<DWI>
8570 (minus:SWI
8571 (minus:SWI
8572 (match_dup 1)
8573 (match_operator:SWI 5 "ix86_carry_flag_operator"
8574 [(match_dup 3) (const_int 0)]))
8575 (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
8576 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
8577 (minus:SWI
8578 (minus:SWI
8579 (match_dup 1)
8580 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
8581 (match_dup 2)))]
8582 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
8583 && CONST_INT_P (operands[2])
8584 && INTVAL (operands[2]) == INTVAL (operands[6])"
8585 "@
8586 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8587 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8588 [(set_attr "isa" "*,apx_ndd")
8589 (set_attr "type" "alu")
8590 (set_attr "mode" "<MODE>")
8591 (set (attr "length_immediate")
8592 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8593 (const_string "1")
8594 (const_string "4")))])
8595
8596 (define_expand "usubv<mode>4"
8597 [(parallel [(set (reg:CC FLAGS_REG)
8598 (compare:CC
8599 (match_operand:SWI 1 "nonimmediate_operand")
8600 (match_operand:SWI 2 "<general_operand>")))
8601 (set (match_operand:SWI 0 "register_operand")
8602 (minus:SWI (match_dup 1) (match_dup 2)))])
8603 (set (pc) (if_then_else
8604 (ltu (reg:CC FLAGS_REG) (const_int 0))
8605 (label_ref (match_operand 3))
8606 (pc)))]
8607 ""
8608 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
8609 TARGET_APX_NDD);")
8610
8611 (define_expand "sub<mode>_3"
8612 [(parallel [(set (reg:CC FLAGS_REG)
8613 (compare:CC
8614 (match_operand:SWI 1 "nonimmediate_operand")
8615 (match_operand:SWI 2 "<general_operand>")))
8616 (set (match_operand:SWI 0 "register_operand")
8617 (minus:SWI (match_dup 1) (match_dup 2)))])])
8618
8619 (define_insn "*sub<mode>_3"
8620 [(set (reg FLAGS_REG)
8621 (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
8622 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8623 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
8624 (minus:SWI (match_dup 1) (match_dup 2)))]
8625 "ix86_match_ccmode (insn, CCmode)
8626 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
8627 "@
8628 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8629 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8630 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8631 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8632 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8633 (set_attr "type" "alu")
8634 (set_attr "mode" "<MODE>")])
8635
8636 (define_peephole2
8637 [(parallel
8638 [(set (reg:CC FLAGS_REG)
8639 (compare:CC (match_operand:SWI 0 "general_reg_operand")
8640 (match_operand:SWI 1 "general_gr_operand")))
8641 (set (match_dup 0)
8642 (minus:SWI (match_dup 0) (match_dup 1)))])]
8643 "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0"
8644 [(set (reg:CC FLAGS_REG)
8645 (compare:CC (match_dup 0) (match_dup 1)))])
8646
8647 (define_peephole2
8648 [(set (match_operand:SWI 0 "general_reg_operand")
8649 (match_operand:SWI 1 "memory_operand"))
8650 (parallel [(set (reg:CC FLAGS_REG)
8651 (compare:CC (match_dup 0)
8652 (match_operand:SWI 2 "memory_operand")))
8653 (set (match_dup 0)
8654 (minus:SWI (match_dup 0) (match_dup 2)))])
8655 (set (match_dup 1) (match_dup 0))]
8656 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8657 && peep2_reg_dead_p (3, operands[0])
8658 && !reg_overlap_mentioned_p (operands[0], operands[1])
8659 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8660 [(set (match_dup 0) (match_dup 2))
8661 (parallel [(set (reg:CC FLAGS_REG)
8662 (compare:CC (match_dup 1) (match_dup 0)))
8663 (set (match_dup 1)
8664 (minus:SWI (match_dup 1) (match_dup 0)))])])
8665
8666 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
8667 ;; subl $1, %eax; jnc .Lxx;
8668 (define_peephole2
8669 [(parallel
8670 [(set (match_operand:SWI 0 "general_reg_operand")
8671 (plus:SWI (match_dup 0) (const_int -1)))
8672 (clobber (reg FLAGS_REG))])
8673 (set (reg:CCZ FLAGS_REG)
8674 (compare:CCZ (match_dup 0) (const_int -1)))
8675 (set (pc)
8676 (if_then_else (match_operator 1 "bt_comparison_operator"
8677 [(reg:CCZ FLAGS_REG) (const_int 0)])
8678 (match_operand 2)
8679 (pc)))]
8680 "peep2_regno_dead_p (3, FLAGS_REG)"
8681 [(parallel
8682 [(set (reg:CC FLAGS_REG)
8683 (compare:CC (match_dup 0) (const_int 1)))
8684 (set (match_dup 0)
8685 (minus:SWI (match_dup 0) (const_int 1)))])
8686 (set (pc)
8687 (if_then_else (match_dup 3)
8688 (match_dup 2)
8689 (pc)))]
8690 {
8691 rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
8692 operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
8693 ? GEU : LTU, VOIDmode, cc, const0_rtx);
8694 })
8695
8696 ;; Help combine use borrow flag to test for -1 after dec (add $-1).
8697 (define_insn_and_split "*dec_cmov<mode>"
8698 [(set (match_operand:SWI248 0 "register_operand" "=r")
8699 (if_then_else:SWI248
8700 (match_operator 1 "bt_comparison_operator"
8701 [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
8702 (plus:SWI248 (match_dup 2) (const_int -1))
8703 (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
8704 (clobber (reg:CC FLAGS_REG))]
8705 "TARGET_CMOVE"
8706 "#"
8707 "&& reload_completed"
8708 [(parallel [(set (reg:CC FLAGS_REG)
8709 (compare:CC (match_dup 2) (const_int 1)))
8710 (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
8711 (set (match_dup 0)
8712 (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
8713 {
8714 rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
8715 operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
8716 ? GEU : LTU, VOIDmode, cc, const0_rtx);
8717 })
8718
8719 (define_insn "*subsi_3_zext"
8720 [(set (reg FLAGS_REG)
8721 (compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8722 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))
8723 (set (match_operand:DI 0 "register_operand" "=r,r,r")
8724 (zero_extend:DI
8725 (minus:SI (match_dup 1)
8726 (match_dup 2))))]
8727 "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
8728 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
8729 "@
8730 sub{l}\t{%2, %1|%1, %2}
8731 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
8732 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
8733 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8734 (set_attr "type" "alu")
8735 (set_attr "mode" "SI")])
8736 \f
8737 ;; Add with carry and subtract with borrow
8738
8739 (define_insn "@add<mode>3_carry"
8740 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8741 (plus:SWI
8742 (plus:SWI
8743 (match_operator:SWI 4 "ix86_carry_flag_operator"
8744 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8745 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
8746 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8747 (clobber (reg:CC FLAGS_REG))]
8748 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
8749 "@
8750 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8751 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8752 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8753 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8754 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8755 (set_attr "type" "alu")
8756 (set_attr "use_carry" "1")
8757 (set_attr "pent_pair" "pu")
8758 (set_attr "mode" "<MODE>")])
8759
8760 (define_peephole2
8761 [(set (match_operand:SWI 0 "general_reg_operand")
8762 (match_operand:SWI 1 "memory_operand"))
8763 (parallel [(set (match_dup 0)
8764 (plus:SWI
8765 (plus:SWI
8766 (match_operator:SWI 4 "ix86_carry_flag_operator"
8767 [(match_operand 3 "flags_reg_operand")
8768 (const_int 0)])
8769 (match_dup 0))
8770 (match_operand:SWI 2 "memory_operand")))
8771 (clobber (reg:CC FLAGS_REG))])
8772 (set (match_dup 1) (match_dup 0))]
8773 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8774 && peep2_reg_dead_p (3, operands[0])
8775 && !reg_overlap_mentioned_p (operands[0], operands[1])
8776 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8777 [(set (match_dup 0) (match_dup 2))
8778 (parallel [(set (match_dup 1)
8779 (plus:SWI (plus:SWI (match_op_dup 4
8780 [(match_dup 3) (const_int 0)])
8781 (match_dup 1))
8782 (match_dup 0)))
8783 (clobber (reg:CC FLAGS_REG))])])
8784
8785 (define_peephole2
8786 [(set (match_operand:SWI 0 "general_reg_operand")
8787 (match_operand:SWI 1 "memory_operand"))
8788 (parallel [(set (match_dup 0)
8789 (plus:SWI
8790 (plus:SWI
8791 (match_operator:SWI 4 "ix86_carry_flag_operator"
8792 [(match_operand 3 "flags_reg_operand")
8793 (const_int 0)])
8794 (match_dup 0))
8795 (match_operand:SWI 2 "memory_operand")))
8796 (clobber (reg:CC FLAGS_REG))])
8797 (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
8798 (set (match_dup 1) (match_dup 5))]
8799 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8800 && peep2_reg_dead_p (3, operands[0])
8801 && peep2_reg_dead_p (4, operands[5])
8802 && !reg_overlap_mentioned_p (operands[0], operands[1])
8803 && !reg_overlap_mentioned_p (operands[0], operands[2])
8804 && !reg_overlap_mentioned_p (operands[5], operands[1])"
8805 [(set (match_dup 0) (match_dup 2))
8806 (parallel [(set (match_dup 1)
8807 (plus:SWI (plus:SWI (match_op_dup 4
8808 [(match_dup 3) (const_int 0)])
8809 (match_dup 1))
8810 (match_dup 0)))
8811 (clobber (reg:CC FLAGS_REG))])])
8812
8813 (define_insn "*add<mode>3_carry_0"
8814 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8815 (plus:SWI
8816 (match_operator:SWI 2 "ix86_carry_flag_operator"
8817 [(reg FLAGS_REG) (const_int 0)])
8818 (match_operand:SWI 1 "nonimmediate_operand" "0")))
8819 (clobber (reg:CC FLAGS_REG))]
8820 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8821 "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
8822 [(set_attr "type" "alu")
8823 (set_attr "use_carry" "1")
8824 (set_attr "pent_pair" "pu")
8825 (set_attr "mode" "<MODE>")])
8826
8827 (define_insn "*add<mode>3_carry_0r"
8828 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8829 (plus:SWI
8830 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
8831 [(reg FLAGS_REG) (const_int 0)])
8832 (match_operand:SWI 1 "nonimmediate_operand" "0")))
8833 (clobber (reg:CC FLAGS_REG))]
8834 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8835 "sbb{<imodesuffix>}\t{$-1, %0|%0, -1}"
8836 [(set_attr "type" "alu")
8837 (set_attr "use_carry" "1")
8838 (set_attr "pent_pair" "pu")
8839 (set_attr "mode" "<MODE>")])
8840
8841 (define_insn "*addqi3_carry_zext<mode>"
8842 [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
8843 (zero_extend:SWI248x
8844 (plus:QI
8845 (plus:QI (match_operator:QI 3 "ix86_carry_flag_operator"
8846 [(reg FLAGS_REG) (const_int 0)])
8847 (match_operand:QI 1 "nonimmediate_operand" "%rm,r"))
8848 (match_operand:QI 2 "x86_64_general_operand" "rn,m"))))
8849 (clobber (reg:CC FLAGS_REG))]
8850 "TARGET_APX_NDD
8851 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8852 "@
8853 adc{b}\t{%2, %1, %b0|%b0, %1, %2}
8854 adc{b}\t{%2, %1, %b0|%b0, %1, %2}"
8855 [(set_attr "type" "alu")
8856 (set_attr "use_carry" "1")
8857 (set_attr "pent_pair" "pu")
8858 (set_attr "mode" "QI")])
8859
8860 (define_insn "*addhi3_carry_zext<mode>"
8861 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
8862 (zero_extend:SWI48x
8863 (plus:HI
8864 (plus:HI (match_operator:HI 3 "ix86_carry_flag_operator"
8865 [(reg FLAGS_REG) (const_int 0)])
8866 (match_operand:HI 1 "nonimmediate_operand" "%rm,r"))
8867 (match_operand:HI 2 "x86_64_general_operand" "rn,m"))))
8868 (clobber (reg:CC FLAGS_REG))]
8869 "TARGET_APX_NDD
8870 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8871 "@
8872 adc{w}\t{%2, %1, %w0|%w0, %1, %2}
8873 adc{w}\t{%2, %1, %w0|%w0, %1, %2}"
8874 [(set_attr "type" "alu")
8875 (set_attr "use_carry" "1")
8876 (set_attr "pent_pair" "pu")
8877 (set_attr "mode" "HI")])
8878
8879 (define_insn "*addsi3_carry_zext"
8880 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8881 (zero_extend:DI
8882 (plus:SI
8883 (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
8884 [(reg FLAGS_REG) (const_int 0)])
8885 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm"))
8886 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
8887 (clobber (reg:CC FLAGS_REG))]
8888 "TARGET_64BIT
8889 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
8890 "@
8891 adc{l}\t{%2, %k0|%k0, %2}
8892 adc{l}\t{%2, %1, %k0|%k0, %1, %2}
8893 adc{l}\t{%2, %1, %k0|%k0, %1, %2}"
8894 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8895 (set_attr "type" "alu")
8896 (set_attr "use_carry" "1")
8897 (set_attr "pent_pair" "pu")
8898 (set_attr "mode" "SI")])
8899
8900 (define_insn "*addqi3_carry_zext<mode>_0"
8901 [(set (match_operand:SWI248x 0 "register_operand" "=r")
8902 (zero_extend:SWI248x
8903 (plus:QI (match_operator:QI 2 "ix86_carry_flag_operator"
8904 [(reg FLAGS_REG) (const_int 0)])
8905 (match_operand:QI 1 "nonimmediate_operand" "rm"))))
8906 (clobber (reg:CC FLAGS_REG))]
8907 "TARGET_APX_NDD"
8908 "adc{b}\t{$0, %1, %b0|%b0, %1, 0}"
8909 [(set_attr "type" "alu")
8910 (set_attr "use_carry" "1")
8911 (set_attr "pent_pair" "pu")
8912 (set_attr "mode" "QI")])
8913
8914 (define_insn "*addhi3_carry_zext<mode>_0"
8915 [(set (match_operand:SWI48x 0 "register_operand" "=r")
8916 (zero_extend:SWI48x
8917 (plus:HI (match_operator:HI 2 "ix86_carry_flag_operator"
8918 [(reg FLAGS_REG) (const_int 0)])
8919 (match_operand:HI 1 "nonimmediate_operand" "rm"))))
8920 (clobber (reg:CC FLAGS_REG))]
8921 "TARGET_APX_NDD"
8922 "adc{w}\t{$0, %1, %w0|%w0, %1, 0}"
8923 [(set_attr "type" "alu")
8924 (set_attr "use_carry" "1")
8925 (set_attr "pent_pair" "pu")
8926 (set_attr "mode" "HI")])
8927
8928 (define_insn "*addsi3_carry_zext_0"
8929 [(set (match_operand:DI 0 "register_operand" "=r,r")
8930 (zero_extend:DI
8931 (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
8932 [(reg FLAGS_REG) (const_int 0)])
8933 (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
8934 (clobber (reg:CC FLAGS_REG))]
8935 "TARGET_64BIT"
8936 "@
8937 adc{l}\t{$0, %k0|%k0, 0}
8938 adc{l}\t{$0, %1, %k0|%k0, %1, 0}"
8939 [(set_attr "isa" "*,apx_ndd")
8940 (set_attr "type" "alu")
8941 (set_attr "use_carry" "1")
8942 (set_attr "pent_pair" "pu")
8943 (set_attr "mode" "SI")])
8944
8945 (define_insn "*addqi3_carry_zext<mode>_0r"
8946 [(set (match_operand:SWI248x 0 "register_operand" "=r")
8947 (zero_extend:SWI248x
8948 (plus:QI (match_operator:QI 2 "ix86_carry_flag_unset_operator"
8949 [(reg FLAGS_REG) (const_int 0)])
8950 (match_operand:QI 1 "nonimmediate_operand" "rm"))))
8951 (clobber (reg:CC FLAGS_REG))]
8952 "TARGET_APX_NDD"
8953 "sbb{b}\t{$-1, %1, %b0|%b0, %1, -1}"
8954 [(set_attr "type" "alu")
8955 (set_attr "use_carry" "1")
8956 (set_attr "pent_pair" "pu")
8957 (set_attr "mode" "QI")])
8958
8959 (define_insn "*addhi3_carry_zext<mode>_0r"
8960 [(set (match_operand:SWI48x 0 "register_operand" "=r")
8961 (zero_extend:SWI48x
8962 (plus:HI (match_operator:HI 2 "ix86_carry_flag_unset_operator"
8963 [(reg FLAGS_REG) (const_int 0)])
8964 (match_operand:HI 1 "nonimmediate_operand" "rm"))))
8965 (clobber (reg:CC FLAGS_REG))]
8966 "TARGET_APX_NDD"
8967 "sbb{w}\t{$-1, %1, %w0|%w0, %1, -1}"
8968 [(set_attr "type" "alu")
8969 (set_attr "use_carry" "1")
8970 (set_attr "pent_pair" "pu")
8971 (set_attr "mode" "HI")])
8972
8973 (define_insn "*addsi3_carry_zext_0r"
8974 [(set (match_operand:DI 0 "register_operand" "=r,r")
8975 (zero_extend:DI
8976 (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
8977 [(reg FLAGS_REG) (const_int 0)])
8978 (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
8979 (clobber (reg:CC FLAGS_REG))]
8980 "TARGET_64BIT"
8981 "@
8982 sbb{l}\t{$-1, %k0|%k0, -1}
8983 sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}"
8984 [(set_attr "isa" "*,apx_ndd")
8985 (set_attr "type" "alu")
8986 (set_attr "use_carry" "1")
8987 (set_attr "pent_pair" "pu")
8988 (set_attr "mode" "SI")])
8989
8990 ;; There is no point to generate ADCX instruction. ADC is shorter and faster.
8991
8992 (define_insn "addcarry<mode>"
8993 [(set (reg:CCC FLAGS_REG)
8994 (compare:CCC
8995 (zero_extend:<DWI>
8996 (plus:SWI48
8997 (plus:SWI48
8998 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8999 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9000 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r"))
9001 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m")))
9002 (plus:<DWI>
9003 (zero_extend:<DWI> (match_dup 2))
9004 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9005 [(match_dup 3) (const_int 0)]))))
9006 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
9007 (plus:SWI48 (plus:SWI48 (match_op_dup 5
9008 [(match_dup 3) (const_int 0)])
9009 (match_dup 1))
9010 (match_dup 2)))]
9011 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9012 "@
9013 adc{<imodesuffix>}\t{%2, %0|%0, %2}
9014 adc{<imodesuffix>}\t{%2, %0|%0, %2}
9015 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9016 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9017 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9018 (set_attr "type" "alu")
9019 (set_attr "use_carry" "1")
9020 (set_attr "pent_pair" "pu")
9021 (set_attr "mode" "<MODE>")])
9022
9023 (define_peephole2
9024 [(parallel [(set (reg:CCC FLAGS_REG)
9025 (compare:CCC
9026 (zero_extend:<DWI>
9027 (plus:SWI48
9028 (plus:SWI48
9029 (match_operator:SWI48 4 "ix86_carry_flag_operator"
9030 [(match_operand 2 "flags_reg_operand")
9031 (const_int 0)])
9032 (match_operand:SWI48 0 "general_reg_operand"))
9033 (match_operand:SWI48 1 "memory_operand")))
9034 (plus:<DWI>
9035 (zero_extend:<DWI> (match_dup 1))
9036 (match_operator:<DWI> 3 "ix86_carry_flag_operator"
9037 [(match_dup 2) (const_int 0)]))))
9038 (set (match_dup 0)
9039 (plus:SWI48 (plus:SWI48 (match_op_dup 4
9040 [(match_dup 2) (const_int 0)])
9041 (match_dup 0))
9042 (match_dup 1)))])
9043 (set (match_dup 1) (match_dup 0))]
9044 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9045 && peep2_reg_dead_p (2, operands[0])
9046 && !reg_overlap_mentioned_p (operands[0], operands[1])"
9047 [(parallel [(set (reg:CCC FLAGS_REG)
9048 (compare:CCC
9049 (zero_extend:<DWI>
9050 (plus:SWI48
9051 (plus:SWI48
9052 (match_op_dup 4
9053 [(match_dup 2) (const_int 0)])
9054 (match_dup 1))
9055 (match_dup 0)))
9056 (plus:<DWI>
9057 (zero_extend:<DWI> (match_dup 0))
9058 (match_op_dup 3
9059 [(match_dup 2) (const_int 0)]))))
9060 (set (match_dup 1)
9061 (plus:SWI48 (plus:SWI48 (match_op_dup 4
9062 [(match_dup 2) (const_int 0)])
9063 (match_dup 1))
9064 (match_dup 0)))])])
9065
9066 (define_peephole2
9067 [(set (match_operand:SWI48 0 "general_reg_operand")
9068 (match_operand:SWI48 1 "memory_operand"))
9069 (parallel [(set (reg:CCC FLAGS_REG)
9070 (compare:CCC
9071 (zero_extend:<DWI>
9072 (plus:SWI48
9073 (plus:SWI48
9074 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9075 [(match_operand 3 "flags_reg_operand")
9076 (const_int 0)])
9077 (match_dup 0))
9078 (match_operand:SWI48 2 "memory_operand")))
9079 (plus:<DWI>
9080 (zero_extend:<DWI> (match_dup 2))
9081 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9082 [(match_dup 3) (const_int 0)]))))
9083 (set (match_dup 0)
9084 (plus:SWI48 (plus:SWI48 (match_op_dup 5
9085 [(match_dup 3) (const_int 0)])
9086 (match_dup 0))
9087 (match_dup 2)))])
9088 (set (match_dup 1) (match_dup 0))]
9089 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9090 && peep2_reg_dead_p (3, operands[0])
9091 && !reg_overlap_mentioned_p (operands[0], operands[1])
9092 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9093 [(set (match_dup 0) (match_dup 2))
9094 (parallel [(set (reg:CCC FLAGS_REG)
9095 (compare:CCC
9096 (zero_extend:<DWI>
9097 (plus:SWI48
9098 (plus:SWI48
9099 (match_op_dup 5
9100 [(match_dup 3) (const_int 0)])
9101 (match_dup 1))
9102 (match_dup 0)))
9103 (plus:<DWI>
9104 (zero_extend:<DWI> (match_dup 0))
9105 (match_op_dup 4
9106 [(match_dup 3) (const_int 0)]))))
9107 (set (match_dup 1)
9108 (plus:SWI48 (plus:SWI48 (match_op_dup 5
9109 [(match_dup 3) (const_int 0)])
9110 (match_dup 1))
9111 (match_dup 0)))])])
9112
9113 (define_peephole2
9114 [(parallel [(set (reg:CCC FLAGS_REG)
9115 (compare:CCC
9116 (zero_extend:<DWI>
9117 (plus:SWI48
9118 (plus:SWI48
9119 (match_operator:SWI48 4 "ix86_carry_flag_operator"
9120 [(match_operand 2 "flags_reg_operand")
9121 (const_int 0)])
9122 (match_operand:SWI48 0 "general_reg_operand"))
9123 (match_operand:SWI48 1 "memory_operand")))
9124 (plus:<DWI>
9125 (zero_extend:<DWI> (match_dup 1))
9126 (match_operator:<DWI> 3 "ix86_carry_flag_operator"
9127 [(match_dup 2) (const_int 0)]))))
9128 (set (match_dup 0)
9129 (plus:SWI48 (plus:SWI48 (match_op_dup 4
9130 [(match_dup 2) (const_int 0)])
9131 (match_dup 0))
9132 (match_dup 1)))])
9133 (set (match_operand:QI 5 "general_reg_operand")
9134 (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9135 (set (match_operand:SWI48 6 "general_reg_operand")
9136 (zero_extend:SWI48 (match_dup 5)))
9137 (set (match_dup 1) (match_dup 0))]
9138 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9139 && peep2_reg_dead_p (4, operands[0])
9140 && !reg_overlap_mentioned_p (operands[0], operands[1])
9141 && !reg_overlap_mentioned_p (operands[0], operands[5])
9142 && !reg_overlap_mentioned_p (operands[5], operands[1])
9143 && !reg_overlap_mentioned_p (operands[0], operands[6])
9144 && !reg_overlap_mentioned_p (operands[6], operands[1])"
9145 [(parallel [(set (reg:CCC FLAGS_REG)
9146 (compare:CCC
9147 (zero_extend:<DWI>
9148 (plus:SWI48
9149 (plus:SWI48
9150 (match_op_dup 4
9151 [(match_dup 2) (const_int 0)])
9152 (match_dup 1))
9153 (match_dup 0)))
9154 (plus:<DWI>
9155 (zero_extend:<DWI> (match_dup 0))
9156 (match_op_dup 3
9157 [(match_dup 2) (const_int 0)]))))
9158 (set (match_dup 1)
9159 (plus:SWI48 (plus:SWI48 (match_op_dup 4
9160 [(match_dup 2) (const_int 0)])
9161 (match_dup 1))
9162 (match_dup 0)))])
9163 (set (match_dup 5) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9164 (set (match_dup 6) (zero_extend:SWI48 (match_dup 5)))])
9165
9166 (define_expand "addcarry<mode>_0"
9167 [(parallel
9168 [(set (reg:CCC FLAGS_REG)
9169 (compare:CCC
9170 (plus:SWI48
9171 (match_operand:SWI48 1 "nonimmediate_operand")
9172 (match_operand:SWI48 2 "x86_64_general_operand"))
9173 (match_dup 1)))
9174 (set (match_operand:SWI48 0 "nonimmediate_operand")
9175 (plus:SWI48 (match_dup 1) (match_dup 2)))])]
9176 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)")
9177
9178 (define_insn "*addcarry<mode>_1"
9179 [(set (reg:CCC FLAGS_REG)
9180 (compare:CCC
9181 (zero_extend:<DWI>
9182 (plus:SWI48
9183 (plus:SWI48
9184 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9185 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9186 (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm"))
9187 (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))
9188 (plus:<DWI>
9189 (match_operand:<DWI> 6 "const_scalar_int_operand")
9190 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9191 [(match_dup 3) (const_int 0)]))))
9192 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
9193 (plus:SWI48 (plus:SWI48 (match_op_dup 5
9194 [(match_dup 3) (const_int 0)])
9195 (match_dup 1))
9196 (match_dup 2)))]
9197 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
9198 && CONST_INT_P (operands[2])
9199 /* Check that operands[6] is operands[2] zero extended from
9200 <MODE>mode to <DWI>mode. */
9201 && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0)
9202 ? (CONST_INT_P (operands[6])
9203 && UINTVAL (operands[6]) == (UINTVAL (operands[2])
9204 & GET_MODE_MASK (<MODE>mode)))
9205 : (CONST_WIDE_INT_P (operands[6])
9206 && CONST_WIDE_INT_NUNITS (operands[6]) == 2
9207 && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
9208 == UINTVAL (operands[2]))
9209 && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
9210 "@
9211 adc{<imodesuffix>}\t{%2, %0|%0, %2}
9212 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9213 [(set_attr "isa" "*,apx_ndd")
9214 (set_attr "type" "alu")
9215 (set_attr "use_carry" "1")
9216 (set_attr "pent_pair" "pu")
9217 (set_attr "mode" "<MODE>")
9218 (set (attr "length_immediate")
9219 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
9220 (const_string "1")
9221 (const_string "4")))])
9222
9223 (define_insn "@sub<mode>3_carry"
9224 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
9225 (minus:SWI
9226 (minus:SWI
9227 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
9228 (match_operator:SWI 4 "ix86_carry_flag_operator"
9229 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
9230 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
9231 (clobber (reg:CC FLAGS_REG))]
9232 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
9233 "@
9234 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9235 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9236 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9237 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9238 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9239 (set_attr "type" "alu")
9240 (set_attr "use_carry" "1")
9241 (set_attr "pent_pair" "pu")
9242 (set_attr "mode" "<MODE>")])
9243
9244 (define_peephole2
9245 [(set (match_operand:SWI 0 "general_reg_operand")
9246 (match_operand:SWI 1 "memory_operand"))
9247 (parallel [(set (match_dup 0)
9248 (minus:SWI
9249 (minus:SWI
9250 (match_dup 0)
9251 (match_operator:SWI 4 "ix86_carry_flag_operator"
9252 [(match_operand 3 "flags_reg_operand")
9253 (const_int 0)]))
9254 (match_operand:SWI 2 "memory_operand")))
9255 (clobber (reg:CC FLAGS_REG))])
9256 (set (match_dup 1) (match_dup 0))]
9257 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9258 && peep2_reg_dead_p (3, operands[0])
9259 && !reg_overlap_mentioned_p (operands[0], operands[1])
9260 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9261 [(set (match_dup 0) (match_dup 2))
9262 (parallel [(set (match_dup 1)
9263 (minus:SWI (minus:SWI (match_dup 1)
9264 (match_op_dup 4
9265 [(match_dup 3) (const_int 0)]))
9266 (match_dup 0)))
9267 (clobber (reg:CC FLAGS_REG))])])
9268
9269 (define_peephole2
9270 [(set (match_operand:SWI 0 "general_reg_operand")
9271 (match_operand:SWI 1 "memory_operand"))
9272 (parallel [(set (match_dup 0)
9273 (minus:SWI
9274 (minus:SWI
9275 (match_dup 0)
9276 (match_operator:SWI 4 "ix86_carry_flag_operator"
9277 [(match_operand 3 "flags_reg_operand")
9278 (const_int 0)]))
9279 (match_operand:SWI 2 "memory_operand")))
9280 (clobber (reg:CC FLAGS_REG))])
9281 (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
9282 (set (match_dup 1) (match_dup 5))]
9283 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9284 && peep2_reg_dead_p (3, operands[0])
9285 && peep2_reg_dead_p (4, operands[5])
9286 && !reg_overlap_mentioned_p (operands[0], operands[1])
9287 && !reg_overlap_mentioned_p (operands[0], operands[2])
9288 && !reg_overlap_mentioned_p (operands[5], operands[1])"
9289 [(set (match_dup 0) (match_dup 2))
9290 (parallel [(set (match_dup 1)
9291 (minus:SWI (minus:SWI (match_dup 1)
9292 (match_op_dup 4
9293 [(match_dup 3) (const_int 0)]))
9294 (match_dup 0)))
9295 (clobber (reg:CC FLAGS_REG))])])
9296
9297 (define_insn "*sub<mode>3_carry_0"
9298 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
9299 (minus:SWI
9300 (match_operand:SWI 1 "nonimmediate_operand" "0")
9301 (match_operator:SWI 2 "ix86_carry_flag_operator"
9302 [(reg FLAGS_REG) (const_int 0)])))
9303 (clobber (reg:CC FLAGS_REG))]
9304 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
9305 "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
9306 [(set_attr "type" "alu")
9307 (set_attr "use_carry" "1")
9308 (set_attr "pent_pair" "pu")
9309 (set_attr "mode" "<MODE>")])
9310
9311 (define_insn "*sub<mode>3_carry_0r"
9312 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
9313 (minus:SWI
9314 (match_operand:SWI 1 "nonimmediate_operand" "0")
9315 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
9316 [(reg FLAGS_REG) (const_int 0)])))
9317 (clobber (reg:CC FLAGS_REG))]
9318 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
9319 "adc{<imodesuffix>}\t{$-1, %0|%0, -1}"
9320 [(set_attr "type" "alu")
9321 (set_attr "use_carry" "1")
9322 (set_attr "pent_pair" "pu")
9323 (set_attr "mode" "<MODE>")])
9324
9325 (define_insn "*subqi3_carry_zext<mode>"
9326 [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
9327 (zero_extend:SWI248x
9328 (minus:QI
9329 (minus:QI
9330 (match_operand:QI 1 "nonimmediate_operand" "r,rm")
9331 (match_operator:QI 3 "ix86_carry_flag_operator"
9332 [(reg FLAGS_REG) (const_int 0)]))
9333 (match_operand:QI 2 "x86_64_general_operand" "rBMe,re"))))
9334 (clobber (reg:CC FLAGS_REG))]
9335 "TARGET_APX_NDD
9336 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9337 "@
9338 sbb{b}\t{%2, %1, %b0|%b0, %1, %2}
9339 sbb{b}\t{%2, %1, %b0|%b0, %1, %2}"
9340 [(set_attr "type" "alu")
9341 (set_attr "use_carry" "1")
9342 (set_attr "pent_pair" "pu")
9343 (set_attr "mode" "QI")])
9344
9345 (define_insn "*subhi3_carry_zext<mode>"
9346 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
9347 (zero_extend:SWI48x
9348 (minus:HI
9349 (minus:HI
9350 (match_operand:HI 1 "nonimmediate_operand" "r,rm")
9351 (match_operator:HI 3 "ix86_carry_flag_operator"
9352 [(reg FLAGS_REG) (const_int 0)]))
9353 (match_operand:HI 2 "x86_64_general_operand" "rBMe,re"))))
9354 (clobber (reg:CC FLAGS_REG))]
9355 "TARGET_APX_NDD
9356 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9357 "@
9358 sbb{w}\t{%2, %1, %w0|%w0, %1, %2}
9359 sbb{w}\t{%2, %1, %w0|%w0, %1, %2}"
9360 [(set_attr "type" "alu")
9361 (set_attr "use_carry" "1")
9362 (set_attr "pent_pair" "pu")
9363 (set_attr "mode" "HI")])
9364
9365 (define_insn "*subsi3_carry_zext"
9366 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
9367 (zero_extend:DI
9368 (minus:SI
9369 (minus:SI
9370 (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
9371 (match_operator:SI 3 "ix86_carry_flag_operator"
9372 [(reg FLAGS_REG) (const_int 0)]))
9373 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
9374 (clobber (reg:CC FLAGS_REG))]
9375 "TARGET_64BIT
9376 && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
9377 "@
9378 sbb{l}\t{%2, %k0|%k0, %2}
9379 sbb{l}\t{%2, %1, %k0|%k0, %1, %2}
9380 sbb{l}\t{%2, %1, %k0|%k0, %1, %2}"
9381 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9382 (set_attr "type" "alu")
9383 (set_attr "use_carry" "1")
9384 (set_attr "pent_pair" "pu")
9385 (set_attr "mode" "SI")])
9386
9387 (define_insn "*subqi3_carry_zext<mode>_0"
9388 [(set (match_operand:SWI248x 0 "register_operand" "=r")
9389 (zero_extend:SWI248x
9390 (minus:QI
9391 (match_operand:QI 1 "nonimmediate_operand" "rm")
9392 (match_operator:QI 2 "ix86_carry_flag_operator"
9393 [(reg FLAGS_REG) (const_int 0)]))))
9394 (clobber (reg:CC FLAGS_REG))]
9395 "TARGET_APX_NDD"
9396 "sbb{b}\t{$0, %1, %b0|%b0, %1, 0}"
9397 [(set_attr "type" "alu")
9398 (set_attr "use_carry" "1")
9399 (set_attr "pent_pair" "pu")
9400 (set_attr "mode" "QI")])
9401
9402 (define_insn "*subhi3_carry_zext<mode>_0"
9403 [(set (match_operand:SWI48x 0 "register_operand" "=r")
9404 (zero_extend:SWI48x
9405 (minus:HI
9406 (match_operand:HI 1 "nonimmediate_operand" "rm")
9407 (match_operator:HI 2 "ix86_carry_flag_operator"
9408 [(reg FLAGS_REG) (const_int 0)]))))
9409 (clobber (reg:CC FLAGS_REG))]
9410 "TARGET_APX_NDD"
9411 "sbb{w}\t{$0, %1, %w0|%w0, %1, 0}"
9412 [(set_attr "type" "alu")
9413 (set_attr "use_carry" "1")
9414 (set_attr "pent_pair" "pu")
9415 (set_attr "mode" "HI")])
9416
9417 (define_insn "*subsi3_carry_zext_0"
9418 [(set (match_operand:DI 0 "register_operand" "=r,r")
9419 (zero_extend:DI
9420 (minus:SI
9421 (match_operand:SI 1 "nonimmediate_operand" "0,rm")
9422 (match_operator:SI 2 "ix86_carry_flag_operator"
9423 [(reg FLAGS_REG) (const_int 0)]))))
9424 (clobber (reg:CC FLAGS_REG))]
9425 "TARGET_64BIT"
9426 "@
9427 sbb{l}\t{$0, %k0|%k0, 0}
9428 sbb{l}\t{$0, %1, %k0|%k0, %1, 0}"
9429 [(set_attr "isa" "*,apx_ndd")
9430 (set_attr "type" "alu")
9431 (set_attr "use_carry" "1")
9432 (set_attr "pent_pair" "pu")
9433 (set_attr "mode" "SI")])
9434
9435 (define_insn "*subqi3_carry_zext<mode>_0r"
9436 [(set (match_operand:SWI248x 0 "register_operand" "=r")
9437 (zero_extend:SWI248x
9438 (minus:QI
9439 (match_operand:QI 1 "nonimmediate_operand" "rm")
9440 (match_operator:QI 2 "ix86_carry_flag_unset_operator"
9441 [(reg FLAGS_REG) (const_int 0)]))))
9442 (clobber (reg:CC FLAGS_REG))]
9443 "TARGET_APX_NDD"
9444 "adc{b}\t{$-1, %1, %b0|%b0, %1, -1}"
9445 [(set_attr "type" "alu")
9446 (set_attr "use_carry" "1")
9447 (set_attr "pent_pair" "pu")
9448 (set_attr "mode" "QI")])
9449
9450 (define_insn "*subhi3_carry_zext<mode>_0r"
9451 [(set (match_operand:SWI48x 0 "register_operand" "=r")
9452 (zero_extend:SWI48x
9453 (minus:HI
9454 (match_operand:HI 1 "nonimmediate_operand" "rm")
9455 (match_operator:HI 2 "ix86_carry_flag_unset_operator"
9456 [(reg FLAGS_REG) (const_int 0)]))))
9457 (clobber (reg:CC FLAGS_REG))]
9458 "TARGET_APX_NDD"
9459 "adc{w}\t{$-1, %1, %w0|%w0, %1, -1}"
9460 [(set_attr "type" "alu")
9461 (set_attr "use_carry" "1")
9462 (set_attr "pent_pair" "pu")
9463 (set_attr "mode" "HI")])
9464
9465 (define_insn "*subsi3_carry_zext_0r"
9466 [(set (match_operand:DI 0 "register_operand" "=r,r")
9467 (zero_extend:DI
9468 (minus:SI
9469 (match_operand:SI 1 "nonimmediate_operand" "0,rm")
9470 (match_operator:SI 2 "ix86_carry_flag_unset_operator"
9471 [(reg FLAGS_REG) (const_int 0)]))))
9472 (clobber (reg:CC FLAGS_REG))]
9473 "TARGET_64BIT"
9474 "@
9475 adc{l}\t{$-1, %k0|%k0, -1}
9476 adc{l}\t{$-1, %1, %k0|%k0, %1, -1}"
9477 [(set_attr "isa" "*,apx_ndd")
9478 (set_attr "type" "alu")
9479 (set_attr "use_carry" "1")
9480 (set_attr "pent_pair" "pu")
9481 (set_attr "mode" "SI")])
9482
9483 (define_insn "@sub<mode>3_carry_ccc"
9484 [(set (reg:CCC FLAGS_REG)
9485 (compare:CCC
9486 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
9487 (plus:<DWI>
9488 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
9489 (zero_extend:<DWI>
9490 (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
9491 (clobber (match_scratch:DWIH 0 "=r"))]
9492 ""
9493 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
9494 [(set_attr "type" "alu")
9495 (set_attr "mode" "<MODE>")])
9496
9497 (define_insn "*sub<mode>3_carry_ccc_1"
9498 [(set (reg:CCC FLAGS_REG)
9499 (compare:CCC
9500 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
9501 (plus:<DWI>
9502 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
9503 (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
9504 (clobber (match_scratch:DWIH 0 "=r"))]
9505 ""
9506 {
9507 operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
9508 return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
9509 }
9510 [(set_attr "type" "alu")
9511 (set_attr "mode" "<MODE>")])
9512
9513 ;; The sign flag is set from the
9514 ;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
9515 ;; result, the overflow flag likewise, but the overflow flag is also
9516 ;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
9517 (define_insn "@sub<mode>3_carry_ccgz"
9518 [(set (reg:CCGZ FLAGS_REG)
9519 (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
9520 (match_operand:DWIH 2 "x86_64_general_operand" "rBMe")
9521 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
9522 UNSPEC_SBB))
9523 (clobber (match_scratch:DWIH 0 "=r"))]
9524 ""
9525 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
9526 [(set_attr "type" "alu")
9527 (set_attr "mode" "<MODE>")])
9528
9529 (define_insn "subborrow<mode>"
9530 [(set (reg:CCC FLAGS_REG)
9531 (compare:CCC
9532 (zero_extend:<DWI>
9533 (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm"))
9534 (plus:<DWI>
9535 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9536 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9537 (zero_extend:<DWI>
9538 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r")))))
9539 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
9540 (minus:SWI48 (minus:SWI48
9541 (match_dup 1)
9542 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9543 [(match_dup 3) (const_int 0)]))
9544 (match_dup 2)))]
9545 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
9546 "@
9547 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9548 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9549 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9550 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9551 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9552 (set_attr "type" "alu")
9553 (set_attr "use_carry" "1")
9554 (set_attr "pent_pair" "pu")
9555 (set_attr "mode" "<MODE>")])
9556
9557 (define_peephole2
9558 [(set (match_operand:SWI48 0 "general_reg_operand")
9559 (match_operand:SWI48 1 "memory_operand"))
9560 (parallel [(set (reg:CCC FLAGS_REG)
9561 (compare:CCC
9562 (zero_extend:<DWI> (match_dup 0))
9563 (plus:<DWI>
9564 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9565 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9566 (zero_extend:<DWI>
9567 (match_operand:SWI48 2 "memory_operand")))))
9568 (set (match_dup 0)
9569 (minus:SWI48
9570 (minus:SWI48
9571 (match_dup 0)
9572 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9573 [(match_dup 3) (const_int 0)]))
9574 (match_dup 2)))])
9575 (set (match_dup 1) (match_dup 0))]
9576 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9577 && peep2_reg_dead_p (3, operands[0])
9578 && !reg_overlap_mentioned_p (operands[0], operands[1])
9579 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9580 [(set (match_dup 0) (match_dup 2))
9581 (parallel [(set (reg:CCC FLAGS_REG)
9582 (compare:CCC
9583 (zero_extend:<DWI> (match_dup 1))
9584 (plus:<DWI> (match_op_dup 4
9585 [(match_dup 3) (const_int 0)])
9586 (zero_extend:<DWI> (match_dup 0)))))
9587 (set (match_dup 1)
9588 (minus:SWI48 (minus:SWI48 (match_dup 1)
9589 (match_op_dup 5
9590 [(match_dup 3) (const_int 0)]))
9591 (match_dup 0)))])])
9592
9593 (define_peephole2
9594 [(set (match_operand:SWI48 6 "general_reg_operand")
9595 (match_operand:SWI48 7 "memory_operand"))
9596 (set (match_operand:SWI48 8 "general_reg_operand")
9597 (match_operand:SWI48 9 "memory_operand"))
9598 (parallel [(set (reg:CCC FLAGS_REG)
9599 (compare:CCC
9600 (zero_extend:<DWI>
9601 (match_operand:SWI48 0 "general_reg_operand"))
9602 (plus:<DWI>
9603 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9604 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9605 (zero_extend:<DWI>
9606 (match_operand:SWI48 2 "general_reg_operand")))))
9607 (set (match_dup 0)
9608 (minus:SWI48
9609 (minus:SWI48
9610 (match_dup 0)
9611 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9612 [(match_dup 3) (const_int 0)]))
9613 (match_dup 2)))])
9614 (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
9615 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9616 && peep2_reg_dead_p (4, operands[0])
9617 && peep2_reg_dead_p (3, operands[2])
9618 && !reg_overlap_mentioned_p (operands[0], operands[1])
9619 && !reg_overlap_mentioned_p (operands[2], operands[1])
9620 && !reg_overlap_mentioned_p (operands[6], operands[9])
9621 && (rtx_equal_p (operands[6], operands[0])
9622 ? (rtx_equal_p (operands[7], operands[1])
9623 && rtx_equal_p (operands[8], operands[2]))
9624 : (rtx_equal_p (operands[8], operands[0])
9625 && rtx_equal_p (operands[9], operands[1])
9626 && rtx_equal_p (operands[6], operands[2])))"
9627 [(set (match_dup 0) (match_dup 9))
9628 (parallel [(set (reg:CCC FLAGS_REG)
9629 (compare:CCC
9630 (zero_extend:<DWI> (match_dup 1))
9631 (plus:<DWI> (match_op_dup 4
9632 [(match_dup 3) (const_int 0)])
9633 (zero_extend:<DWI> (match_dup 0)))))
9634 (set (match_dup 1)
9635 (minus:SWI48 (minus:SWI48 (match_dup 1)
9636 (match_op_dup 5
9637 [(match_dup 3) (const_int 0)]))
9638 (match_dup 0)))])]
9639 {
9640 if (!rtx_equal_p (operands[6], operands[0]))
9641 operands[9] = operands[7];
9642 })
9643
9644 (define_peephole2
9645 [(set (match_operand:SWI48 6 "general_reg_operand")
9646 (match_operand:SWI48 7 "memory_operand"))
9647 (set (match_operand:SWI48 8 "general_reg_operand")
9648 (match_operand:SWI48 9 "memory_operand"))
9649 (parallel [(set (reg:CCC FLAGS_REG)
9650 (compare:CCC
9651 (zero_extend:<DWI>
9652 (match_operand:SWI48 0 "general_reg_operand"))
9653 (plus:<DWI>
9654 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9655 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9656 (zero_extend:<DWI>
9657 (match_operand:SWI48 2 "general_reg_operand")))))
9658 (set (match_dup 0)
9659 (minus:SWI48
9660 (minus:SWI48
9661 (match_dup 0)
9662 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9663 [(match_dup 3) (const_int 0)]))
9664 (match_dup 2)))])
9665 (set (match_operand:QI 10 "general_reg_operand")
9666 (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9667 (set (match_operand:SWI48 11 "general_reg_operand")
9668 (zero_extend:SWI48 (match_dup 10)))
9669 (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
9670 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9671 && peep2_reg_dead_p (6, operands[0])
9672 && peep2_reg_dead_p (3, operands[2])
9673 && !reg_overlap_mentioned_p (operands[0], operands[1])
9674 && !reg_overlap_mentioned_p (operands[2], operands[1])
9675 && !reg_overlap_mentioned_p (operands[6], operands[9])
9676 && !reg_overlap_mentioned_p (operands[0], operands[10])
9677 && !reg_overlap_mentioned_p (operands[10], operands[1])
9678 && !reg_overlap_mentioned_p (operands[0], operands[11])
9679 && !reg_overlap_mentioned_p (operands[11], operands[1])
9680 && (rtx_equal_p (operands[6], operands[0])
9681 ? (rtx_equal_p (operands[7], operands[1])
9682 && rtx_equal_p (operands[8], operands[2]))
9683 : (rtx_equal_p (operands[8], operands[0])
9684 && rtx_equal_p (operands[9], operands[1])
9685 && rtx_equal_p (operands[6], operands[2])))"
9686 [(set (match_dup 0) (match_dup 9))
9687 (parallel [(set (reg:CCC FLAGS_REG)
9688 (compare:CCC
9689 (zero_extend:<DWI> (match_dup 1))
9690 (plus:<DWI> (match_op_dup 4
9691 [(match_dup 3) (const_int 0)])
9692 (zero_extend:<DWI> (match_dup 0)))))
9693 (set (match_dup 1)
9694 (minus:SWI48 (minus:SWI48 (match_dup 1)
9695 (match_op_dup 5
9696 [(match_dup 3) (const_int 0)]))
9697 (match_dup 0)))])
9698 (set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9699 (set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))]
9700 {
9701 if (!rtx_equal_p (operands[6], operands[0]))
9702 operands[9] = operands[7];
9703 })
9704
9705 (define_expand "subborrow<mode>_0"
9706 [(parallel
9707 [(set (reg:CC FLAGS_REG)
9708 (compare:CC
9709 (match_operand:SWI48 1 "nonimmediate_operand")
9710 (match_operand:SWI48 2 "<general_operand>")))
9711 (set (match_operand:SWI48 0 "register_operand")
9712 (minus:SWI48 (match_dup 1) (match_dup 2)))])]
9713 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)")
9714
9715 (define_expand "uaddc<mode>5"
9716 [(match_operand:SWI48 0 "register_operand")
9717 (match_operand:SWI48 1 "register_operand")
9718 (match_operand:SWI48 2 "register_operand")
9719 (match_operand:SWI48 3 "register_operand")
9720 (match_operand:SWI48 4 "nonmemory_operand")]
9721 ""
9722 {
9723 rtx cf = gen_rtx_REG (CCCmode, FLAGS_REG), pat, pat2;
9724 if (operands[4] == const0_rtx)
9725 emit_insn (gen_addcarry<mode>_0 (operands[0], operands[2], operands[3]));
9726 else
9727 {
9728 ix86_expand_carry (operands[4]);
9729 pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
9730 pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
9731 emit_insn (gen_addcarry<mode> (operands[0], operands[2], operands[3],
9732 cf, pat, pat2));
9733 }
9734 rtx cc = gen_reg_rtx (QImode);
9735 pat = gen_rtx_LTU (QImode, cf, const0_rtx);
9736 emit_insn (gen_rtx_SET (cc, pat));
9737 emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
9738 DONE;
9739 })
9740
9741 (define_expand "usubc<mode>5"
9742 [(match_operand:SWI48 0 "register_operand")
9743 (match_operand:SWI48 1 "register_operand")
9744 (match_operand:SWI48 2 "register_operand")
9745 (match_operand:SWI48 3 "register_operand")
9746 (match_operand:SWI48 4 "nonmemory_operand")]
9747 ""
9748 {
9749 rtx cf, pat, pat2;
9750 if (operands[4] == const0_rtx)
9751 {
9752 cf = gen_rtx_REG (CCmode, FLAGS_REG);
9753 emit_insn (gen_subborrow<mode>_0 (operands[0], operands[2],
9754 operands[3]));
9755 }
9756 else
9757 {
9758 cf = gen_rtx_REG (CCCmode, FLAGS_REG);
9759 ix86_expand_carry (operands[4]);
9760 pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
9761 pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
9762 emit_insn (gen_subborrow<mode> (operands[0], operands[2], operands[3],
9763 cf, pat, pat2));
9764 }
9765 rtx cc = gen_reg_rtx (QImode);
9766 pat = gen_rtx_LTU (QImode, cf, const0_rtx);
9767 emit_insn (gen_rtx_SET (cc, pat));
9768 emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
9769 DONE;
9770 })
9771
9772 (define_mode_iterator CC_CCC [CC CCC])
9773
9774 ;; Pre-reload splitter to optimize
9775 ;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
9776 ;; operand and no intervening flags modifications into nothing.
9777 (define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
9778 [(set (reg:CCC FLAGS_REG)
9779 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
9780 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
9781 "ix86_pre_reload_split ()"
9782 "#"
9783 "&& 1"
9784 [(const_int 0)]
9785 "emit_note (NOTE_INSN_DELETED); DONE;")
9786
9787 ;; Set the carry flag from the carry flag.
9788 (define_insn_and_split "*setccc"
9789 [(set (reg:CCC FLAGS_REG)
9790 (reg:CCC FLAGS_REG))]
9791 "ix86_pre_reload_split ()"
9792 "#"
9793 "&& 1"
9794 [(const_int 0)]
9795 "emit_note (NOTE_INSN_DELETED); DONE;")
9796
9797 ;; Set the carry flag from the carry flag.
9798 (define_insn_and_split "*setcc_qi_negqi_ccc_1_<mode>"
9799 [(set (reg:CCC FLAGS_REG)
9800 (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))]
9801 "ix86_pre_reload_split ()"
9802 "#"
9803 "&& 1"
9804 [(const_int 0)]
9805 "emit_note (NOTE_INSN_DELETED); DONE;")
9806
9807 ;; Set the carry flag from the carry flag.
9808 (define_insn_and_split "*setcc_qi_negqi_ccc_2_<mode>"
9809 [(set (reg:CCC FLAGS_REG)
9810 (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
9811 (const_int 0)] UNSPEC_CC_NE))]
9812 "ix86_pre_reload_split ()"
9813 "#"
9814 "&& 1"
9815 [(const_int 0)]
9816 "emit_note (NOTE_INSN_DELETED); DONE;")
9817 \f
9818 ;; Overflow setting add instructions
9819
9820 (define_expand "addqi3_cconly_overflow"
9821 [(parallel
9822 [(set (reg:CCC FLAGS_REG)
9823 (compare:CCC
9824 (plus:QI
9825 (match_operand:QI 0 "nonimmediate_operand")
9826 (match_operand:QI 1 "general_operand"))
9827 (match_dup 0)))
9828 (clobber (scratch:QI))])]
9829 "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
9830
9831 (define_insn "*add<mode>3_cconly_overflow_1"
9832 [(set (reg:CCC FLAGS_REG)
9833 (compare:CCC
9834 (plus:SWI
9835 (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
9836 (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
9837 (match_dup 1)))
9838 (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
9839 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9840 "@
9841 add{<imodesuffix>}\t{%2, %0|%0, %2}
9842 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9843 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9844 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9845 (set_attr "type" "alu")
9846 (set_attr "mode" "<MODE>")])
9847
9848 (define_insn "@add<mode>3_cc_overflow_1"
9849 [(set (reg:CCC FLAGS_REG)
9850 (compare:CCC
9851 (plus:SWI
9852 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,rjM,r")
9853 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r,<i>,<m>"))
9854 (match_dup 1)))
9855 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r")
9856 (plus:SWI (match_dup 1) (match_dup 2)))]
9857 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9858 "@
9859 add{<imodesuffix>}\t{%2, %0|%0, %2}
9860 add{<imodesuffix>}\t{%2, %0|%0, %2}
9861 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9862 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9863 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9864 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")
9865 (set_attr "type" "alu")
9866 (set_attr "mode" "<MODE>")])
9867
9868 (define_peephole2
9869 [(parallel [(set (reg:CCC FLAGS_REG)
9870 (compare:CCC
9871 (plus:SWI (match_operand:SWI 0 "general_reg_operand")
9872 (match_operand:SWI 1 "memory_operand"))
9873 (match_dup 0)))
9874 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))])
9875 (set (match_dup 1) (match_dup 0))]
9876 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9877 && peep2_reg_dead_p (2, operands[0])
9878 && !reg_overlap_mentioned_p (operands[0], operands[1])"
9879 [(parallel [(set (reg:CCC FLAGS_REG)
9880 (compare:CCC
9881 (plus:SWI (match_dup 1) (match_dup 0))
9882 (match_dup 1)))
9883 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
9884
9885 (define_peephole2
9886 [(set (match_operand:SWI 0 "general_reg_operand")
9887 (match_operand:SWI 1 "memory_operand"))
9888 (parallel [(set (reg:CCC FLAGS_REG)
9889 (compare:CCC
9890 (plus:SWI (match_dup 0)
9891 (match_operand:SWI 2 "memory_operand"))
9892 (match_dup 0)))
9893 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 2)))])
9894 (set (match_dup 1) (match_dup 0))]
9895 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9896 && peep2_reg_dead_p (3, operands[0])
9897 && !reg_overlap_mentioned_p (operands[0], operands[1])
9898 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9899 [(set (match_dup 0) (match_dup 2))
9900 (parallel [(set (reg:CCC FLAGS_REG)
9901 (compare:CCC
9902 (plus:SWI (match_dup 1) (match_dup 0))
9903 (match_dup 1)))
9904 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
9905
9906 (define_insn "*addsi3_zext_cc_overflow_1"
9907 [(set (reg:CCC FLAGS_REG)
9908 (compare:CCC
9909 (plus:SI
9910 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
9911 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
9912 (match_dup 1)))
9913 (set (match_operand:DI 0 "register_operand" "=r,r,r")
9914 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
9915 "TARGET_64BIT
9916 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
9917 "@
9918 add{l}\t{%2, %k0|%k0, %2}
9919 add{l}\t{%2, %1, %k0|%k0, %1, %2}
9920 add{l}\t{%2, %1, %k0|%k0, %1, %2}"
9921 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9922 (set_attr "type" "alu")
9923 (set_attr "mode" "SI")])
9924
9925 (define_insn "*add<mode>3_cconly_overflow_2"
9926 [(set (reg:CCC FLAGS_REG)
9927 (compare:CCC
9928 (plus:SWI
9929 (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
9930 (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
9931 (match_dup 2)))
9932 (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
9933 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9934 "@
9935 add{<imodesuffix>}\t{%2, %0|%0, %2}
9936 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9937 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9938 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9939 (set_attr "type" "alu")
9940 (set_attr "mode" "<MODE>")])
9941
9942 (define_insn "*add<mode>3_cc_overflow_2"
9943 [(set (reg:CCC FLAGS_REG)
9944 (compare:CCC
9945 (plus:SWI
9946 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
9947 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
9948 (match_dup 2)))
9949 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
9950 (plus:SWI (match_dup 1) (match_dup 2)))]
9951 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9952 "@
9953 add{<imodesuffix>}\t{%2, %0|%0, %2}
9954 add{<imodesuffix>}\t{%2, %0|%0, %2}
9955 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9956 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9957 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9958 (set_attr "type" "alu")
9959 (set_attr "mode" "<MODE>")])
9960
9961 (define_insn "*addsi3_zext_cc_overflow_2"
9962 [(set (reg:CCC FLAGS_REG)
9963 (compare:CCC
9964 (plus:SI
9965 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
9966 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
9967 (match_dup 2)))
9968 (set (match_operand:DI 0 "register_operand" "=r,r,r")
9969 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
9970 "TARGET_64BIT
9971 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
9972 "@
9973 add{l}\t{%2, %k0|%k0, %2}
9974 add{l}\t{%2, %1, %k0|%k0, %1, %2}
9975 add{l}\t{%2, %1, %k0|%k0, %1, %2}"
9976 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9977 (set_attr "type" "alu")
9978 (set_attr "mode" "SI")])
9979
9980 (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
9981 [(set (reg:CCC FLAGS_REG)
9982 (compare:CCC
9983 (plus:<DWI>
9984 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r")
9985 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,K,<di>,o"))
9986 (match_dup 1)))
9987 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r")
9988 (plus:<DWI> (match_dup 1) (match_dup 2)))]
9989 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
9990 "#"
9991 "&& reload_completed"
9992 [(parallel [(set (reg:CCC FLAGS_REG)
9993 (compare:CCC
9994 (plus:DWIH (match_dup 1) (match_dup 2))
9995 (match_dup 1)))
9996 (set (match_dup 0)
9997 (plus:DWIH (match_dup 1) (match_dup 2)))])
9998 (parallel [(set (reg:CCC FLAGS_REG)
9999 (compare:CCC
10000 (zero_extend:<DWI>
10001 (plus:DWIH
10002 (plus:DWIH
10003 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
10004 (match_dup 4))
10005 (match_dup 5)))
10006 (plus:<DWI>
10007 (match_dup 6)
10008 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
10009 (set (match_dup 3)
10010 (plus:DWIH
10011 (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
10012 (match_dup 4))
10013 (match_dup 5)))])]
10014 {
10015 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
10016 if (operands[2] == const0_rtx)
10017 {
10018 if (!rtx_equal_p (operands[0], operands[1]))
10019 emit_move_insn (operands[0], operands[1]);
10020 emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
10021 DONE;
10022 }
10023 if (CONST_INT_P (operands[5]))
10024 operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode,
10025 operands[5], <MODE>mode);
10026 else
10027 operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
10028 }
10029 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
10030
10031 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
10032 ;; test, where the latter is preferrable if we have some carry consuming
10033 ;; instruction.
10034 ;; For x != 0, we need to use x < 1U with negation of carry, i.e.
10035 ;; + (1 - CF).
10036 (define_insn_and_split "*add<mode>3_eq"
10037 [(set (match_operand:SWI 0 "nonimmediate_operand")
10038 (plus:SWI
10039 (plus:SWI
10040 (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
10041 (match_operand:SWI 1 "nonimmediate_operand"))
10042 (match_operand:SWI 2 "<general_operand>")))
10043 (clobber (reg:CC FLAGS_REG))]
10044 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
10045 && ix86_pre_reload_split ()"
10046 "#"
10047 "&& 1"
10048 [(set (reg:CC FLAGS_REG)
10049 (compare:CC (match_dup 3) (const_int 1)))
10050 (parallel [(set (match_dup 0)
10051 (plus:SWI
10052 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
10053 (match_dup 1))
10054 (match_dup 2)))
10055 (clobber (reg:CC FLAGS_REG))])])
10056
10057 (define_insn_and_split "*add<mode>3_ne"
10058 [(set (match_operand:SWI 0 "nonimmediate_operand")
10059 (plus:SWI
10060 (plus:SWI
10061 (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
10062 (match_operand:SWI 1 "nonimmediate_operand"))
10063 (match_operand:SWI 2 "<immediate_operand>")))
10064 (clobber (reg:CC FLAGS_REG))]
10065 "CONST_INT_P (operands[2])
10066 && (<MODE>mode != DImode
10067 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
10068 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
10069 && ix86_pre_reload_split ()"
10070 "#"
10071 "&& 1"
10072 [(set (reg:CC FLAGS_REG)
10073 (compare:CC (match_dup 3) (const_int 1)))
10074 (parallel [(set (match_dup 0)
10075 (minus:SWI
10076 (minus:SWI (match_dup 1)
10077 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
10078 (match_dup 2)))
10079 (clobber (reg:CC FLAGS_REG))])]
10080 {
10081 operands[2] = gen_int_mode (~INTVAL (operands[2]),
10082 <MODE>mode == DImode ? SImode : <MODE>mode);
10083 })
10084
10085 (define_insn_and_split "*add<mode>3_eq_0"
10086 [(set (match_operand:SWI 0 "nonimmediate_operand")
10087 (plus:SWI
10088 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
10089 (match_operand:SWI 1 "<general_operand>")))
10090 (clobber (reg:CC FLAGS_REG))]
10091 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
10092 && ix86_pre_reload_split ()"
10093 "#"
10094 "&& 1"
10095 [(set (reg:CC FLAGS_REG)
10096 (compare:CC (match_dup 2) (const_int 1)))
10097 (parallel [(set (match_dup 0)
10098 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
10099 (match_dup 1)))
10100 (clobber (reg:CC FLAGS_REG))])]
10101 {
10102 if (!nonimmediate_operand (operands[1], <MODE>mode))
10103 operands[1] = force_reg (<MODE>mode, operands[1]);
10104 })
10105
10106 (define_insn_and_split "*add<mode>3_ne_0"
10107 [(set (match_operand:SWI 0 "nonimmediate_operand")
10108 (plus:SWI
10109 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
10110 (match_operand:SWI 1 "<general_operand>")))
10111 (clobber (reg:CC FLAGS_REG))]
10112 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
10113 && ix86_pre_reload_split ()"
10114 "#"
10115 "&& 1"
10116 [(set (reg:CC FLAGS_REG)
10117 (compare:CC (match_dup 2) (const_int 1)))
10118 (parallel [(set (match_dup 0)
10119 (minus:SWI (minus:SWI
10120 (match_dup 1)
10121 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
10122 (const_int -1)))
10123 (clobber (reg:CC FLAGS_REG))])]
10124 {
10125 if (!nonimmediate_operand (operands[1], <MODE>mode))
10126 operands[1] = force_reg (<MODE>mode, operands[1]);
10127 })
10128
10129 (define_insn_and_split "*sub<mode>3_eq"
10130 [(set (match_operand:SWI 0 "nonimmediate_operand")
10131 (minus:SWI
10132 (minus:SWI
10133 (match_operand:SWI 1 "nonimmediate_operand")
10134 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
10135 (const_int 0)))
10136 (match_operand:SWI 2 "<general_operand>")))
10137 (clobber (reg:CC FLAGS_REG))]
10138 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
10139 && ix86_pre_reload_split ()"
10140 "#"
10141 "&& 1"
10142 [(set (reg:CC FLAGS_REG)
10143 (compare:CC (match_dup 3) (const_int 1)))
10144 (parallel [(set (match_dup 0)
10145 (minus:SWI
10146 (minus:SWI (match_dup 1)
10147 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
10148 (match_dup 2)))
10149 (clobber (reg:CC FLAGS_REG))])])
10150
10151 (define_insn_and_split "*sub<mode>3_ne"
10152 [(set (match_operand:SWI 0 "nonimmediate_operand")
10153 (plus:SWI
10154 (minus:SWI
10155 (match_operand:SWI 1 "nonimmediate_operand")
10156 (ne:SWI (match_operand 3 "int_nonimmediate_operand")
10157 (const_int 0)))
10158 (match_operand:SWI 2 "<immediate_operand>")))
10159 (clobber (reg:CC FLAGS_REG))]
10160 "CONST_INT_P (operands[2])
10161 && (<MODE>mode != DImode
10162 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
10163 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
10164 && ix86_pre_reload_split ()"
10165 "#"
10166 "&& 1"
10167 [(set (reg:CC FLAGS_REG)
10168 (compare:CC (match_dup 3) (const_int 1)))
10169 (parallel [(set (match_dup 0)
10170 (plus:SWI
10171 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
10172 (match_dup 1))
10173 (match_dup 2)))
10174 (clobber (reg:CC FLAGS_REG))])]
10175 {
10176 operands[2] = gen_int_mode (INTVAL (operands[2]) - 1,
10177 <MODE>mode == DImode ? SImode : <MODE>mode);
10178 })
10179
10180 (define_insn_and_split "*sub<mode>3_eq_1"
10181 [(set (match_operand:SWI 0 "nonimmediate_operand")
10182 (plus:SWI
10183 (minus:SWI
10184 (match_operand:SWI 1 "nonimmediate_operand")
10185 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
10186 (const_int 0)))
10187 (match_operand:SWI 2 "<immediate_operand>")))
10188 (clobber (reg:CC FLAGS_REG))]
10189 "CONST_INT_P (operands[2])
10190 && (<MODE>mode != DImode
10191 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
10192 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
10193 && ix86_pre_reload_split ()"
10194 "#"
10195 "&& 1"
10196 [(set (reg:CC FLAGS_REG)
10197 (compare:CC (match_dup 3) (const_int 1)))
10198 (parallel [(set (match_dup 0)
10199 (minus:SWI
10200 (minus:SWI (match_dup 1)
10201 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
10202 (match_dup 2)))
10203 (clobber (reg:CC FLAGS_REG))])]
10204 {
10205 operands[2] = gen_int_mode (-INTVAL (operands[2]),
10206 <MODE>mode == DImode ? SImode : <MODE>mode);
10207 })
10208
10209 (define_insn_and_split "*sub<mode>3_eq_0"
10210 [(set (match_operand:SWI 0 "nonimmediate_operand")
10211 (minus:SWI
10212 (match_operand:SWI 1 "<general_operand>")
10213 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
10214 (clobber (reg:CC FLAGS_REG))]
10215 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
10216 && ix86_pre_reload_split ()"
10217 "#"
10218 "&& 1"
10219 [(set (reg:CC FLAGS_REG)
10220 (compare:CC (match_dup 2) (const_int 1)))
10221 (parallel [(set (match_dup 0)
10222 (minus:SWI (match_dup 1)
10223 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))
10224 (clobber (reg:CC FLAGS_REG))])]
10225 {
10226 if (!nonimmediate_operand (operands[1], <MODE>mode))
10227 operands[1] = force_reg (<MODE>mode, operands[1]);
10228 })
10229
10230 (define_insn_and_split "*sub<mode>3_ne_0"
10231 [(set (match_operand:SWI 0 "nonimmediate_operand")
10232 (minus:SWI
10233 (match_operand:SWI 1 "<general_operand>")
10234 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
10235 (clobber (reg:CC FLAGS_REG))]
10236 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
10237 && ix86_pre_reload_split ()"
10238 "#"
10239 "&& 1"
10240 [(set (reg:CC FLAGS_REG)
10241 (compare:CC (match_dup 2) (const_int 1)))
10242 (parallel [(set (match_dup 0)
10243 (plus:SWI (plus:SWI
10244 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
10245 (match_dup 1))
10246 (const_int -1)))
10247 (clobber (reg:CC FLAGS_REG))])]
10248 {
10249 if (!nonimmediate_operand (operands[1], <MODE>mode))
10250 operands[1] = force_reg (<MODE>mode, operands[1]);
10251 })
10252
10253 (define_expand "usadd<mode>3"
10254 [(set (match_operand:SWI 0 "register_operand")
10255 (us_plus:SWI (match_operand:SWI 1 "register_operand")
10256 (match_operand:SWI 2 "<general_operand>")))]
10257 ""
10258 {
10259 rtx res = gen_reg_rtx (<MODE>mode);
10260 rtx dst;
10261
10262 emit_insn (gen_add<mode>3_cc_overflow_1 (res, operands[1], operands[2]));
10263
10264 if (TARGET_CMOVE)
10265 {
10266 rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
10267 const0_rtx);
10268
10269 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
10270 {
10271 dst = force_reg (<MODE>mode, operands[0]);
10272 emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
10273 gen_lowpart (SImode, res), constm1_rtx));
10274 }
10275 else
10276 {
10277 dst = operands[0];
10278 emit_insn (gen_mov<mode>cc (dst, cmp, res, constm1_rtx));
10279 }
10280 }
10281 else
10282 {
10283 rtx msk = gen_reg_rtx (<MODE>mode);
10284
10285 emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
10286 dst = expand_simple_binop (<MODE>mode, IOR, res, msk,
10287 operands[0], 1, OPTAB_WIDEN);
10288 }
10289
10290 if (!rtx_equal_p (dst, operands[0]))
10291 emit_move_insn (operands[0], dst);
10292 DONE;
10293 })
10294
10295 (define_expand "ussub<mode>3"
10296 [(set (match_operand:SWI 0 "register_operand")
10297 (us_minus:SWI (match_operand:SWI 1 "register_operand")
10298 (match_operand:SWI 2 "<general_operand>")))]
10299 ""
10300 {
10301 rtx res = gen_reg_rtx (<MODE>mode);
10302 rtx dst;
10303
10304 emit_insn (gen_sub<mode>_3 (res, operands[1], operands[2]));
10305
10306 if (TARGET_CMOVE)
10307 {
10308 rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
10309 const0_rtx);
10310
10311 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
10312 {
10313 dst = force_reg (<MODE>mode, operands[0]);
10314 emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
10315 gen_lowpart (SImode, res), const0_rtx));
10316 }
10317 else
10318 {
10319 dst = operands[0];
10320 emit_insn (gen_mov<mode>cc (dst, cmp, res, const0_rtx));
10321 }
10322 }
10323 else
10324 {
10325 rtx msk = gen_reg_rtx (<MODE>mode);
10326
10327 emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
10328 msk = expand_simple_unop (<MODE>mode, NOT, msk, NULL, 1);
10329 dst = expand_simple_binop (<MODE>mode, AND, res, msk,
10330 operands[0], 1, OPTAB_WIDEN);
10331 }
10332
10333 if (!rtx_equal_p (dst, operands[0]))
10334 emit_move_insn (operands[0], dst);
10335 DONE;
10336 })
10337
10338 (define_expand "ustruncdi<mode>2"
10339 [(set (match_operand:SWI124 0 "register_operand")
10340 (us_truncate:DI (match_operand:DI 1 "nonimmediate_operand")))]
10341 "TARGET_64BIT"
10342 {
10343 rtx op1 = force_reg (DImode, operands[1]);
10344 rtx sat = force_reg (DImode, GEN_INT (GET_MODE_MASK (<MODE>mode)));
10345 rtx dst;
10346
10347 emit_insn (gen_cmpdi_1 (sat, op1));
10348
10349 if (TARGET_CMOVE)
10350 {
10351 rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
10352 const0_rtx);
10353
10354 dst = force_reg (<MODE>mode, operands[0]);
10355 emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
10356 gen_lowpart (SImode, op1),
10357 gen_lowpart (SImode, sat)));
10358 }
10359 else
10360 {
10361 rtx msk = gen_reg_rtx (<MODE>mode);
10362
10363 emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
10364 dst = expand_simple_binop (<MODE>mode, IOR,
10365 gen_lowpart (<MODE>mode, op1), msk,
10366 operands[0], 1, OPTAB_WIDEN);
10367 }
10368
10369 if (!rtx_equal_p (dst, operands[0]))
10370 emit_move_insn (operands[0], dst);
10371 DONE;
10372 })
10373
10374 (define_expand "ustruncsi<mode>2"
10375 [(set (match_operand:SWI12 0 "register_operand")
10376 (us_truncate:SI (match_operand:SI 1 "nonimmediate_operand")))]
10377 ""
10378 {
10379 rtx op1 = force_reg (SImode, operands[1]);
10380 rtx sat = force_reg (SImode, GEN_INT (GET_MODE_MASK (<MODE>mode)));
10381 rtx dst;
10382
10383 emit_insn (gen_cmpsi_1 (sat, op1));
10384
10385 if (TARGET_CMOVE)
10386 {
10387 rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
10388 const0_rtx);
10389
10390 dst = force_reg (<MODE>mode, operands[0]);
10391 emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
10392 gen_lowpart (SImode, op1),
10393 gen_lowpart (SImode, sat)));
10394 }
10395 else
10396 {
10397 rtx msk = gen_reg_rtx (<MODE>mode);
10398
10399 emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
10400 dst = expand_simple_binop (<MODE>mode, IOR,
10401 gen_lowpart (<MODE>mode, op1), msk,
10402 operands[0], 1, OPTAB_WIDEN);
10403 }
10404
10405 if (!rtx_equal_p (dst, operands[0]))
10406 emit_move_insn (operands[0], dst);
10407 DONE;
10408 })
10409
10410 (define_expand "ustrunchiqi2"
10411 [(set (match_operand:QI 0 "register_operand")
10412 (us_truncate:HI (match_operand:HI 1 "nonimmediate_operand")))]
10413 ""
10414 {
10415 rtx op1 = force_reg (HImode, operands[1]);
10416 rtx sat = force_reg (HImode, GEN_INT (GET_MODE_MASK (QImode)));
10417 rtx dst;
10418
10419 emit_insn (gen_cmphi_1 (sat, op1));
10420
10421 if (TARGET_CMOVE)
10422 {
10423 rtx cmp = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
10424 const0_rtx);
10425
10426 dst = force_reg (QImode, operands[0]);
10427 emit_insn (gen_movsicc (gen_lowpart (SImode, dst), cmp,
10428 gen_lowpart (SImode, op1),
10429 gen_lowpart (SImode, sat)));
10430 }
10431 else
10432 {
10433 rtx msk = gen_reg_rtx (QImode);
10434
10435 emit_insn (gen_x86_movqicc_0_m1_neg (msk));
10436 dst = expand_simple_binop (QImode, IOR,
10437 gen_lowpart (QImode, op1), msk,
10438 operands[0], 1, OPTAB_WIDEN);
10439 }
10440
10441 if (!rtx_equal_p (dst, operands[0]))
10442 emit_move_insn (operands[0], dst);
10443 DONE;
10444 })
10445
10446 ;; The patterns that match these are at the end of this file.
10447
10448 (define_expand "<insn>xf3"
10449 [(set (match_operand:XF 0 "register_operand")
10450 (plusminus:XF
10451 (match_operand:XF 1 "register_operand")
10452 (match_operand:XF 2 "register_operand")))]
10453 "TARGET_80387")
10454
10455 (define_expand "<insn>hf3"
10456 [(set (match_operand:HF 0 "register_operand")
10457 (plusminus:HF
10458 (match_operand:HF 1 "register_operand")
10459 (match_operand:HF 2 "nonimmediate_operand")))]
10460 "TARGET_AVX512FP16")
10461
10462 (define_expand "<insn><mode>3"
10463 [(set (match_operand:MODEF 0 "register_operand")
10464 (plusminus:MODEF
10465 (match_operand:MODEF 1 "register_operand")
10466 (match_operand:MODEF 2 "nonimmediate_operand")))]
10467 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
10468 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
10469 \f
10470 ;; Multiply instructions
10471
10472 (define_expand "mul<mode>3"
10473 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
10474 (mult:SWIM248
10475 (match_operand:SWIM248 1 "register_operand")
10476 (match_operand:SWIM248 2 "<general_operand>")))
10477 (clobber (reg:CC FLAGS_REG))])])
10478
10479 (define_expand "mulqi3"
10480 [(parallel [(set (match_operand:QI 0 "register_operand")
10481 (mult:QI
10482 (match_operand:QI 1 "register_operand")
10483 (match_operand:QI 2 "nonimmediate_operand")))
10484 (clobber (reg:CC FLAGS_REG))])]
10485 "TARGET_QIMODE_MATH")
10486
10487 ;; On AMDFAM10
10488 ;; IMUL reg32/64, reg32/64, imm8 Direct
10489 ;; IMUL reg32/64, mem32/64, imm8 VectorPath
10490 ;; IMUL reg32/64, reg32/64, imm32 Direct
10491 ;; IMUL reg32/64, mem32/64, imm32 VectorPath
10492 ;; IMUL reg32/64, reg32/64 Direct
10493 ;; IMUL reg32/64, mem32/64 Direct
10494 ;;
10495 ;; On BDVER1, all above IMULs use DirectPath
10496 ;;
10497 ;; On AMDFAM10
10498 ;; IMUL reg16, reg16, imm8 VectorPath
10499 ;; IMUL reg16, mem16, imm8 VectorPath
10500 ;; IMUL reg16, reg16, imm16 VectorPath
10501 ;; IMUL reg16, mem16, imm16 VectorPath
10502 ;; IMUL reg16, reg16 Direct
10503 ;; IMUL reg16, mem16 Direct
10504 ;;
10505 ;; On BDVER1, all HI MULs use DoublePath
10506
10507 (define_insn "*mul<mode>3_1<nf_name>"
10508 [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r,r")
10509 (mult:SWIM248
10510 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0,r")
10511 (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r,<m>r")))]
10512 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
10513 && <nf_condition>"
10514 "@
10515 <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
10516 <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
10517 <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}
10518 <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
10519 [(set_attr "type" "imul")
10520 (set_attr "prefix_0f" "0,0,1,1")
10521 (set_attr "isa" "*,*,*,apx_ndd")
10522 (set (attr "athlon_decode")
10523 (cond [(eq_attr "cpu" "athlon")
10524 (const_string "vector")
10525 (eq_attr "alternative" "1")
10526 (const_string "vector")
10527 (and (eq_attr "alternative" "2,3")
10528 (ior (match_test "<MODE>mode == HImode")
10529 (match_operand 1 "memory_operand")))
10530 (const_string "vector")]
10531 (const_string "direct")))
10532 (set (attr "amdfam10_decode")
10533 (cond [(and (eq_attr "alternative" "0,1")
10534 (ior (match_test "<MODE>mode == HImode")
10535 (match_operand 1 "memory_operand")))
10536 (const_string "vector")]
10537 (const_string "direct")))
10538 (set (attr "bdver1_decode")
10539 (if_then_else
10540 (match_test "<MODE>mode == HImode")
10541 (const_string "double")
10542 (const_string "direct")))
10543 (set_attr "has_nf" "1")
10544 (set_attr "mode" "<MODE>")])
10545
10546 (define_insn "*imulhi<mode>zu<nf_name>"
10547 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
10548 (zero_extend:SWI48x
10549 (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
10550 (match_operand:HI 2 "immediate_operand" "K,n"))))]
10551 "TARGET_APX_ZU && <nf_condition>"
10552 "@
10553 <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
10554 <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
10555 [(set_attr "type" "imul")
10556 (set_attr "mode" "HI")])
10557
10558 (define_insn "*mulsi3_1_zext<nf_name>"
10559 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
10560 (zero_extend:DI
10561 (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0,r")
10562 (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr,BMr"))))]
10563 "TARGET_64BIT
10564 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10565 && <nf_condition>"
10566 "@
10567 <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
10568 <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
10569 <nf_prefix>imul{l}\t{%2, %k0|%k0, %2}
10570 <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}"
10571 [(set_attr "type" "imul")
10572 (set_attr "prefix_0f" "0,0,1,1")
10573 (set_attr "isa" "*,*,*,apx_ndd")
10574 (set (attr "athlon_decode")
10575 (cond [(eq_attr "cpu" "athlon")
10576 (const_string "vector")
10577 (eq_attr "alternative" "1")
10578 (const_string "vector")
10579 (and (eq_attr "alternative" "2")
10580 (match_operand 1 "memory_operand"))
10581 (const_string "vector")]
10582 (const_string "direct")))
10583 (set (attr "amdfam10_decode")
10584 (cond [(and (eq_attr "alternative" "0,1")
10585 (match_operand 1 "memory_operand"))
10586 (const_string "vector")]
10587 (const_string "direct")))
10588 (set_attr "bdver1_decode" "direct")
10589 (set_attr "mode" "SI")])
10590
10591 ;;On AMDFAM10 and BDVER1
10592 ;; MUL reg8 Direct
10593 ;; MUL mem8 Direct
10594
10595 (define_insn "*mulqi3_1<nf_name>"
10596 [(set (match_operand:QI 0 "register_operand" "=a")
10597 (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
10598 (match_operand:QI 2 "nonimmediate_operand" "qm")))]
10599 "TARGET_QIMODE_MATH
10600 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10601 && <nf_condition>"
10602 "<nf_prefix>mul{b}\t%2"
10603 [(set_attr "type" "imul")
10604 (set_attr "length_immediate" "0")
10605 (set (attr "athlon_decode")
10606 (if_then_else (eq_attr "cpu" "athlon")
10607 (const_string "vector")
10608 (const_string "direct")))
10609 (set_attr "amdfam10_decode" "direct")
10610 (set_attr "bdver1_decode" "direct")
10611 (set_attr "has_nf" "1")
10612 (set_attr "mode" "QI")])
10613
10614 ;; Multiply with jump on overflow.
10615 (define_expand "mulv<mode>4"
10616 [(parallel [(set (reg:CCO FLAGS_REG)
10617 (eq:CCO (mult:<DWI>
10618 (sign_extend:<DWI>
10619 (match_operand:SWI248 1 "register_operand"))
10620 (match_dup 4))
10621 (sign_extend:<DWI>
10622 (mult:SWI248 (match_dup 1)
10623 (match_operand:SWI248 2
10624 "<general_operand>")))))
10625 (set (match_operand:SWI248 0 "register_operand")
10626 (mult:SWI248 (match_dup 1) (match_dup 2)))])
10627 (set (pc) (if_then_else
10628 (eq (reg:CCO FLAGS_REG) (const_int 0))
10629 (label_ref (match_operand 3))
10630 (pc)))]
10631 ""
10632 {
10633 if (CONST_INT_P (operands[2]))
10634 operands[4] = operands[2];
10635 else
10636 operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
10637 })
10638
10639 (define_insn "*mulv<mode>4"
10640 [(set (reg:CCO FLAGS_REG)
10641 (eq:CCO (mult:<DWI>
10642 (sign_extend:<DWI>
10643 (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0,r"))
10644 (sign_extend:<DWI>
10645 (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr,mr")))
10646 (sign_extend:<DWI>
10647 (mult:SWI48 (match_dup 1) (match_dup 2)))))
10648 (set (match_operand:SWI48 0 "register_operand" "=r,r,r")
10649 (mult:SWI48 (match_dup 1) (match_dup 2)))]
10650 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10651 "@
10652 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
10653 imul{<imodesuffix>}\t{%2, %0|%0, %2}
10654 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
10655 [(set_attr "type" "imul")
10656 (set_attr "prefix_0f" "0,1,1")
10657 (set_attr "isa" "*,*,apx_ndd")
10658 (set (attr "athlon_decode")
10659 (cond [(eq_attr "cpu" "athlon")
10660 (const_string "vector")
10661 (eq_attr "alternative" "0")
10662 (const_string "vector")
10663 (and (eq_attr "alternative" "1,2")
10664 (match_operand 1 "memory_operand"))
10665 (const_string "vector")]
10666 (const_string "direct")))
10667 (set (attr "amdfam10_decode")
10668 (cond [(and (eq_attr "alternative" "1,2")
10669 (match_operand 1 "memory_operand"))
10670 (const_string "vector")]
10671 (const_string "direct")))
10672 (set_attr "bdver1_decode" "direct")
10673 (set_attr "mode" "<MODE>")])
10674
10675 (define_insn "*mulvhi4"
10676 [(set (reg:CCO FLAGS_REG)
10677 (eq:CCO (mult:SI
10678 (sign_extend:SI
10679 (match_operand:HI 1 "nonimmediate_operand" "%0,r"))
10680 (sign_extend:SI
10681 (match_operand:HI 2 "nonimmediate_operand" "mr,mr")))
10682 (sign_extend:SI
10683 (mult:HI (match_dup 1) (match_dup 2)))))
10684 (set (match_operand:HI 0 "register_operand" "=r,r")
10685 (mult:HI (match_dup 1) (match_dup 2)))]
10686 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10687 "@
10688 imul{w}\t{%2, %0|%0, %2}
10689 imul{w}\t{%2, %1, %0|%0, %1, %2}"
10690 [(set_attr "type" "imul")
10691 (set_attr "prefix_0f" "1")
10692 (set_attr "isa" "*,apx_ndd")
10693 (set_attr "athlon_decode" "vector")
10694 (set_attr "amdfam10_decode" "direct")
10695 (set_attr "bdver1_decode" "double")
10696 (set_attr "mode" "HI")])
10697
10698 (define_insn "*mulv<mode>4_1"
10699 [(set (reg:CCO FLAGS_REG)
10700 (eq:CCO (mult:<DWI>
10701 (sign_extend:<DWI>
10702 (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm"))
10703 (match_operand:<DWI> 3 "const_int_operand" "K,i"))
10704 (sign_extend:<DWI>
10705 (mult:SWI248 (match_dup 1)
10706 (match_operand:SWI248 2
10707 "<immediate_operand>" "K,<i>")))))
10708 (set (match_operand:SWI248 0 "register_operand" "=r,r")
10709 (mult:SWI248 (match_dup 1) (match_dup 2)))]
10710 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
10711 && CONST_INT_P (operands[2])
10712 && INTVAL (operands[2]) == INTVAL (operands[3])"
10713 "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
10714 [(set_attr "type" "imul")
10715 (set (attr "prefix_0f")
10716 (if_then_else
10717 (match_test "<MODE>mode == HImode")
10718 (const_string "0")
10719 (const_string "*")))
10720 (set (attr "athlon_decode")
10721 (cond [(eq_attr "cpu" "athlon")
10722 (const_string "vector")
10723 (eq_attr "alternative" "1")
10724 (const_string "vector")]
10725 (const_string "direct")))
10726 (set (attr "amdfam10_decode")
10727 (cond [(ior (match_test "<MODE>mode == HImode")
10728 (match_operand 1 "memory_operand"))
10729 (const_string "vector")]
10730 (const_string "direct")))
10731 (set (attr "bdver1_decode")
10732 (if_then_else
10733 (match_test "<MODE>mode == HImode")
10734 (const_string "double")
10735 (const_string "direct")))
10736 (set_attr "mode" "<MODE>")
10737 (set (attr "length_immediate")
10738 (cond [(eq_attr "alternative" "0")
10739 (const_string "1")
10740 (match_test "<MODE_SIZE> == 8")
10741 (const_string "4")]
10742 (const_string "<MODE_SIZE>")))])
10743
10744 (define_expand "umulv<mode>4"
10745 [(parallel [(set (reg:CCO FLAGS_REG)
10746 (eq:CCO (mult:<DWI>
10747 (zero_extend:<DWI>
10748 (match_operand:SWI248 1
10749 "nonimmediate_operand"))
10750 (zero_extend:<DWI>
10751 (match_operand:SWI248 2
10752 "nonimmediate_operand")))
10753 (zero_extend:<DWI>
10754 (mult:SWI248 (match_dup 1) (match_dup 2)))))
10755 (set (match_operand:SWI248 0 "register_operand")
10756 (mult:SWI248 (match_dup 1) (match_dup 2)))
10757 (clobber (scratch:SWI248))])
10758 (set (pc) (if_then_else
10759 (eq (reg:CCO FLAGS_REG) (const_int 0))
10760 (label_ref (match_operand 3))
10761 (pc)))]
10762 ""
10763 {
10764 if (MEM_P (operands[1]) && MEM_P (operands[2]))
10765 operands[1] = force_reg (<MODE>mode, operands[1]);
10766 })
10767
10768 (define_insn "*umulv<mode>4"
10769 [(set (reg:CCO FLAGS_REG)
10770 (eq:CCO (mult:<DWI>
10771 (zero_extend:<DWI>
10772 (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
10773 (zero_extend:<DWI>
10774 (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
10775 (zero_extend:<DWI>
10776 (mult:SWI248 (match_dup 1) (match_dup 2)))))
10777 (set (match_operand:SWI248 0 "register_operand" "=a")
10778 (mult:SWI248 (match_dup 1) (match_dup 2)))
10779 (clobber (match_scratch:SWI248 3 "=d"))]
10780 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10781 "mul{<imodesuffix>}\t%2"
10782 [(set_attr "type" "imul")
10783 (set_attr "length_immediate" "0")
10784 (set (attr "athlon_decode")
10785 (if_then_else (eq_attr "cpu" "athlon")
10786 (const_string "vector")
10787 (const_string "double")))
10788 (set_attr "amdfam10_decode" "double")
10789 (set_attr "bdver1_decode" "direct")
10790 (set_attr "mode" "<MODE>")])
10791
10792 (define_expand "<u>mulvqi4"
10793 [(parallel [(set (reg:CCO FLAGS_REG)
10794 (eq:CCO (mult:HI
10795 (any_extend:HI
10796 (match_operand:QI 1 "nonimmediate_operand"))
10797 (any_extend:HI
10798 (match_operand:QI 2 "nonimmediate_operand")))
10799 (any_extend:HI
10800 (mult:QI (match_dup 1) (match_dup 2)))))
10801 (set (match_operand:QI 0 "register_operand")
10802 (mult:QI (match_dup 1) (match_dup 2)))])
10803 (set (pc) (if_then_else
10804 (eq (reg:CCO FLAGS_REG) (const_int 0))
10805 (label_ref (match_operand 3))
10806 (pc)))]
10807 "TARGET_QIMODE_MATH"
10808 {
10809 if (MEM_P (operands[1]) && MEM_P (operands[2]))
10810 operands[1] = force_reg (QImode, operands[1]);
10811 })
10812
10813 (define_insn "*<u>mulvqi4"
10814 [(set (reg:CCO FLAGS_REG)
10815 (eq:CCO (mult:HI
10816 (any_extend:HI
10817 (match_operand:QI 1 "nonimmediate_operand" "%0"))
10818 (any_extend:HI
10819 (match_operand:QI 2 "nonimmediate_operand" "qm")))
10820 (any_extend:HI
10821 (mult:QI (match_dup 1) (match_dup 2)))))
10822 (set (match_operand:QI 0 "register_operand" "=a")
10823 (mult:QI (match_dup 1) (match_dup 2)))]
10824 "TARGET_QIMODE_MATH
10825 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10826 "<sgnprefix>mul{b}\t%2"
10827 [(set_attr "type" "imul")
10828 (set_attr "length_immediate" "0")
10829 (set (attr "athlon_decode")
10830 (if_then_else (eq_attr "cpu" "athlon")
10831 (const_string "vector")
10832 (const_string "direct")))
10833 (set_attr "amdfam10_decode" "direct")
10834 (set_attr "bdver1_decode" "direct")
10835 (set_attr "mode" "QI")])
10836
10837 (define_expand "<u>mul<mode><dwi>3"
10838 [(parallel [(set (match_operand:<DWI> 0 "register_operand")
10839 (mult:<DWI>
10840 (any_extend:<DWI>
10841 (match_operand:DWIH 1 "register_operand"))
10842 (any_extend:<DWI>
10843 (match_operand:DWIH 2 "nonimmediate_operand"))))
10844 (clobber (reg:CC FLAGS_REG))])])
10845
10846 (define_expand "<u>mulqihi3"
10847 [(parallel [(set (match_operand:HI 0 "register_operand")
10848 (mult:HI
10849 (any_extend:HI
10850 (match_operand:QI 1 "register_operand"))
10851 (any_extend:HI
10852 (match_operand:QI 2 "nonimmediate_operand"))))
10853 (clobber (reg:CC FLAGS_REG))])]
10854 "TARGET_QIMODE_MATH")
10855
10856 (define_insn "*bmi2_umul<mode><dwi>3_1"
10857 [(set (match_operand:DWIH 0 "register_operand" "=r")
10858 (mult:DWIH
10859 (match_operand:DWIH 2 "register_operand" "%d")
10860 (match_operand:DWIH 3 "nonimmediate_operand" "rm")))
10861 (set (match_operand:DWIH 1 "register_operand" "=r")
10862 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))]
10863 "TARGET_BMI2"
10864 "mulx\t{%3, %0, %1|%1, %0, %3}"
10865 [(set_attr "type" "imulx")
10866 (set_attr "prefix" "vex")
10867 (set_attr "mode" "<MODE>")])
10868
10869 ;; Tweak *bmi2_umul<mode><dwi>3_1 to eliminate following mov.
10870 (define_peephole2
10871 [(parallel [(set (match_operand:DWIH 0 "general_reg_operand")
10872 (mult:DWIH (match_operand:DWIH 2 "register_operand")
10873 (match_operand:DWIH 3 "nonimmediate_operand")))
10874 (set (match_operand:DWIH 1 "general_reg_operand")
10875 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])
10876 (set (match_operand:DWIH 4 "general_reg_operand")
10877 (match_operand:DWIH 5 "general_reg_operand"))]
10878 "TARGET_BMI2
10879 && ((REGNO (operands[5]) == REGNO (operands[0])
10880 && REGNO (operands[1]) != REGNO (operands[4]))
10881 || (REGNO (operands[5]) == REGNO (operands[1])
10882 && REGNO (operands[0]) != REGNO (operands[4])))
10883 && peep2_reg_dead_p (2, operands[5])"
10884 [(parallel [(set (match_dup 0) (mult:DWIH (match_dup 2) (match_dup 3)))
10885 (set (match_dup 1)
10886 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])]
10887 {
10888 if (REGNO (operands[5]) == REGNO (operands[0]))
10889 operands[0] = operands[4];
10890 else
10891 operands[1] = operands[4];
10892 })
10893
10894 (define_insn "*umul<mode><dwi>3_1"
10895 [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
10896 (mult:<DWI>
10897 (zero_extend:<DWI>
10898 (match_operand:DWIH 1 "register_operand" "%d,a"))
10899 (zero_extend:<DWI>
10900 (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
10901 (clobber (reg:CC FLAGS_REG))]
10902 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10903 "@
10904 #
10905 mul{<imodesuffix>}\t%2"
10906 [(set_attr "isa" "bmi2,*")
10907 (set_attr "type" "imulx,imul")
10908 (set_attr "length_immediate" "*,0")
10909 (set (attr "athlon_decode")
10910 (cond [(eq_attr "alternative" "1")
10911 (if_then_else (eq_attr "cpu" "athlon")
10912 (const_string "vector")
10913 (const_string "double"))]
10914 (const_string "*")))
10915 (set_attr "amdfam10_decode" "*,double")
10916 (set_attr "bdver1_decode" "*,direct")
10917 (set_attr "prefix" "vex,orig")
10918 (set_attr "mode" "<MODE>")])
10919
10920 ;; Convert mul to the mulx pattern to avoid flags dependency.
10921 (define_split
10922 [(set (match_operand:<DWI> 0 "register_operand")
10923 (mult:<DWI>
10924 (zero_extend:<DWI>
10925 (match_operand:DWIH 1 "register_operand"))
10926 (zero_extend:<DWI>
10927 (match_operand:DWIH 2 "nonimmediate_operand"))))
10928 (clobber (reg:CC FLAGS_REG))]
10929 "TARGET_BMI2 && reload_completed
10930 && REGNO (operands[1]) == DX_REG"
10931 [(parallel [(set (match_dup 3)
10932 (mult:DWIH (match_dup 1) (match_dup 2)))
10933 (set (match_dup 4)
10934 (umul_highpart:DWIH (match_dup 1) (match_dup 2)))])]
10935 {
10936 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
10937
10938 operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
10939 })
10940
10941 (define_insn "*mul<mode><dwi>3_1<nf_name>"
10942 [(set (match_operand:<DWI> 0 "register_operand" "=A")
10943 (mult:<DWI>
10944 (sign_extend:<DWI>
10945 (match_operand:DWIH 1 "register_operand" "%a"))
10946 (sign_extend:<DWI>
10947 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))]
10948 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
10949 && <nf_condition>"
10950 "<nf_prefix>imul{<imodesuffix>}\t%2"
10951 [(set_attr "type" "imul")
10952 (set_attr "length_immediate" "0")
10953 (set (attr "athlon_decode")
10954 (if_then_else (eq_attr "cpu" "athlon")
10955 (const_string "vector")
10956 (const_string "double")))
10957 (set_attr "amdfam10_decode" "double")
10958 (set_attr "bdver1_decode" "direct")
10959 (set_attr "mode" "<MODE>")])
10960
10961 (define_insn "*<u>mulqihi3_1<nf_name>"
10962 [(set (match_operand:HI 0 "register_operand" "=a")
10963 (mult:HI
10964 (any_extend:HI
10965 (match_operand:QI 1 "register_operand" "%0"))
10966 (any_extend:HI
10967 (match_operand:QI 2 "nonimmediate_operand" "qm"))))]
10968 "TARGET_QIMODE_MATH
10969 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10970 && <nf_condition>"
10971 "<nf_prefix><sgnprefix>mul{b}\t%2"
10972 [(set_attr "type" "imul")
10973 (set_attr "length_immediate" "0")
10974 (set (attr "athlon_decode")
10975 (if_then_else (eq_attr "cpu" "athlon")
10976 (const_string "vector")
10977 (const_string "direct")))
10978 (set_attr "amdfam10_decode" "direct")
10979 (set_attr "bdver1_decode" "direct")
10980 (set_attr "mode" "QI")])
10981
10982 ;; Widening multiplication peephole2s to tweak register allocation.
10983 ;; mov imm,%rdx; mov %rdi,%rax; mulq %rdx -> mov imm,%rax; mulq %rdi
10984 (define_peephole2
10985 [(set (match_operand:DWIH 0 "general_reg_operand")
10986 (match_operand:DWIH 1 "immediate_operand"))
10987 (set (match_operand:DWIH 2 "general_reg_operand")
10988 (match_operand:DWIH 3 "general_reg_operand"))
10989 (parallel [(set (match_operand:<DWI> 4 "general_reg_operand")
10990 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
10991 (zero_extend:<DWI> (match_dup 0))))
10992 (clobber (reg:CC FLAGS_REG))])]
10993 "REGNO (operands[3]) != AX_REG
10994 && REGNO (operands[0]) != REGNO (operands[2])
10995 && REGNO (operands[0]) != REGNO (operands[3])
10996 && (REGNO (operands[0]) == REGNO (operands[4])
10997 || REGNO (operands[0]) == DX_REG
10998 || peep2_reg_dead_p (3, operands[0]))"
10999 [(set (match_dup 2) (match_dup 1))
11000 (parallel [(set (match_dup 4)
11001 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
11002 (zero_extend:<DWI> (match_dup 3))))
11003 (clobber (reg:CC FLAGS_REG))])])
11004
11005 ;; mov imm,%rax; mov %rdi,%rdx; mulx %rax -> mov imm,%rdx; mulx %rdi
11006 (define_peephole2
11007 [(set (match_operand:DWIH 0 "general_reg_operand")
11008 (match_operand:DWIH 1 "immediate_operand"))
11009 (set (match_operand:DWIH 2 "general_reg_operand")
11010 (match_operand:DWIH 3 "general_reg_operand"))
11011 (parallel [(set (match_operand:DWIH 4 "general_reg_operand")
11012 (mult:DWIH (match_dup 2) (match_dup 0)))
11013 (set (match_operand:DWIH 5 "general_reg_operand")
11014 (umul_highpart:DWIH (match_dup 2) (match_dup 0)))])]
11015 "REGNO (operands[3]) != DX_REG
11016 && REGNO (operands[0]) != REGNO (operands[2])
11017 && REGNO (operands[0]) != REGNO (operands[3])
11018 && (REGNO (operands[0]) == REGNO (operands[4])
11019 || REGNO (operands[0]) == REGNO (operands[5])
11020 || peep2_reg_dead_p (3, operands[0]))
11021 && (REGNO (operands[2]) == REGNO (operands[4])
11022 || REGNO (operands[2]) == REGNO (operands[5])
11023 || peep2_reg_dead_p (3, operands[2]))"
11024 [(set (match_dup 2) (match_dup 1))
11025 (parallel [(set (match_dup 4)
11026 (mult:DWIH (match_dup 2) (match_dup 3)))
11027 (set (match_dup 5)
11028 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])])
11029
11030 ;; Highpart multiplication patterns
11031 (define_insn "<s>mul<mode>3_highpart"
11032 [(set (match_operand:DWIH 0 "register_operand" "=d")
11033 (any_mul_highpart:DWIH
11034 (match_operand:DWIH 1 "register_operand" "%a")
11035 (match_operand:DWIH 2 "nonimmediate_operand" "rm")))
11036 (clobber (match_scratch:DWIH 3 "=1"))
11037 (clobber (reg:CC FLAGS_REG))]
11038 ""
11039 "<sgnprefix>mul{<imodesuffix>}\t%2"
11040 [(set_attr "type" "imul")
11041 (set_attr "length_immediate" "0")
11042 (set (attr "athlon_decode")
11043 (if_then_else (eq_attr "cpu" "athlon")
11044 (const_string "vector")
11045 (const_string "double")))
11046 (set_attr "amdfam10_decode" "double")
11047 (set_attr "bdver1_decode" "direct")
11048 (set_attr "mode" "<MODE>")])
11049
11050 (define_insn "*<s>mulsi3_highpart_zext"
11051 [(set (match_operand:DI 0 "register_operand" "=d")
11052 (zero_extend:DI
11053 (any_mul_highpart:SI
11054 (match_operand:SI 1 "register_operand" "%a")
11055 (match_operand:SI 2 "nonimmediate_operand" "rm"))))
11056 (clobber (match_scratch:SI 3 "=1"))
11057 (clobber (reg:CC FLAGS_REG))]
11058 "TARGET_64BIT"
11059 "<sgnprefix>mul{l}\t%2"
11060 [(set_attr "type" "imul")
11061 (set_attr "length_immediate" "0")
11062 (set (attr "athlon_decode")
11063 (if_then_else (eq_attr "cpu" "athlon")
11064 (const_string "vector")
11065 (const_string "double")))
11066 (set_attr "amdfam10_decode" "double")
11067 (set_attr "bdver1_decode" "direct")
11068 (set_attr "mode" "SI")])
11069
11070 (define_insn "*<s>muldi3_highpart_1"
11071 [(set (match_operand:DI 0 "register_operand" "=d")
11072 (truncate:DI
11073 (lshiftrt:TI
11074 (mult:TI
11075 (any_extend:TI
11076 (match_operand:DI 1 "nonimmediate_operand" "%a"))
11077 (any_extend:TI
11078 (match_operand:DI 2 "nonimmediate_operand" "rm")))
11079 (const_int 64))))
11080 (clobber (match_scratch:DI 3 "=1"))
11081 (clobber (reg:CC FLAGS_REG))]
11082 "TARGET_64BIT
11083 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11084 "<sgnprefix>mul{q}\t%2"
11085 [(set_attr "type" "imul")
11086 (set_attr "length_immediate" "0")
11087 (set (attr "athlon_decode")
11088 (if_then_else (eq_attr "cpu" "athlon")
11089 (const_string "vector")
11090 (const_string "double")))
11091 (set_attr "amdfam10_decode" "double")
11092 (set_attr "bdver1_decode" "direct")
11093 (set_attr "mode" "DI")])
11094
11095 (define_insn "*<s>mulsi3_highpart_zext"
11096 [(set (match_operand:DI 0 "register_operand" "=d")
11097 (zero_extend:DI (truncate:SI
11098 (lshiftrt:DI
11099 (mult:DI (any_extend:DI
11100 (match_operand:SI 1 "nonimmediate_operand" "%a"))
11101 (any_extend:DI
11102 (match_operand:SI 2 "nonimmediate_operand" "rm")))
11103 (const_int 32)))))
11104 (clobber (match_scratch:SI 3 "=1"))
11105 (clobber (reg:CC FLAGS_REG))]
11106 "TARGET_64BIT
11107 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11108 "<sgnprefix>mul{l}\t%2"
11109 [(set_attr "type" "imul")
11110 (set_attr "length_immediate" "0")
11111 (set (attr "athlon_decode")
11112 (if_then_else (eq_attr "cpu" "athlon")
11113 (const_string "vector")
11114 (const_string "double")))
11115 (set_attr "amdfam10_decode" "double")
11116 (set_attr "bdver1_decode" "direct")
11117 (set_attr "mode" "SI")])
11118
11119 (define_insn "*<s>mulsi3_highpart_1"
11120 [(set (match_operand:SI 0 "register_operand" "=d")
11121 (truncate:SI
11122 (lshiftrt:DI
11123 (mult:DI
11124 (any_extend:DI
11125 (match_operand:SI 1 "nonimmediate_operand" "%a"))
11126 (any_extend:DI
11127 (match_operand:SI 2 "nonimmediate_operand" "rm")))
11128 (const_int 32))))
11129 (clobber (match_scratch:SI 3 "=1"))
11130 (clobber (reg:CC FLAGS_REG))]
11131 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
11132 "<sgnprefix>mul{l}\t%2"
11133 [(set_attr "type" "imul")
11134 (set_attr "length_immediate" "0")
11135 (set (attr "athlon_decode")
11136 (if_then_else (eq_attr "cpu" "athlon")
11137 (const_string "vector")
11138 (const_string "double")))
11139 (set_attr "amdfam10_decode" "double")
11140 (set_attr "bdver1_decode" "direct")
11141 (set_attr "mode" "SI")])
11142
11143 ;; Highpart multiplication peephole2s to tweak register allocation.
11144 ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
11145 (define_peephole2
11146 [(set (match_operand:SWI48 0 "general_reg_operand")
11147 (match_operand:SWI48 1 "immediate_operand"))
11148 (set (match_operand:SWI48 2 "general_reg_operand")
11149 (match_operand:SWI48 3 "general_reg_operand"))
11150 (parallel [(set (match_operand:SWI48 4 "general_reg_operand")
11151 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
11152 (clobber (match_dup 2))
11153 (clobber (reg:CC FLAGS_REG))])]
11154 "REGNO (operands[3]) != AX_REG
11155 && REGNO (operands[0]) != REGNO (operands[2])
11156 && REGNO (operands[0]) != REGNO (operands[3])
11157 && (REGNO (operands[0]) == REGNO (operands[4])
11158 || peep2_reg_dead_p (3, operands[0]))"
11159 [(set (match_dup 2) (match_dup 1))
11160 (parallel [(set (match_dup 4)
11161 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 3)))
11162 (clobber (match_dup 2))
11163 (clobber (reg:CC FLAGS_REG))])])
11164
11165 (define_peephole2
11166 [(set (match_operand:SI 0 "general_reg_operand")
11167 (match_operand:SI 1 "immediate_operand"))
11168 (set (match_operand:SI 2 "general_reg_operand")
11169 (match_operand:SI 3 "general_reg_operand"))
11170 (parallel [(set (match_operand:DI 4 "general_reg_operand")
11171 (zero_extend:DI
11172 (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
11173 (clobber (match_dup 2))
11174 (clobber (reg:CC FLAGS_REG))])]
11175 "TARGET_64BIT
11176 && REGNO (operands[3]) != AX_REG
11177 && REGNO (operands[0]) != REGNO (operands[2])
11178 && REGNO (operands[2]) != REGNO (operands[3])
11179 && REGNO (operands[0]) != REGNO (operands[3])
11180 && (REGNO (operands[0]) == REGNO (operands[4])
11181 || peep2_reg_dead_p (3, operands[0]))"
11182 [(set (match_dup 2) (match_dup 1))
11183 (parallel [(set (match_dup 4)
11184 (zero_extend:DI
11185 (any_mul_highpart:SI (match_dup 2) (match_dup 3))))
11186 (clobber (match_dup 2))
11187 (clobber (reg:CC FLAGS_REG))])])
11188
11189 ;; The patterns that match these are at the end of this file.
11190
11191 (define_expand "mulxf3"
11192 [(set (match_operand:XF 0 "register_operand")
11193 (mult:XF (match_operand:XF 1 "register_operand")
11194 (match_operand:XF 2 "register_operand")))]
11195 "TARGET_80387")
11196
11197 (define_expand "mulhf3"
11198 [(set (match_operand:HF 0 "register_operand")
11199 (mult:HF (match_operand:HF 1 "register_operand")
11200 (match_operand:HF 2 "nonimmediate_operand")))]
11201 "TARGET_AVX512FP16")
11202
11203 (define_expand "mul<mode>3"
11204 [(set (match_operand:MODEF 0 "register_operand")
11205 (mult:MODEF (match_operand:MODEF 1 "register_operand")
11206 (match_operand:MODEF 2 "nonimmediate_operand")))]
11207 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
11208 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
11209 \f
11210 ;; Divide instructions
11211
11212 ;; The patterns that match these are at the end of this file.
11213
11214 (define_expand "divxf3"
11215 [(set (match_operand:XF 0 "register_operand")
11216 (div:XF (match_operand:XF 1 "register_operand")
11217 (match_operand:XF 2 "register_operand")))]
11218 "TARGET_80387")
11219
11220 /* There is no more precision loss than Newton-Rhapson approximation
11221 when using HFmode rcp/rsqrt, so do the transformation directly under
11222 TARGET_RECIP_DIV and fast-math. */
11223 (define_expand "divhf3"
11224 [(set (match_operand:HF 0 "register_operand")
11225 (div:HF (match_operand:HF 1 "register_operand")
11226 (match_operand:HF 2 "nonimmediate_operand")))]
11227 "TARGET_AVX512FP16"
11228 {
11229 if (TARGET_RECIP_DIV
11230 && optimize_insn_for_speed_p ()
11231 && flag_finite_math_only && !flag_trapping_math
11232 && flag_unsafe_math_optimizations)
11233 {
11234 rtx op = gen_reg_rtx (HFmode);
11235 operands[2] = force_reg (HFmode, operands[2]);
11236 emit_insn (gen_rcphf2 (op, operands[2]));
11237 emit_insn (gen_mulhf3 (operands[0], operands[1], op));
11238 DONE;
11239 }
11240 })
11241
11242 (define_expand "div<mode>3"
11243 [(set (match_operand:MODEF 0 "register_operand")
11244 (div:MODEF (match_operand:MODEF 1 "register_operand")
11245 (match_operand:MODEF 2 "nonimmediate_operand")))]
11246 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
11247 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
11248 {
11249 if (<MODE>mode == SFmode
11250 && TARGET_SSE && TARGET_SSE_MATH
11251 && TARGET_RECIP_DIV
11252 && optimize_insn_for_speed_p ()
11253 && flag_finite_math_only && !flag_trapping_math
11254 && flag_unsafe_math_optimizations)
11255 {
11256 ix86_emit_swdivsf (operands[0], operands[1],
11257 operands[2], SFmode);
11258 DONE;
11259 }
11260 })
11261 \f
11262 ;; Divmod instructions.
11263
11264 (define_code_iterator any_div [div udiv])
11265 (define_code_attr paired_mod [(div "mod") (udiv "umod")])
11266
11267 (define_expand "<u>divmod<mode>4"
11268 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
11269 (any_div:SWIM248
11270 (match_operand:SWIM248 1 "register_operand")
11271 (match_operand:SWIM248 2 "nonimmediate_operand")))
11272 (set (match_operand:SWIM248 3 "register_operand")
11273 (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
11274 (clobber (reg:CC FLAGS_REG))])])
11275
11276 ;; Split with 8bit unsigned divide:
11277 ;; if (dividend an divisor are in [0-255])
11278 ;; use 8bit unsigned integer divide
11279 ;; else
11280 ;; use original integer divide
11281 (define_split
11282 [(set (match_operand:SWI48 0 "register_operand")
11283 (any_div:SWI48 (match_operand:SWI48 2 "register_operand")
11284 (match_operand:SWI48 3 "nonimmediate_operand")))
11285 (set (match_operand:SWI48 1 "register_operand")
11286 (<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
11287 (clobber (reg:CC FLAGS_REG))]
11288 "TARGET_USE_8BIT_IDIV
11289 && TARGET_QIMODE_MATH
11290 && can_create_pseudo_p ()
11291 && !optimize_insn_for_size_p ()"
11292 [(const_int 0)]
11293 "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")
11294
11295 (define_split
11296 [(set (match_operand:DI 0 "register_operand")
11297 (zero_extend:DI
11298 (any_div:SI (match_operand:SI 2 "register_operand")
11299 (match_operand:SI 3 "nonimmediate_operand"))))
11300 (set (match_operand:SI 1 "register_operand")
11301 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
11302 (clobber (reg:CC FLAGS_REG))]
11303 "TARGET_64BIT
11304 && TARGET_USE_8BIT_IDIV
11305 && TARGET_QIMODE_MATH
11306 && can_create_pseudo_p ()
11307 && !optimize_insn_for_size_p ()"
11308 [(const_int 0)]
11309 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
11310
11311 (define_split
11312 [(set (match_operand:DI 1 "register_operand")
11313 (zero_extend:DI
11314 (<paired_mod>:SI (match_operand:SI 2 "register_operand")
11315 (match_operand:SI 3 "nonimmediate_operand"))))
11316 (set (match_operand:SI 0 "register_operand")
11317 (any_div:SI (match_dup 2) (match_dup 3)))
11318 (clobber (reg:CC FLAGS_REG))]
11319 "TARGET_64BIT
11320 && TARGET_USE_8BIT_IDIV
11321 && TARGET_QIMODE_MATH
11322 && can_create_pseudo_p ()
11323 && !optimize_insn_for_size_p ()"
11324 [(const_int 0)]
11325 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
11326
11327 (define_insn_and_split "divmod<mode>4_1"
11328 [(set (match_operand:SWI48 0 "register_operand" "=a")
11329 (div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
11330 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
11331 (set (match_operand:SWI48 1 "register_operand" "=&d")
11332 (mod:SWI48 (match_dup 2) (match_dup 3)))
11333 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
11334 (clobber (reg:CC FLAGS_REG))]
11335 ""
11336 "#"
11337 "reload_completed"
11338 [(parallel [(set (match_dup 1)
11339 (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
11340 (clobber (reg:CC FLAGS_REG))])
11341 (parallel [(set (match_dup 0)
11342 (div:SWI48 (match_dup 2) (match_dup 3)))
11343 (set (match_dup 1)
11344 (mod:SWI48 (match_dup 2) (match_dup 3)))
11345 (use (match_dup 1))
11346 (clobber (reg:CC FLAGS_REG))])]
11347 {
11348 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
11349
11350 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11351 operands[4] = operands[2];
11352 else
11353 {
11354 /* Avoid use of cltd in favor of a mov+shift. */
11355 emit_move_insn (operands[1], operands[2]);
11356 operands[4] = operands[1];
11357 }
11358 }
11359 [(set_attr "type" "multi")
11360 (set_attr "mode" "<MODE>")])
11361
11362 (define_insn_and_split "udivmod<mode>4_1"
11363 [(set (match_operand:SWI48 0 "register_operand" "=a")
11364 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
11365 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
11366 (set (match_operand:SWI48 1 "register_operand" "=&d")
11367 (umod:SWI48 (match_dup 2) (match_dup 3)))
11368 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
11369 (clobber (reg:CC FLAGS_REG))]
11370 ""
11371 "#"
11372 "reload_completed"
11373 [(set (match_dup 1) (const_int 0))
11374 (parallel [(set (match_dup 0)
11375 (udiv:SWI48 (match_dup 2) (match_dup 3)))
11376 (set (match_dup 1)
11377 (umod:SWI48 (match_dup 2) (match_dup 3)))
11378 (use (match_dup 1))
11379 (clobber (reg:CC FLAGS_REG))])]
11380 ""
11381 [(set_attr "type" "multi")
11382 (set_attr "mode" "<MODE>")])
11383
11384 (define_insn_and_split "divmodsi4_zext_1"
11385 [(set (match_operand:DI 0 "register_operand" "=a")
11386 (zero_extend:DI
11387 (div:SI (match_operand:SI 2 "register_operand" "0")
11388 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11389 (set (match_operand:SI 1 "register_operand" "=&d")
11390 (mod:SI (match_dup 2) (match_dup 3)))
11391 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
11392 (clobber (reg:CC FLAGS_REG))]
11393 "TARGET_64BIT"
11394 "#"
11395 "&& reload_completed"
11396 [(parallel [(set (match_dup 1)
11397 (ashiftrt:SI (match_dup 4) (match_dup 5)))
11398 (clobber (reg:CC FLAGS_REG))])
11399 (parallel [(set (match_dup 0)
11400 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
11401 (set (match_dup 1)
11402 (mod:SI (match_dup 2) (match_dup 3)))
11403 (use (match_dup 1))
11404 (clobber (reg:CC FLAGS_REG))])]
11405 {
11406 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
11407
11408 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11409 operands[4] = operands[2];
11410 else
11411 {
11412 /* Avoid use of cltd in favor of a mov+shift. */
11413 emit_move_insn (operands[1], operands[2]);
11414 operands[4] = operands[1];
11415 }
11416 }
11417 [(set_attr "type" "multi")
11418 (set_attr "mode" "SI")])
11419
11420 (define_insn_and_split "udivmodsi4_zext_1"
11421 [(set (match_operand:DI 0 "register_operand" "=a")
11422 (zero_extend:DI
11423 (udiv:SI (match_operand:SI 2 "register_operand" "0")
11424 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11425 (set (match_operand:SI 1 "register_operand" "=&d")
11426 (umod:SI (match_dup 2) (match_dup 3)))
11427 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
11428 (clobber (reg:CC FLAGS_REG))]
11429 "TARGET_64BIT"
11430 "#"
11431 "&& reload_completed"
11432 [(set (match_dup 1) (const_int 0))
11433 (parallel [(set (match_dup 0)
11434 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
11435 (set (match_dup 1)
11436 (umod:SI (match_dup 2) (match_dup 3)))
11437 (use (match_dup 1))
11438 (clobber (reg:CC FLAGS_REG))])]
11439 ""
11440 [(set_attr "type" "multi")
11441 (set_attr "mode" "SI")])
11442
11443 (define_insn_and_split "divmodsi4_zext_2"
11444 [(set (match_operand:DI 1 "register_operand" "=&d")
11445 (zero_extend:DI
11446 (mod:SI (match_operand:SI 2 "register_operand" "0")
11447 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11448 (set (match_operand:SI 0 "register_operand" "=a")
11449 (div:SI (match_dup 2) (match_dup 3)))
11450 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
11451 (clobber (reg:CC FLAGS_REG))]
11452 "TARGET_64BIT"
11453 "#"
11454 "&& reload_completed"
11455 [(parallel [(set (match_dup 6)
11456 (ashiftrt:SI (match_dup 4) (match_dup 5)))
11457 (clobber (reg:CC FLAGS_REG))])
11458 (parallel [(set (match_dup 1)
11459 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
11460 (set (match_dup 0)
11461 (div:SI (match_dup 2) (match_dup 3)))
11462 (use (match_dup 6))
11463 (clobber (reg:CC FLAGS_REG))])]
11464 {
11465 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
11466 operands[6] = gen_lowpart (SImode, operands[1]);
11467
11468 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11469 operands[4] = operands[2];
11470 else
11471 {
11472 /* Avoid use of cltd in favor of a mov+shift. */
11473 emit_move_insn (operands[6], operands[2]);
11474 operands[4] = operands[6];
11475 }
11476 }
11477 [(set_attr "type" "multi")
11478 (set_attr "mode" "SI")])
11479
11480 (define_insn_and_split "udivmodsi4_zext_2"
11481 [(set (match_operand:DI 1 "register_operand" "=&d")
11482 (zero_extend:DI
11483 (umod:SI (match_operand:SI 2 "register_operand" "0")
11484 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11485 (set (match_operand:SI 0 "register_operand" "=a")
11486 (udiv:SI (match_dup 2) (match_dup 3)))
11487 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
11488 (clobber (reg:CC FLAGS_REG))]
11489 "TARGET_64BIT"
11490 "#"
11491 "&& reload_completed"
11492 [(set (match_dup 4) (const_int 0))
11493 (parallel [(set (match_dup 1)
11494 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
11495 (set (match_dup 0)
11496 (udiv:SI (match_dup 2) (match_dup 3)))
11497 (use (match_dup 4))
11498 (clobber (reg:CC FLAGS_REG))])]
11499 "operands[4] = gen_lowpart (SImode, operands[1]);"
11500 [(set_attr "type" "multi")
11501 (set_attr "mode" "SI")])
11502
11503 (define_insn_and_split "*divmod<mode>4"
11504 [(set (match_operand:SWIM248 0 "register_operand" "=a")
11505 (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
11506 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
11507 (set (match_operand:SWIM248 1 "register_operand" "=&d")
11508 (mod:SWIM248 (match_dup 2) (match_dup 3)))
11509 (clobber (reg:CC FLAGS_REG))]
11510 ""
11511 "#"
11512 "reload_completed"
11513 [(parallel [(set (match_dup 1)
11514 (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
11515 (clobber (reg:CC FLAGS_REG))])
11516 (parallel [(set (match_dup 0)
11517 (div:SWIM248 (match_dup 2) (match_dup 3)))
11518 (set (match_dup 1)
11519 (mod:SWIM248 (match_dup 2) (match_dup 3)))
11520 (use (match_dup 1))
11521 (clobber (reg:CC FLAGS_REG))])]
11522 {
11523 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
11524
11525 if (<MODE>mode != HImode
11526 && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
11527 operands[4] = operands[2];
11528 else
11529 {
11530 /* Avoid use of cltd in favor of a mov+shift. */
11531 emit_move_insn (operands[1], operands[2]);
11532 operands[4] = operands[1];
11533 }
11534 }
11535 [(set_attr "type" "multi")
11536 (set_attr "mode" "<MODE>")])
11537
11538 (define_insn_and_split "*udivmod<mode>4"
11539 [(set (match_operand:SWIM248 0 "register_operand" "=a")
11540 (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
11541 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
11542 (set (match_operand:SWIM248 1 "register_operand" "=&d")
11543 (umod:SWIM248 (match_dup 2) (match_dup 3)))
11544 (clobber (reg:CC FLAGS_REG))]
11545 ""
11546 "#"
11547 "reload_completed"
11548 [(set (match_dup 1) (const_int 0))
11549 (parallel [(set (match_dup 0)
11550 (udiv:SWIM248 (match_dup 2) (match_dup 3)))
11551 (set (match_dup 1)
11552 (umod:SWIM248 (match_dup 2) (match_dup 3)))
11553 (use (match_dup 1))
11554 (clobber (reg:CC FLAGS_REG))])]
11555 ""
11556 [(set_attr "type" "multi")
11557 (set_attr "mode" "<MODE>")])
11558
11559 ;; Optimize division or modulo by constant power of 2, if the constant
11560 ;; materializes only after expansion.
11561 (define_insn_and_split "*udivmod<mode>4_pow2"
11562 [(set (match_operand:SWI48 0 "register_operand" "=r")
11563 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
11564 (match_operand:SWI48 3 "const_int_operand")))
11565 (set (match_operand:SWI48 1 "register_operand" "=r")
11566 (umod:SWI48 (match_dup 2) (match_dup 3)))
11567 (clobber (reg:CC FLAGS_REG))]
11568 "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
11569 "#"
11570 "&& reload_completed"
11571 [(set (match_dup 1) (match_dup 2))
11572 (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
11573 (clobber (reg:CC FLAGS_REG))])
11574 (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
11575 (clobber (reg:CC FLAGS_REG))])]
11576 {
11577 int v = exact_log2 (UINTVAL (operands[3]));
11578 operands[4] = GEN_INT (v);
11579 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
11580 }
11581 [(set_attr "type" "multi")
11582 (set_attr "mode" "<MODE>")])
11583
11584 (define_insn_and_split "*divmodsi4_zext_1"
11585 [(set (match_operand:DI 0 "register_operand" "=a")
11586 (zero_extend:DI
11587 (div:SI (match_operand:SI 2 "register_operand" "0")
11588 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11589 (set (match_operand:SI 1 "register_operand" "=&d")
11590 (mod:SI (match_dup 2) (match_dup 3)))
11591 (clobber (reg:CC FLAGS_REG))]
11592 "TARGET_64BIT"
11593 "#"
11594 "&& reload_completed"
11595 [(parallel [(set (match_dup 1)
11596 (ashiftrt:SI (match_dup 4) (match_dup 5)))
11597 (clobber (reg:CC FLAGS_REG))])
11598 (parallel [(set (match_dup 0)
11599 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
11600 (set (match_dup 1)
11601 (mod:SI (match_dup 2) (match_dup 3)))
11602 (use (match_dup 1))
11603 (clobber (reg:CC FLAGS_REG))])]
11604 {
11605 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
11606
11607 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11608 operands[4] = operands[2];
11609 else
11610 {
11611 /* Avoid use of cltd in favor of a mov+shift. */
11612 emit_move_insn (operands[1], operands[2]);
11613 operands[4] = operands[1];
11614 }
11615 }
11616 [(set_attr "type" "multi")
11617 (set_attr "mode" "SI")])
11618
11619 (define_insn_and_split "*udivmodsi4_zext_1"
11620 [(set (match_operand:DI 0 "register_operand" "=a")
11621 (zero_extend:DI
11622 (udiv:SI (match_operand:SI 2 "register_operand" "0")
11623 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11624 (set (match_operand:SI 1 "register_operand" "=&d")
11625 (umod:SI (match_dup 2) (match_dup 3)))
11626 (clobber (reg:CC FLAGS_REG))]
11627 "TARGET_64BIT"
11628 "#"
11629 "&& reload_completed"
11630 [(set (match_dup 1) (const_int 0))
11631 (parallel [(set (match_dup 0)
11632 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
11633 (set (match_dup 1)
11634 (umod:SI (match_dup 2) (match_dup 3)))
11635 (use (match_dup 1))
11636 (clobber (reg:CC FLAGS_REG))])]
11637 ""
11638 [(set_attr "type" "multi")
11639 (set_attr "mode" "SI")])
11640
11641 (define_insn_and_split "*udivmodsi4_pow2_zext_1"
11642 [(set (match_operand:DI 0 "register_operand" "=r")
11643 (zero_extend:DI
11644 (udiv:SI (match_operand:SI 2 "register_operand" "0")
11645 (match_operand:SI 3 "const_int_operand"))))
11646 (set (match_operand:SI 1 "register_operand" "=r")
11647 (umod:SI (match_dup 2) (match_dup 3)))
11648 (clobber (reg:CC FLAGS_REG))]
11649 "TARGET_64BIT
11650 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
11651 "#"
11652 "&& reload_completed"
11653 [(set (match_dup 1) (match_dup 2))
11654 (parallel [(set (match_dup 0)
11655 (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
11656 (clobber (reg:CC FLAGS_REG))])
11657 (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
11658 (clobber (reg:CC FLAGS_REG))])]
11659 {
11660 int v = exact_log2 (UINTVAL (operands[3]));
11661 operands[4] = GEN_INT (v);
11662 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
11663 }
11664 [(set_attr "type" "multi")
11665 (set_attr "mode" "SI")])
11666
11667 (define_insn_and_split "*divmodsi4_zext_2"
11668 [(set (match_operand:DI 1 "register_operand" "=&d")
11669 (zero_extend:DI
11670 (mod:SI (match_operand:SI 2 "register_operand" "0")
11671 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11672 (set (match_operand:SI 0 "register_operand" "=a")
11673 (div:SI (match_dup 2) (match_dup 3)))
11674 (clobber (reg:CC FLAGS_REG))]
11675 "TARGET_64BIT"
11676 "#"
11677 "&& reload_completed"
11678 [(parallel [(set (match_dup 6)
11679 (ashiftrt:SI (match_dup 4) (match_dup 5)))
11680 (clobber (reg:CC FLAGS_REG))])
11681 (parallel [(set (match_dup 1)
11682 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
11683 (set (match_dup 0)
11684 (div:SI (match_dup 2) (match_dup 3)))
11685 (use (match_dup 6))
11686 (clobber (reg:CC FLAGS_REG))])]
11687 {
11688 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
11689 operands[6] = gen_lowpart (SImode, operands[1]);
11690
11691 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11692 operands[4] = operands[2];
11693 else
11694 {
11695 /* Avoid use of cltd in favor of a mov+shift. */
11696 emit_move_insn (operands[6], operands[2]);
11697 operands[4] = operands[6];
11698 }
11699 }
11700 [(set_attr "type" "multi")
11701 (set_attr "mode" "SI")])
11702
11703 (define_insn_and_split "*udivmodsi4_zext_2"
11704 [(set (match_operand:DI 1 "register_operand" "=&d")
11705 (zero_extend:DI
11706 (umod:SI (match_operand:SI 2 "register_operand" "0")
11707 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11708 (set (match_operand:SI 0 "register_operand" "=a")
11709 (udiv:SI (match_dup 2) (match_dup 3)))
11710 (clobber (reg:CC FLAGS_REG))]
11711 "TARGET_64BIT"
11712 "#"
11713 "&& reload_completed"
11714 [(set (match_dup 4) (const_int 0))
11715 (parallel [(set (match_dup 1)
11716 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
11717 (set (match_dup 0)
11718 (udiv:SI (match_dup 2) (match_dup 3)))
11719 (use (match_dup 4))
11720 (clobber (reg:CC FLAGS_REG))])]
11721 "operands[4] = gen_lowpart (SImode, operands[1]);"
11722 [(set_attr "type" "multi")
11723 (set_attr "mode" "SI")])
11724
11725 (define_insn_and_split "*udivmodsi4_pow2_zext_2"
11726 [(set (match_operand:DI 1 "register_operand" "=r")
11727 (zero_extend:DI
11728 (umod:SI (match_operand:SI 2 "register_operand" "0")
11729 (match_operand:SI 3 "const_int_operand"))))
11730 (set (match_operand:SI 0 "register_operand" "=r")
11731 (udiv:SI (match_dup 2) (match_dup 3)))
11732 (clobber (reg:CC FLAGS_REG))]
11733 "TARGET_64BIT
11734 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
11735 "#"
11736 "&& reload_completed"
11737 [(set (match_dup 1) (match_dup 2))
11738 (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
11739 (clobber (reg:CC FLAGS_REG))])
11740 (parallel [(set (match_dup 1)
11741 (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
11742 (clobber (reg:CC FLAGS_REG))])]
11743 {
11744 int v = exact_log2 (UINTVAL (operands[3]));
11745 operands[4] = GEN_INT (v);
11746 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
11747 }
11748 [(set_attr "type" "multi")
11749 (set_attr "mode" "SI")])
11750
11751 (define_insn "*<u>divmod<mode>4_noext_nf"
11752 [(set (match_operand:SWIM248 0 "register_operand" "=a")
11753 (any_div:SWIM248
11754 (match_operand:SWIM248 2 "register_operand" "0")
11755 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
11756 (set (match_operand:SWIM248 1 "register_operand" "=d")
11757 (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
11758 (use (match_operand:SWIM248 4 "register_operand" "1"))]
11759 "TARGET_APX_NF"
11760 "%{nf%} <sgnprefix>div{<imodesuffix>}\t%3"
11761 [(set_attr "type" "idiv")
11762 (set_attr "mode" "<MODE>")])
11763
11764 (define_insn "*<u>divmod<mode>4_noext"
11765 [(set (match_operand:SWIM248 0 "register_operand" "=a")
11766 (any_div:SWIM248
11767 (match_operand:SWIM248 2 "register_operand" "0")
11768 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
11769 (set (match_operand:SWIM248 1 "register_operand" "=d")
11770 (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
11771 (use (match_operand:SWIM248 4 "register_operand" "1"))
11772 (clobber (reg:CC FLAGS_REG))]
11773 ""
11774 "<sgnprefix>div{<imodesuffix>}\t%3"
11775 [(set_attr "type" "idiv")
11776 (set_attr "has_nf" "1")
11777 (set_attr "mode" "<MODE>")])
11778
11779 (define_insn "*<u>divmodsi4_noext_zext_1"
11780 [(set (match_operand:DI 0 "register_operand" "=a")
11781 (zero_extend:DI
11782 (any_div:SI (match_operand:SI 2 "register_operand" "0")
11783 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11784 (set (match_operand:SI 1 "register_operand" "=d")
11785 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
11786 (use (match_operand:SI 4 "register_operand" "1"))
11787 (clobber (reg:CC FLAGS_REG))]
11788 "TARGET_64BIT"
11789 "<sgnprefix>div{l}\t%3"
11790 [(set_attr "type" "idiv")
11791 (set_attr "mode" "SI")])
11792
11793 (define_insn "*<u>divmodsi4_noext_zext_2"
11794 [(set (match_operand:DI 1 "register_operand" "=d")
11795 (zero_extend:DI
11796 (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
11797 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11798 (set (match_operand:SI 0 "register_operand" "=a")
11799 (any_div:SI (match_dup 2) (match_dup 3)))
11800 (use (match_operand:SI 4 "register_operand" "1"))
11801 (clobber (reg:CC FLAGS_REG))]
11802 "TARGET_64BIT"
11803 "<sgnprefix>div{l}\t%3"
11804 [(set_attr "type" "idiv")
11805 (set_attr "mode" "SI")])
11806
11807 ;; Avoid sign-extension (using cdq) for constant numerators.
11808 (define_insn_and_split "*divmodsi4_const"
11809 [(set (match_operand:SI 0 "register_operand" "=&a")
11810 (div:SI (match_operand:SI 2 "const_int_operand")
11811 (match_operand:SI 3 "nonimmediate_operand" "rm")))
11812 (set (match_operand:SI 1 "register_operand" "=&d")
11813 (mod:SI (match_dup 2) (match_dup 3)))
11814 (clobber (reg:CC FLAGS_REG))]
11815 "!optimize_function_for_size_p (cfun)"
11816 "#"
11817 "&& reload_completed"
11818 [(set (match_dup 0) (match_dup 2))
11819 (set (match_dup 1) (match_dup 4))
11820 (parallel [(set (match_dup 0)
11821 (div:SI (match_dup 0) (match_dup 3)))
11822 (set (match_dup 1)
11823 (mod:SI (match_dup 0) (match_dup 3)))
11824 (use (match_dup 1))
11825 (clobber (reg:CC FLAGS_REG))])]
11826 {
11827 operands[4] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
11828 }
11829 [(set_attr "type" "multi")
11830 (set_attr "mode" "SI")])
11831
11832 (define_expand "divmodqi4"
11833 [(parallel [(set (match_operand:QI 0 "register_operand")
11834 (div:QI
11835 (match_operand:QI 1 "register_operand")
11836 (match_operand:QI 2 "nonimmediate_operand")))
11837 (set (match_operand:QI 3 "register_operand")
11838 (mod:QI (match_dup 1) (match_dup 2)))
11839 (clobber (reg:CC FLAGS_REG))])]
11840 "TARGET_QIMODE_MATH"
11841 {
11842 rtx div, mod;
11843 rtx tmp0, tmp1;
11844
11845 tmp0 = gen_reg_rtx (HImode);
11846 tmp1 = gen_reg_rtx (HImode);
11847
11848 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
11849 emit_insn (gen_extendqihi2 (tmp1, operands[1]));
11850 emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
11851
11852 /* Extract remainder from AH. */
11853 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
11854 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
11855 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
11856
11857 mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
11858 set_unique_reg_note (insn, REG_EQUAL, mod);
11859
11860 /* Extract quotient from AL. */
11861 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
11862
11863 div = gen_rtx_DIV (QImode, operands[1], operands[2]);
11864 set_unique_reg_note (insn, REG_EQUAL, div);
11865
11866 DONE;
11867 })
11868
11869 (define_expand "udivmodqi4"
11870 [(parallel [(set (match_operand:QI 0 "register_operand")
11871 (udiv:QI
11872 (match_operand:QI 1 "register_operand")
11873 (match_operand:QI 2 "nonimmediate_operand")))
11874 (set (match_operand:QI 3 "register_operand")
11875 (umod:QI (match_dup 1) (match_dup 2)))
11876 (clobber (reg:CC FLAGS_REG))])]
11877 "TARGET_QIMODE_MATH"
11878 {
11879 rtx div, mod;
11880 rtx tmp0, tmp1;
11881
11882 tmp0 = gen_reg_rtx (HImode);
11883 tmp1 = gen_reg_rtx (HImode);
11884
11885 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
11886 emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
11887 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
11888
11889 /* Extract remainder from AH. */
11890 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
11891 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
11892 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
11893
11894 mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
11895 set_unique_reg_note (insn, REG_EQUAL, mod);
11896
11897 /* Extract quotient from AL. */
11898 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
11899
11900 div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
11901 set_unique_reg_note (insn, REG_EQUAL, div);
11902
11903 DONE;
11904 })
11905
11906 ;; Divide AX by r/m8, with result stored in
11907 ;; AL <- Quotient
11908 ;; AH <- Remainder
11909 ;; Change div/mod to HImode and extend the second argument to HImode
11910 ;; so that mode of div/mod matches with mode of arguments. Otherwise
11911 ;; combine may fail.
11912 (define_insn "<u>divmodhiqi3<nf_name>"
11913 [(set (match_operand:HI 0 "register_operand" "=a")
11914 (ior:HI
11915 (ashift:HI
11916 (zero_extend:HI
11917 (truncate:QI
11918 (mod:HI (match_operand:HI 1 "register_operand" "0")
11919 (any_extend:HI
11920 (match_operand:QI 2 "nonimmediate_operand" "qm")))))
11921 (const_int 8))
11922 (zero_extend:HI
11923 (truncate:QI
11924 (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))]
11925 "TARGET_QIMODE_MATH
11926 && <nf_condition>"
11927 "<nf_prefix><sgnprefix>div{b}\t%2"
11928 [(set_attr "type" "idiv")
11929 (set_attr "has_nf" "1")
11930 (set_attr "mode" "QI")])
11931
11932 ;; We cannot use div/idiv for double division, because it causes
11933 ;; "division by zero" on the overflow and that's not what we expect
11934 ;; from truncate. Because true (non truncating) double division is
11935 ;; never generated, we can't create this insn anyway.
11936 ;
11937 ;(define_insn ""
11938 ; [(set (match_operand:SI 0 "register_operand" "=a")
11939 ; (truncate:SI
11940 ; (udiv:DI (match_operand:DI 1 "register_operand" "A")
11941 ; (zero_extend:DI
11942 ; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
11943 ; (set (match_operand:SI 3 "register_operand" "=d")
11944 ; (truncate:SI
11945 ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
11946 ; (clobber (reg:CC FLAGS_REG))]
11947 ; ""
11948 ; "div{l}\t{%2, %0|%0, %2}"
11949 ; [(set_attr "type" "idiv")])
11950 \f
11951 ;;- Logical AND instructions
11952
11953 ;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
11954 ;; Note that this excludes ah.
11955
11956 (define_expand "@test<mode>_ccno_1"
11957 [(set (reg:CCNO FLAGS_REG)
11958 (compare:CCNO
11959 (and:SWI48
11960 (match_operand:SWI48 0 "nonimmediate_operand")
11961 (match_operand:SWI48 1 "<nonmemory_szext_operand>"))
11962 (const_int 0)))])
11963
11964 (define_expand "testqi_ccz_1"
11965 [(set (reg:CCZ FLAGS_REG)
11966 (compare:CCZ
11967 (and:QI
11968 (match_operand:QI 0 "nonimmediate_operand")
11969 (match_operand:QI 1 "nonmemory_operand"))
11970 (const_int 0)))])
11971
11972 (define_insn "*testdi_1"
11973 [(set (reg FLAGS_REG)
11974 (compare
11975 (and:DI
11976 (match_operand:DI 0 "nonimmediate_operand" "%r,rm")
11977 (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re"))
11978 (const_int 0)))]
11979 "TARGET_64BIT
11980 && ix86_match_ccmode
11981 (insn,
11982 /* If we are going to emit testl instead of testq, and the operands[1]
11983 constant might have the SImode sign bit set, make sure the sign
11984 flag isn't tested, because the instruction will set the sign flag
11985 based on bit 31 rather than bit 63. If it isn't CONST_INT,
11986 conservatively assume it might have bit 31 set. */
11987 (satisfies_constraint_Z (operands[1])
11988 && (!CONST_INT_P (operands[1])
11989 || val_signbit_known_set_p (SImode, INTVAL (operands[1]))))
11990 ? CCZmode : CCNOmode)"
11991 "@
11992 test{l}\t{%k1, %k0|%k0, %k1}
11993 test{q}\t{%1, %0|%0, %1}"
11994 [(set_attr "type" "test")
11995 (set_attr "mode" "SI,DI")])
11996
11997 (define_insn "*testqi_1_maybe_si"
11998 [(set (reg FLAGS_REG)
11999 (compare
12000 (and:QI
12001 (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r")
12002 (match_operand:QI 1 "nonmemory_operand" "q,n,n"))
12003 (const_int 0)))]
12004 "ix86_match_ccmode (insn,
12005 CONST_INT_P (operands[1])
12006 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
12007 {
12008 if (get_attr_mode (insn) == MODE_SI)
12009 {
12010 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
12011 operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
12012 return "test{l}\t{%1, %k0|%k0, %1}";
12013 }
12014 return "test{b}\t{%1, %0|%0, %1}";
12015 }
12016 [(set_attr "type" "test")
12017 (set (attr "mode")
12018 (cond [(eq_attr "alternative" "2")
12019 (const_string "SI")
12020 (and (match_test "optimize_insn_for_size_p ()")
12021 (and (match_operand 0 "ext_QIreg_operand")
12022 (match_operand 1 "const_0_to_127_operand")))
12023 (const_string "SI")
12024 ]
12025 (const_string "QI")))
12026 (set_attr "pent_pair" "uv,np,np")])
12027
12028 (define_insn "*test<mode>_1"
12029 [(set (reg FLAGS_REG)
12030 (compare
12031 (and:SWI124
12032 (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m")
12033 (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>"))
12034 (const_int 0)))]
12035 "ix86_match_ccmode (insn, CCNOmode)"
12036 "test{<imodesuffix>}\t{%1, %0|%0, %1}"
12037 [(set_attr "type" "test")
12038 (set_attr "mode" "<MODE>")
12039 (set_attr "pent_pair" "uv,uv,np")])
12040
12041 (define_expand "testqi_ext_1_ccno"
12042 [(set (reg:CCNO FLAGS_REG)
12043 (compare:CCNO
12044 (and:QI
12045 (subreg:QI
12046 (zero_extract:HI
12047 (match_operand:HI 0 "register_operand")
12048 (const_int 8)
12049 (const_int 8)) 0)
12050 (match_operand:QI 1 "const_int_operand"))
12051 (const_int 0)))])
12052
12053 (define_insn "*testqi_ext<mode>_1"
12054 [(set (reg FLAGS_REG)
12055 (compare
12056 (and:QI
12057 (subreg:QI
12058 (match_operator:SWI248 2 "extract_operator"
12059 [(match_operand 0 "int248_register_operand" "Q")
12060 (const_int 8)
12061 (const_int 8)]) 0)
12062 (match_operand:QI 1 "general_operand" "QnBn"))
12063 (const_int 0)))]
12064 "ix86_match_ccmode (insn, CCNOmode)"
12065 "test{b}\t{%1, %h0|%h0, %1}"
12066 [(set_attr "addr" "gpr8")
12067 (set_attr "type" "test")
12068 (set_attr "mode" "QI")])
12069
12070 (define_insn "*testqi_ext<mode>_2"
12071 [(set (reg FLAGS_REG)
12072 (compare
12073 (and:QI
12074 (subreg:QI
12075 (match_operator:SWI248 2 "extract_operator"
12076 [(match_operand 0 "int248_register_operand" "Q")
12077 (const_int 8)
12078 (const_int 8)]) 0)
12079 (subreg:QI
12080 (match_operator:SWI248 3 "extract_operator"
12081 [(match_operand 1 "int248_register_operand" "Q")
12082 (const_int 8)
12083 (const_int 8)]) 0))
12084 (const_int 0)))]
12085 "ix86_match_ccmode (insn, CCNOmode)"
12086 "test{b}\t{%h1, %h0|%h0, %h1}"
12087 [(set_attr "type" "test")
12088 (set_attr "mode" "QI")])
12089
12090 ;; Provide a *testti instruction that STV can implement using ptest.
12091 ;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
12092 (define_insn_and_split "*testti_doubleword"
12093 [(set (reg:CCZ FLAGS_REG)
12094 (compare:CCZ
12095 (and:TI (match_operand:TI 0 "register_operand")
12096 (match_operand:TI 1 "general_operand"))
12097 (const_int 0)))]
12098 "TARGET_64BIT
12099 && ix86_pre_reload_split ()"
12100 "#"
12101 "&& 1"
12102 [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
12103 (clobber (reg:CC FLAGS_REG))])
12104 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
12105 {
12106 operands[2] = gen_reg_rtx (TImode);
12107 if (!x86_64_hilo_general_operand (operands[1], TImode))
12108 operands[1] = force_reg (TImode, operands[1]);
12109 })
12110
12111 ;; Combine likes to form bit extractions for some tests. Humor it.
12112 (define_insn_and_split "*testqi_ext_3"
12113 [(set (match_operand 0 "flags_reg_operand")
12114 (match_operator 1 "compare_operator"
12115 [(zero_extract:SWI248
12116 (match_operand 2 "int_nonimmediate_operand" "rm")
12117 (match_operand:QI 3 "const_int_operand")
12118 (match_operand:QI 4 "const_int_operand"))
12119 (const_int 0)]))]
12120 "/* Ensure that resulting mask is zero or sign extended operand. */
12121 INTVAL (operands[4]) >= 0
12122 && ((INTVAL (operands[3]) > 0
12123 && INTVAL (operands[3]) + INTVAL (operands[4]) <= 32)
12124 || (<MODE>mode == DImode
12125 && INTVAL (operands[3]) > 32
12126 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
12127 && ix86_match_ccmode (insn,
12128 /* If zero_extract mode precision is the same
12129 as len, the SF of the zero_extract
12130 comparison will be the most significant
12131 extracted bit, but this could be matched
12132 after splitting only for pos 0 len all bits
12133 trivial extractions. Require CCZmode. */
12134 (GET_MODE_PRECISION (<MODE>mode)
12135 == INTVAL (operands[3]))
12136 /* Otherwise, require CCZmode if we'd use a mask
12137 with the most significant bit set and can't
12138 widen it to wider mode. *testdi_1 also
12139 requires CCZmode if the mask has bit
12140 31 set and all bits above it clear. */
12141 || (INTVAL (operands[3]) + INTVAL (operands[4])
12142 >= 32)
12143 /* We can't widen also if val is not a REG. */
12144 || (INTVAL (operands[3]) + INTVAL (operands[4])
12145 == GET_MODE_PRECISION (GET_MODE (operands[2]))
12146 && !register_operand (operands[2],
12147 GET_MODE (operands[2])))
12148 /* And we shouldn't widen if
12149 TARGET_PARTIAL_REG_STALL. */
12150 || (TARGET_PARTIAL_REG_STALL
12151 && (INTVAL (operands[3]) + INTVAL (operands[4])
12152 >= (paradoxical_subreg_p (operands[2])
12153 && (GET_MODE_CLASS
12154 (GET_MODE (SUBREG_REG (operands[2])))
12155 == MODE_INT)
12156 ? GET_MODE_PRECISION
12157 (GET_MODE (SUBREG_REG (operands[2])))
12158 : GET_MODE_PRECISION
12159 (GET_MODE (operands[2])))))
12160 ? CCZmode : CCNOmode)"
12161 "#"
12162 "&& 1"
12163 [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
12164 {
12165 rtx val = operands[2];
12166 HOST_WIDE_INT len = INTVAL (operands[3]);
12167 HOST_WIDE_INT pos = INTVAL (operands[4]);
12168 machine_mode mode = GET_MODE (val);
12169
12170 if (SUBREG_P (val))
12171 {
12172 machine_mode submode = GET_MODE (SUBREG_REG (val));
12173
12174 /* Narrow paradoxical subregs to prevent partial register stalls. */
12175 if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
12176 && GET_MODE_CLASS (submode) == MODE_INT
12177 && (GET_MODE (operands[0]) == CCZmode
12178 || pos + len < GET_MODE_PRECISION (submode)
12179 || REG_P (SUBREG_REG (val))))
12180 {
12181 val = SUBREG_REG (val);
12182 mode = submode;
12183 }
12184 }
12185
12186 /* Small HImode tests can be converted to QImode. */
12187 if (pos + len <= 8
12188 && register_operand (val, HImode))
12189 {
12190 rtx nval = gen_lowpart (QImode, val);
12191 if (!MEM_P (nval)
12192 || GET_MODE (operands[0]) == CCZmode
12193 || pos + len < 8)
12194 {
12195 val = nval;
12196 mode = QImode;
12197 }
12198 }
12199
12200 gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
12201
12202 /* If the mask is going to have the sign bit set in the mode
12203 we want to do the comparison in and user isn't interested just
12204 in the zero flag, then we must widen the target mode. */
12205 if (pos + len == GET_MODE_PRECISION (mode)
12206 && GET_MODE (operands[0]) != CCZmode)
12207 {
12208 gcc_assert (pos + len < 32 && !MEM_P (val));
12209 mode = SImode;
12210 val = gen_lowpart (mode, val);
12211 }
12212
12213 wide_int mask
12214 = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
12215
12216 operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
12217 })
12218
12219 ;; Split and;cmp (as optimized by combine) into not;test
12220 ;; Except when TARGET_BMI provides andn (*andn_<mode>_ccno).
12221 (define_insn_and_split "*test<mode>_not"
12222 [(set (reg:CCZ FLAGS_REG)
12223 (compare:CCZ
12224 (and:SWI
12225 (not:SWI (match_operand:SWI 0 "register_operand"))
12226 (match_operand:SWI 1 "<nonmemory_szext_operand>"))
12227 (const_int 0)))]
12228 "ix86_pre_reload_split ()
12229 && (!TARGET_BMI || !REG_P (operands[1]))"
12230 "#"
12231 "&& 1"
12232 [(set (match_dup 2) (not:SWI (match_dup 0)))
12233 (set (reg:CCZ FLAGS_REG)
12234 (compare:CCZ (and:SWI (match_dup 2) (match_dup 1))
12235 (const_int 0)))]
12236 "operands[2] = gen_reg_rtx (<MODE>mode);")
12237
12238 ;; Split and;cmp (as optimized by combine) into andn;cmp $0
12239 (define_insn_and_split "*test<mode>_not_doubleword"
12240 [(set (reg:CCZ FLAGS_REG)
12241 (compare:CCZ
12242 (and:DWI
12243 (not:DWI (match_operand:DWI 0 "nonimmediate_operand"))
12244 (match_operand:DWI 1 "nonimmediate_operand"))
12245 (const_int 0)))]
12246 "ix86_pre_reload_split ()"
12247 "#"
12248 "&& 1"
12249 [(parallel
12250 [(set (match_dup 2) (and:DWI (not:DWI (match_dup 0)) (match_dup 1)))
12251 (clobber (reg:CC FLAGS_REG))])
12252 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
12253 {
12254 operands[0] = force_reg (<MODE>mode, operands[0]);
12255 operands[2] = gen_reg_rtx (<MODE>mode);
12256 })
12257
12258 ;; Convert HImode/SImode test instructions with immediate to QImode ones.
12259 ;; i386 does not allow to encode test with 8bit sign extended immediate, so
12260 ;; this is relatively important trick.
12261 ;; Do the conversion only post-reload to avoid limiting of the register class
12262 ;; to QI regs.
12263 (define_split
12264 [(set (match_operand 0 "flags_reg_operand")
12265 (match_operator 1 "compare_operator"
12266 [(and (match_operand 2 "QIreg_operand")
12267 (match_operand 3 "const_int_operand"))
12268 (const_int 0)]))]
12269 "reload_completed
12270 && GET_MODE (operands[2]) != QImode
12271 && ((ix86_match_ccmode (insn, CCZmode)
12272 && !(INTVAL (operands[3]) & ~(255 << 8)))
12273 || (ix86_match_ccmode (insn, CCNOmode)
12274 && !(INTVAL (operands[3]) & ~(127 << 8))))"
12275 [(set (match_dup 0)
12276 (match_op_dup 1
12277 [(and:QI
12278 (subreg:QI
12279 (zero_extract:HI (match_dup 2)
12280 (const_int 8)
12281 (const_int 8)) 0)
12282 (match_dup 3))
12283 (const_int 0)]))]
12284 {
12285 operands[2] = gen_lowpart (HImode, operands[2]);
12286 operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
12287 })
12288
12289 (define_split
12290 [(set (match_operand 0 "flags_reg_operand")
12291 (match_operator 1 "compare_operator"
12292 [(and (match_operand 2 "nonimmediate_operand")
12293 (match_operand 3 "const_int_operand"))
12294 (const_int 0)]))]
12295 "reload_completed
12296 && GET_MODE (operands[2]) != QImode
12297 && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
12298 && ((ix86_match_ccmode (insn, CCZmode)
12299 && !(INTVAL (operands[3]) & ~255))
12300 || (ix86_match_ccmode (insn, CCNOmode)
12301 && !(INTVAL (operands[3]) & ~127)))"
12302 [(set (match_dup 0)
12303 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
12304 (const_int 0)]))]
12305 {
12306 operands[2] = gen_lowpart (QImode, operands[2]);
12307 operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
12308 })
12309
12310 ;; Narrow test instructions with immediate operands that test
12311 ;; memory locations for zero. E.g. testl $0x00aa0000, mem can be
12312 ;; converted to testb $0xaa, mem+2. Reject volatile locations and
12313 ;; targets where reading (possibly unaligned) part of memory
12314 ;; location after a large write to the same address causes
12315 ;; store-to-load forwarding stall.
12316 (define_peephole2
12317 [(set (reg:CCZ FLAGS_REG)
12318 (compare:CCZ
12319 (and:SWI248 (match_operand:SWI248 0 "memory_operand")
12320 (match_operand 1 "const_int_operand"))
12321 (const_int 0)))]
12322 "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
12323 [(set (reg:CCZ FLAGS_REG)
12324 (compare:CCZ (match_dup 2) (const_int 0)))]
12325 {
12326 unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
12327 int first_nonzero_byte, bitsize;
12328 rtx new_addr, new_const;
12329 machine_mode new_mode;
12330
12331 if (ival == 0)
12332 FAIL;
12333
12334 /* Clear bits outside mode width. */
12335 ival &= GET_MODE_MASK (<MODE>mode);
12336
12337 first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
12338
12339 ival >>= first_nonzero_byte * BITS_PER_UNIT;
12340
12341 bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
12342
12343 if (bitsize <= GET_MODE_BITSIZE (QImode))
12344 new_mode = QImode;
12345 else if (bitsize <= GET_MODE_BITSIZE (HImode))
12346 new_mode = HImode;
12347 else if (bitsize <= GET_MODE_BITSIZE (SImode))
12348 new_mode = SImode;
12349 else
12350 new_mode = DImode;
12351
12352 if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
12353 FAIL;
12354
12355 new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
12356 new_const = gen_int_mode (ival, new_mode);
12357
12358 operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
12359 })
12360
12361 ;; %%% This used to optimize known byte-wide and operations to memory,
12362 ;; and sometimes to QImode registers. If this is considered useful,
12363 ;; it should be done with splitters.
12364
12365 (define_expand "and<mode>3"
12366 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
12367 (and:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
12368 (match_operand:SDWIM 2 "<general_szext_operand>")))]
12369 ""
12370 {
12371 machine_mode mode = <MODE>mode;
12372
12373 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
12374 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
12375 operands[2] = force_reg (<MODE>mode, operands[2]);
12376
12377 if (GET_MODE_SIZE (<MODE>mode) <= UNITS_PER_WORD
12378 && const_int_operand (operands[2], <MODE>mode)
12379 && register_operand (operands[0], <MODE>mode)
12380 && !(TARGET_ZERO_EXTEND_WITH_AND
12381 && optimize_function_for_speed_p (cfun)))
12382 {
12383 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
12384
12385 if (ival == GET_MODE_MASK (SImode))
12386 mode = SImode;
12387 else if (ival == GET_MODE_MASK (HImode))
12388 mode = HImode;
12389 else if (ival == GET_MODE_MASK (QImode))
12390 mode = QImode;
12391 }
12392
12393 if (mode != <MODE>mode)
12394 emit_insn (gen_extend_insn
12395 (operands[0], gen_lowpart (mode, operands[1]),
12396 <MODE>mode, mode, 1));
12397 else
12398 ix86_expand_binary_operator (AND, <MODE>mode, operands, TARGET_APX_NDD);
12399
12400 DONE;
12401 })
12402
12403 (define_insn_and_split "*and<dwi>3_doubleword"
12404 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r")
12405 (and:<DWI>
12406 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r")
12407 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,K,<di>,o")))
12408 (clobber (reg:CC FLAGS_REG))]
12409 "ix86_binary_operator_ok (AND, <DWI>mode, operands, TARGET_APX_NDD)"
12410 "#"
12411 "&& reload_completed"
12412 [(const_int:DWIH 0)]
12413 {
12414 bool emit_insn_deleted_note_p = false;
12415
12416 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
12417
12418 if (operands[2] == const0_rtx)
12419 emit_move_insn (operands[0], const0_rtx);
12420 else if (operands[2] == constm1_rtx)
12421 {
12422 if (!rtx_equal_p (operands[0], operands[1]))
12423 emit_move_insn (operands[0], operands[1]);
12424 else
12425 emit_insn_deleted_note_p = true;
12426 }
12427 else
12428 ix86_expand_binary_operator (AND, <MODE>mode, &operands[0], TARGET_APX_NDD);
12429
12430 if (operands[5] == const0_rtx)
12431 emit_move_insn (operands[3], const0_rtx);
12432 else if (operands[5] == constm1_rtx)
12433 {
12434 if (!rtx_equal_p (operands[3], operands[4]))
12435 emit_move_insn (operands[3], operands[4]);
12436 else if (emit_insn_deleted_note_p)
12437 emit_note (NOTE_INSN_DELETED);
12438 }
12439 else
12440 ix86_expand_binary_operator (AND, <MODE>mode, &operands[3], TARGET_APX_NDD);
12441
12442 DONE;
12443 }
12444 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
12445
12446 (define_insn "*anddi_1<nf_name>"
12447 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r<nf_mem_constraint>,r,r,r,r,r,?k")
12448 (and:DI
12449 (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,0,rm,rjM,r,qm,k")
12450 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,r,e,m,r,e,m,L,k")))]
12451 "TARGET_64BIT
12452 && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)
12453 && <nf_condition>"
12454 "@
12455 <nf_prefix>and{l}\t{%k2, %k0|%k0, %k2}
12456 <nf_prefix>and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
12457 <nf_prefix>and{q}\t{%2, %0|%0, %2}
12458 <nf_prefix>and{q}\t{%2, %0|%0, %2}
12459 <nf_prefix>and{q}\t{%2, %0|%0, %2}
12460 <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2}
12461 <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2}
12462 <nf_prefix>and{q}\t{%2, %1, %0|%0, %1, %2}
12463 #
12464 #"
12465 [(set_attr "isa" "x64,apx_ndd,x64,x64,x64,apx_ndd,apx_ndd,apx_ndd,<nf_nonf_x64_attr>,avx512bw")
12466 (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,imovx,msklog")
12467 (set_attr "length_immediate" "*,*,*,*,*,*,*,*,0,*")
12468 (set (attr "prefix_rex")
12469 (if_then_else
12470 (and (eq_attr "type" "imovx")
12471 (and (match_test "INTVAL (operands[2]) == 0xff")
12472 (match_operand 1 "ext_QIreg_operand")))
12473 (const_string "1")
12474 (const_string "*")))
12475 (set_attr "has_nf" "1")
12476 (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,SI,DI")])
12477
12478 (define_insn_and_split "*anddi_1_btr"
12479 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
12480 (and:DI
12481 (match_operand:DI 1 "nonimmediate_operand" "%0")
12482 (match_operand:DI 2 "const_int_operand" "n")))
12483 (clobber (reg:CC FLAGS_REG))]
12484 "TARGET_64BIT && TARGET_USE_BT
12485 && ix86_binary_operator_ok (AND, DImode, operands)
12486 && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
12487 "#"
12488 "&& reload_completed"
12489 [(parallel [(set (zero_extract:DI (match_dup 0)
12490 (const_int 1)
12491 (match_dup 3))
12492 (const_int 0))
12493 (clobber (reg:CC FLAGS_REG))])]
12494 "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));"
12495 [(set_attr "type" "alu1")
12496 (set_attr "prefix_0f" "1")
12497 (set_attr "znver1_decode" "double")
12498 (set_attr "mode" "DI")])
12499
12500 ;; Turn *anddi_1 into *andsi_1_zext if possible.
12501 (define_split
12502 [(set (match_operand:DI 0 "register_operand")
12503 (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
12504 (match_operand:DI 2 "x86_64_zext_immediate_operand")))
12505 (clobber (reg:CC FLAGS_REG))]
12506 "TARGET_64BIT"
12507 [(parallel [(set (match_dup 0)
12508 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
12509 (clobber (reg:CC FLAGS_REG))])]
12510 {
12511 if (GET_CODE (operands[2]) == SYMBOL_REF
12512 || GET_CODE (operands[2]) == LABEL_REF)
12513 {
12514 operands[2] = shallow_copy_rtx (operands[2]);
12515 PUT_MODE (operands[2], SImode);
12516 }
12517 else if (GET_CODE (operands[2]) == CONST)
12518 {
12519 /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
12520 operands[2] = copy_rtx (operands[2]);
12521 PUT_MODE (operands[2], SImode);
12522 PUT_MODE (XEXP (operands[2], 0), SImode);
12523 PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
12524 }
12525 else
12526 operands[2] = gen_lowpart (SImode, operands[2]);
12527 })
12528
12529 (define_insn "*andqi_1_zext<mode><nf_name>"
12530 [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
12531 (zero_extend:SWI248x
12532 (and:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
12533 (match_operand:QI 2 "x86_64_general_operand" "rn,m"))))]
12534 "TARGET_APX_NDD && <nf_condition>
12535 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12536 "@
12537 <nf_prefix>and{b}\t{%2, %1, %b0|%b0, %1, %2}
12538 <nf_prefix>and{b}\t{%2, %1, %b0|%b0, %1, %2}"
12539 [(set_attr "type" "alu")
12540 (set_attr "has_nf" "1")
12541 (set_attr "mode" "QI")])
12542
12543 (define_insn "*andhi_1_zext<mode><nf_name>"
12544 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
12545 (zero_extend:SWI48x
12546 (and:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
12547 (match_operand:HI 2 "x86_64_general_operand" "rn,m"))))]
12548 "TARGET_APX_NDD && <nf_condition>
12549 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12550 "@
12551 <nf_prefix>and{w}\t{%2, %1, %w0|%w0, %1, %2}
12552 <nf_prefix>and{w}\t{%2, %1, %w0|%w0, %1, %2}"
12553 [(set_attr "type" "alu")
12554 (set_attr "has_nf" "1")
12555 (set_attr "mode" "HI")])
12556
12557 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
12558 (define_insn "*andsi_1_zext"
12559 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
12560 (zero_extend:DI
12561 (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,rjM,r")
12562 (match_operand:SI 2 "x86_64_general_operand" "rBMe,r,e,BM"))))
12563 (clobber (reg:CC FLAGS_REG))]
12564 "TARGET_64BIT
12565 && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
12566 "@
12567 and{l}\t{%2, %k0|%k0, %2}
12568 and{l}\t{%2, %1, %k0|%k0, %1, %2}
12569 and{l}\t{%2, %1, %k0|%k0, %1, %2}
12570 and{l}\t{%2, %1, %k0|%k0, %1, %2}"
12571 [(set_attr "type" "alu")
12572 (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
12573 (set_attr "mode" "SI")])
12574
12575 (define_insn "*and<mode>_1<nf_name>"
12576 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,Ya,?k")
12577 (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,qm,k")
12578 (match_operand:SWI24 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,L,k")))]
12579 "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)
12580 && <nf_condition>"
12581 "@
12582 <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2}
12583 <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2}
12584 <nf_prefix>and{<imodesuffix>}\t{%2, %0|%0, %2}
12585 <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12586 <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12587 <nf_prefix>and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12588 #
12589 #"
12590 [(set (attr "isa")
12591 (cond [(eq_attr "alternative" "3,4,5")
12592 (const_string "apx_ndd")
12593 (eq_attr "alternative" "6")
12594 (const_string "<nf_nonf_attr>")
12595 (eq_attr "alternative" "7")
12596 (if_then_else (eq_attr "mode" "SI")
12597 (const_string "avx512bw")
12598 (const_string "avx512f"))
12599 ]
12600 (const_string "*")))
12601 (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
12602 (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
12603 (set (attr "prefix_rex")
12604 (if_then_else
12605 (and (eq_attr "type" "imovx")
12606 (and (match_test "INTVAL (operands[2]) == 0xff")
12607 (match_operand 1 "ext_QIreg_operand")))
12608 (const_string "1")
12609 (const_string "*")))
12610 (set_attr "has_nf" "1")
12611 (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
12612
12613 (define_insn "*andqi_1<nf_name>"
12614 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
12615 (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
12616 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))]
12617 "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
12618 && <nf_condition>"
12619 "@
12620 <nf_prefix>and{b}\t{%2, %0|%0, %2}
12621 <nf_prefix>and{b}\t{%2, %0|%0, %2}
12622 <nf_prefix>and{l}\t{%k2, %k0|%k0, %k2}
12623 <nf_prefix>and{b}\t{%2, %1, %0|%0, %1, %2}
12624 <nf_prefix>and{b}\t{%2, %1, %0|%0, %1, %2}
12625 #"
12626 [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
12627 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
12628 (set_attr "has_nf" "1")
12629 (set (attr "mode")
12630 (cond [(eq_attr "alternative" "2")
12631 (const_string "SI")
12632 (and (eq_attr "alternative" "5")
12633 (match_test "!TARGET_AVX512DQ"))
12634 (const_string "HI")
12635 ]
12636 (const_string "QI")))
12637 ;; Potential partial reg stall on alternative 2.
12638 (set (attr "preferred_for_speed")
12639 (cond [(eq_attr "alternative" "2")
12640 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12641 (symbol_ref "true")))])
12642
12643 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12644 (define_insn_and_split "*<code><mode>_1_slp"
12645 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
12646 (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
12647 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
12648 (clobber (reg:CC FLAGS_REG))]
12649 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
12650 "@
12651 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
12652 #"
12653 "&& reload_completed
12654 && !(rtx_equal_p (operands[0], operands[1])
12655 || rtx_equal_p (operands[0], operands[2]))"
12656 [(set (strict_low_part (match_dup 0)) (match_dup 1))
12657 (parallel
12658 [(set (strict_low_part (match_dup 0))
12659 (any_logic:SWI12 (match_dup 0) (match_dup 2)))
12660 (clobber (reg:CC FLAGS_REG))])]
12661 ""
12662 [(set_attr "type" "alu")
12663 (set_attr "mode" "<MODE>")])
12664
12665 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12666 (define_insn_and_split "*<code>qi_ext<mode>_1_slp"
12667 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
12668 (any_logic:QI
12669 (subreg:QI
12670 (match_operator:SWI248 3 "extract_operator"
12671 [(match_operand 2 "int248_register_operand" "Q,Q")
12672 (const_int 8)
12673 (const_int 8)]) 0)
12674 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
12675 (clobber (reg:CC FLAGS_REG))]
12676 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
12677 "@
12678 <logic>{b}\t{%h2, %0|%0, %h2}
12679 #"
12680 "&& reload_completed
12681 && !rtx_equal_p (operands[0], operands[1])"
12682 [(set (strict_low_part (match_dup 0)) (match_dup 1))
12683 (parallel
12684 [(set (strict_low_part (match_dup 0))
12685 (any_logic:QI
12686 (subreg:QI
12687 (match_op_dup 3
12688 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
12689 (match_dup 0)))
12690 (clobber (reg:CC FLAGS_REG))])]
12691 ""
12692 [(set_attr "type" "alu")
12693 (set_attr "mode" "QI")])
12694
12695 (define_insn_and_split "*<code>qi_ext<mode>_2_slp"
12696 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
12697 (any_logic:QI
12698 (subreg:QI
12699 (match_operator:SWI248 3 "extract_operator"
12700 [(match_operand 1 "int248_register_operand" "Q")
12701 (const_int 8)
12702 (const_int 8)]) 0)
12703 (subreg:QI
12704 (match_operator:SWI248 4 "extract_operator"
12705 [(match_operand 2 "int248_register_operand" "Q")
12706 (const_int 8)
12707 (const_int 8)]) 0)))
12708 (clobber (reg:CC FLAGS_REG))]
12709 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
12710 "#"
12711 "&& reload_completed"
12712 [(set (strict_low_part (match_dup 0))
12713 (subreg:QI
12714 (match_op_dup 4
12715 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
12716 (parallel
12717 [(set (strict_low_part (match_dup 0))
12718 (any_logic:QI
12719 (subreg:QI
12720 (match_op_dup 3
12721 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12722 (match_dup 0)))
12723 (clobber (reg:CC FLAGS_REG))])]
12724 ""
12725 [(set_attr "type" "alu")
12726 (set_attr "mode" "QI")])
12727
12728 (define_split
12729 [(set (match_operand:SWI248 0 "register_operand")
12730 (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
12731 (match_operand:SWI248 2 "const_int_operand")))
12732 (clobber (reg:CC FLAGS_REG))]
12733 "reload_completed
12734 && (!REG_P (operands[1])
12735 || REGNO (operands[0]) != REGNO (operands[1]))
12736 && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
12737 || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
12738 || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
12739 [(const_int 0)]
12740 {
12741 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
12742 machine_mode mode;
12743
12744 if (ival == GET_MODE_MASK (SImode))
12745 mode = SImode;
12746 else if (ival == GET_MODE_MASK (HImode))
12747 mode = HImode;
12748 else if (ival == GET_MODE_MASK (QImode))
12749 mode = QImode;
12750 else
12751 gcc_unreachable ();
12752
12753 /* Zero extend to SImode to avoid partial register stalls. */
12754 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
12755 operands[0] = gen_lowpart (SImode, operands[0]);
12756
12757 emit_insn (gen_extend_insn
12758 (operands[0], gen_lowpart (mode, operands[1]),
12759 GET_MODE (operands[0]), mode, 1));
12760 DONE;
12761 })
12762
12763 (define_split
12764 [(set (match_operand:SWI48 0 "register_operand")
12765 (and:SWI48 (match_dup 0)
12766 (const_int -65536)))
12767 (clobber (reg:CC FLAGS_REG))]
12768 "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
12769 || optimize_function_for_size_p (cfun)"
12770 [(set (strict_low_part (match_dup 1)) (const_int 0))]
12771 "operands[1] = gen_lowpart (HImode, operands[0]);")
12772
12773 (define_split
12774 [(set (match_operand:SWI248 0 "any_QIreg_operand")
12775 (and:SWI248 (match_dup 0)
12776 (const_int -256)))
12777 (clobber (reg:CC FLAGS_REG))]
12778 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12779 && reload_completed"
12780 [(set (strict_low_part (match_dup 1)) (const_int 0))]
12781 "operands[1] = gen_lowpart (QImode, operands[0]);")
12782
12783 (define_split
12784 [(set (match_operand:SWI248 0 "QIreg_operand")
12785 (and:SWI248 (match_dup 0)
12786 (const_int -65281)))
12787 (clobber (reg:CC FLAGS_REG))]
12788 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12789 && reload_completed"
12790 [(parallel
12791 [(set (zero_extract:HI (match_dup 0)
12792 (const_int 8)
12793 (const_int 8))
12794 (subreg:HI
12795 (xor:QI
12796 (subreg:QI
12797 (zero_extract:HI (match_dup 0)
12798 (const_int 8)
12799 (const_int 8)) 0)
12800 (subreg:QI
12801 (zero_extract:HI (match_dup 0)
12802 (const_int 8)
12803 (const_int 8)) 0)) 0))
12804 (clobber (reg:CC FLAGS_REG))])]
12805 "operands[0] = gen_lowpart (HImode, operands[0]);")
12806
12807 (define_insn "*anddi_2"
12808 [(set (reg FLAGS_REG)
12809 (compare
12810 (and:DI
12811 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
12812 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
12813 (const_int 0)))
12814 (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
12815 (and:DI (match_dup 1) (match_dup 2)))]
12816 "TARGET_64BIT
12817 && ix86_match_ccmode
12818 (insn,
12819 /* If we are going to emit andl instead of andq, and the operands[2]
12820 constant might have the SImode sign bit set, make sure the sign
12821 flag isn't tested, because the instruction will set the sign flag
12822 based on bit 31 rather than bit 63. If it isn't CONST_INT,
12823 conservatively assume it might have bit 31 set. */
12824 (satisfies_constraint_Z (operands[2])
12825 && (!CONST_INT_P (operands[2])
12826 || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
12827 ? CCZmode : CCNOmode)
12828 && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
12829 "@
12830 and{l}\t{%k2, %k0|%k0, %k2}
12831 and{q}\t{%2, %0|%0, %2}
12832 and{q}\t{%2, %0|%0, %2}
12833 and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
12834 and{q}\t{%2, %1, %0|%0, %1, %2}
12835 and{q}\t{%2, %1, %0|%0, %1, %2}"
12836 [(set_attr "type" "alu")
12837 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
12838 (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
12839
12840 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
12841 (define_insn "*andsi_2_zext"
12842 [(set (reg FLAGS_REG)
12843 (compare (and:SI
12844 (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
12845 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
12846 (const_int 0)))
12847 (set (match_operand:DI 0 "register_operand" "=r,r,r")
12848 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
12849 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
12850 && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
12851 "@
12852 and{l}\t{%2, %k0|%k0, %2}
12853 and{l}\t{%2, %1, %k0|%k0, %1, %2}
12854 and{l}\t{%2, %1, %k0|%k0, %1, %2}"
12855 [(set_attr "type" "alu")
12856 (set_attr "isa" "*,apx_ndd,apx_ndd")
12857 (set_attr "mode" "SI")])
12858
12859 (define_insn "*andqi_2_maybe_si"
12860 [(set (reg FLAGS_REG)
12861 (compare (and:QI
12862 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
12863 (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
12864 (const_int 0)))
12865 (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
12866 (and:QI (match_dup 1) (match_dup 2)))]
12867 "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
12868 && ix86_match_ccmode (insn,
12869 CONST_INT_P (operands[2])
12870 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
12871 {
12872 if (get_attr_mode (insn) == MODE_SI)
12873 {
12874 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
12875 operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
12876 return "and{l}\t{%2, %k0|%k0, %2}";
12877 }
12878 if (which_alternative > 2)
12879 return "and{b}\t{%2, %1, %0|%0, %1, %2}";
12880 return "and{b}\t{%2, %0|%0, %2}";
12881 }
12882 [(set_attr "type" "alu")
12883 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
12884 (set (attr "mode")
12885 (cond [(eq_attr "alternative" "3,4")
12886 (const_string "QI")
12887 (eq_attr "alternative" "2")
12888 (const_string "SI")
12889 (and (match_test "optimize_insn_for_size_p ()")
12890 (and (match_operand 0 "ext_QIreg_operand")
12891 (match_operand 2 "const_0_to_127_operand")))
12892 (const_string "SI")
12893 ]
12894 (const_string "QI")))
12895 ;; Potential partial reg stall on alternative 2.
12896 (set (attr "preferred_for_speed")
12897 (cond [(eq_attr "alternative" "2")
12898 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12899 (symbol_ref "true")))])
12900
12901 (define_insn "*and<mode>_2"
12902 [(set (reg FLAGS_REG)
12903 (compare (and:SWI124
12904 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
12905 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
12906 (const_int 0)))
12907 (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
12908 (and:SWI124 (match_dup 1) (match_dup 2)))]
12909 "ix86_match_ccmode (insn, CCNOmode)
12910 && ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
12911 "@
12912 and{<imodesuffix>}\t{%2, %0|%0, %2}
12913 and{<imodesuffix>}\t{%2, %0|%0, %2}
12914 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12915 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
12916 [(set_attr "type" "alu")
12917 (set_attr "isa" "*,*,apx_ndd,apx_ndd")
12918 (set_attr "mode" "<MODE>")])
12919
12920 (define_insn "*<code>qi_ext<mode>_0"
12921 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
12922 (any_logic:QI
12923 (subreg:QI
12924 (match_operator:SWI248 3 "extract_operator"
12925 [(match_operand 2 "int248_register_operand" "Q")
12926 (const_int 8)
12927 (const_int 8)]) 0)
12928 (match_operand:QI 1 "nonimmediate_operand" "0")))
12929 (clobber (reg:CC FLAGS_REG))]
12930 ""
12931 "<logic>{b}\t{%h2, %0|%0, %h2}"
12932 [(set_attr "addr" "gpr8")
12933 (set_attr "type" "alu")
12934 (set_attr "mode" "QI")])
12935
12936 (define_insn_and_split "*<code>qi_ext2<mode>_0"
12937 [(set (match_operand:QI 0 "register_operand" "=&Q")
12938 (any_logic:QI
12939 (subreg:QI
12940 (match_operator:SWI248 3 "extract_operator"
12941 [(match_operand 1 "int248_register_operand" "Q")
12942 (const_int 8)
12943 (const_int 8)]) 0)
12944 (subreg:QI
12945 (match_operator:SWI248 4 "extract_operator"
12946 [(match_operand 2 "int248_register_operand" "Q")
12947 (const_int 8)
12948 (const_int 8)]) 0)))
12949 (clobber (reg:CC FLAGS_REG))]
12950 ""
12951 "#"
12952 "&& reload_completed"
12953 [(set (match_dup 0)
12954 (subreg:QI
12955 (match_op_dup 4
12956 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
12957 (parallel
12958 [(set (match_dup 0)
12959 (any_logic:QI
12960 (subreg:QI
12961 (match_op_dup 3
12962 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12963 (match_dup 0)))
12964 (clobber (reg:CC FLAGS_REG))])]
12965 ""
12966 [(set_attr "type" "alu")
12967 (set_attr "mode" "QI")])
12968
12969 (define_expand "andqi_ext_1"
12970 [(parallel
12971 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
12972 (const_int 8)
12973 (const_int 8))
12974 (subreg:HI
12975 (and:QI
12976 (subreg:QI
12977 (zero_extract:HI (match_operand:HI 1 "register_operand")
12978 (const_int 8)
12979 (const_int 8)) 0)
12980 (match_operand:QI 2 "const_int_operand")) 0))
12981 (clobber (reg:CC FLAGS_REG))])])
12982
12983 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12984 (define_insn_and_split "*<code>qi_ext<mode>_1"
12985 [(set (zero_extract:SWI248
12986 (match_operand 0 "int248_register_operand" "+Q,&Q")
12987 (const_int 8)
12988 (const_int 8))
12989 (subreg:SWI248
12990 (any_logic:QI
12991 (subreg:QI
12992 (match_operator:SWI248 3 "extract_operator"
12993 [(match_operand 1 "int248_register_operand" "0,!Q")
12994 (const_int 8)
12995 (const_int 8)]) 0)
12996 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
12997 (clobber (reg:CC FLAGS_REG))]
12998 ""
12999 "@
13000 <logic>{b}\t{%2, %h0|%h0, %2}
13001 #"
13002 "reload_completed
13003 && !(rtx_equal_p (operands[0], operands[1]))"
13004 [(set (zero_extract:SWI248
13005 (match_dup 0) (const_int 8) (const_int 8))
13006 (zero_extract:SWI248
13007 (match_dup 1) (const_int 8) (const_int 8)))
13008 (parallel
13009 [(set (zero_extract:SWI248
13010 (match_dup 0) (const_int 8) (const_int 8))
13011 (subreg:SWI248
13012 (any_logic:QI
13013 (subreg:QI
13014 (match_op_dup 3
13015 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
13016 (match_dup 2)) 0))
13017 (clobber (reg:CC FLAGS_REG))])]
13018 ""
13019 [(set_attr "addr" "gpr8")
13020 (set_attr "type" "alu")
13021 (set_attr "mode" "QI")])
13022
13023 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13024 (define_insn_and_split "*<code>qi_ext<mode>_1_cc"
13025 [(set (match_operand 4 "flags_reg_operand")
13026 (match_operator 5 "compare_operator"
13027 [(any_logic:QI
13028 (subreg:QI
13029 (match_operator:SWI248 3 "extract_operator"
13030 [(match_operand 1 "int248_register_operand" "0,!Q")
13031 (const_int 8)
13032 (const_int 8)]) 0)
13033 (match_operand:QI 2 "general_operand" "QnBn,QnBn"))
13034 (const_int 0)]))
13035 (set (zero_extract:SWI248
13036 (match_operand 0 "int248_register_operand" "+Q,&Q")
13037 (const_int 8)
13038 (const_int 8))
13039 (subreg:SWI248
13040 (any_logic:QI
13041 (subreg:QI
13042 (match_op_dup 3
13043 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
13044 (match_dup 2)) 0))]
13045 "ix86_match_ccmode (insn, CCNOmode)"
13046 "@
13047 <logic>{b}\t{%2, %h0|%h0, %2}
13048 #"
13049 "&& reload_completed
13050 && !(rtx_equal_p (operands[0], operands[1]))"
13051 [(set (zero_extract:SWI248
13052 (match_dup 0) (const_int 8) (const_int 8))
13053 (zero_extract:SWI248
13054 (match_dup 1) (const_int 8) (const_int 8)))
13055 (parallel
13056 [(set (match_dup 4)
13057 (match_op_dup 5
13058 [(any_logic:QI
13059 (subreg:QI
13060 (match_op_dup 3
13061 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
13062 (match_dup 2))
13063 (const_int 0)]))
13064 (set (zero_extract:SWI248
13065 (match_dup 0) (const_int 8) (const_int 8))
13066 (subreg:SWI248
13067 (any_logic:QI
13068 (subreg:QI
13069 (match_op_dup 3
13070 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
13071 (match_dup 2)) 0))])]
13072 ""
13073 [(set_attr "addr" "gpr8")
13074 (set_attr "type" "alu")
13075 (set_attr "mode" "QI")])
13076
13077 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13078 (define_insn_and_split "*<code>qi_ext<mode>_2"
13079 [(set (zero_extract:SWI248
13080 (match_operand 0 "int248_register_operand" "+Q,&Q")
13081 (const_int 8)
13082 (const_int 8))
13083 (subreg:SWI248
13084 (any_logic:QI
13085 (subreg:QI
13086 (match_operator:SWI248 3 "extract_operator"
13087 [(match_operand 1 "int248_register_operand" "%0,!Q")
13088 (const_int 8)
13089 (const_int 8)]) 0)
13090 (subreg:QI
13091 (match_operator:SWI248 4 "extract_operator"
13092 [(match_operand 2 "int248_register_operand" "Q,Q")
13093 (const_int 8)
13094 (const_int 8)]) 0)) 0))
13095 (clobber (reg:CC FLAGS_REG))]
13096 ""
13097 "@
13098 <logic>{b}\t{%h2, %h0|%h0, %h2}
13099 #"
13100 "reload_completed
13101 && !(rtx_equal_p (operands[0], operands[1])
13102 || rtx_equal_p (operands[0], operands[2]))"
13103 [(set (zero_extract:SWI248
13104 (match_dup 0) (const_int 8) (const_int 8))
13105 (zero_extract:SWI248
13106 (match_dup 1) (const_int 8) (const_int 8)))
13107 (parallel
13108 [(set (zero_extract:SWI248
13109 (match_dup 0) (const_int 8) (const_int 8))
13110 (subreg:SWI248
13111 (any_logic:QI
13112 (subreg:QI
13113 (match_op_dup 3
13114 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
13115 (subreg:QI
13116 (match_op_dup 4
13117 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
13118 (clobber (reg:CC FLAGS_REG))])]
13119 ""
13120 [(set_attr "type" "alu")
13121 (set_attr "mode" "QI")])
13122
13123 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13124 (define_insn_and_split "*<code>qi_ext<mode>_3"
13125 [(set (zero_extract:SWI248
13126 (match_operand 0 "int248_register_operand" "+Q,&Q")
13127 (const_int 8)
13128 (const_int 8))
13129 (match_operator:SWI248 3 "extract_operator"
13130 [(any_logic
13131 (match_operand 1 "int248_register_operand" "%0,!Q")
13132 (match_operand 2 "int248_register_operand" "Q,Q"))
13133 (const_int 8)
13134 (const_int 8)]))
13135 (clobber (reg:CC FLAGS_REG))]
13136 "GET_MODE (operands[1]) == GET_MODE (operands[2])"
13137 "@
13138 <logic>{b}\t{%h2, %h0|%h0, %h2}
13139 #"
13140 "&& reload_completed
13141 && !(rtx_equal_p (operands[0], operands[1])
13142 || rtx_equal_p (operands[0], operands[2]))"
13143 [(set (zero_extract:SWI248
13144 (match_dup 0) (const_int 8) (const_int 8))
13145 (zero_extract:SWI248
13146 (match_dup 1) (const_int 8) (const_int 8)))
13147 (parallel
13148 [(set (zero_extract:SWI248
13149 (match_dup 0) (const_int 8) (const_int 8))
13150 (match_op_dup 3
13151 [(any_logic (match_dup 4) (match_dup 2))
13152 (const_int 8) (const_int 8)]))
13153 (clobber (reg:CC FLAGS_REG))])]
13154 "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
13155 [(set_attr "type" "alu")
13156 (set_attr "mode" "QI")])
13157
13158 ;; Convert wide AND instructions with immediate operand to shorter QImode
13159 ;; equivalents when possible.
13160 ;; Don't do the splitting with memory operands, since it introduces risk
13161 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
13162 ;; for size, but that can (should?) be handled by generic code instead.
13163 ;; Don't do the splitting for APX NDD as NDD does not support *h registers.
13164 (define_split
13165 [(set (match_operand:SWI248 0 "QIreg_operand")
13166 (and:SWI248 (match_operand:SWI248 1 "register_operand")
13167 (match_operand:SWI248 2 "const_int_operand")))
13168 (clobber (reg:CC FLAGS_REG))]
13169 "reload_completed
13170 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13171 && !(~INTVAL (operands[2]) & ~(255 << 8))
13172 && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
13173 [(parallel
13174 [(set (zero_extract:HI (match_dup 0)
13175 (const_int 8)
13176 (const_int 8))
13177 (subreg:HI
13178 (and:QI
13179 (subreg:QI
13180 (zero_extract:HI (match_dup 1)
13181 (const_int 8)
13182 (const_int 8)) 0)
13183 (match_dup 2)) 0))
13184 (clobber (reg:CC FLAGS_REG))])]
13185 {
13186 operands[0] = gen_lowpart (HImode, operands[0]);
13187 operands[1] = gen_lowpart (HImode, operands[1]);
13188 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
13189 })
13190
13191 ;; Since AND can be encoded with sign extended immediate, this is only
13192 ;; profitable when 7th bit is not set.
13193 (define_split
13194 [(set (match_operand:SWI248 0 "any_QIreg_operand")
13195 (and:SWI248 (match_operand:SWI248 1 "general_operand")
13196 (match_operand:SWI248 2 "const_int_operand")))
13197 (clobber (reg:CC FLAGS_REG))]
13198 "reload_completed
13199 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13200 && !(~INTVAL (operands[2]) & ~255)
13201 && !(INTVAL (operands[2]) & 128)
13202 && !(TARGET_APX_NDD
13203 && !rtx_equal_p (operands[0], operands[1]))"
13204 [(parallel [(set (strict_low_part (match_dup 0))
13205 (and:QI (match_dup 1)
13206 (match_dup 2)))
13207 (clobber (reg:CC FLAGS_REG))])]
13208 {
13209 operands[0] = gen_lowpart (QImode, operands[0]);
13210 operands[1] = gen_lowpart (QImode, operands[1]);
13211 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
13212 })
13213
13214 (define_insn_and_split "*andn<dwi>3_doubleword_bmi"
13215 [(set (match_operand:<DWI> 0 "register_operand" "=&r,&r,r,r")
13216 (and:<DWI>
13217 (not:<DWI> (match_operand:<DWI> 1 "register_operand" "r,r,0,r"))
13218 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,ro,0")))
13219 (clobber (reg:CC FLAGS_REG))]
13220 "TARGET_BMI"
13221 "#"
13222 "&& reload_completed"
13223 [(parallel [(set (match_dup 0)
13224 (and:DWIH (not:DWIH (match_dup 1)) (match_dup 2)))
13225 (clobber (reg:CC FLAGS_REG))])
13226 (parallel [(set (match_dup 3)
13227 (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
13228 (clobber (reg:CC FLAGS_REG))])]
13229 "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);"
13230 [(set_attr "isa" "x64,*,*,*")])
13231
13232 (define_insn_and_split "*andn<mode>3_doubleword"
13233 [(set (match_operand:DWI 0 "register_operand")
13234 (and:DWI
13235 (not:DWI (match_operand:DWI 1 "register_operand"))
13236 (match_operand:DWI 2 "nonimmediate_operand")))
13237 (clobber (reg:CC FLAGS_REG))]
13238 "!TARGET_BMI
13239 && ix86_pre_reload_split ()"
13240 "#"
13241 "&& 1"
13242 [(set (match_dup 3) (not:DWI (match_dup 1)))
13243 (parallel [(set (match_dup 0)
13244 (and:DWI (match_dup 3) (match_dup 2)))
13245 (clobber (reg:CC FLAGS_REG))])]
13246 "operands[3] = gen_reg_rtx (<MODE>mode);")
13247
13248 (define_insn "*andn<mode>_1"
13249 [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
13250 (and:SWI48
13251 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
13252 (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
13253 (clobber (reg:CC FLAGS_REG))]
13254 "TARGET_BMI || TARGET_AVX512BW"
13255 "@
13256 andn\t{%2, %1, %0|%0, %1, %2}
13257 andn\t{%2, %1, %0|%0, %1, %2}
13258 #"
13259 [(set_attr "isa" "bmi,bmi,avx512bw")
13260 (set_attr "type" "bitmanip,bitmanip,msklog")
13261 (set_attr "btver2_decode" "direct, double,*")
13262 (set_attr "mode" "<MODE>")])
13263
13264 (define_insn "*andn<mode>_1"
13265 [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
13266 (and:SWI12
13267 (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
13268 (match_operand:SWI12 2 "register_operand" "r,k")))
13269 (clobber (reg:CC FLAGS_REG))]
13270 "TARGET_BMI || TARGET_AVX512BW"
13271 "@
13272 andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
13273 #"
13274 [(set_attr "isa" "bmi,avx512f")
13275 (set_attr "type" "bitmanip,msklog")
13276 (set_attr "btver2_decode" "direct,*")
13277 (set (attr "mode")
13278 (cond [(eq_attr "alternative" "0")
13279 (const_string "SI")
13280 (and (eq_attr "alternative" "1")
13281 (match_test "!TARGET_AVX512DQ"))
13282 (const_string "HI")
13283 ]
13284 (const_string "<MODE>")))])
13285
13286 (define_insn "*andn_<mode>_ccno"
13287 [(set (reg FLAGS_REG)
13288 (compare
13289 (and:SWI48
13290 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
13291 (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))
13292 (const_int 0)))
13293 (clobber (match_scratch:SWI48 0 "=r,r"))]
13294 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
13295 "andn\t{%2, %1, %0|%0, %1, %2}"
13296 [(set_attr "type" "bitmanip")
13297 (set_attr "btver2_decode" "direct, double")
13298 (set_attr "mode" "<MODE>")])
13299
13300 ;; Split *andnsi_1 after reload with -Oz when not;and is shorter.
13301 (define_split
13302 [(set (match_operand:SI 0 "register_operand")
13303 (and:SI (not:SI (match_operand:SI 1 "register_operand"))
13304 (match_operand:SI 2 "nonimmediate_operand")))
13305 (clobber (reg:CC FLAGS_REG))]
13306 "reload_completed
13307 && optimize_insn_for_size_p () && optimize_size > 1
13308 && REGNO (operands[0]) == REGNO (operands[1])
13309 && LEGACY_INT_REG_P (operands[0])
13310 && !REX_INT_REG_P (operands[2])
13311 && !reg_overlap_mentioned_p (operands[0], operands[2])"
13312 [(set (match_dup 0) (not:SI (match_dup 1)))
13313 (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
13314 (clobber (reg:CC FLAGS_REG))])])
13315
13316 ;; Split *andn_si_ccno with -Oz when not;test is shorter.
13317 (define_split
13318 [(set (match_operand 0 "flags_reg_operand")
13319 (match_operator 1 "compare_operator"
13320 [(and:SI (not:SI (match_operand:SI 2 "general_reg_operand"))
13321 (match_operand:SI 3 "nonimmediate_operand"))
13322 (const_int 0)]))
13323 (clobber (match_dup 2))]
13324 "reload_completed
13325 && optimize_insn_for_size_p () && optimize_size > 1
13326 && LEGACY_INT_REG_P (operands[2])
13327 && !REX_INT_REG_P (operands[3])
13328 && !reg_overlap_mentioned_p (operands[2], operands[3])"
13329 [(set (match_dup 2) (not:SI (match_dup 2)))
13330 (set (match_dup 0) (match_op_dup 1
13331 [(and:SI (match_dup 3) (match_dup 2))
13332 (const_int 0)]))])
13333
13334 ;; Variant 1 of 4: Split ((A | B) ^ A) ^ C as (B & ~A) ^ C.
13335 (define_split
13336 [(set (match_operand:SWI48 0 "register_operand")
13337 (xor:SWI48
13338 (xor:SWI48
13339 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
13340 (match_operand:SWI48 2 "nonimmediate_operand"))
13341 (match_dup 1))
13342 (match_operand:SWI48 3 "nonimmediate_operand")))
13343 (clobber (reg:CC FLAGS_REG))]
13344 "TARGET_BMI"
13345 [(parallel
13346 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
13347 (clobber (reg:CC FLAGS_REG))])
13348 (parallel
13349 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
13350 (clobber (reg:CC FLAGS_REG))])]
13351 "operands[4] = gen_reg_rtx (<MODE>mode);")
13352
13353 ;; Variant 2 of 4: Split ((A | B) ^ B) ^ C as (A & ~B) ^ C.
13354 (define_split
13355 [(set (match_operand:SWI48 0 "register_operand")
13356 (xor:SWI48
13357 (xor:SWI48
13358 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
13359 (match_operand:SWI48 2 "register_operand"))
13360 (match_dup 2))
13361 (match_operand:SWI48 3 "nonimmediate_operand")))
13362 (clobber (reg:CC FLAGS_REG))]
13363 "TARGET_BMI"
13364 [(parallel
13365 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
13366 (clobber (reg:CC FLAGS_REG))])
13367 (parallel
13368 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
13369 (clobber (reg:CC FLAGS_REG))])]
13370 "operands[4] = gen_reg_rtx (<MODE>mode);")
13371
13372 ;; Variant 3 of 4: Split ((A | B) ^ C) ^ A as (B & ~A) ^ C.
13373 (define_split
13374 [(set (match_operand:SWI48 0 "register_operand")
13375 (xor:SWI48
13376 (xor:SWI48
13377 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
13378 (match_operand:SWI48 2 "nonimmediate_operand"))
13379 (match_operand:SWI48 3 "nonimmediate_operand"))
13380 (match_dup 1)))
13381 (clobber (reg:CC FLAGS_REG))]
13382 "TARGET_BMI"
13383 [(parallel
13384 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
13385 (clobber (reg:CC FLAGS_REG))])
13386 (parallel
13387 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
13388 (clobber (reg:CC FLAGS_REG))])]
13389 "operands[4] = gen_reg_rtx (<MODE>mode);")
13390
13391 ;; Variant 4 of 4: Split ((A | B) ^ C) ^ B as (A & ~B) ^ C.
13392 (define_split
13393 [(set (match_operand:SWI48 0 "register_operand")
13394 (xor:SWI48
13395 (xor:SWI48
13396 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
13397 (match_operand:SWI48 2 "register_operand"))
13398 (match_operand:SWI48 3 "nonimmediate_operand"))
13399 (match_dup 2)))
13400 (clobber (reg:CC FLAGS_REG))]
13401 "TARGET_BMI"
13402 [(parallel
13403 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
13404 (clobber (reg:CC FLAGS_REG))])
13405 (parallel
13406 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
13407 (clobber (reg:CC FLAGS_REG))])]
13408 "operands[4] = gen_reg_rtx (<MODE>mode);")
13409 \f
13410 ;; Logical inclusive and exclusive OR instructions
13411
13412 ;; %%% This used to optimize known byte-wide and operations to memory.
13413 ;; If this is considered useful, it should be done with splitters.
13414
13415 (define_expand "<code><mode>3"
13416 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
13417 (any_or:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
13418 (match_operand:SDWIM 2 "<general_operand>")))]
13419 ""
13420 {
13421 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
13422 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
13423 operands[2] = force_reg (<MODE>mode, operands[2]);
13424
13425 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
13426 DONE;
13427 })
13428
13429 (define_insn_and_split "*<code><dwi>3_doubleword"
13430 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r")
13431 (any_or:<DWI>
13432 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r")
13433 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,K,<di>,o")))
13434 (clobber (reg:CC FLAGS_REG))]
13435 "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands, TARGET_APX_NDD)"
13436 "#"
13437 "&& reload_completed"
13438 [(const_int:DWIH 0)]
13439 {
13440 /* This insn may disappear completely when operands[2] == const0_rtx
13441 and operands[0] == operands[1], which requires a NOTE_INSN_DELETED. */
13442 bool emit_insn_deleted_note_p = false;
13443
13444 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
13445
13446 if (operands[2] == const0_rtx)
13447 {
13448 if (!rtx_equal_p (operands[0], operands[1]))
13449 emit_move_insn (operands[0], operands[1]);
13450 else
13451 emit_insn_deleted_note_p = true;
13452 }
13453 else if (operands[2] == constm1_rtx)
13454 {
13455 if (<CODE> == IOR)
13456 emit_move_insn (operands[0], constm1_rtx);
13457 else
13458 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0],
13459 TARGET_APX_NDD);
13460 }
13461 else
13462 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0],
13463 TARGET_APX_NDD);
13464
13465 if (operands[5] == const0_rtx)
13466 {
13467 if (!rtx_equal_p (operands[3], operands[4]))
13468 emit_move_insn (operands[3], operands[4]);
13469 else if (emit_insn_deleted_note_p)
13470 emit_note (NOTE_INSN_DELETED);
13471 }
13472 else if (operands[5] == constm1_rtx)
13473 {
13474 if (<CODE> == IOR)
13475 emit_move_insn (operands[3], constm1_rtx);
13476 else
13477 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3],
13478 TARGET_APX_NDD);
13479 }
13480 else
13481 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3],
13482 TARGET_APX_NDD);
13483
13484 DONE;
13485 }
13486 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
13487
13488 (define_insn "*<code><mode>_1<nf_name>"
13489 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,?k")
13490 (any_or:SWI248
13491 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,0,rm,rjM,r,k")
13492 (match_operand:SWI248 2 "<general_operand>" "r,<i>,<m>,r,<i>,<m>,k")))]
13493 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
13494 && <nf_condition>"
13495 "@
13496 <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13497 <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13498 <nf_prefix><logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13499 <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
13500 <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
13501 <nf_prefix><logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
13502 #"
13503 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
13504 (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog")
13505 (set_attr "has_nf" "1")
13506 (set_attr "mode" "<MODE>")])
13507
13508 (define_insn_and_split "*notxor<mode>_1"
13509 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
13510 (not:SWI248
13511 (xor:SWI248
13512 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
13513 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k"))))
13514 (clobber (reg:CC FLAGS_REG))]
13515 "ix86_binary_operator_ok (XOR, <MODE>mode, operands, TARGET_APX_NDD)"
13516 "#"
13517 "&& reload_completed"
13518 [(parallel
13519 [(set (match_dup 0)
13520 (xor:SWI248 (match_dup 1) (match_dup 2)))
13521 (clobber (reg:CC FLAGS_REG))])
13522 (set (match_dup 0)
13523 (not:SWI248 (match_dup 0)))]
13524 {
13525 if (MASK_REG_P (operands[0]))
13526 {
13527 emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
13528 DONE;
13529 }
13530 }
13531 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
13532 (set_attr "type" "alu, alu, alu, alu, msklog")
13533 (set_attr "mode" "<MODE>")])
13534
13535 (define_insn_and_split "*iordi_1_bts"
13536 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
13537 (ior:DI
13538 (match_operand:DI 1 "nonimmediate_operand" "%0")
13539 (match_operand:DI 2 "const_int_operand" "n")))
13540 (clobber (reg:CC FLAGS_REG))]
13541 "TARGET_64BIT && TARGET_USE_BT
13542 && ix86_binary_operator_ok (IOR, DImode, operands)
13543 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
13544 "#"
13545 "&& reload_completed"
13546 [(parallel [(set (zero_extract:DI (match_dup 0)
13547 (const_int 1)
13548 (match_dup 3))
13549 (const_int 1))
13550 (clobber (reg:CC FLAGS_REG))])]
13551 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
13552 [(set_attr "type" "alu1")
13553 (set_attr "prefix_0f" "1")
13554 (set_attr "znver1_decode" "double")
13555 (set_attr "mode" "DI")])
13556
13557 (define_insn_and_split "*xordi_1_btc"
13558 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
13559 (xor:DI
13560 (match_operand:DI 1 "nonimmediate_operand" "%0")
13561 (match_operand:DI 2 "const_int_operand" "n")))
13562 (clobber (reg:CC FLAGS_REG))]
13563 "TARGET_64BIT && TARGET_USE_BT
13564 && ix86_binary_operator_ok (XOR, DImode, operands)
13565 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
13566 "#"
13567 "&& reload_completed"
13568 [(parallel [(set (zero_extract:DI (match_dup 0)
13569 (const_int 1)
13570 (match_dup 3))
13571 (not:DI (zero_extract:DI (match_dup 0)
13572 (const_int 1)
13573 (match_dup 3))))
13574 (clobber (reg:CC FLAGS_REG))])]
13575 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
13576 [(set_attr "type" "alu1")
13577 (set_attr "prefix_0f" "1")
13578 (set_attr "znver1_decode" "double")
13579 (set_attr "mode" "DI")])
13580
13581 ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask)
13582 (define_insn_and_split "*xor2andn"
13583 [(set (match_operand:SWI248 0 "register_operand")
13584 (xor:SWI248
13585 (and:SWI248
13586 (xor:SWI248
13587 (match_operand:SWI248 1 "nonimmediate_operand")
13588 (match_operand:SWI248 2 "nonimmediate_operand"))
13589 (match_operand:SWI248 3 "nonimmediate_operand"))
13590 (match_dup 1)))
13591 (clobber (reg:CC FLAGS_REG))]
13592 "TARGET_BMI && ix86_pre_reload_split ()"
13593 "#"
13594 "&& 1"
13595 [(parallel [(set (match_dup 4)
13596 (and:SWI248
13597 (not:SWI248
13598 (match_dup 3))
13599 (match_dup 1)))
13600 (clobber (reg:CC FLAGS_REG))])
13601 (parallel [(set (match_dup 5)
13602 (and:SWI248
13603 (match_dup 3)
13604 (match_dup 2)))
13605 (clobber (reg:CC FLAGS_REG))])
13606 (parallel [(set (match_dup 0)
13607 (ior:SWI248
13608 (match_dup 4)
13609 (match_dup 5)))
13610 (clobber (reg:CC FLAGS_REG))])]
13611 {
13612 operands[1] = force_reg (<MODE>mode, operands[1]);
13613 operands[3] = force_reg (<MODE>mode, operands[3]);
13614 operands[4] = gen_reg_rtx (<MODE>mode);
13615 operands[5] = gen_reg_rtx (<MODE>mode);
13616 })
13617
13618 (define_insn "*<code>qi_1_zext<mode><nf_name>"
13619 [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
13620 (zero_extend:SWI248x
13621 (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
13622 (match_operand:QI 2 "x86_64_general_operand" "rn,m"))))]
13623 "TARGET_APX_NDD && <nf_condition>
13624 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13625 "@
13626 <nf_prefix><logic>{b}\t{%2, %1, %b0|%b0, %1, %2}
13627 <nf_prefix><logic>{b}\t{%2, %1, %b0|%b0, %1, %2}"
13628 [(set_attr "type" "alu")
13629 (set_attr "has_nf" "1")
13630 (set_attr "mode" "QI")])
13631
13632 (define_insn "*<code>hi_1_zext<mode><nf_name>"
13633 [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
13634 (zero_extend:SWI48x
13635 (any_or:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
13636 (match_operand:HI 2 "x86_64_general_operand" "rn,m"))))]
13637 "TARGET_APX_NDD && <nf_condition>
13638 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13639 "@
13640 <nf_prefix><logic>{w}\t{%2, %1, %w0|%w0, %1, %2}
13641 <nf_prefix><logic>{w}\t{%2, %1, %w0|%w0, %1, %2}"
13642 [(set_attr "type" "alu")
13643 (set_attr "has_nf" "1")
13644 (set_attr "mode" "HI")])
13645
13646 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
13647 (define_insn "*<code>si_1_zext"
13648 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
13649 (zero_extend:DI
13650 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,rjM,r")
13651 (match_operand:SI 2 "x86_64_general_operand" "rBMe,r,e,BM"))))
13652 (clobber (reg:CC FLAGS_REG))]
13653 "TARGET_64BIT
13654 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
13655 "@
13656 <logic>{l}\t{%2, %k0|%k0, %2}
13657 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
13658 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
13659 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13660 [(set_attr "type" "alu")
13661 (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
13662 (set_attr "mode" "SI")])
13663
13664 (define_insn "*<code>si_1_zext_imm"
13665 [(set (match_operand:DI 0 "register_operand" "=r,r")
13666 (any_or:DI
13667 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0,rm"))
13668 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z")))
13669 (clobber (reg:CC FLAGS_REG))]
13670 "TARGET_64BIT
13671 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
13672 "@
13673 <logic>{l}\t{%2, %k0|%k0, %2}
13674 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13675 [(set_attr "type" "alu")
13676 (set_attr "isa" "*,apx_ndd")
13677 (set_attr "mode" "SI")])
13678
13679 (define_insn "*<code>qi_1<nf_name>"
13680 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
13681 (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
13682 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))]
13683 "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)
13684 && <nf_condition>"
13685 "@
13686 <nf_prefix><logic>{b}\t{%2, %0|%0, %2}
13687 <nf_prefix><logic>{b}\t{%2, %0|%0, %2}
13688 <nf_prefix><logic>{l}\t{%k2, %k0|%k0, %k2}
13689 <nf_prefix><logic>{b}\t{%2, %1, %0|%0, %1, %2}
13690 <nf_prefix><logic>{b}\t{%2, %1, %0|%0, %1, %2}
13691 #"
13692 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
13693 (set_attr "type" "alu,alu,alu,alu,alu,msklog")
13694 (set_attr "has_nf" "1")
13695 (set (attr "mode")
13696 (cond [(eq_attr "alternative" "2")
13697 (const_string "SI")
13698 (and (eq_attr "alternative" "5")
13699 (match_test "!TARGET_AVX512DQ"))
13700 (const_string "HI")
13701 ]
13702 (const_string "QI")))
13703 ;; Potential partial reg stall on alternative 2.
13704 (set (attr "preferred_for_speed")
13705 (cond [(eq_attr "alternative" "2")
13706 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
13707 (symbol_ref "true")))])
13708
13709 (define_insn_and_split "*notxorqi_1"
13710 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
13711 (not:QI
13712 (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
13713 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))))
13714 (clobber (reg:CC FLAGS_REG))]
13715 "ix86_binary_operator_ok (XOR, QImode, operands, TARGET_APX_NDD)"
13716 "#"
13717 "&& reload_completed"
13718 [(parallel
13719 [(set (match_dup 0)
13720 (xor:QI (match_dup 1) (match_dup 2)))
13721 (clobber (reg:CC FLAGS_REG))])
13722 (set (match_dup 0)
13723 (not:QI (match_dup 0)))]
13724 {
13725 if (mask_reg_operand (operands[0], QImode))
13726 {
13727 emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
13728 DONE;
13729 }
13730 }
13731 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
13732 (set_attr "type" "alu,alu,alu,alu,alu,msklog")
13733 (set (attr "mode")
13734 (cond [(eq_attr "alternative" "2")
13735 (const_string "SI")
13736 (and (eq_attr "alternative" "5")
13737 (match_test "!TARGET_AVX512DQ"))
13738 (const_string "HI")
13739 ]
13740 (const_string "QI")))
13741 ;; Potential partial reg stall on alternative 2.
13742 (set (attr "preferred_for_speed")
13743 (cond [(eq_attr "alternative" "2")
13744 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
13745 (symbol_ref "true")))])
13746
13747 ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
13748 ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
13749 ;; This eliminates sign extension after logic operation.
13750
13751 (define_split
13752 [(set (match_operand:SWI248 0 "register_operand")
13753 (sign_extend:SWI248
13754 (any_logic:QI (match_operand:QI 1 "memory_operand")
13755 (match_operand:QI 2 "const_int_operand"))))]
13756 ""
13757 [(set (match_dup 3) (sign_extend:SWI248 (match_dup 1)))
13758 (set (match_dup 0) (any_logic:SWI248 (match_dup 3) (match_dup 2)))]
13759 "operands[3] = gen_reg_rtx (<MODE>mode);")
13760
13761 (define_split
13762 [(set (match_operand:SWI48 0 "register_operand")
13763 (sign_extend:SWI48
13764 (any_logic:HI (match_operand:HI 1 "memory_operand")
13765 (match_operand:HI 2 "const_int_operand"))))]
13766 ""
13767 [(set (match_dup 3) (sign_extend:SWI48 (match_dup 1)))
13768 (set (match_dup 0) (any_logic:SWI48 (match_dup 3) (match_dup 2)))]
13769 "operands[3] = gen_reg_rtx (<MODE>mode);")
13770
13771 (define_split
13772 [(set (match_operand:DI 0 "register_operand")
13773 (sign_extend:DI
13774 (any_logic:SI (match_operand:SI 1 "memory_operand")
13775 (match_operand:SI 2 "const_int_operand"))))]
13776 "TARGET_64BIT"
13777 [(set (match_dup 3) (sign_extend:DI (match_dup 1)))
13778 (set (match_dup 0) (any_logic:DI (match_dup 3) (match_dup 2)))]
13779 "operands[3] = gen_reg_rtx (DImode);")
13780
13781 (define_insn "*<code><mode>_2"
13782 [(set (reg FLAGS_REG)
13783 (compare (any_or:SWI
13784 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
13785 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
13786 (const_int 0)))
13787 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
13788 (any_or:SWI (match_dup 1) (match_dup 2)))]
13789 "ix86_match_ccmode (insn, CCNOmode)
13790 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
13791 "@
13792 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13793 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13794 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
13795 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
13796 [(set_attr "type" "alu")
13797 (set_attr "isa" "*,*,apx_ndd,apx_ndd")
13798 (set_attr "mode" "<MODE>")])
13799
13800 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
13801 ;; ??? Special case for immediate operand is missing - it is tricky.
13802 (define_insn "*<code>si_2_zext"
13803 [(set (reg FLAGS_REG)
13804 (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
13805 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
13806 (const_int 0)))
13807 (set (match_operand:DI 0 "register_operand" "=r,r,r")
13808 (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
13809 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
13810 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
13811 "@
13812 <logic>{l}\t{%2, %k0|%k0, %2}
13813 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
13814 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13815 [(set_attr "type" "alu")
13816 (set_attr "isa" "*,apx_ndd,apx_ndd")
13817 (set_attr "mode" "SI")])
13818
13819 (define_insn "*<code>si_2_zext_imm"
13820 [(set (reg FLAGS_REG)
13821 (compare (any_or:SI
13822 (match_operand:SI 1 "nonimmediate_operand" "%0,rm")
13823 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z"))
13824 (const_int 0)))
13825 (set (match_operand:DI 0 "register_operand" "=r,r")
13826 (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
13827 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
13828 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
13829 "@
13830 <logic>{l}\t{%2, %k0|%k0, %2}
13831 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13832 [(set_attr "type" "alu")
13833 (set_attr "isa" "*,apx_ndd")
13834 (set_attr "mode" "SI")])
13835
13836 (define_insn "*<code><mode>_3"
13837 [(set (reg FLAGS_REG)
13838 (compare (any_or:SWI
13839 (match_operand:SWI 1 "nonimmediate_operand" "%0")
13840 (match_operand:SWI 2 "<general_operand>" "<g>"))
13841 (const_int 0)))
13842 (clobber (match_scratch:SWI 0 "=<r>"))]
13843 "ix86_match_ccmode (insn, CCNOmode)
13844 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13845 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
13846 [(set_attr "type" "alu")
13847 (set_attr "mode" "<MODE>")])
13848
13849 ;; Convert wide OR instructions with immediate operand to shorter QImode
13850 ;; equivalents when possible.
13851 ;; Don't do the splitting with memory operands, since it introduces risk
13852 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
13853 ;; for size, but that can (should?) be handled by generic code instead.
13854 ;; Don't do the splitting for APX NDD as NDD does not support *h registers.
13855 (define_split
13856 [(set (match_operand:SWI248 0 "QIreg_operand")
13857 (any_or:SWI248 (match_operand:SWI248 1 "register_operand")
13858 (match_operand:SWI248 2 "const_int_operand")))
13859 (clobber (reg:CC FLAGS_REG))]
13860 "reload_completed
13861 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13862 && !(INTVAL (operands[2]) & ~(255 << 8))
13863 && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
13864 [(parallel
13865 [(set (zero_extract:HI (match_dup 0)
13866 (const_int 8)
13867 (const_int 8))
13868 (subreg:HI
13869 (any_or:QI
13870 (subreg:QI
13871 (zero_extract:HI (match_dup 1)
13872 (const_int 8)
13873 (const_int 8)) 0)
13874 (match_dup 2)) 0))
13875 (clobber (reg:CC FLAGS_REG))])]
13876 {
13877 /* Handle the case where INTVAL (operands[2]) == 0. */
13878 if (operands[2] == const0_rtx)
13879 {
13880 if (!rtx_equal_p (operands[0], operands[1]))
13881 emit_move_insn (operands[0], operands[1]);
13882 else
13883 emit_note (NOTE_INSN_DELETED);
13884 DONE;
13885 }
13886 operands[0] = gen_lowpart (HImode, operands[0]);
13887 operands[1] = gen_lowpart (HImode, operands[1]);
13888 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
13889 })
13890
13891 ;; Since OR can be encoded with sign extended immediate, this is only
13892 ;; profitable when 7th bit is set.
13893 (define_split
13894 [(set (match_operand:SWI248 0 "any_QIreg_operand")
13895 (any_or:SWI248 (match_operand:SWI248 1 "general_operand")
13896 (match_operand:SWI248 2 "const_int_operand")))
13897 (clobber (reg:CC FLAGS_REG))]
13898 "reload_completed
13899 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13900 && !(INTVAL (operands[2]) & ~255)
13901 && (INTVAL (operands[2]) & 128)
13902 && !(TARGET_APX_NDD
13903 && !rtx_equal_p (operands[0], operands[1]))"
13904 [(parallel [(set (strict_low_part (match_dup 0))
13905 (any_or:QI (match_dup 1)
13906 (match_dup 2)))
13907 (clobber (reg:CC FLAGS_REG))])]
13908 {
13909 operands[0] = gen_lowpart (QImode, operands[0]);
13910 operands[1] = gen_lowpart (QImode, operands[1]);
13911 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
13912 })
13913
13914 (define_expand "xorqi_ext_1_cc"
13915 [(parallel
13916 [(set (reg:CCNO FLAGS_REG)
13917 (compare:CCNO
13918 (xor:QI
13919 (subreg:QI
13920 (zero_extract:HI (match_operand:HI 1 "register_operand")
13921 (const_int 8)
13922 (const_int 8)) 0)
13923 (match_operand:QI 2 "const_int_operand"))
13924 (const_int 0)))
13925 (set (zero_extract:HI (match_operand:HI 0 "register_operand")
13926 (const_int 8)
13927 (const_int 8))
13928 (subreg:HI
13929 (xor:QI
13930 (subreg:QI
13931 (zero_extract:HI (match_dup 1)
13932 (const_int 8)
13933 (const_int 8)) 0)
13934 (match_dup 2)) 0))])])
13935
13936 ;; Peephole2 rega = 0; rega op= regb into rega = regb.
13937 (define_peephole2
13938 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
13939 (const_int 0))
13940 (clobber (reg:CC FLAGS_REG))])
13941 (parallel [(set (match_dup 0)
13942 (any_or_plus:SWI (match_dup 0)
13943 (match_operand:SWI 1 "<general_operand>")))
13944 (clobber (reg:CC FLAGS_REG))])]
13945 "!reg_mentioned_p (operands[0], operands[1])"
13946 [(set (match_dup 0) (match_dup 1))])
13947
13948 ;; Peephole2 dead instruction in rega = 0; rega op= rega.
13949 (define_peephole2
13950 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
13951 (const_int 0))
13952 (clobber (reg:CC FLAGS_REG))])
13953 (parallel [(set (match_dup 0)
13954 (any_or_plus:SWI (match_dup 0) (match_dup 0)))
13955 (clobber (reg:CC FLAGS_REG))])]
13956 ""
13957 [(parallel [(set (match_dup 0) (const_int 0))
13958 (clobber (reg:CC FLAGS_REG))])])
13959
13960 ;; Split DST = (HI<<32)|LO early to minimize register usage.
13961 (define_insn_and_split "*concat<mode><dwi>3_1"
13962 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
13963 (any_or_plus:<DWI>
13964 (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r,r")
13965 (match_operand:QI 2 "const_int_operand"))
13966 (zero_extend:<DWI>
13967 (match_operand:DWIH 3 "nonimmediate_operand" "r,m"))))]
13968 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
13969 "#"
13970 "&& reload_completed"
13971 [(const_int 0)]
13972 {
13973 split_double_concat (<DWI>mode, operands[0], operands[3],
13974 gen_lowpart (<MODE>mode, operands[1]));
13975 DONE;
13976 })
13977
13978 (define_insn_and_split "*concat<mode><dwi>3_2"
13979 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
13980 (any_or_plus:<DWI>
13981 (zero_extend:<DWI>
13982 (match_operand:DWIH 1 "nonimmediate_operand" "r,m"))
13983 (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r,r")
13984 (match_operand:QI 3 "const_int_operand"))))]
13985 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
13986 "#"
13987 "&& reload_completed"
13988 [(const_int 0)]
13989 {
13990 split_double_concat (<DWI>mode, operands[0], operands[1],
13991 gen_lowpart (<MODE>mode, operands[2]));
13992 DONE;
13993 })
13994
13995 (define_insn_and_split "*concat<mode><dwi>3_3"
13996 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r,x")
13997 (any_or_plus:<DWI>
13998 (ashift:<DWI>
13999 (any_extend:<DWI>
14000 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x"))
14001 (match_operand:QI 2 "const_int_operand"))
14002 (zero_extend:<DWI>
14003 (match_operand:DWIH 3 "nonimmediate_operand" "r,r,m,m,0"))))]
14004 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
14005 "#"
14006 "&& reload_completed"
14007 [(const_int 0)]
14008 {
14009 if (SSE_REG_P (operands[0]))
14010 {
14011 rtx tmp = gen_rtx_REG (V2DImode, REGNO (operands[0]));
14012 emit_insn (gen_vec_concatv2di (tmp, operands[3], operands[1]));
14013 }
14014 else
14015 split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
14016 DONE;
14017 }
14018 [(set_attr "isa" "*,*,*,x64,x64")])
14019
14020 (define_insn_and_split "*concat<mode><dwi>3_4"
14021 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r")
14022 (any_or_plus:<DWI>
14023 (zero_extend:<DWI>
14024 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
14025 (ashift:<DWI>
14026 (any_extend:<DWI>
14027 (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m"))
14028 (match_operand:QI 3 "const_int_operand"))))]
14029 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
14030 "#"
14031 "&& reload_completed"
14032 [(const_int 0)]
14033 {
14034 split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
14035 DONE;
14036 }
14037 [(set_attr "isa" "*,*,*,x64")])
14038
14039 (define_insn_and_split "*concat<half><mode>3_5"
14040 [(set (match_operand:DWI 0 "nonimmediate_operand" "=r,o,o")
14041 (any_or_plus:DWI
14042 (ashift:DWI (match_operand:DWI 1 "register_operand" "r,r,r")
14043 (match_operand:QI 2 "const_int_operand"))
14044 (match_operand:DWI 3 "const_scalar_int_operand" "n,n,Wd")))]
14045 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT / 2
14046 && (<MODE>mode == DImode
14047 ? CONST_INT_P (operands[3])
14048 && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
14049 : CONST_INT_P (operands[3])
14050 ? INTVAL (operands[3]) >= 0
14051 : CONST_WIDE_INT_NUNITS (operands[3]) == 2
14052 && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
14053 && !(CONST_INT_P (operands[3])
14054 ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
14055 : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
14056 0)),
14057 VOIDmode))"
14058 "#"
14059 "&& reload_completed"
14060 [(const_int 0)]
14061 {
14062 rtx op3 = simplify_subreg (<HALF>mode, operands[3], <MODE>mode, 0);
14063 split_double_concat (<MODE>mode, operands[0], op3,
14064 gen_lowpart (<HALF>mode, operands[1]));
14065 DONE;
14066 }
14067 [(set_attr "isa" "*,nox64,x64")])
14068
14069 (define_insn_and_split "*concat<mode><dwi>3_6"
14070 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
14071 (any_or_plus:<DWI>
14072 (ashift:<DWI>
14073 (any_extend:<DWI>
14074 (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
14075 (match_operand:QI 2 "const_int_operand"))
14076 (match_operand:<DWI> 3 "const_scalar_int_operand" "n,n,Wd,n")))]
14077 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT
14078 && (<DWI>mode == DImode
14079 ? CONST_INT_P (operands[3])
14080 && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
14081 : CONST_INT_P (operands[3])
14082 ? INTVAL (operands[3]) >= 0
14083 : CONST_WIDE_INT_NUNITS (operands[3]) == 2
14084 && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
14085 && !(CONST_INT_P (operands[3])
14086 ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
14087 : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
14088 0)),
14089 VOIDmode))"
14090 "#"
14091 "&& reload_completed"
14092 [(const_int 0)]
14093 {
14094 rtx op3 = simplify_subreg (<MODE>mode, operands[3], <DWI>mode, 0);
14095 split_double_concat (<DWI>mode, operands[0], op3, operands[1]);
14096 DONE;
14097 }
14098 [(set_attr "isa" "*,nox64,x64,*")])
14099
14100 (define_insn_and_split "*concat<mode><dwi>3_7"
14101 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
14102 (any_or_plus:<DWI>
14103 (zero_extend:<DWI>
14104 (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
14105 (match_operand:<DWI> 2 "const_scalar_int_operand" "n,n,Wd,n")))]
14106 "<DWI>mode == DImode
14107 ? CONST_INT_P (operands[2])
14108 && (UINTVAL (operands[2]) & GET_MODE_MASK (SImode)) == 0
14109 && !ix86_endbr_immediate_operand (operands[2], VOIDmode)
14110 : CONST_WIDE_INT_P (operands[2])
14111 && CONST_WIDE_INT_NUNITS (operands[2]) == 2
14112 && CONST_WIDE_INT_ELT (operands[2], 0) == 0
14113 && !ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2],
14114 1)),
14115 VOIDmode)"
14116 "#"
14117 "&& reload_completed"
14118 [(const_int 0)]
14119 {
14120 rtx op2;
14121 if (<DWI>mode == DImode)
14122 op2 = gen_int_mode (INTVAL (operands[2]) >> 32, <MODE>mode);
14123 else
14124 op2 = gen_int_mode (CONST_WIDE_INT_ELT (operands[2], 1), <MODE>mode);
14125 split_double_concat (<DWI>mode, operands[0], operands[1], op2);
14126 DONE;
14127 }
14128 [(set_attr "isa" "*,nox64,x64,*")])
14129 \f
14130 ;; Negation instructions
14131
14132 (define_expand "neg<mode>2"
14133 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
14134 (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
14135 ""
14136 {
14137 ix86_expand_unary_operator (NEG, <MODE>mode, operands, TARGET_APX_NDD);
14138 DONE;
14139 })
14140
14141 (define_insn_and_split "*neg<dwi>2_doubleword"
14142 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
14143 (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))
14144 (clobber (reg:CC FLAGS_REG))]
14145 "ix86_unary_operator_ok (NEG, <DWI>mode, operands, TARGET_APX_NDD)"
14146 "#"
14147 "&& reload_completed"
14148 [(parallel
14149 [(set (reg:CCC FLAGS_REG)
14150 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
14151 (set (match_dup 0) (neg:DWIH (match_dup 1)))])
14152 (parallel
14153 [(set (match_dup 2)
14154 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
14155 (match_dup 3))
14156 (const_int 0)))
14157 (clobber (reg:CC FLAGS_REG))])
14158 (parallel
14159 [(set (match_dup 2)
14160 (neg:DWIH (match_dup 2)))
14161 (clobber (reg:CC FLAGS_REG))])]
14162 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
14163 [(set_attr "isa" "*,apx_ndd")])
14164
14165 ;; Convert:
14166 ;; mov %esi, %edx
14167 ;; negl %eax
14168 ;; adcl $0, %edx
14169 ;; negl %edx
14170 ;; to:
14171 ;; xorl %edx, %edx
14172 ;; negl %eax
14173 ;; sbbl %esi, %edx
14174
14175 (define_peephole2
14176 [(set (match_operand:SWI48 0 "general_reg_operand")
14177 (match_operand:SWI48 1 "nonimmediate_gr_operand"))
14178 (parallel
14179 [(set (reg:CCC FLAGS_REG)
14180 (unspec:CCC [(match_operand:SWI48 2 "general_reg_operand")
14181 (const_int 0)] UNSPEC_CC_NE))
14182 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
14183 (parallel
14184 [(set (match_dup 0)
14185 (plus:SWI48 (plus:SWI48
14186 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
14187 (match_dup 0))
14188 (const_int 0)))
14189 (clobber (reg:CC FLAGS_REG))])
14190 (parallel
14191 [(set (match_dup 0)
14192 (neg:SWI48 (match_dup 0)))
14193 (clobber (reg:CC FLAGS_REG))])]
14194 "REGNO (operands[0]) != REGNO (operands[2])
14195 && !reg_mentioned_p (operands[0], operands[1])
14196 && !reg_mentioned_p (operands[2], operands[1])"
14197 [(parallel
14198 [(set (reg:CCC FLAGS_REG)
14199 (unspec:CCC [(match_dup 2) (const_int 0)] UNSPEC_CC_NE))
14200 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
14201 (parallel
14202 [(set (match_dup 0)
14203 (minus:SWI48 (minus:SWI48
14204 (match_dup 0)
14205 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)))
14206 (match_dup 1)))
14207 (clobber (reg:CC FLAGS_REG))])]
14208 "ix86_expand_clear (operands[0]);")
14209
14210 ;; Convert:
14211 ;; xorl %edx, %edx
14212 ;; negl %eax
14213 ;; adcl $0, %edx
14214 ;; negl %edx
14215 ;; to:
14216 ;; negl %eax
14217 ;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
14218
14219 (define_peephole2
14220 [(parallel
14221 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
14222 (clobber (reg:CC FLAGS_REG))])
14223 (parallel
14224 [(set (reg:CCC FLAGS_REG)
14225 (unspec:CCC [(match_operand:SWI48 1 "general_reg_operand")
14226 (const_int 0)] UNSPEC_CC_NE))
14227 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
14228 (parallel
14229 [(set (match_dup 0)
14230 (plus:SWI48 (plus:SWI48
14231 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
14232 (match_dup 0))
14233 (const_int 0)))
14234 (clobber (reg:CC FLAGS_REG))])
14235 (parallel
14236 [(set (match_dup 0)
14237 (neg:SWI48 (match_dup 0)))
14238 (clobber (reg:CC FLAGS_REG))])]
14239 "REGNO (operands[0]) != REGNO (operands[1])"
14240 [(parallel
14241 [(set (reg:CCC FLAGS_REG)
14242 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
14243 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
14244 (parallel
14245 [(set (match_dup 0)
14246 (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
14247 (const_int -1)
14248 (const_int 0)))
14249 (clobber (reg:CC FLAGS_REG))])])
14250
14251 (define_insn "*neg<mode>_1<nf_name>"
14252 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
14253 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))]
14254 "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)
14255 && <nf_condition>"
14256 "@
14257 <nf_prefix>neg{<imodesuffix>}\t%0
14258 <nf_prefix>neg{<imodesuffix>}\t{%1, %0|%0, %1}"
14259 [(set_attr "type" "negnot")
14260 (set_attr "isa" "*,apx_ndd")
14261 (set_attr "has_nf" "1")
14262 (set_attr "mode" "<MODE>")])
14263
14264 (define_insn "*negqi_1_zext<mode><nf_name>"
14265 [(set (match_operand:SWI248x 0 "register_operand" "=r")
14266 (zero_extend:SWI248x
14267 (neg:QI (match_operand:QI 1 "nonimmediate_operand" "rm"))))]
14268 "TARGET_APX_NDD && <nf_condition>"
14269 "<nf_prefix>neg{b}\t{%b1, %b0|%b0, %b1}"
14270 [(set_attr "type" "negnot")
14271 (set_attr "has_nf" "1")
14272 (set_attr "mode" "QI")])
14273
14274 (define_insn "*neghi_1_zext<mode><nf_name>"
14275 [(set (match_operand:SWI48x 0 "register_operand" "=r")
14276 (zero_extend:SWI48x
14277 (neg:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))]
14278 "TARGET_APX_NDD && <nf_condition>"
14279 "<nf_prefix>neg{w}\t{%w1, %w0|%w0, %w1}"
14280 [(set_attr "type" "negnot")
14281 (set_attr "has_nf" "1")
14282 (set_attr "mode" "HI")])
14283
14284 (define_insn "*negsi_1_zext"
14285 [(set (match_operand:DI 0 "register_operand" "=r,r")
14286 (zero_extend:DI
14287 (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
14288 (clobber (reg:CC FLAGS_REG))]
14289 "TARGET_64BIT
14290 && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)"
14291 "@
14292 neg{l}\t%k0
14293 neg{l}\t{%k1, %k0|%k0, %k1}"
14294 [(set_attr "type" "negnot")
14295 (set_attr "isa" "*,apx_ndd")
14296 (set_attr "mode" "SI")])
14297
14298 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14299 (define_insn_and_split "*neg<mode>_1_slp"
14300 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14301 (neg:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))
14302 (clobber (reg:CC FLAGS_REG))]
14303 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14304 "@
14305 neg{<imodesuffix>}\t%0
14306 #"
14307 "&& reload_completed
14308 && !(rtx_equal_p (operands[0], operands[1]))"
14309 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14310 (parallel
14311 [(set (strict_low_part (match_dup 0))
14312 (neg:SWI12 (match_dup 0)))
14313 (clobber (reg:CC FLAGS_REG))])]
14314 ""
14315 [(set_attr "type" "negnot")
14316 (set_attr "mode" "<MODE>")])
14317
14318 (define_insn "*neg<mode>_2"
14319 [(set (reg FLAGS_REG)
14320 (compare
14321 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
14322 (const_int 0)))
14323 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
14324 (neg:SWI (match_dup 1)))]
14325 "ix86_match_ccmode (insn, CCGOCmode)
14326 && ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
14327 "@
14328 neg{<imodesuffix>}\t%0
14329 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
14330 [(set_attr "type" "negnot")
14331 (set_attr "isa" "*,apx_ndd")
14332 (set_attr "mode" "<MODE>")])
14333
14334 (define_insn "*negsi_2_zext"
14335 [(set (reg FLAGS_REG)
14336 (compare
14337 (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
14338 (const_int 0)))
14339 (set (match_operand:DI 0 "register_operand" "=r,r")
14340 (zero_extend:DI
14341 (neg:SI (match_dup 1))))]
14342 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
14343 && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)"
14344 "@
14345 neg{l}\t%k0
14346 neg{l}\t{%1, %k0|%k0, %1}"
14347 [(set_attr "type" "negnot")
14348 (set_attr "isa" "*,apx_ndd")
14349 (set_attr "mode" "SI")])
14350
14351 (define_insn "*neg<mode>_ccc_1"
14352 [(set (reg:CCC FLAGS_REG)
14353 (unspec:CCC
14354 [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
14355 (const_int 0)] UNSPEC_CC_NE))
14356 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
14357 (neg:SWI (match_dup 1)))]
14358 ""
14359 "@
14360 neg{<imodesuffix>}\t%0
14361 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
14362 [(set_attr "type" "negnot")
14363 (set_attr "isa" "*,apx_ndd")
14364 (set_attr "mode" "<MODE>")])
14365
14366 (define_insn "*neg<mode>_ccc_2"
14367 [(set (reg:CCC FLAGS_REG)
14368 (unspec:CCC
14369 [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
14370 (const_int 0)] UNSPEC_CC_NE))
14371 (clobber (match_scratch:SWI 0 "=<r>,r"))]
14372 ""
14373 "@
14374 neg{<imodesuffix>}\t%0
14375 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
14376 [(set_attr "type" "negnot")
14377 (set_attr "isa" "*,apx_ndd")
14378 (set_attr "mode" "<MODE>")])
14379
14380 (define_expand "x86_neg<mode>_ccc"
14381 [(parallel
14382 [(set (reg:CCC FLAGS_REG)
14383 (unspec:CCC [(match_operand:SWI48 1 "register_operand")
14384 (const_int 0)] UNSPEC_CC_NE))
14385 (set (match_operand:SWI48 0 "register_operand")
14386 (neg:SWI48 (match_dup 1)))])])
14387
14388 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14389 (define_insn_and_split "*negqi_ext<mode>_1"
14390 [(set (zero_extract:SWI248
14391 (match_operand 0 "int248_register_operand" "+Q,&Q")
14392 (const_int 8)
14393 (const_int 8))
14394 (subreg:SWI248
14395 (neg:QI
14396 (subreg:QI
14397 (match_operator:SWI248 2 "extract_operator"
14398 [(match_operand 1 "int248_register_operand" "0,!Q")
14399 (const_int 8)
14400 (const_int 8)]) 0)) 0))
14401 (clobber (reg:CC FLAGS_REG))]
14402 ""
14403 "@
14404 neg{b}\t%h0
14405 #"
14406 "reload_completed
14407 && !(rtx_equal_p (operands[0], operands[1]))"
14408 [(set (zero_extract:SWI248
14409 (match_dup 0) (const_int 8) (const_int 8))
14410 (zero_extract:SWI248
14411 (match_dup 1) (const_int 8) (const_int 8)))
14412 (parallel
14413 [(set (zero_extract:SWI248
14414 (match_dup 0) (const_int 8) (const_int 8))
14415 (subreg:SWI248
14416 (neg:QI
14417 (subreg:QI
14418 (match_op_dup 2
14419 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
14420 (clobber (reg:CC FLAGS_REG))])]
14421 ""
14422 [(set_attr "type" "negnot")
14423 (set_attr "mode" "QI")])
14424
14425 ;; Negate with jump on overflow.
14426 (define_expand "negv<mode>3"
14427 [(parallel [(set (reg:CCO FLAGS_REG)
14428 (unspec:CCO
14429 [(match_operand:SWI 1 "register_operand")
14430 (match_dup 3)] UNSPEC_CC_NE))
14431 (set (match_operand:SWI 0 "register_operand")
14432 (neg:SWI (match_dup 1)))])
14433 (set (pc) (if_then_else
14434 (eq (reg:CCO FLAGS_REG) (const_int 0))
14435 (label_ref (match_operand 2))
14436 (pc)))]
14437 ""
14438 {
14439 operands[3]
14440 = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
14441 <MODE>mode);
14442 })
14443
14444 (define_insn "*negv<mode>3"
14445 [(set (reg:CCO FLAGS_REG)
14446 (unspec:CCO [(match_operand:SWI 1 "nonimmediate_operand" "0")
14447 (match_operand:SWI 2 "const_int_operand")]
14448 UNSPEC_CC_NE))
14449 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
14450 (neg:SWI (match_dup 1)))]
14451 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
14452 && mode_signbit_p (<MODE>mode, operands[2])"
14453 "neg{<imodesuffix>}\t%0"
14454 [(set_attr "type" "negnot")
14455 (set_attr "mode" "<MODE>")])
14456
14457 ;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384)
14458 (define_peephole2
14459 [(set (match_operand:SWI 0 "general_reg_operand")
14460 (match_operand:SWI 1 "general_reg_operand"))
14461 (parallel [(set (match_dup 0) (neg:SWI (match_dup 0)))
14462 (clobber (reg:CC FLAGS_REG))])
14463 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
14464 ""
14465 [(set (match_dup 0) (match_dup 1))
14466 (parallel [(set (reg:CCZ FLAGS_REG)
14467 (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
14468 (set (match_dup 0) (neg:SWI (match_dup 0)))])])
14469
14470 ;; Special expand pattern to handle integer mode abs
14471
14472 (define_expand "abs<mode>2"
14473 [(parallel
14474 [(set (match_operand:SDWIM 0 "register_operand")
14475 (abs:SDWIM
14476 (match_operand:SDWIM 1 "general_operand")))
14477 (clobber (reg:CC FLAGS_REG))])]
14478 "TARGET_CMOVE
14479 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)"
14480 {
14481 if (TARGET_EXPAND_ABS)
14482 {
14483 machine_mode mode = <MODE>mode;
14484 operands[1] = force_reg (mode, operands[1]);
14485
14486 /* Generate rtx abs using:
14487 abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */
14488
14489 rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
14490 rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
14491 shift_amount, NULL_RTX,
14492 0, OPTAB_DIRECT);
14493 rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
14494 operands[0], 0, OPTAB_DIRECT);
14495 rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
14496 operands[0], 0, OPTAB_DIRECT);
14497 if (!rtx_equal_p (minus_dst, operands[0]))
14498 emit_move_insn (operands[0], minus_dst);
14499 DONE;
14500 }
14501 })
14502
14503 (define_insn_and_split "*abs<dwi>2_doubleword"
14504 [(set (match_operand:<DWI> 0 "register_operand")
14505 (abs:<DWI>
14506 (match_operand:<DWI> 1 "general_operand")))
14507 (clobber (reg:CC FLAGS_REG))]
14508 "TARGET_CMOVE
14509 && ix86_pre_reload_split ()"
14510 "#"
14511 "&& 1"
14512 [(parallel
14513 [(set (reg:CCC FLAGS_REG)
14514 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
14515 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
14516 (parallel
14517 [(set (match_dup 5)
14518 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
14519 (match_dup 4))
14520 (const_int 0)))
14521 (clobber (reg:CC FLAGS_REG))])
14522 (parallel
14523 [(set (reg:CCGOC FLAGS_REG)
14524 (compare:CCGOC
14525 (neg:DWIH (match_dup 5))
14526 (const_int 0)))
14527 (set (match_dup 5)
14528 (neg:DWIH (match_dup 5)))])
14529 (set (match_dup 0)
14530 (if_then_else:DWIH
14531 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
14532 (match_dup 2)
14533 (match_dup 1)))
14534 (set (match_dup 3)
14535 (if_then_else:DWIH
14536 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
14537 (match_dup 5)
14538 (match_dup 4)))]
14539 {
14540 operands[1] = force_reg (<DWI>mode, operands[1]);
14541 operands[2] = gen_reg_rtx (<DWI>mode);
14542
14543 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
14544 })
14545
14546 (define_insn_and_split "*nabs<dwi>2_doubleword"
14547 [(set (match_operand:<DWI> 0 "register_operand")
14548 (neg:<DWI>
14549 (abs:<DWI>
14550 (match_operand:<DWI> 1 "general_operand"))))
14551 (clobber (reg:CC FLAGS_REG))]
14552 "TARGET_CMOVE
14553 && ix86_pre_reload_split ()"
14554 "#"
14555 "&& 1"
14556 [(parallel
14557 [(set (reg:CCC FLAGS_REG)
14558 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
14559 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
14560 (parallel
14561 [(set (match_dup 5)
14562 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
14563 (match_dup 4))
14564 (const_int 0)))
14565 (clobber (reg:CC FLAGS_REG))])
14566 (parallel
14567 [(set (reg:CCGOC FLAGS_REG)
14568 (compare:CCGOC
14569 (neg:DWIH (match_dup 5))
14570 (const_int 0)))
14571 (set (match_dup 5)
14572 (neg:DWIH (match_dup 5)))])
14573 (set (match_dup 0)
14574 (if_then_else:DWIH
14575 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
14576 (match_dup 2)
14577 (match_dup 1)))
14578 (set (match_dup 3)
14579 (if_then_else:DWIH
14580 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
14581 (match_dup 5)
14582 (match_dup 4)))]
14583 {
14584 operands[1] = force_reg (<DWI>mode, operands[1]);
14585 operands[2] = gen_reg_rtx (<DWI>mode);
14586
14587 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
14588 })
14589
14590 (define_insn_and_split "*abs<mode>2_1"
14591 [(set (match_operand:SWI 0 "register_operand")
14592 (abs:SWI
14593 (match_operand:SWI 1 "general_operand")))
14594 (clobber (reg:CC FLAGS_REG))]
14595 "TARGET_CMOVE
14596 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
14597 && ix86_pre_reload_split ()"
14598 "#"
14599 "&& 1"
14600 [(parallel
14601 [(set (reg:CCGOC FLAGS_REG)
14602 (compare:CCGOC
14603 (neg:SWI (match_dup 1))
14604 (const_int 0)))
14605 (set (match_dup 2)
14606 (neg:SWI (match_dup 1)))])
14607 (set (match_dup 0)
14608 (if_then_else:SWI
14609 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
14610 (match_dup 2)
14611 (match_dup 1)))]
14612 {
14613 operands[1] = force_reg (<MODE>mode, operands[1]);
14614 operands[2] = gen_reg_rtx (<MODE>mode);
14615 })
14616
14617 (define_insn_and_split "*nabs<mode>2_1"
14618 [(set (match_operand:SWI 0 "register_operand")
14619 (neg:SWI
14620 (abs:SWI
14621 (match_operand:SWI 1 "general_operand"))))
14622 (clobber (reg:CC FLAGS_REG))]
14623 "TARGET_CMOVE
14624 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
14625 && ix86_pre_reload_split ()"
14626 "#"
14627 "&& 1"
14628 [(parallel
14629 [(set (reg:CCGOC FLAGS_REG)
14630 (compare:CCGOC
14631 (neg:SWI (match_dup 1))
14632 (const_int 0)))
14633 (set (match_dup 2)
14634 (neg:SWI (match_dup 1)))])
14635 (set (match_dup 0)
14636 (if_then_else:SWI
14637 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
14638 (match_dup 2)
14639 (match_dup 1)))]
14640 {
14641 operands[1] = force_reg (<MODE>mode, operands[1]);
14642 operands[2] = gen_reg_rtx (<MODE>mode);
14643 })
14644
14645 (define_expand "<code>tf2"
14646 [(set (match_operand:TF 0 "register_operand")
14647 (absneg:TF (match_operand:TF 1 "register_operand")))]
14648 "TARGET_SSE"
14649 "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
14650
14651 (define_insn_and_split "*<code>tf2_1"
14652 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
14653 (absneg:TF
14654 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
14655 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
14656 "TARGET_SSE"
14657 "#"
14658 "&& reload_completed"
14659 [(set (match_dup 0)
14660 (<absneg_op>:TF (match_dup 1) (match_dup 2)))]
14661 {
14662 if (TARGET_AVX)
14663 {
14664 if (MEM_P (operands[1]))
14665 std::swap (operands[1], operands[2]);
14666 }
14667 else
14668 {
14669 if (operands_match_p (operands[0], operands[2]))
14670 std::swap (operands[1], operands[2]);
14671 }
14672 }
14673 [(set_attr "isa" "noavx,noavx,avx,avx")])
14674
14675 (define_insn_and_split "*nabstf2_1"
14676 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
14677 (neg:TF
14678 (abs:TF
14679 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
14680 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
14681 "TARGET_SSE"
14682 "#"
14683 "&& reload_completed"
14684 [(set (match_dup 0)
14685 (ior:TF (match_dup 1) (match_dup 2)))]
14686 {
14687 if (TARGET_AVX)
14688 {
14689 if (MEM_P (operands[1]))
14690 std::swap (operands[1], operands[2]);
14691 }
14692 else
14693 {
14694 if (operands_match_p (operands[0], operands[2]))
14695 std::swap (operands[1], operands[2]);
14696 }
14697 }
14698 [(set_attr "isa" "noavx,noavx,avx,avx")])
14699
14700 (define_expand "<code>hf2"
14701 [(set (match_operand:HF 0 "register_operand")
14702 (absneg:HF (match_operand:HF 1 "register_operand")))]
14703 "TARGET_AVX512FP16"
14704 "ix86_expand_fp_absneg_operator (<CODE>, HFmode, operands); DONE;")
14705
14706 (define_expand "<code><mode>2"
14707 [(set (match_operand:X87MODEF 0 "register_operand")
14708 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
14709 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
14710 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
14711
14712 ;; Changing of sign for FP values is doable using integer unit too.
14713 (define_insn "*<code><mode>2_i387_1"
14714 [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
14715 (absneg:X87MODEF
14716 (match_operand:X87MODEF 1 "register_operand" "0,0")))
14717 (clobber (reg:CC FLAGS_REG))]
14718 "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
14719 "#")
14720
14721 (define_split
14722 [(set (match_operand:X87MODEF 0 "fp_register_operand")
14723 (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
14724 (clobber (reg:CC FLAGS_REG))]
14725 "TARGET_80387 && reload_completed"
14726 [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])
14727
14728 (define_split
14729 [(set (match_operand:X87MODEF 0 "general_reg_operand")
14730 (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
14731 (clobber (reg:CC FLAGS_REG))]
14732 "TARGET_80387 && reload_completed"
14733 [(const_int 0)]
14734 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
14735
14736 (define_insn_and_split "*<code>hf2_1"
14737 [(set (match_operand:HF 0 "register_operand" "=Yv")
14738 (absneg:HF
14739 (match_operand:HF 1 "register_operand" "Yv")))
14740 (use (match_operand:V8HF 2 "vector_operand" "Yvm"))
14741 (clobber (reg:CC FLAGS_REG))]
14742 "TARGET_AVX512FP16"
14743 "#"
14744 "&& reload_completed"
14745 [(set (match_dup 0)
14746 (<absneg_op>:V8HF (match_dup 1) (match_dup 2)))]
14747 {
14748 operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode);
14749 operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode);
14750 })
14751
14752 (define_insn "*<code><mode>2_1"
14753 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
14754 (absneg:MODEF
14755 (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
14756 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
14757 (clobber (reg:CC FLAGS_REG))]
14758 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
14759 "#"
14760 [(set_attr "isa" "noavx,noavx,avx,*,*")
14761 (set (attr "enabled")
14762 (if_then_else
14763 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
14764 (if_then_else
14765 (eq_attr "alternative" "3,4")
14766 (symbol_ref "TARGET_MIX_SSE_I387")
14767 (const_string "*"))
14768 (if_then_else
14769 (eq_attr "alternative" "3,4")
14770 (symbol_ref "true")
14771 (symbol_ref "false"))))])
14772
14773 (define_split
14774 [(set (match_operand:MODEF 0 "sse_reg_operand")
14775 (absneg:MODEF
14776 (match_operand:MODEF 1 "sse_reg_operand")))
14777 (use (match_operand:<ssevecmodef> 2 "vector_operand"))
14778 (clobber (reg:CC FLAGS_REG))]
14779 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
14780 && reload_completed"
14781 [(set (match_dup 0)
14782 (<absneg_op>:<ssevecmodef> (match_dup 1) (match_dup 2)))]
14783 {
14784 machine_mode mode = <MODE>mode;
14785 machine_mode vmode = <ssevecmodef>mode;
14786
14787 operands[0] = lowpart_subreg (vmode, operands[0], mode);
14788 operands[1] = lowpart_subreg (vmode, operands[1], mode);
14789
14790 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
14791 std::swap (operands[1], operands[2]);
14792 })
14793
14794 (define_split
14795 [(set (match_operand:MODEF 0 "fp_register_operand")
14796 (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
14797 (use (match_operand 2))
14798 (clobber (reg:CC FLAGS_REG))]
14799 "TARGET_80387 && reload_completed"
14800 [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])
14801
14802 (define_split
14803 [(set (match_operand:MODEF 0 "general_reg_operand")
14804 (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
14805 (use (match_operand 2))
14806 (clobber (reg:CC FLAGS_REG))]
14807 "TARGET_80387 && reload_completed"
14808 [(const_int 0)]
14809 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
14810
14811 (define_insn_and_split "*nabs<mode>2_1"
14812 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
14813 (neg:MODEF
14814 (abs:MODEF
14815 (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
14816 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
14817 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
14818 "#"
14819 "&& reload_completed"
14820 [(set (match_dup 0)
14821 (ior:<ssevecmodef> (match_dup 1) (match_dup 2)))]
14822 {
14823 machine_mode mode = <MODE>mode;
14824 machine_mode vmode = <ssevecmodef>mode;
14825
14826 operands[0] = lowpart_subreg (vmode, operands[0], mode);
14827 operands[1] = lowpart_subreg (vmode, operands[1], mode);
14828
14829 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
14830 std::swap (operands[1], operands[2]);
14831 }
14832 [(set_attr "isa" "noavx,noavx,avx")])
14833
14834 ;; Conditionalize these after reload. If they match before reload, we
14835 ;; lose the clobber and ability to use integer instructions.
14836
14837 (define_insn "*<code><mode>2_i387"
14838 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
14839 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
14840 "TARGET_80387 && reload_completed"
14841 "<absneg_mnemonic>"
14842 [(set_attr "type" "fsgn")
14843 (set_attr "mode" "<MODE>")])
14844
14845 ;; Copysign instructions
14846
14847 (define_expand "copysign<mode>3"
14848 [(match_operand:SSEMODEF 0 "register_operand")
14849 (match_operand:SSEMODEF 1 "nonmemory_operand")
14850 (match_operand:SSEMODEF 2 "register_operand")]
14851 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
14852 || (TARGET_SSE && (<MODE>mode == TFmode))
14853 || (TARGET_AVX512FP16 && (<MODE>mode ==HFmode))"
14854 "ix86_expand_copysign (operands); DONE;")
14855
14856 (define_expand "xorsign<mode>3"
14857 [(match_operand:MODEFH 0 "register_operand")
14858 (match_operand:MODEFH 1 "register_operand")
14859 (match_operand:MODEFH 2 "register_operand")]
14860 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
14861 || <MODE>mode == HFmode"
14862 {
14863 if (rtx_equal_p (operands[1], operands[2]))
14864 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
14865 else
14866 ix86_expand_xorsign (operands);
14867 DONE;
14868 })
14869 \f
14870 ;; One complement instructions
14871
14872 (define_expand "one_cmpl<mode>2"
14873 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
14874 (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
14875 ""
14876 {
14877 ix86_expand_unary_operator (NOT, <MODE>mode, operands, TARGET_APX_NDD);
14878 DONE;
14879 })
14880
14881 (define_insn_and_split "*one_cmpl<dwi>2_doubleword"
14882 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
14883 (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))]
14884 "ix86_unary_operator_ok (NOT, <DWI>mode, operands, TARGET_APX_NDD)"
14885 "#"
14886 "&& reload_completed"
14887 [(set (match_dup 0)
14888 (not:DWIH (match_dup 1)))
14889 (set (match_dup 2)
14890 (not:DWIH (match_dup 3)))]
14891 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
14892 [(set_attr "isa" "*,apx_ndd")])
14893
14894 (define_insn "*one_cmpl<mode>2_1"
14895 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
14896 (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))]
14897 "ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
14898 "@
14899 not{<imodesuffix>}\t%0
14900 not{<imodesuffix>}\t{%1, %0|%0, %1}
14901 #"
14902 [(set_attr "isa" "*,apx_ndd,<kmov_isa>")
14903 (set_attr "type" "negnot,negnot,msklog")
14904 (set_attr "mode" "<MODE>")])
14905
14906 (define_insn "*one_cmplqi2_1_zext<mode>"
14907 [(set (match_operand:SWI248x 0 "register_operand" "=r")
14908 (zero_extend:SWI248x
14909 (not:QI (match_operand:QI 1 "nonimmediate_operand" "rm"))))]
14910 "TARGET_APX_NDD"
14911 "not{b}\t{%1, %b0|%b0, %1}"
14912 [(set_attr "type" "negnot")
14913 (set_attr "mode" "QI")])
14914
14915 (define_insn "*one_cmplhi2_1_zext<mode>"
14916 [(set (match_operand:SWI48x 0 "register_operand" "=r")
14917 (zero_extend:SWI48x
14918 (not:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))]
14919 "TARGET_APX_NDD"
14920 "not{w}\t{%1, %w0|%w0, %1}"
14921 [(set_attr "type" "negnot")
14922 (set_attr "mode" "HI")])
14923
14924 (define_insn "*one_cmplsi2_1_zext"
14925 [(set (match_operand:DI 0 "register_operand" "=r,r,?k")
14926 (zero_extend:DI
14927 (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,k"))))]
14928 "TARGET_64BIT
14929 && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
14930 "@
14931 not{l}\t%k0
14932 not{l}\t{%1, %k0|%k0, %1}
14933 #"
14934 [(set_attr "isa" "x64,apx_ndd,avx512bw")
14935 (set_attr "type" "negnot,negnot,msklog")
14936 (set_attr "mode" "SI,SI,SI")])
14937
14938 (define_insn "*one_cmplqi2_1"
14939 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k")
14940 (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))]
14941 "ix86_unary_operator_ok (NOT, QImode, operands, TARGET_APX_NDD)"
14942 "@
14943 not{b}\t%0
14944 not{l}\t%k0
14945 not{b}\t{%1, %0|%0, %1}
14946 #"
14947 [(set_attr "isa" "*,*,apx_ndd,avx512f")
14948 (set_attr "type" "negnot,negnot,negnot,msklog")
14949 (set (attr "mode")
14950 (cond [(eq_attr "alternative" "1")
14951 (const_string "SI")
14952 (and (eq_attr "alternative" "3")
14953 (match_test "!TARGET_AVX512DQ"))
14954 (const_string "HI")
14955 ]
14956 (const_string "QI")))
14957 ;; Potential partial reg stall on alternative 1.
14958 (set (attr "preferred_for_speed")
14959 (cond [(eq_attr "alternative" "1")
14960 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
14961 (symbol_ref "true")))])
14962
14963 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14964 (define_insn_and_split "*one_cmpl<mode>_1_slp"
14965 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14966 (not:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))]
14967 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14968 "@
14969 not{<imodesuffix>}\t%0
14970 #"
14971 "&& reload_completed
14972 && !(rtx_equal_p (operands[0], operands[1]))"
14973 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14974 (set (strict_low_part (match_dup 0))
14975 (not:SWI12 (match_dup 0)))]
14976 ""
14977 [(set_attr "type" "negnot")
14978 (set_attr "mode" "<MODE>")])
14979
14980 (define_insn "*one_cmpl<mode>2_2"
14981 [(set (reg FLAGS_REG)
14982 (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
14983 (const_int 0)))
14984 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
14985 (not:SWI (match_dup 1)))]
14986 "ix86_match_ccmode (insn, CCNOmode)
14987 && ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
14988 "#"
14989 [(set_attr "type" "alu1")
14990 (set_attr "isa" "*,apx_ndd")
14991 (set_attr "mode" "<MODE>")])
14992
14993 (define_split
14994 [(set (match_operand 0 "flags_reg_operand")
14995 (match_operator 2 "compare_operator"
14996 [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
14997 (const_int 0)]))
14998 (set (match_operand:SWI 1 "nonimmediate_operand")
14999 (not:SWI (match_dup 3)))]
15000 "ix86_match_ccmode (insn, CCNOmode)"
15001 [(parallel [(set (match_dup 0)
15002 (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
15003 (const_int 0)]))
15004 (set (match_dup 1)
15005 (xor:SWI (match_dup 3) (const_int -1)))])])
15006
15007 (define_insn "*one_cmplsi2_2_zext"
15008 [(set (reg FLAGS_REG)
15009 (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
15010 (const_int 0)))
15011 (set (match_operand:DI 0 "register_operand" "=r,r")
15012 (zero_extend:DI (not:SI (match_dup 1))))]
15013 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
15014 && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
15015 "#"
15016 [(set_attr "type" "alu1")
15017 (set_attr "isa" "*,apx_ndd")
15018 (set_attr "mode" "SI")])
15019
15020 (define_split
15021 [(set (match_operand 0 "flags_reg_operand")
15022 (match_operator 2 "compare_operator"
15023 [(not:SI (match_operand:SI 3 "nonimmediate_operand"))
15024 (const_int 0)]))
15025 (set (match_operand:DI 1 "register_operand")
15026 (zero_extend:DI (not:SI (match_dup 3))))]
15027 "ix86_match_ccmode (insn, CCNOmode)"
15028 [(parallel [(set (match_dup 0)
15029 (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
15030 (const_int 0)]))
15031 (set (match_dup 1)
15032 (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
15033
15034 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
15035 (define_insn_and_split "*one_cmplqi_ext<mode>_1"
15036 [(set (zero_extract:SWI248
15037 (match_operand 0 "int248_register_operand" "+Q,&Q")
15038 (const_int 8)
15039 (const_int 8))
15040 (subreg:SWI248
15041 (not:QI
15042 (subreg:QI
15043 (match_operator:SWI248 2 "extract_operator"
15044 [(match_operand 1 "int248_register_operand" "0,!Q")
15045 (const_int 8)
15046 (const_int 8)]) 0)) 0))]
15047 ""
15048 "@
15049 not{b}\t%h0
15050 #"
15051 "reload_completed
15052 && !(rtx_equal_p (operands[0], operands[1]))"
15053 [(set (zero_extract:SWI248
15054 (match_dup 0) (const_int 8) (const_int 8))
15055 (zero_extract:SWI248
15056 (match_dup 1) (const_int 8) (const_int 8)))
15057 (set (zero_extract:SWI248
15058 (match_dup 0) (const_int 8) (const_int 8))
15059 (subreg:SWI248
15060 (not:QI
15061 (subreg:QI
15062 (match_op_dup 2
15063 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
15064 ""
15065 [(set_attr "type" "negnot")
15066 (set_attr "mode" "QI")])
15067 \f
15068 ;; Shift instructions
15069
15070 ;; DImode shifts are implemented using the i386 "shift double" opcode,
15071 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
15072 ;; is variable, then the count is in %cl and the "imm" operand is dropped
15073 ;; from the assembler input.
15074 ;;
15075 ;; This instruction shifts the target reg/mem as usual, but instead of
15076 ;; shifting in zeros, bits are shifted in from reg operand. If the insn
15077 ;; is a left shift double, bits are taken from the high order bits of
15078 ;; reg, else if the insn is a shift right double, bits are taken from the
15079 ;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
15080 ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
15081 ;;
15082 ;; Since sh[lr]d does not change the `reg' operand, that is done
15083 ;; separately, making all shifts emit pairs of shift double and normal
15084 ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
15085 ;; support a 63 bit shift, each shift where the count is in a reg expands
15086 ;; to a pair of shifts, a branch, a shift by 32 and a label.
15087 ;;
15088 ;; If the shift count is a constant, we need never emit more than one
15089 ;; shift pair, instead using moves and sign extension for counts greater
15090 ;; than 31.
15091
15092 (define_expand "ashl<mode>3"
15093 [(set (match_operand:SDWIM 0 "<shift_operand>")
15094 (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
15095 (match_operand:QI 2 "nonmemory_operand")))]
15096 ""
15097 {
15098 ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD);
15099 DONE;
15100 })
15101
15102 (define_insn_and_split "*ashl<dwi>3_doubleword_mask"
15103 [(set (match_operand:<DWI> 0 "register_operand")
15104 (ashift:<DWI>
15105 (match_operand:<DWI> 1 "register_operand")
15106 (subreg:QI
15107 (and
15108 (match_operand 2 "int248_register_operand" "c")
15109 (match_operand 3 "const_int_operand")) 0)))
15110 (clobber (reg:CC FLAGS_REG))]
15111 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
15112 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
15113 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
15114 && ix86_pre_reload_split ()"
15115 "#"
15116 "&& 1"
15117 [(parallel
15118 [(set (match_dup 6)
15119 (ior:DWIH (ashift:DWIH (match_dup 6)
15120 (and:QI (match_dup 2) (match_dup 8)))
15121 (subreg:DWIH
15122 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
15123 (minus:QI (match_dup 9)
15124 (and:QI (match_dup 2) (match_dup 8)))) 0)))
15125 (clobber (reg:CC FLAGS_REG))])
15126 (parallel
15127 [(set (match_dup 4)
15128 (ashift:DWIH (match_dup 5) (match_dup 2)))
15129 (clobber (reg:CC FLAGS_REG))])]
15130 {
15131 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
15132 {
15133 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15134 operands[2] = gen_lowpart (QImode, operands[2]);
15135 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
15136 operands[2]));
15137 DONE;
15138 }
15139
15140 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
15141
15142 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
15143 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
15144
15145 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15146 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15147 {
15148 rtx xops[3];
15149 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
15150 xops[1] = operands[2];
15151 xops[2] = GEN_INT (INTVAL (operands[3])
15152 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
15153 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
15154 operands[2] = xops[0];
15155 }
15156
15157 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15158 operands[2] = gen_lowpart (QImode, operands[2]);
15159
15160 if (!rtx_equal_p (operands[6], operands[7]))
15161 emit_move_insn (operands[6], operands[7]);
15162 })
15163
15164 (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
15165 [(set (match_operand:<DWI> 0 "register_operand")
15166 (ashift:<DWI>
15167 (match_operand:<DWI> 1 "register_operand")
15168 (and:QI
15169 (match_operand:QI 2 "register_operand" "c")
15170 (match_operand:QI 3 "const_int_operand"))))
15171 (clobber (reg:CC FLAGS_REG))]
15172 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
15173 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
15174 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
15175 && ix86_pre_reload_split ()"
15176 "#"
15177 "&& 1"
15178 [(parallel
15179 [(set (match_dup 6)
15180 (ior:DWIH (ashift:DWIH (match_dup 6)
15181 (and:QI (match_dup 2) (match_dup 8)))
15182 (subreg:DWIH
15183 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
15184 (minus:QI (match_dup 9)
15185 (and:QI (match_dup 2) (match_dup 8)))) 0)))
15186 (clobber (reg:CC FLAGS_REG))])
15187 (parallel
15188 [(set (match_dup 4)
15189 (ashift:DWIH (match_dup 5) (match_dup 2)))
15190 (clobber (reg:CC FLAGS_REG))])]
15191 {
15192 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
15193 {
15194 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
15195 operands[2]));
15196 DONE;
15197 }
15198
15199 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
15200
15201 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
15202 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
15203
15204 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15205 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15206 {
15207 rtx tem = gen_reg_rtx (QImode);
15208 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
15209 operands[2] = tem;
15210 }
15211
15212 if (!rtx_equal_p (operands[6], operands[7]))
15213 emit_move_insn (operands[6], operands[7]);
15214 })
15215
15216 (define_insn "ashl<mode>3_doubleword"
15217 [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
15218 (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0Wc,r")
15219 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
15220 (clobber (reg:CC FLAGS_REG))]
15221 ""
15222 "#"
15223 [(set_attr "type" "multi")
15224 (set_attr "isa" "*,apx_ndd")])
15225
15226 (define_split
15227 [(set (match_operand:DWI 0 "register_operand")
15228 (ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
15229 (match_operand:QI 2 "nonmemory_operand")))
15230 (clobber (reg:CC FLAGS_REG))]
15231 "epilogue_completed"
15232 [(const_int 0)]
15233 {
15234 if (TARGET_APX_NDD
15235 && !rtx_equal_p (operands[0], operands[1])
15236 && REG_P (operands[1]))
15237 ix86_split_ashl_ndd (operands, NULL_RTX);
15238 else
15239 ix86_split_ashl (operands, NULL_RTX, <MODE>mode);
15240 DONE;
15241 })
15242
15243 ;; By default we don't ask for a scratch register, because when DWImode
15244 ;; values are manipulated, registers are already at a premium. But if
15245 ;; we have one handy, we won't turn it away.
15246
15247 (define_peephole2
15248 [(match_scratch:DWIH 3 "r")
15249 (parallel [(set (match_operand:<DWI> 0 "register_operand")
15250 (ashift:<DWI>
15251 (match_operand:<DWI> 1 "nonmemory_operand")
15252 (match_operand:QI 2 "nonmemory_operand")))
15253 (clobber (reg:CC FLAGS_REG))])
15254 (match_dup 3)]
15255 "TARGET_CMOVE"
15256 [(const_int 0)]
15257 {
15258 if (TARGET_APX_NDD
15259 && !rtx_equal_p (operands[0], operands[1])
15260 && (REG_P (operands[1])))
15261 ix86_split_ashl_ndd (operands, operands[3]);
15262 else
15263 ix86_split_ashl (operands, operands[3], <DWI>mode);
15264 DONE;
15265 })
15266
15267 (define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
15268 [(set (match_operand:<DWI> 0 "register_operand" "=r")
15269 (ashift:<DWI>
15270 (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm"))
15271 (match_operand:QI 2 "const_int_operand")))
15272 (clobber (reg:CC FLAGS_REG))]
15273 "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT
15274 && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2"
15275 "#"
15276 "&& reload_completed"
15277 [(const_int 0)]
15278 {
15279 split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
15280 int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
15281 bool op_equal_p = rtx_equal_p (operands[3], operands[1]);
15282 if (bits == 0)
15283 {
15284 if (!op_equal_p)
15285 emit_move_insn (operands[3], operands[1]);
15286 }
15287 else
15288 {
15289 if (!op_equal_p && !TARGET_APX_NDD)
15290 emit_move_insn (operands[3], operands[1]);
15291 rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3];
15292 emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
15293 }
15294 ix86_expand_clear (operands[0]);
15295 DONE;
15296 })
15297
15298 (define_insn "x86_64_shld<nf_name>"
15299 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
15300 (ior:DI (ashift:DI (match_dup 0)
15301 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
15302 (const_int 63)))
15303 (subreg:DI
15304 (lshiftrt:TI
15305 (zero_extend:TI
15306 (match_operand:DI 1 "register_operand" "r"))
15307 (minus:QI (const_int 64)
15308 (and:QI (match_dup 2) (const_int 63)))) 0)))]
15309 "TARGET_64BIT && <nf_condition>"
15310 "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
15311 [(set_attr "type" "ishift")
15312 (set_attr "prefix_0f" "1")
15313 (set_attr "has_nf" "1")
15314 (set_attr "mode" "DI")
15315 (set_attr "athlon_decode" "vector")
15316 (set_attr "amdfam10_decode" "vector")
15317 (set_attr "bdver1_decode" "vector")])
15318
15319 (define_insn "x86_64_shld_ndd<nf_name>"
15320 [(set (match_operand:DI 0 "register_operand" "=r")
15321 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
15322 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
15323 (const_int 63)))
15324 (subreg:DI
15325 (lshiftrt:TI
15326 (zero_extend:TI
15327 (match_operand:DI 2 "register_operand" "r"))
15328 (minus:QI (const_int 64)
15329 (and:QI (match_dup 3) (const_int 63)))) 0)))]
15330 "TARGET_APX_NDD && <nf_condition>"
15331 "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15332 [(set_attr "type" "ishift")
15333 (set_attr "has_nf" "1")
15334 (set_attr "mode" "DI")])
15335
15336 (define_insn "x86_64_shld_1<nf_name>"
15337 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
15338 (ior:DI (ashift:DI (match_dup 0)
15339 (match_operand:QI 2 "const_0_to_63_operand"))
15340 (subreg:DI
15341 (lshiftrt:TI
15342 (zero_extend:TI
15343 (match_operand:DI 1 "register_operand" "r"))
15344 (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
15345 "TARGET_64BIT
15346 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
15347 && <nf_condition>"
15348 "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
15349 [(set_attr "type" "ishift")
15350 (set_attr "prefix_0f" "1")
15351 (set_attr "has_nf" "1")
15352 (set_attr "mode" "DI")
15353 (set_attr "length_immediate" "1")
15354 (set_attr "athlon_decode" "vector")
15355 (set_attr "amdfam10_decode" "vector")
15356 (set_attr "bdver1_decode" "vector")])
15357
15358 (define_insn "x86_64_shld_ndd_1<nf_name>"
15359 [(set (match_operand:DI 0 "register_operand" "=r")
15360 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
15361 (match_operand:QI 3 "const_0_to_63_operand"))
15362 (subreg:DI
15363 (lshiftrt:TI
15364 (zero_extend:TI
15365 (match_operand:DI 2 "register_operand" "r"))
15366 (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
15367 "TARGET_APX_NDD
15368 && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
15369 && <nf_condition>"
15370 "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15371 [(set_attr "type" "ishift")
15372 (set_attr "has_nf" "1")
15373 (set_attr "mode" "DI")
15374 (set_attr "length_immediate" "1")])
15375
15376 (define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
15377 [(set (match_operand:DI 0 "nonimmediate_operand")
15378 (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
15379 (match_operand:QI 2 "const_0_to_63_operand"))
15380 (lshiftrt:DI
15381 (match_operand:DI 1 "nonimmediate_operand")
15382 (match_operand:QI 3 "const_0_to_63_operand"))))]
15383 "TARGET_64BIT && TARGET_APX_NF
15384 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
15385 && ix86_pre_reload_split ()"
15386 "#"
15387 "&& 1"
15388 [(const_int 0)]
15389 {
15390 if (rtx_equal_p (operands[4], operands[0]))
15391 {
15392 operands[1] = force_reg (DImode, operands[1]);
15393 emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1],
15394 operands[2], operands[3]));
15395 }
15396 else if (rtx_equal_p (operands[1], operands[0]))
15397 {
15398 operands[4] = force_reg (DImode, operands[4]);
15399 emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4],
15400 operands[3], operands[2]));
15401 }
15402 else if (TARGET_APX_NDD)
15403 {
15404 rtx tmp = gen_reg_rtx (DImode);
15405 if (MEM_P (operands[4]))
15406 {
15407 operands[1] = force_reg (DImode, operands[1]);
15408 emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
15409 operands[2], operands[3]));
15410 }
15411 else if (MEM_P (operands[1]))
15412 emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
15413 operands[3], operands[2]));
15414 else
15415 emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
15416 operands[2], operands[3]));
15417 emit_move_insn (operands[0], tmp);
15418 }
15419 else
15420 {
15421 operands[1] = force_reg (DImode, operands[1]);
15422 rtx tmp = gen_reg_rtx (DImode);
15423 emit_move_insn (tmp, operands[4]);
15424 emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1],
15425 operands[2], operands[3]));
15426 emit_move_insn (operands[0], tmp);
15427 }
15428 DONE;
15429 })
15430
15431 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
15432 [(set (match_operand:DI 0 "nonimmediate_operand")
15433 (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
15434 (match_operand:QI 2 "const_0_to_63_operand"))
15435 (lshiftrt:DI
15436 (match_operand:DI 1 "nonimmediate_operand")
15437 (match_operand:QI 3 "const_0_to_63_operand"))))
15438 (clobber (reg:CC FLAGS_REG))]
15439 "TARGET_64BIT
15440 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
15441 && ix86_pre_reload_split ()"
15442 "#"
15443 "&& 1"
15444 [(const_int 0)]
15445 {
15446 if (rtx_equal_p (operands[4], operands[0]))
15447 {
15448 operands[1] = force_reg (DImode, operands[1]);
15449 emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3]));
15450 }
15451 else if (rtx_equal_p (operands[1], operands[0]))
15452 {
15453 operands[4] = force_reg (DImode, operands[4]);
15454 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
15455 }
15456 else if (TARGET_APX_NDD)
15457 {
15458 rtx tmp = gen_reg_rtx (DImode);
15459 if (MEM_P (operands[4]))
15460 {
15461 operands[1] = force_reg (DImode, operands[1]);
15462 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
15463 operands[2], operands[3]));
15464 }
15465 else if (MEM_P (operands[1]))
15466 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4],
15467 operands[3], operands[2]));
15468 else
15469 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
15470 operands[2], operands[3]));
15471 emit_move_insn (operands[0], tmp);
15472 }
15473 else
15474 {
15475 operands[1] = force_reg (DImode, operands[1]);
15476 rtx tmp = gen_reg_rtx (DImode);
15477 emit_move_insn (tmp, operands[4]);
15478 emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3]));
15479 emit_move_insn (operands[0], tmp);
15480 }
15481 DONE;
15482 }
15483 [(set_attr "has_nf" "1")])
15484
15485 (define_insn_and_split "*x86_64_shld_2"
15486 [(set (match_operand:DI 0 "nonimmediate_operand")
15487 (ior:DI (ashift:DI (match_dup 0)
15488 (match_operand:QI 2 "nonmemory_operand"))
15489 (lshiftrt:DI (match_operand:DI 1 "register_operand")
15490 (minus:QI (const_int 64) (match_dup 2)))))
15491 (clobber (reg:CC FLAGS_REG))]
15492 "TARGET_64BIT && ix86_pre_reload_split ()"
15493 "#"
15494 "&& 1"
15495 [(parallel [(set (match_dup 0)
15496 (ior:DI (ashift:DI (match_dup 0)
15497 (and:QI (match_dup 2) (const_int 63)))
15498 (subreg:DI
15499 (lshiftrt:TI
15500 (zero_extend:TI (match_dup 1))
15501 (minus:QI (const_int 64)
15502 (and:QI (match_dup 2)
15503 (const_int 63)))) 0)))
15504 (clobber (reg:CC FLAGS_REG))])])
15505
15506 (define_insn_and_split "*x86_64_shld_ndd_2"
15507 [(set (match_operand:DI 0 "nonimmediate_operand")
15508 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
15509 (match_operand:QI 3 "nonmemory_operand"))
15510 (lshiftrt:DI (match_operand:DI 2 "register_operand")
15511 (minus:QI (const_int 64) (match_dup 3)))))
15512 (clobber (reg:CC FLAGS_REG))]
15513 "TARGET_APX_NDD
15514 && ix86_pre_reload_split ()"
15515 "#"
15516 "&& 1"
15517 [(parallel [(set (match_dup 4)
15518 (ior:DI (ashift:DI (match_dup 1)
15519 (and:QI (match_dup 3) (const_int 63)))
15520 (subreg:DI
15521 (lshiftrt:TI
15522 (zero_extend:TI (match_dup 2))
15523 (minus:QI (const_int 64)
15524 (and:QI (match_dup 3)
15525 (const_int 63)))) 0)))
15526 (clobber (reg:CC FLAGS_REG))
15527 (set (match_dup 0) (match_dup 4))])]
15528 {
15529 operands[4] = gen_reg_rtx (DImode);
15530 emit_move_insn (operands[4], operands[0]);
15531 })
15532
15533 (define_insn "x86_shld<nf_name>"
15534 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
15535 (ior:SI (ashift:SI (match_dup 0)
15536 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
15537 (const_int 31)))
15538 (subreg:SI
15539 (lshiftrt:DI
15540 (zero_extend:DI
15541 (match_operand:SI 1 "register_operand" "r"))
15542 (minus:QI (const_int 32)
15543 (and:QI (match_dup 2) (const_int 31)))) 0)))]
15544 "<nf_condition>"
15545 "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
15546 [(set_attr "type" "ishift")
15547 (set_attr "prefix_0f" "1")
15548 (set_attr "has_nf" "1")
15549 (set_attr "mode" "SI")
15550 (set_attr "pent_pair" "np")
15551 (set_attr "athlon_decode" "vector")
15552 (set_attr "amdfam10_decode" "vector")
15553 (set_attr "bdver1_decode" "vector")])
15554
15555 (define_insn "x86_shld_ndd<nf_name>"
15556 [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
15557 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
15558 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
15559 (const_int 31)))
15560 (subreg:SI
15561 (lshiftrt:DI
15562 (zero_extend:DI
15563 (match_operand:SI 2 "register_operand" "r"))
15564 (minus:QI (const_int 32)
15565 (and:QI (match_dup 3) (const_int 31)))) 0)))]
15566 "TARGET_APX_NDD && <nf_condition>"
15567 "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15568 [(set_attr "type" "ishift")
15569 (set_attr "has_nf" "1")
15570 (set_attr "mode" "SI")])
15571
15572
15573 (define_insn "x86_shld_1<nf_name>"
15574 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
15575 (ior:SI (ashift:SI (match_dup 0)
15576 (match_operand:QI 2 "const_0_to_31_operand"))
15577 (subreg:SI
15578 (lshiftrt:DI
15579 (zero_extend:DI
15580 (match_operand:SI 1 "register_operand" "r"))
15581 (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
15582 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
15583 && <nf_condition>"
15584 "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
15585 [(set_attr "type" "ishift")
15586 (set_attr "prefix_0f" "1")
15587 (set_attr "length_immediate" "1")
15588 (set_attr "has_nf" "1")
15589 (set_attr "mode" "SI")
15590 (set_attr "pent_pair" "np")
15591 (set_attr "athlon_decode" "vector")
15592 (set_attr "amdfam10_decode" "vector")
15593 (set_attr "bdver1_decode" "vector")])
15594
15595 (define_insn "x86_shld_ndd_1<nf_name>"
15596 [(set (match_operand:SI 0 "register_operand" "=r")
15597 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
15598 (match_operand:QI 3 "const_0_to_31_operand"))
15599 (subreg:SI
15600 (lshiftrt:DI
15601 (zero_extend:DI
15602 (match_operand:SI 2 "register_operand" "r"))
15603 (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
15604 "TARGET_APX_NDD
15605 && INTVAL (operands[4]) == 32 - INTVAL (operands[3])
15606 && <nf_condition>"
15607 "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15608 [(set_attr "type" "ishift")
15609 (set_attr "length_immediate" "1")
15610 (set_attr "has_nf" "1")
15611 (set_attr "mode" "SI")])
15612
15613 (define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
15614 [(set (match_operand:SI 0 "nonimmediate_operand")
15615 (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
15616 (match_operand:QI 2 "const_0_to_31_operand"))
15617 (lshiftrt:SI
15618 (match_operand:SI 1 "nonimmediate_operand")
15619 (match_operand:QI 3 "const_0_to_31_operand"))))]
15620 "TARGET_APX_NF
15621 && INTVAL (operands[3]) == 32 - INTVAL (operands[2])
15622 && ix86_pre_reload_split ()"
15623 "#"
15624 "&& 1"
15625 [(const_int 0)]
15626 {
15627 if (rtx_equal_p (operands[4], operands[0]))
15628 {
15629 operands[1] = force_reg (SImode, operands[1]);
15630 emit_insn (gen_x86_shld_1_nf (operands[0], operands[1],
15631 operands[2], operands[3]));
15632 }
15633 else if (rtx_equal_p (operands[1], operands[0]))
15634 {
15635 operands[4] = force_reg (SImode, operands[4]);
15636 emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4],
15637 operands[3], operands[2]));
15638 }
15639 else if (TARGET_APX_NDD)
15640 {
15641 rtx tmp = gen_reg_rtx (SImode);
15642 if (MEM_P (operands[4]))
15643 {
15644 operands[1] = force_reg (SImode, operands[1]);
15645 emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
15646 operands[2], operands[3]));
15647 }
15648 else if (MEM_P (operands[1]))
15649 emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
15650 operands[3], operands[2]));
15651 else
15652 emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
15653 operands[2], operands[3]));
15654 emit_move_insn (operands[0], tmp);
15655 }
15656 else
15657 {
15658 operands[1] = force_reg (SImode, operands[1]);
15659 rtx tmp = gen_reg_rtx (SImode);
15660 emit_move_insn (tmp, operands[4]);
15661 emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2],
15662 operands[3]));
15663 emit_move_insn (operands[0], tmp);
15664 }
15665 DONE;
15666 })
15667
15668 (define_insn_and_split "*x86_shld_shrd_1_nozext"
15669 [(set (match_operand:SI 0 "nonimmediate_operand")
15670 (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
15671 (match_operand:QI 2 "const_0_to_31_operand"))
15672 (lshiftrt:SI
15673 (match_operand:SI 1 "nonimmediate_operand")
15674 (match_operand:QI 3 "const_0_to_31_operand"))))
15675 (clobber (reg:CC FLAGS_REG))]
15676 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
15677 && ix86_pre_reload_split ()"
15678 "#"
15679 "&& 1"
15680 [(const_int 0)]
15681 {
15682 if (rtx_equal_p (operands[4], operands[0]))
15683 {
15684 operands[1] = force_reg (SImode, operands[1]);
15685 emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3]));
15686 }
15687 else if (rtx_equal_p (operands[1], operands[0]))
15688 {
15689 operands[4] = force_reg (SImode, operands[4]);
15690 emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
15691 }
15692 else if (TARGET_APX_NDD)
15693 {
15694 rtx tmp = gen_reg_rtx (SImode);
15695 if (MEM_P (operands[4]))
15696 {
15697 operands[1] = force_reg (SImode, operands[1]);
15698 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
15699 operands[2], operands[3]));
15700 }
15701 else if (MEM_P (operands[1]))
15702 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4],
15703 operands[3], operands[2]));
15704 else
15705 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
15706 operands[2], operands[3]));
15707 emit_move_insn (operands[0], tmp);
15708 }
15709 else
15710 {
15711 operands[1] = force_reg (SImode, operands[1]);
15712 rtx tmp = gen_reg_rtx (SImode);
15713 emit_move_insn (tmp, operands[4]);
15714 emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3]));
15715 emit_move_insn (operands[0], tmp);
15716 }
15717 DONE;
15718 }
15719 [(set_attr "has_nf" "1")])
15720
15721 (define_insn_and_split "*x86_shld_2"
15722 [(set (match_operand:SI 0 "nonimmediate_operand")
15723 (ior:SI (ashift:SI (match_dup 0)
15724 (match_operand:QI 2 "nonmemory_operand"))
15725 (lshiftrt:SI (match_operand:SI 1 "register_operand")
15726 (minus:QI (const_int 32) (match_dup 2)))))
15727 (clobber (reg:CC FLAGS_REG))]
15728 "TARGET_64BIT && ix86_pre_reload_split ()"
15729 "#"
15730 "&& 1"
15731 [(parallel [(set (match_dup 0)
15732 (ior:SI (ashift:SI (match_dup 0)
15733 (and:QI (match_dup 2) (const_int 31)))
15734 (subreg:SI
15735 (lshiftrt:DI
15736 (zero_extend:DI (match_dup 1))
15737 (minus:QI (const_int 32)
15738 (and:QI (match_dup 2)
15739 (const_int 31)))) 0)))
15740 (clobber (reg:CC FLAGS_REG))])])
15741
15742 (define_insn_and_split "*x86_shld_ndd_2"
15743 [(set (match_operand:SI 0 "nonimmediate_operand")
15744 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
15745 (match_operand:QI 3 "nonmemory_operand"))
15746 (lshiftrt:SI (match_operand:SI 2 "register_operand")
15747 (minus:QI (const_int 32) (match_dup 3)))))
15748 (clobber (reg:CC FLAGS_REG))]
15749 "TARGET_APX_NDD
15750 && ix86_pre_reload_split ()"
15751 "#"
15752 "&& 1"
15753 [(parallel [(set (match_dup 4)
15754 (ior:SI (ashift:SI (match_dup 1)
15755 (and:QI (match_dup 3) (const_int 31)))
15756 (subreg:SI
15757 (lshiftrt:DI
15758 (zero_extend:DI (match_dup 2))
15759 (minus:QI (const_int 32)
15760 (and:QI (match_dup 3)
15761 (const_int 31)))) 0)))
15762 (clobber (reg:CC FLAGS_REG))
15763 (set (match_dup 0) (match_dup 4))])]
15764 {
15765 operands[4] = gen_reg_rtx (SImode);
15766 emit_move_insn (operands[4], operands[0]);
15767 })
15768
15769 (define_expand "@x86_shift<mode>_adj_1"
15770 [(set (reg:CCZ FLAGS_REG)
15771 (compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
15772 (match_dup 4))
15773 (const_int 0)))
15774 (set (match_operand:SWI48 0 "register_operand")
15775 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
15776 (match_operand:SWI48 1 "register_operand")
15777 (match_dup 0)))
15778 (set (match_dup 1)
15779 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
15780 (match_operand:SWI48 3 "register_operand")
15781 (match_dup 1)))]
15782 "TARGET_CMOVE"
15783 "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
15784
15785 (define_expand "@x86_shift<mode>_adj_2"
15786 [(use (match_operand:SWI48 0 "register_operand"))
15787 (use (match_operand:SWI48 1 "register_operand"))
15788 (use (match_operand:QI 2 "register_operand"))]
15789 ""
15790 {
15791 rtx_code_label *label = gen_label_rtx ();
15792 rtx tmp;
15793
15794 emit_insn (gen_testqi_ccz_1 (operands[2],
15795 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
15796
15797 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
15798 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15799 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15800 gen_rtx_LABEL_REF (VOIDmode, label),
15801 pc_rtx);
15802 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
15803 JUMP_LABEL (tmp) = label;
15804
15805 emit_move_insn (operands[0], operands[1]);
15806 ix86_expand_clear (operands[1]);
15807
15808 emit_label (label);
15809 LABEL_NUSES (label) = 1;
15810
15811 DONE;
15812 })
15813
15814 ;; Avoid useless masking of count operand.
15815 (define_insn_and_split "*ashl<mode>3_mask"
15816 [(set (match_operand:SWI48 0 "nonimmediate_operand")
15817 (ashift:SWI48
15818 (match_operand:SWI48 1 "nonimmediate_operand")
15819 (subreg:QI
15820 (and
15821 (match_operand 2 "int248_register_operand" "c,r")
15822 (match_operand 3 "const_int_operand")) 0)))
15823 (clobber (reg:CC FLAGS_REG))]
15824 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
15825 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15826 == GET_MODE_BITSIZE (<MODE>mode)-1
15827 && ix86_pre_reload_split ()"
15828 "#"
15829 "&& 1"
15830 [(parallel
15831 [(set (match_dup 0)
15832 (ashift:SWI48 (match_dup 1)
15833 (match_dup 2)))
15834 (clobber (reg:CC FLAGS_REG))])]
15835 {
15836 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15837 operands[2] = gen_lowpart (QImode, operands[2]);
15838 }
15839 [(set_attr "isa" "*,bmi2")])
15840
15841 (define_insn_and_split "*ashl<mode>3_mask_1"
15842 [(set (match_operand:SWI48 0 "nonimmediate_operand")
15843 (ashift:SWI48
15844 (match_operand:SWI48 1 "nonimmediate_operand")
15845 (and:QI
15846 (match_operand:QI 2 "register_operand" "c,r")
15847 (match_operand:QI 3 "const_int_operand"))))
15848 (clobber (reg:CC FLAGS_REG))]
15849 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
15850 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15851 == GET_MODE_BITSIZE (<MODE>mode)-1
15852 && ix86_pre_reload_split ()"
15853 "#"
15854 "&& 1"
15855 [(parallel
15856 [(set (match_dup 0)
15857 (ashift:SWI48 (match_dup 1)
15858 (match_dup 2)))
15859 (clobber (reg:CC FLAGS_REG))])]
15860 ""
15861 [(set_attr "isa" "*,bmi2")])
15862
15863 (define_insn "*bmi2_ashl<mode>3_1"
15864 [(set (match_operand:SWI48 0 "register_operand" "=r")
15865 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
15866 (match_operand:SWI48 2 "register_operand" "r")))]
15867 "TARGET_BMI2"
15868 "shlx\t{%2, %1, %0|%0, %1, %2}"
15869 [(set_attr "type" "ishiftx")
15870 (set_attr "mode" "<MODE>")])
15871
15872 (define_insn "*ashl<mode>3_1<nf_name>"
15873 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
15874 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
15875 (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))]
15876 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)
15877 && <nf_condition>"
15878 {
15879 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15880 switch (get_attr_type (insn))
15881 {
15882 case TYPE_LEA:
15883 if (TARGET_APX_NDD && <nf_applied>)
15884 return "%{nf%} sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
15885 else
15886 return "#";
15887
15888 case TYPE_ISHIFTX:
15889 case TYPE_MSKLOG:
15890 return "#";
15891
15892 case TYPE_ALU:
15893 gcc_assert (operands[2] == const1_rtx);
15894 gcc_assert (rtx_equal_p (operands[0], operands[1]));
15895 return "<nf_prefix>add{<imodesuffix>}\t%0, %0";
15896
15897 default:
15898 if (operands[2] == const1_rtx
15899 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15900 /* For NDD form instructions related to TARGET_SHIFT1, the $1
15901 immediate do not need to be omitted as assembler will map it
15902 to use shorter encoding. */
15903 && !use_ndd && !<nf_applied>)
15904 return "sal{<imodesuffix>}\t%0";
15905 else
15906 return use_ndd ? "<nf_prefix>sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
15907 : "<nf_prefix>sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15908 }
15909 }
15910 [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
15911 (set (attr "type")
15912 (cond [(eq_attr "alternative" "1")
15913 (const_string "lea")
15914 (eq_attr "alternative" "2")
15915 (const_string "ishiftx")
15916 (eq_attr "alternative" "4")
15917 (const_string "ishift")
15918 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15919 (match_operand 0 "register_operand"))
15920 (match_operand 2 "const1_operand"))
15921 (const_string "alu")
15922 (eq_attr "alternative" "3")
15923 (const_string "msklog")
15924 ]
15925 (const_string "ishift")))
15926 (set (attr "length_immediate")
15927 (if_then_else
15928 (ior (eq_attr "type" "alu")
15929 (and (eq_attr "type" "ishift")
15930 (and (match_operand 2 "const1_operand")
15931 (ior (match_test "TARGET_SHIFT1")
15932 (match_test "optimize_function_for_size_p (cfun)")))))
15933 (const_string "0")
15934 (const_string "*")))
15935 (set_attr "has_nf" "1")
15936 (set_attr "mode" "<MODE>")])
15937
15938 ;; Convert shift to the shiftx pattern to avoid flags dependency.
15939 ;; For NF/NDD doesn't support shift count as r, it just support c<S>,
15940 ;; and it has no flag.
15941 (define_split
15942 [(set (match_operand:SWI48 0 "register_operand")
15943 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
15944 (match_operand:QI 2 "register_operand")))]
15945 "TARGET_BMI2 && reload_completed"
15946 [(set (match_dup 0)
15947 (ashift:SWI48 (match_dup 1) (match_dup 2)))]
15948 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
15949
15950 (define_split
15951 [(set (match_operand:SWI48 0 "register_operand")
15952 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
15953 (match_operand:QI 2 "register_operand")))
15954 (clobber (reg:CC FLAGS_REG))]
15955 "TARGET_BMI2 && reload_completed"
15956 [(set (match_dup 0)
15957 (ashift:SWI48 (match_dup 1) (match_dup 2)))]
15958 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
15959
15960 (define_insn "*bmi2_ashlsi3_1_zext"
15961 [(set (match_operand:DI 0 "register_operand" "=r")
15962 (zero_extend:DI
15963 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
15964 (match_operand:SI 2 "register_operand" "r"))))]
15965 "TARGET_64BIT && TARGET_BMI2"
15966 "shlx\t{%2, %1, %k0|%k0, %1, %2}"
15967 [(set_attr "type" "ishiftx")
15968 (set_attr "mode" "SI")])
15969
15970 (define_insn "*ashlqi3_1_zext<mode><nf_name>"
15971 [(set (match_operand:SWI248x 0 "register_operand" "=r")
15972 (zero_extend:SWI248x
15973 (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "rm")
15974 (match_operand:QI 2 "nonmemory_operand" "cI"))))]
15975 "TARGET_APX_NDD && <nf_condition>"
15976 "<nf_prefix>sal{b}\t{%2, %1, %b0|%b0, %1, %2}"
15977 [(set_attr "type" "ishiftx")
15978 (set_attr "has_nf" "1")
15979 (set_attr "mode" "QI")])
15980
15981 (define_insn "*ashlhi3_1_zext<mode><nf_name>"
15982 [(set (match_operand:SWI48x 0 "register_operand" "=r")
15983 (zero_extend:SWI48x
15984 (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "rm")
15985 (match_operand:QI 2 "nonmemory_operand" "cI"))))]
15986 "TARGET_APX_NDD && <nf_condition>"
15987 "<nf_prefix>sal{w}\t{%2, %1, %w0|%w0, %1, %2}"
15988 [(set_attr "type" "ishiftx")
15989 (set_attr "has_nf" "1")
15990 (set_attr "mode" "HI")])
15991
15992 (define_insn "*ashlsi3_1_zext"
15993 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
15994 (zero_extend:DI
15995 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
15996 (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
15997 (clobber (reg:CC FLAGS_REG))]
15998 "TARGET_64BIT
15999 && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)"
16000 {
16001 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16002 switch (get_attr_type (insn))
16003 {
16004 case TYPE_LEA:
16005 case TYPE_ISHIFTX:
16006 return "#";
16007
16008 case TYPE_ALU:
16009 gcc_assert (operands[2] == const1_rtx);
16010 return "add{l}\t%k0, %k0";
16011
16012 default:
16013 if (operands[2] == const1_rtx
16014 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16015 && !use_ndd)
16016 return "sal{l}\t%k0";
16017 else
16018 return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
16019 : "sal{l}\t{%2, %k0|%k0, %2}";
16020 }
16021 }
16022 [(set_attr "isa" "*,*,bmi2,apx_ndd")
16023 (set (attr "type")
16024 (cond [(eq_attr "alternative" "1")
16025 (const_string "lea")
16026 (eq_attr "alternative" "2")
16027 (const_string "ishiftx")
16028 (eq_attr "alternative" "3")
16029 (const_string "ishift")
16030 (and (match_test "TARGET_DOUBLE_WITH_ADD")
16031 (match_operand 2 "const1_operand"))
16032 (const_string "alu")
16033 ]
16034 (const_string "ishift")))
16035 (set (attr "length_immediate")
16036 (if_then_else
16037 (ior (eq_attr "type" "alu")
16038 (and (eq_attr "type" "ishift")
16039 (and (match_operand 2 "const1_operand")
16040 (ior (match_test "TARGET_SHIFT1")
16041 (match_test "optimize_function_for_size_p (cfun)")))))
16042 (const_string "0")
16043 (const_string "*")))
16044 (set_attr "mode" "SI")])
16045
16046 ;; Convert shift to the shiftx pattern to avoid flags dependency.
16047 (define_split
16048 [(set (match_operand:DI 0 "register_operand")
16049 (zero_extend:DI
16050 (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
16051 (match_operand:QI 2 "register_operand"))))
16052 (clobber (reg:CC FLAGS_REG))]
16053 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
16054 [(set (match_dup 0)
16055 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
16056 "operands[2] = gen_lowpart (SImode, operands[2]);")
16057
16058 (define_insn "*ashlhi3_1<nf_name>"
16059 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
16060 (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
16061 (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
16062 "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)
16063 && <nf_condition>"
16064 {
16065 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16066 switch (get_attr_type (insn))
16067 {
16068 case TYPE_LEA:
16069 if (TARGET_APX_NDD && <nf_applied>)
16070 return "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}";
16071 else
16072 return "#";
16073
16074 case TYPE_MSKLOG:
16075 return "#";
16076
16077 case TYPE_ALU:
16078 gcc_assert (operands[2] == const1_rtx);
16079 return "<nf_prefix>add{w}\t%0, %0";
16080
16081 default:
16082 if (operands[2] == const1_rtx
16083 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16084 && !use_ndd && !<nf_applied>)
16085 return "sal{w}\t%0";
16086 else
16087 return use_ndd ? "<nf_prefix>sal{w}\t{%2, %1, %0|%0, %1, %2}"
16088 : "<nf_prefix>sal{w}\t{%2, %0|%0, %2}";
16089 }
16090 }
16091 [(set_attr "isa" "*,*,avx512f,apx_ndd")
16092 (set (attr "type")
16093 (cond [(eq_attr "alternative" "1")
16094 (const_string "lea")
16095 (eq_attr "alternative" "2")
16096 (const_string "msklog")
16097 (eq_attr "alternative" "3")
16098 (const_string "ishift")
16099 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
16100 (match_operand 0 "register_operand"))
16101 (match_operand 2 "const1_operand"))
16102 (const_string "alu")
16103 ]
16104 (const_string "ishift")))
16105 (set (attr "length_immediate")
16106 (if_then_else
16107 (ior (eq_attr "type" "alu")
16108 (and (eq_attr "type" "ishift")
16109 (and (match_operand 2 "const1_operand")
16110 (ior (match_test "TARGET_SHIFT1")
16111 (match_test "optimize_function_for_size_p (cfun)")))))
16112 (const_string "0")
16113 (const_string "*")))
16114 (set_attr "has_nf" "1")
16115 (set_attr "mode" "HI,SI,HI,HI")])
16116
16117 (define_insn "*ashlqi3_1<nf_name>"
16118 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
16119 (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
16120 (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))]
16121 "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)
16122 && <nf_condition>"
16123 {
16124 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16125 switch (get_attr_type (insn))
16126 {
16127 case TYPE_LEA:
16128 if (TARGET_APX_NDD && <nf_applied>)
16129 return "%{nf%} sal{b}\t{%2, %1, %0|%0, %1, %2}";
16130 else
16131 return "#";
16132
16133 case TYPE_MSKLOG:
16134 return "#";
16135
16136 case TYPE_ALU:
16137 gcc_assert (operands[2] == const1_rtx);
16138 if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
16139 return "<nf_prefix>add{l}\t%k0, %k0";
16140 else
16141 return "<nf_prefix>add{b}\t%0, %0";
16142
16143 default:
16144 if (operands[2] == const1_rtx
16145 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16146 && !use_ndd && !<nf_applied>)
16147 {
16148 if (get_attr_mode (insn) == MODE_SI)
16149 return "sal{l}\t%k0";
16150 else
16151 return "sal{b}\t%0";
16152 }
16153 else
16154 {
16155 if (get_attr_mode (insn) == MODE_SI)
16156 return "<nf_prefix>sal{l}\t{%2, %k0|%k0, %2}";
16157 else
16158 return use_ndd ? "<nf_prefix>sal{b}\t{%2, %1, %0|%0, %1, %2}"
16159 : "<nf_prefix>sal{b}\t{%2, %0|%0, %2}";
16160 }
16161 }
16162 }
16163 [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
16164 (set (attr "type")
16165 (cond [(eq_attr "alternative" "2")
16166 (const_string "lea")
16167 (eq_attr "alternative" "3")
16168 (const_string "msklog")
16169 (eq_attr "alternative" "4")
16170 (const_string "ishift")
16171 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
16172 (match_operand 0 "register_operand"))
16173 (match_operand 2 "const1_operand"))
16174 (const_string "alu")
16175 ]
16176 (const_string "ishift")))
16177 (set (attr "length_immediate")
16178 (if_then_else
16179 (ior (eq_attr "type" "alu")
16180 (and (eq_attr "type" "ishift")
16181 (and (match_operand 2 "const1_operand")
16182 (ior (match_test "TARGET_SHIFT1")
16183 (match_test "optimize_function_for_size_p (cfun)")))))
16184 (const_string "0")
16185 (const_string "*")))
16186 (set_attr "has_nf" "1")
16187 (set_attr "mode" "QI,SI,SI,QI,QI")
16188 ;; Potential partial reg stall on alternative 1.
16189 (set (attr "preferred_for_speed")
16190 (cond [(eq_attr "alternative" "1,4")
16191 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
16192 (symbol_ref "true")))])
16193
16194 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
16195 (define_insn_and_split "*ashl<mode>3_1_slp"
16196 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
16197 (ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
16198 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
16199 (clobber (reg:CC FLAGS_REG))]
16200 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
16201 {
16202 if (which_alternative)
16203 return "#";
16204
16205 switch (get_attr_type (insn))
16206 {
16207 case TYPE_ALU:
16208 gcc_assert (operands[2] == const1_rtx);
16209 return "add{<imodesuffix>}\t%0, %0";
16210
16211 default:
16212 if (operands[2] == const1_rtx
16213 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
16214 return "sal{<imodesuffix>}\t%0";
16215 else
16216 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
16217 }
16218 }
16219 "&& reload_completed
16220 && !(rtx_equal_p (operands[0], operands[1]))"
16221 [(set (strict_low_part (match_dup 0)) (match_dup 1))
16222 (parallel
16223 [(set (strict_low_part (match_dup 0))
16224 (ashift:SWI12 (match_dup 0) (match_dup 2)))
16225 (clobber (reg:CC FLAGS_REG))])]
16226 ""
16227 [(set (attr "type")
16228 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
16229 (match_operand 2 "const1_operand"))
16230 (const_string "alu")
16231 ]
16232 (const_string "ishift")))
16233 (set (attr "length_immediate")
16234 (if_then_else
16235 (ior (eq_attr "type" "alu")
16236 (and (eq_attr "type" "ishift")
16237 (and (match_operand 2 "const1_operand")
16238 (ior (match_test "TARGET_SHIFT1")
16239 (match_test "optimize_function_for_size_p (cfun)")))))
16240 (const_string "0")
16241 (const_string "*")))
16242 (set_attr "mode" "<MODE>")])
16243
16244 ;; Convert ashift to the lea pattern to avoid flags dependency.
16245 (define_split
16246 [(set (match_operand:SWI 0 "general_reg_operand")
16247 (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
16248 (match_operand 2 "const_0_to_3_operand")))
16249 (clobber (reg:CC FLAGS_REG))]
16250 "reload_completed
16251 && REGNO (operands[0]) != REGNO (operands[1])"
16252 [(set (match_dup 0)
16253 (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
16254 {
16255 if (<MODE>mode != <LEAMODE>mode)
16256 {
16257 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
16258 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
16259 }
16260 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
16261 })
16262
16263 (define_split
16264 [(set (match_operand:SWI 0 "general_reg_operand")
16265 (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
16266 (match_operand 2 "const_0_to_3_operand")))]
16267 "reload_completed
16268 && REGNO (operands[0]) != REGNO (operands[1])
16269 && !TARGET_APX_NDD"
16270 [(set (match_dup 0)
16271 (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
16272 {
16273 if (<MODE>mode != <LEAMODE>mode)
16274 {
16275 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
16276 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
16277 }
16278 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
16279 })
16280
16281 ;; Convert ashift to the lea pattern to avoid flags dependency.
16282 (define_split
16283 [(set (match_operand:DI 0 "general_reg_operand")
16284 (zero_extend:DI
16285 (ashift:SI (match_operand:SI 1 "index_reg_operand")
16286 (match_operand 2 "const_0_to_3_operand"))))
16287 (clobber (reg:CC FLAGS_REG))]
16288 "TARGET_64BIT && reload_completed
16289 && REGNO (operands[0]) != REGNO (operands[1])"
16290 [(set (match_dup 0)
16291 (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
16292 {
16293 operands[1] = gen_lowpart (SImode, operands[1]);
16294 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
16295 })
16296
16297 ;; This pattern can't accept a variable shift count, since shifts by
16298 ;; zero don't affect the flags. We assume that shifts by constant
16299 ;; zero are optimized away.
16300 (define_insn "*ashl<mode>3_cmp"
16301 [(set (reg FLAGS_REG)
16302 (compare
16303 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
16304 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
16305 (const_int 0)))
16306 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
16307 (ashift:SWI (match_dup 1) (match_dup 2)))]
16308 "(optimize_function_for_size_p (cfun)
16309 || !TARGET_PARTIAL_FLAG_REG_STALL
16310 || (operands[2] == const1_rtx
16311 && (TARGET_SHIFT1
16312 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
16313 && ix86_match_ccmode (insn, CCGOCmode)
16314 && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
16315 {
16316 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16317 switch (get_attr_type (insn))
16318 {
16319 case TYPE_ALU:
16320 gcc_assert (operands[2] == const1_rtx);
16321 return "add{<imodesuffix>}\t%0, %0";
16322
16323 default:
16324 if (operands[2] == const1_rtx
16325 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16326 && !use_ndd)
16327 return "sal{<imodesuffix>}\t%0";
16328 else
16329 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16330 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
16331 }
16332 }
16333 [(set_attr "isa" "*,apx_ndd")
16334 (set (attr "type")
16335 (cond [(eq_attr "alternative" "1")
16336 (const_string "ishift")
16337 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
16338 (match_operand 0 "register_operand"))
16339 (match_operand 2 "const1_operand"))
16340 (const_string "alu")
16341 ]
16342 (const_string "ishift")))
16343 (set (attr "length_immediate")
16344 (if_then_else
16345 (ior (eq_attr "type" "alu")
16346 (and (eq_attr "type" "ishift")
16347 (and (match_operand 2 "const1_operand")
16348 (ior (match_test "TARGET_SHIFT1")
16349 (match_test "optimize_function_for_size_p (cfun)")))))
16350 (const_string "0")
16351 (const_string "*")))
16352 (set_attr "mode" "<MODE>")])
16353
16354 (define_insn "*ashlsi3_cmp_zext"
16355 [(set (reg FLAGS_REG)
16356 (compare
16357 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
16358 (match_operand:QI 2 "const_1_to_31_operand"))
16359 (const_int 0)))
16360 (set (match_operand:DI 0 "register_operand" "=r,r")
16361 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
16362 "TARGET_64BIT
16363 && (optimize_function_for_size_p (cfun)
16364 || !TARGET_PARTIAL_FLAG_REG_STALL
16365 || (operands[2] == const1_rtx
16366 && (TARGET_SHIFT1
16367 || TARGET_DOUBLE_WITH_ADD)))
16368 && ix86_match_ccmode (insn, CCGOCmode)
16369 && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)"
16370 {
16371 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16372 switch (get_attr_type (insn))
16373 {
16374 case TYPE_ALU:
16375 gcc_assert (operands[2] == const1_rtx);
16376 return "add{l}\t%k0, %k0";
16377
16378 default:
16379 if (operands[2] == const1_rtx
16380 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16381 && !use_ndd)
16382 return "sal{l}\t%k0";
16383 else
16384 return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
16385 : "sal{l}\t{%2, %k0|%k0, %2}";
16386 }
16387 }
16388 [(set_attr "isa" "*,apx_ndd")
16389 (set (attr "type")
16390 (cond [(eq_attr "alternative" "1")
16391 (const_string "ishift")
16392 (and (match_test "TARGET_DOUBLE_WITH_ADD")
16393 (match_operand 2 "const1_operand"))
16394 (const_string "alu")
16395 ]
16396 (const_string "ishift")))
16397 (set (attr "length_immediate")
16398 (if_then_else
16399 (ior (eq_attr "type" "alu")
16400 (and (eq_attr "type" "ishift")
16401 (and (match_operand 2 "const1_operand")
16402 (ior (match_test "TARGET_SHIFT1")
16403 (match_test "optimize_function_for_size_p (cfun)")))))
16404 (const_string "0")
16405 (const_string "*")))
16406 (set_attr "mode" "SI")])
16407
16408 (define_insn "*ashl<mode>3_cconly"
16409 [(set (reg FLAGS_REG)
16410 (compare
16411 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
16412 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
16413 (const_int 0)))
16414 (clobber (match_scratch:SWI 0 "=<r>,r"))]
16415 "(optimize_function_for_size_p (cfun)
16416 || !TARGET_PARTIAL_FLAG_REG_STALL
16417 || (operands[2] == const1_rtx
16418 && (TARGET_SHIFT1
16419 || TARGET_DOUBLE_WITH_ADD)))
16420 && ix86_match_ccmode (insn, CCGOCmode)"
16421 {
16422 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16423 switch (get_attr_type (insn))
16424 {
16425 case TYPE_ALU:
16426 gcc_assert (operands[2] == const1_rtx);
16427 return "add{<imodesuffix>}\t%0, %0";
16428
16429 default:
16430 if (operands[2] == const1_rtx
16431 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16432 && !use_ndd)
16433 return "sal{<imodesuffix>}\t%0";
16434 else
16435 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16436 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
16437 }
16438 }
16439 [(set_attr "isa" "*,apx_ndd")
16440 (set (attr "type")
16441 (cond [(eq_attr "alternative" "1")
16442 (const_string "ishift")
16443 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
16444 (match_operand 0 "register_operand"))
16445 (match_operand 2 "const1_operand"))
16446 (const_string "alu")
16447 ]
16448 (const_string "ishift")))
16449 (set (attr "length_immediate")
16450 (if_then_else
16451 (ior (eq_attr "type" "alu")
16452 (and (eq_attr "type" "ishift")
16453 (and (match_operand 2 "const1_operand")
16454 (ior (match_test "TARGET_SHIFT1")
16455 (match_test "optimize_function_for_size_p (cfun)")))))
16456 (const_string "0")
16457 (const_string "*")))
16458 (set_attr "mode" "<MODE>")])
16459
16460 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
16461 (define_insn_and_split "*ashlqi_ext<mode>_1"
16462 [(set (zero_extract:SWI248
16463 (match_operand 0 "int248_register_operand" "+Q,&Q")
16464 (const_int 8)
16465 (const_int 8))
16466 (subreg:SWI248
16467 (ashift:QI
16468 (subreg:QI
16469 (match_operator:SWI248 3 "extract_operator"
16470 [(match_operand 1 "int248_register_operand" "0,!Q")
16471 (const_int 8)
16472 (const_int 8)]) 0)
16473 (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
16474 (clobber (reg:CC FLAGS_REG))]
16475 ""
16476 {
16477 if (which_alternative)
16478 return "#";
16479
16480 switch (get_attr_type (insn))
16481 {
16482 case TYPE_ALU:
16483 gcc_assert (operands[2] == const1_rtx);
16484 return "add{b}\t%h0, %h0";
16485
16486 default:
16487 if (operands[2] == const1_rtx
16488 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
16489 return "sal{b}\t%h0";
16490 else
16491 return "sal{b}\t{%2, %h0|%h0, %2}";
16492 }
16493 }
16494 "reload_completed
16495 && !(rtx_equal_p (operands[0], operands[1]))"
16496 [(set (zero_extract:SWI248
16497 (match_dup 0) (const_int 8) (const_int 8))
16498 (zero_extract:SWI248
16499 (match_dup 1) (const_int 8) (const_int 8)))
16500 (parallel
16501 [(set (zero_extract:SWI248
16502 (match_dup 0) (const_int 8) (const_int 8))
16503 (subreg:SWI248
16504 (ashift:QI
16505 (subreg:QI
16506 (match_op_dup 3
16507 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
16508 (match_dup 2)) 0))
16509 (clobber (reg:CC FLAGS_REG))])]
16510 ""
16511 [(set (attr "type")
16512 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
16513 (match_operand 2 "const1_operand"))
16514 (const_string "alu")
16515 ]
16516 (const_string "ishift")))
16517 (set (attr "length_immediate")
16518 (if_then_else
16519 (ior (eq_attr "type" "alu")
16520 (and (eq_attr "type" "ishift")
16521 (and (match_operand 2 "const1_operand")
16522 (ior (match_test "TARGET_SHIFT1")
16523 (match_test "optimize_function_for_size_p (cfun)")))))
16524 (const_string "0")
16525 (const_string "*")))
16526 (set_attr "mode" "QI")])
16527
16528 ;; See comment above `ashl<mode>3' about how this works.
16529
16530 (define_expand "<insn><mode>3"
16531 [(set (match_operand:SDWIM 0 "<shift_operand>")
16532 (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
16533 (match_operand:QI 2 "nonmemory_operand")))]
16534 ""
16535 {
16536 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
16537 DONE;
16538 })
16539
16540 ;; Avoid useless masking of count operand.
16541 (define_insn_and_split "*<insn><mode>3_mask"
16542 [(set (match_operand:SWI48 0 "nonimmediate_operand")
16543 (any_shiftrt:SWI48
16544 (match_operand:SWI48 1 "nonimmediate_operand")
16545 (subreg:QI
16546 (and
16547 (match_operand 2 "int248_register_operand" "c,r")
16548 (match_operand 3 "const_int_operand")) 0)))
16549 (clobber (reg:CC FLAGS_REG))]
16550 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
16551 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
16552 == GET_MODE_BITSIZE (<MODE>mode)-1
16553 && ix86_pre_reload_split ()"
16554 "#"
16555 "&& 1"
16556 [(parallel
16557 [(set (match_dup 0)
16558 (any_shiftrt:SWI48 (match_dup 1)
16559 (match_dup 2)))
16560 (clobber (reg:CC FLAGS_REG))])]
16561 {
16562 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
16563 operands[2] = gen_lowpart (QImode, operands[2]);
16564 }
16565 [(set_attr "isa" "*,bmi2")])
16566
16567 (define_insn_and_split "*<insn><mode>3_mask_1"
16568 [(set (match_operand:SWI48 0 "nonimmediate_operand")
16569 (any_shiftrt:SWI48
16570 (match_operand:SWI48 1 "nonimmediate_operand")
16571 (and:QI
16572 (match_operand:QI 2 "register_operand" "c,r")
16573 (match_operand:QI 3 "const_int_operand"))))
16574 (clobber (reg:CC FLAGS_REG))]
16575 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
16576 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
16577 == GET_MODE_BITSIZE (<MODE>mode)-1
16578 && ix86_pre_reload_split ()"
16579 "#"
16580 "&& 1"
16581 [(parallel
16582 [(set (match_dup 0)
16583 (any_shiftrt:SWI48 (match_dup 1)
16584 (match_dup 2)))
16585 (clobber (reg:CC FLAGS_REG))])]
16586 ""
16587 [(set_attr "isa" "*,bmi2")])
16588
16589 (define_insn_and_split "*<insn><dwi>3_doubleword_mask"
16590 [(set (match_operand:<DWI> 0 "register_operand")
16591 (any_shiftrt:<DWI>
16592 (match_operand:<DWI> 1 "register_operand")
16593 (subreg:QI
16594 (and
16595 (match_operand 2 "int248_register_operand" "c")
16596 (match_operand 3 "const_int_operand")) 0)))
16597 (clobber (reg:CC FLAGS_REG))]
16598 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
16599 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
16600 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
16601 && ix86_pre_reload_split ()"
16602 "#"
16603 "&& 1"
16604 [(parallel
16605 [(set (match_dup 4)
16606 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
16607 (and:QI (match_dup 2) (match_dup 8)))
16608 (subreg:DWIH
16609 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
16610 (minus:QI (match_dup 9)
16611 (and:QI (match_dup 2) (match_dup 8)))) 0)))
16612 (clobber (reg:CC FLAGS_REG))])
16613 (parallel
16614 [(set (match_dup 6)
16615 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
16616 (clobber (reg:CC FLAGS_REG))])]
16617 {
16618 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
16619 {
16620 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
16621 operands[2] = gen_lowpart (QImode, operands[2]);
16622 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
16623 operands[2]));
16624 DONE;
16625 }
16626
16627 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
16628
16629 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
16630 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
16631
16632 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
16633 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
16634 {
16635 rtx xops[3];
16636 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
16637 xops[1] = operands[2];
16638 xops[2] = GEN_INT (INTVAL (operands[3])
16639 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
16640 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
16641 operands[2] = xops[0];
16642 }
16643
16644 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
16645 operands[2] = gen_lowpart (QImode, operands[2]);
16646
16647 if (!rtx_equal_p (operands[4], operands[5]))
16648 emit_move_insn (operands[4], operands[5]);
16649 })
16650
16651 (define_insn_and_split "*<insn><dwi>3_doubleword_mask_1"
16652 [(set (match_operand:<DWI> 0 "register_operand")
16653 (any_shiftrt:<DWI>
16654 (match_operand:<DWI> 1 "register_operand")
16655 (and:QI
16656 (match_operand:QI 2 "register_operand" "c")
16657 (match_operand:QI 3 "const_int_operand"))))
16658 (clobber (reg:CC FLAGS_REG))]
16659 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
16660 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
16661 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
16662 && ix86_pre_reload_split ()"
16663 "#"
16664 "&& 1"
16665 [(parallel
16666 [(set (match_dup 4)
16667 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
16668 (and:QI (match_dup 2) (match_dup 8)))
16669 (subreg:DWIH
16670 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
16671 (minus:QI (match_dup 9)
16672 (and:QI (match_dup 2) (match_dup 8)))) 0)))
16673 (clobber (reg:CC FLAGS_REG))])
16674 (parallel
16675 [(set (match_dup 6)
16676 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
16677 (clobber (reg:CC FLAGS_REG))])]
16678 {
16679 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
16680 {
16681 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
16682 operands[2]));
16683 DONE;
16684 }
16685
16686 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
16687
16688 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
16689 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
16690
16691 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
16692 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
16693 {
16694 rtx tem = gen_reg_rtx (QImode);
16695 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
16696 operands[2] = tem;
16697 }
16698
16699 if (!rtx_equal_p (operands[4], operands[5]))
16700 emit_move_insn (operands[4], operands[5]);
16701 })
16702
16703 (define_insn_and_split "<insn><mode>3_doubleword"
16704 [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
16705 (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r")
16706 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
16707 (clobber (reg:CC FLAGS_REG))]
16708 ""
16709 "#"
16710 "epilogue_completed"
16711 [(const_int 0)]
16712 {
16713 if (TARGET_APX_NDD
16714 && !rtx_equal_p (operands[0], operands[1]))
16715 ix86_split_rshift_ndd (<CODE>, operands, NULL_RTX);
16716 else
16717 ix86_split_<insn> (operands, NULL_RTX, <MODE>mode);
16718 DONE;
16719 }
16720 [(set_attr "type" "multi")
16721 (set_attr "isa" "*,apx_ndd")])
16722
16723 ;; By default we don't ask for a scratch register, because when DWImode
16724 ;; values are manipulated, registers are already at a premium. But if
16725 ;; we have one handy, we won't turn it away.
16726
16727 (define_peephole2
16728 [(match_scratch:DWIH 3 "r")
16729 (parallel [(set (match_operand:<DWI> 0 "register_operand")
16730 (any_shiftrt:<DWI>
16731 (match_operand:<DWI> 1 "register_operand")
16732 (match_operand:QI 2 "nonmemory_operand")))
16733 (clobber (reg:CC FLAGS_REG))])
16734 (match_dup 3)]
16735 "TARGET_CMOVE"
16736 [(const_int 0)]
16737 {
16738 if (TARGET_APX_NDD
16739 && !rtx_equal_p (operands[0], operands[1]))
16740 ix86_split_rshift_ndd (<CODE>, operands, operands[3]);
16741 else
16742 ix86_split_<insn> (operands, operands[3], <DWI>mode);
16743 DONE;
16744 })
16745
16746 ;; Split truncations of double word right shifts into x86_shrd_1.
16747 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
16748 [(set (match_operand:DWIH 0 "register_operand" "=&r")
16749 (subreg:DWIH
16750 (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
16751 (match_operand:QI 2 "const_int_operand")) 0))]
16752 "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
16753 "#"
16754 "&& reload_completed"
16755 [(set (match_dup 0)
16756 (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
16757 (subreg:DWIH
16758 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
16759 (match_dup 4)) 0)))]
16760 {
16761 split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]);
16762 operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
16763 if (!rtx_equal_p (operands[0], operands[1]))
16764 emit_move_insn (operands[0], operands[1]);
16765 })
16766
16767 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
16768 [(set (match_operand:DWIH 0 "register_operand" "=&r")
16769 (subreg:DWIH
16770 (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
16771 (match_operand:QI 2 "const_int_operand")) 0))
16772 (clobber (reg:CC FLAGS_REG))]
16773 "UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
16774 "#"
16775 "&& reload_completed"
16776 [(parallel
16777 [(set (match_dup 0)
16778 (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
16779 (subreg:DWIH
16780 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
16781 (match_dup 4)) 0)))
16782 (clobber (reg:CC FLAGS_REG))])]
16783 {
16784 split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]);
16785 operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
16786 if (!rtx_equal_p (operands[0], operands[1]))
16787 emit_move_insn (operands[0], operands[1]);
16788 }
16789 [(set_attr "has_nf" "1")])
16790
16791 (define_insn "x86_64_shrd<nf_name>"
16792 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
16793 (ior:DI (lshiftrt:DI (match_dup 0)
16794 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
16795 (const_int 63)))
16796 (subreg:DI
16797 (ashift:TI
16798 (zero_extend:TI
16799 (match_operand:DI 1 "register_operand" "r"))
16800 (minus:QI (const_int 64)
16801 (and:QI (match_dup 2) (const_int 63)))) 0)))]
16802 "TARGET_64BIT && <nf_condition>"
16803 "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
16804 [(set_attr "type" "ishift")
16805 (set_attr "prefix_0f" "1")
16806 (set_attr "has_nf" "1")
16807 (set_attr "mode" "DI")
16808 (set_attr "athlon_decode" "vector")
16809 (set_attr "amdfam10_decode" "vector")
16810 (set_attr "bdver1_decode" "vector")])
16811
16812 (define_insn "x86_64_shrd_ndd<nf_name>"
16813 [(set (match_operand:DI 0 "register_operand" "=r")
16814 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
16815 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
16816 (const_int 63)))
16817 (subreg:DI
16818 (ashift:TI
16819 (zero_extend:TI
16820 (match_operand:DI 2 "register_operand" "r"))
16821 (minus:QI (const_int 64)
16822 (and:QI (match_dup 3) (const_int 63)))) 0)))]
16823 "TARGET_APX_NDD && <nf_condition>"
16824 "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16825 [(set_attr "type" "ishift")
16826 (set_attr "has_nf" "1")
16827 (set_attr "mode" "DI")])
16828
16829 (define_insn "x86_64_shrd_1<nf_name>"
16830 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
16831 (ior:DI (lshiftrt:DI (match_dup 0)
16832 (match_operand:QI 2 "const_0_to_63_operand"))
16833 (subreg:DI
16834 (ashift:TI
16835 (zero_extend:TI
16836 (match_operand:DI 1 "register_operand" "r"))
16837 (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
16838 "TARGET_64BIT
16839 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
16840 && <nf_condition>"
16841 "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
16842 [(set_attr "type" "ishift")
16843 (set_attr "prefix_0f" "1")
16844 (set_attr "length_immediate" "1")
16845 (set_attr "has_nf" "1")
16846 (set_attr "mode" "DI")
16847 (set_attr "athlon_decode" "vector")
16848 (set_attr "amdfam10_decode" "vector")
16849 (set_attr "bdver1_decode" "vector")])
16850
16851 (define_insn "x86_64_shrd_ndd_1<nf_name>"
16852 [(set (match_operand:DI 0 "register_operand" "=r")
16853 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
16854 (match_operand:QI 3 "const_0_to_63_operand"))
16855 (subreg:DI
16856 (ashift:TI
16857 (zero_extend:TI
16858 (match_operand:DI 2 "register_operand" "r"))
16859 (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
16860 "TARGET_APX_NDD
16861 && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
16862 && <nf_condition>"
16863 "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16864 [(set_attr "type" "ishift")
16865 (set_attr "length_immediate" "1")
16866 (set_attr "has_nf" "1")
16867 (set_attr "mode" "DI")])
16868
16869 (define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
16870 [(set (match_operand:DI 0 "nonimmediate_operand")
16871 (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
16872 (match_operand:QI 2 "const_0_to_63_operand"))
16873 (ashift:DI
16874 (match_operand:DI 1 "nonimmediate_operand")
16875 (match_operand:QI 3 "const_0_to_63_operand"))))]
16876 "TARGET_64BIT && TARGET_APX_NF
16877 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
16878 && ix86_pre_reload_split ()"
16879 "#"
16880 "&& 1"
16881 [(const_int 0)]
16882 {
16883 if (rtx_equal_p (operands[4], operands[0]))
16884 {
16885 operands[1] = force_reg (DImode, operands[1]);
16886 emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1],
16887 operands[2], operands[3]));
16888 }
16889 else if (rtx_equal_p (operands[1], operands[0]))
16890 {
16891 operands[4] = force_reg (DImode, operands[4]);
16892 emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4],
16893 operands[3], operands[2]));
16894 }
16895 else if (TARGET_APX_NDD)
16896 {
16897 rtx tmp = gen_reg_rtx (DImode);
16898 if (MEM_P (operands[4]))
16899 {
16900 operands[1] = force_reg (DImode, operands[1]);
16901 emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
16902 operands[2], operands[3]));
16903 }
16904 else if (MEM_P (operands[1]))
16905 emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
16906 operands[3], operands[2]));
16907 else
16908 emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
16909 operands[2], operands[3]));
16910 emit_move_insn (operands[0], tmp);
16911 }
16912 else
16913 {
16914 operands[1] = force_reg (DImode, operands[1]);
16915 rtx tmp = gen_reg_rtx (DImode);
16916 emit_move_insn (tmp, operands[4]);
16917 emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1],
16918 operands[2], operands[3]));
16919 emit_move_insn (operands[0], tmp);
16920 }
16921 DONE;
16922 })
16923
16924 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
16925 [(set (match_operand:DI 0 "nonimmediate_operand")
16926 (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
16927 (match_operand:QI 2 "const_0_to_63_operand"))
16928 (ashift:DI
16929 (match_operand:DI 1 "nonimmediate_operand")
16930 (match_operand:QI 3 "const_0_to_63_operand"))))
16931 (clobber (reg:CC FLAGS_REG))]
16932 "TARGET_64BIT
16933 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
16934 && ix86_pre_reload_split ()"
16935 "#"
16936 "&& 1"
16937 [(const_int 0)]
16938 {
16939 if (rtx_equal_p (operands[4], operands[0]))
16940 {
16941 operands[1] = force_reg (DImode, operands[1]);
16942 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
16943 }
16944 else if (rtx_equal_p (operands[1], operands[0]))
16945 {
16946 operands[4] = force_reg (DImode, operands[4]);
16947 emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
16948 }
16949 else if (TARGET_APX_NDD)
16950 {
16951 rtx tmp = gen_reg_rtx (DImode);
16952 if (MEM_P (operands[4]))
16953 {
16954 operands[1] = force_reg (DImode, operands[1]);
16955 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
16956 operands[2], operands[3]));
16957 }
16958 else if (MEM_P (operands[1]))
16959 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4],
16960 operands[3], operands[2]));
16961 else
16962 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
16963 operands[2], operands[3]));
16964 emit_move_insn (operands[0], tmp);
16965 }
16966 else
16967 {
16968 operands[1] = force_reg (DImode, operands[1]);
16969 rtx tmp = gen_reg_rtx (DImode);
16970 emit_move_insn (tmp, operands[4]);
16971 emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3]));
16972 emit_move_insn (operands[0], tmp);
16973 }
16974 DONE;
16975 }
16976 [(set_attr "has_nf" "1")])
16977
16978 (define_insn_and_split "*x86_64_shrd_2"
16979 [(set (match_operand:DI 0 "nonimmediate_operand")
16980 (ior:DI (lshiftrt:DI (match_dup 0)
16981 (match_operand:QI 2 "nonmemory_operand"))
16982 (ashift:DI (match_operand:DI 1 "register_operand")
16983 (minus:QI (const_int 64) (match_dup 2)))))
16984 (clobber (reg:CC FLAGS_REG))]
16985 "TARGET_64BIT && ix86_pre_reload_split ()"
16986 "#"
16987 "&& 1"
16988 [(parallel [(set (match_dup 0)
16989 (ior:DI (lshiftrt:DI (match_dup 0)
16990 (and:QI (match_dup 2) (const_int 63)))
16991 (subreg:DI
16992 (ashift:TI
16993 (zero_extend:TI (match_dup 1))
16994 (minus:QI (const_int 64)
16995 (and:QI (match_dup 2)
16996 (const_int 63)))) 0)))
16997 (clobber (reg:CC FLAGS_REG))])])
16998
16999 (define_insn_and_split "*x86_64_shrd_ndd_2"
17000 [(set (match_operand:DI 0 "nonimmediate_operand")
17001 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
17002 (match_operand:QI 3 "nonmemory_operand"))
17003 (ashift:DI (match_operand:DI 2 "register_operand")
17004 (minus:QI (const_int 64) (match_dup 2)))))
17005 (clobber (reg:CC FLAGS_REG))]
17006 "TARGET_APX_NDD
17007 && ix86_pre_reload_split ()"
17008 "#"
17009 "&& 1"
17010 [(parallel [(set (match_dup 4)
17011 (ior:DI (lshiftrt:DI (match_dup 1)
17012 (and:QI (match_dup 3) (const_int 63)))
17013 (subreg:DI
17014 (ashift:TI
17015 (zero_extend:TI (match_dup 2))
17016 (minus:QI (const_int 64)
17017 (and:QI (match_dup 3)
17018 (const_int 63)))) 0)))
17019 (clobber (reg:CC FLAGS_REG))
17020 (set (match_dup 0) (match_dup 4))])]
17021 {
17022 operands[4] = gen_reg_rtx (DImode);
17023 emit_move_insn (operands[4], operands[0]);
17024 })
17025
17026 (define_insn "x86_shrd<nf_name>"
17027 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
17028 (ior:SI (lshiftrt:SI (match_dup 0)
17029 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
17030 (const_int 31)))
17031 (subreg:SI
17032 (ashift:DI
17033 (zero_extend:DI
17034 (match_operand:SI 1 "register_operand" "r"))
17035 (minus:QI (const_int 32)
17036 (and:QI (match_dup 2) (const_int 31)))) 0)))]
17037 "<nf_condition>"
17038 "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
17039 [(set_attr "type" "ishift")
17040 (set_attr "prefix_0f" "1")
17041 (set_attr "has_nf" "1")
17042 (set_attr "mode" "SI")
17043 (set_attr "pent_pair" "np")
17044 (set_attr "athlon_decode" "vector")
17045 (set_attr "amdfam10_decode" "vector")
17046 (set_attr "bdver1_decode" "vector")])
17047
17048 (define_insn "x86_shrd_ndd<nf_name>"
17049 [(set (match_operand:SI 0 "register_operand" "=r")
17050 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
17051 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
17052 (const_int 31)))
17053 (subreg:SI
17054 (ashift:DI
17055 (zero_extend:DI
17056 (match_operand:SI 2 "register_operand" "r"))
17057 (minus:QI (const_int 32)
17058 (and:QI (match_dup 3) (const_int 31)))) 0)))]
17059 "TARGET_APX_NDD && <nf_condition>"
17060 "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17061 [(set_attr "type" "ishift")
17062 (set_attr "has_nf" "1")
17063 (set_attr "mode" "SI")])
17064
17065 (define_insn "x86_shrd_1<nf_name>"
17066 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
17067 (ior:SI (lshiftrt:SI (match_dup 0)
17068 (match_operand:QI 2 "const_0_to_31_operand"))
17069 (subreg:SI
17070 (ashift:DI
17071 (zero_extend:DI
17072 (match_operand:SI 1 "register_operand" "r"))
17073 (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
17074 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
17075 && <nf_condition>"
17076 "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
17077 [(set_attr "type" "ishift")
17078 (set_attr "prefix_0f" "1")
17079 (set_attr "length_immediate" "1")
17080 (set_attr "has_nf" "1")
17081 (set_attr "mode" "SI")
17082 (set_attr "pent_pair" "np")
17083 (set_attr "athlon_decode" "vector")
17084 (set_attr "amdfam10_decode" "vector")
17085 (set_attr "bdver1_decode" "vector")])
17086
17087 (define_insn "x86_shrd_ndd_1<nf_name>"
17088 [(set (match_operand:SI 0 "register_operand" "=r")
17089 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
17090 (match_operand:QI 3 "const_0_to_31_operand"))
17091 (subreg:SI
17092 (ashift:DI
17093 (zero_extend:DI
17094 (match_operand:SI 2 "register_operand" "r"))
17095 (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
17096 "TARGET_APX_NDD
17097 && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))
17098 && <nf_condition>"
17099 "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17100 [(set_attr "type" "ishift")
17101 (set_attr "length_immediate" "1")
17102 (set_attr "has_nf" "1")
17103 (set_attr "mode" "SI")])
17104
17105 (define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
17106 [(set (match_operand:SI 0 "nonimmediate_operand")
17107 (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
17108 (match_operand:QI 2 "const_0_to_31_operand"))
17109 (ashift:SI
17110 (match_operand:SI 1 "nonimmediate_operand")
17111 (match_operand:QI 3 "const_0_to_31_operand"))))]
17112 "TARGET_APX_NF
17113 && INTVAL (operands[3]) == 32 - INTVAL (operands[2])
17114 && ix86_pre_reload_split ()"
17115 "#"
17116 "&& 1"
17117 [(const_int 0)]
17118 {
17119 if (rtx_equal_p (operands[4], operands[0]))
17120 {
17121 operands[1] = force_reg (SImode, operands[1]);
17122 emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1],
17123 operands[2], operands[3]));
17124 }
17125 else if (rtx_equal_p (operands[1], operands[0]))
17126 {
17127 operands[4] = force_reg (SImode, operands[4]);
17128 emit_insn (gen_x86_shld_1_nf (operands[0], operands[4],
17129 operands[3], operands[2]));
17130 }
17131 else if (TARGET_APX_NDD)
17132 {
17133 rtx tmp = gen_reg_rtx (SImode);
17134 if (MEM_P (operands[4]))
17135 {
17136 operands[1] = force_reg (SImode, operands[1]);
17137 emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
17138 operands[2], operands[3]));
17139 }
17140 else if (MEM_P (operands[1]))
17141 emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
17142 operands[3], operands[2]));
17143 else
17144 emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
17145 operands[2], operands[3]));
17146 emit_move_insn (operands[0], tmp);
17147 }
17148 else
17149 {
17150 operands[1] = force_reg (SImode, operands[1]);
17151 rtx tmp = gen_reg_rtx (SImode);
17152 emit_move_insn (tmp, operands[4]);
17153 emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2],
17154 operands[3]));
17155 emit_move_insn (operands[0], tmp);
17156 }
17157 DONE;
17158 })
17159
17160 (define_insn_and_split "*x86_shrd_shld_1_nozext"
17161 [(set (match_operand:SI 0 "nonimmediate_operand")
17162 (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
17163 (match_operand:QI 2 "const_0_to_31_operand"))
17164 (ashift:SI
17165 (match_operand:SI 1 "nonimmediate_operand")
17166 (match_operand:QI 3 "const_0_to_31_operand"))))
17167 (clobber (reg:CC FLAGS_REG))]
17168 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
17169 && ix86_pre_reload_split ()"
17170 "#"
17171 "&& 1"
17172 [(const_int 0)]
17173 {
17174 if (rtx_equal_p (operands[4], operands[0]))
17175 {
17176 operands[1] = force_reg (SImode, operands[1]);
17177 emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
17178 }
17179 else if (rtx_equal_p (operands[1], operands[0]))
17180 {
17181 operands[4] = force_reg (SImode, operands[4]);
17182 emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
17183 }
17184 else if (TARGET_APX_NDD)
17185 {
17186 rtx tmp = gen_reg_rtx (SImode);
17187 if (MEM_P (operands[4]))
17188 {
17189 operands[1] = force_reg (SImode, operands[1]);
17190 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
17191 operands[2], operands[3]));
17192 }
17193 else if (MEM_P (operands[1]))
17194 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4],
17195 operands[3], operands[2]));
17196 else
17197 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
17198 operands[2], operands[3]));
17199 emit_move_insn (operands[0], tmp);
17200 }
17201 else
17202 {
17203 operands[1] = force_reg (SImode, operands[1]);
17204 rtx tmp = gen_reg_rtx (SImode);
17205 emit_move_insn (tmp, operands[4]);
17206 emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3]));
17207 emit_move_insn (operands[0], tmp);
17208 }
17209 DONE;
17210 }
17211 [(set_attr "has_nf" "1")])
17212
17213 (define_insn_and_split "*x86_shrd_2"
17214 [(set (match_operand:SI 0 "nonimmediate_operand")
17215 (ior:SI (lshiftrt:SI (match_dup 0)
17216 (match_operand:QI 2 "nonmemory_operand"))
17217 (ashift:SI (match_operand:SI 1 "register_operand")
17218 (minus:QI (const_int 32) (match_dup 2)))))
17219 (clobber (reg:CC FLAGS_REG))]
17220 "TARGET_64BIT && ix86_pre_reload_split ()"
17221 "#"
17222 "&& 1"
17223 [(parallel [(set (match_dup 0)
17224 (ior:SI (lshiftrt:SI (match_dup 0)
17225 (and:QI (match_dup 2) (const_int 31)))
17226 (subreg:SI
17227 (ashift:DI
17228 (zero_extend:DI (match_dup 1))
17229 (minus:QI (const_int 32)
17230 (and:QI (match_dup 2)
17231 (const_int 31)))) 0)))
17232 (clobber (reg:CC FLAGS_REG))])])
17233
17234 (define_insn_and_split "*x86_shrd_ndd_2"
17235 [(set (match_operand:SI 0 "nonimmediate_operand")
17236 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
17237 (match_operand:QI 3 "nonmemory_operand"))
17238 (ashift:SI (match_operand:SI 2 "register_operand")
17239 (minus:QI (const_int 32) (match_dup 3)))))
17240 (clobber (reg:CC FLAGS_REG))]
17241 "TARGET_APX_NDD
17242 && ix86_pre_reload_split ()"
17243 "#"
17244 "&& 1"
17245 [(parallel [(set (match_dup 4)
17246 (ior:SI (lshiftrt:SI (match_dup 1)
17247 (and:QI (match_dup 3) (const_int 31)))
17248 (subreg:SI
17249 (ashift:DI
17250 (zero_extend:DI (match_dup 2))
17251 (minus:QI (const_int 32)
17252 (and:QI (match_dup 3)
17253 (const_int 31)))) 0)))
17254 (clobber (reg:CC FLAGS_REG))
17255 (set (match_dup 0) (match_dup 4))])]
17256 {
17257 operands[4] = gen_reg_rtx (SImode);
17258 emit_move_insn (operands[4], operands[0]);
17259 })
17260
17261 ;; Base name for insn mnemonic.
17262 (define_mode_attr cvt_mnemonic
17263 [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
17264
17265 (define_insn "ashr<mode>3_cvt<nf_name>"
17266 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
17267 (ashiftrt:SWI48
17268 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
17269 (match_operand:QI 2 "const_int_operand")))]
17270 "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
17271 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
17272 && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
17273 && <nf_condition>"
17274 "@
17275 <cvt_mnemonic>
17276 <nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}
17277 <nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17278 [(set_attr "isa" "*,*,apx_ndd")
17279 (set_attr "type" "imovx,ishift,ishift")
17280 (set_attr "prefix_0f" "0,*,*")
17281 (set_attr "length_immediate" "0,*,*")
17282 (set_attr "modrm" "0,1,1")
17283 (set_attr "has_nf" "1")
17284 (set_attr "mode" "<MODE>")])
17285
17286 (define_insn "*ashrsi3_cvt_zext"
17287 [(set (match_operand:DI 0 "register_operand" "=*d,r,r")
17288 (zero_extend:DI
17289 (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm")
17290 (match_operand:QI 2 "const_int_operand"))))
17291 (clobber (reg:CC FLAGS_REG))]
17292 "TARGET_64BIT && INTVAL (operands[2]) == 31
17293 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
17294 && ix86_binary_operator_ok (ASHIFTRT, SImode, operands, TARGET_APX_NDD)"
17295 "@
17296 {cltd|cdq}
17297 sar{l}\t{%2, %k0|%k0, %2}
17298 sar{l}\t{%2, %1, %k0|%k0, %1, %2}"
17299 [(set_attr "isa" "*,*,apx_ndd")
17300 (set_attr "type" "imovx,ishift,ishift")
17301 (set_attr "prefix_0f" "0,*,*")
17302 (set_attr "length_immediate" "0,*,*")
17303 (set_attr "modrm" "0,1,1")
17304 (set_attr "mode" "SI")])
17305
17306 (define_expand "@x86_shift<mode>_adj_3"
17307 [(use (match_operand:SWI48 0 "register_operand"))
17308 (use (match_operand:SWI48 1 "register_operand"))
17309 (use (match_operand:QI 2 "register_operand"))]
17310 ""
17311 {
17312 rtx_code_label *label = gen_label_rtx ();
17313 rtx tmp;
17314
17315 emit_insn (gen_testqi_ccz_1 (operands[2],
17316 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
17317
17318 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
17319 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17320 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17321 gen_rtx_LABEL_REF (VOIDmode, label),
17322 pc_rtx);
17323 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
17324 JUMP_LABEL (tmp) = label;
17325
17326 emit_move_insn (operands[0], operands[1]);
17327 emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
17328 GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
17329 emit_label (label);
17330 LABEL_NUSES (label) = 1;
17331
17332 DONE;
17333 })
17334
17335 (define_insn "*bmi2_<insn><mode>3_1"
17336 [(set (match_operand:SWI48 0 "register_operand" "=r")
17337 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17338 (match_operand:SWI48 2 "register_operand" "r")))]
17339 "TARGET_BMI2"
17340 "<shift>x\t{%2, %1, %0|%0, %1, %2}"
17341 [(set_attr "type" "ishiftx")
17342 (set_attr "mode" "<MODE>")])
17343
17344 (define_insn "*ashr<mode>3_1<nf_name>"
17345 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
17346 (ashiftrt:SWI48
17347 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
17348 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))]
17349 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
17350 && <nf_condition>"
17351 {
17352 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17353 switch (get_attr_type (insn))
17354 {
17355 case TYPE_ISHIFTX:
17356 return "#";
17357
17358 default:
17359 if (operands[2] == const1_rtx
17360 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17361 && !use_ndd && !<nf_applied>)
17362 return "sar{<imodesuffix>}\t%0";
17363 else
17364 return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17365 : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}";
17366 }
17367 }
17368 [(set_attr "isa" "*,bmi2,apx_ndd")
17369 (set_attr "type" "ishift,ishiftx,ishift")
17370 (set (attr "length_immediate")
17371 (if_then_else
17372 (and (match_operand 2 "const1_operand")
17373 (ior (match_test "TARGET_SHIFT1")
17374 (match_test "optimize_function_for_size_p (cfun)")))
17375 (const_string "0")
17376 (const_string "*")))
17377 (set_attr "has_nf" "1")
17378 (set_attr "mode" "<MODE>")])
17379
17380 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
17381 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
17382 (define_insn_and_split "*highpartdisi2"
17383 [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0)
17384 (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm")
17385 (const_int 32)))
17386 (clobber (reg:CC FLAGS_REG))]
17387 "TARGET_64BIT"
17388 "#"
17389 "&& reload_completed"
17390 [(parallel
17391 [(set (match_dup 0) (lshiftrt:DI (match_dup 1) (const_int 32)))
17392 (clobber (reg:CC FLAGS_REG))])]
17393 {
17394 if (SSE_REG_P (operands[0]))
17395 {
17396 rtx tmp = gen_rtx_REG (V4SImode, REGNO (operands[0]));
17397 emit_insn (gen_sse_shufps_v4si (tmp, tmp, tmp,
17398 const1_rtx, const1_rtx,
17399 GEN_INT (5), GEN_INT (5)));
17400 DONE;
17401 }
17402 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
17403 }
17404 [(set_attr "isa" "*,*,*,apx_ndd")])
17405
17406 (define_insn "*lshr<mode>3_1<nf_name>"
17407 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
17408 (lshiftrt:SWI48
17409 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
17410 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))]
17411 "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
17412 && <nf_condition>"
17413 {
17414 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17415 switch (get_attr_type (insn))
17416 {
17417 case TYPE_ISHIFTX:
17418 case TYPE_MSKLOG:
17419 return "#";
17420
17421 default:
17422 if (operands[2] == const1_rtx
17423 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17424 && !use_ndd && !<nf_applied>)
17425 return "shr{<imodesuffix>}\t%0";
17426 else
17427 return use_ndd ? "<nf_prefix>shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17428 : "<nf_prefix>shr{<imodesuffix>}\t{%2, %0|%0, %2}";
17429 }
17430 }
17431 [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
17432 (set_attr "type" "ishift,ishiftx,msklog,ishift")
17433 (set (attr "length_immediate")
17434 (if_then_else
17435 (and (and (match_operand 2 "const1_operand")
17436 (eq_attr "alternative" "0"))
17437 (ior (match_test "TARGET_SHIFT1")
17438 (match_test "optimize_function_for_size_p (cfun)")))
17439 (const_string "0")
17440 (const_string "*")))
17441 (set_attr "has_nf" "1")
17442 (set_attr "mode" "<MODE>")])
17443
17444 ;; Convert shift to the shiftx pattern to avoid flags dependency.
17445 ;; For NF/NDD doesn't support shift count as r, it just support c<S>,
17446 ;; and it has no flag.
17447 (define_split
17448 [(set (match_operand:SWI48 0 "register_operand")
17449 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
17450 (match_operand:QI 2 "register_operand")))]
17451 "TARGET_BMI2 && reload_completed"
17452 [(set (match_dup 0)
17453 (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
17454 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
17455
17456 (define_split
17457 [(set (match_operand:SWI48 0 "register_operand")
17458 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
17459 (match_operand:QI 2 "register_operand")))
17460 (clobber (reg:CC FLAGS_REG))]
17461 "TARGET_BMI2 && reload_completed"
17462 [(set (match_dup 0)
17463 (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
17464 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
17465
17466 (define_insn "*bmi2_<insn>si3_1_zext"
17467 [(set (match_operand:DI 0 "register_operand" "=r")
17468 (zero_extend:DI
17469 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
17470 (match_operand:SI 2 "register_operand" "r"))))]
17471 "TARGET_64BIT && TARGET_BMI2"
17472 "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
17473 [(set_attr "type" "ishiftx")
17474 (set_attr "mode" "SI")])
17475
17476 (define_insn "*<insn>qi3_1_zext<mode><nf_name>"
17477 [(set (match_operand:SWI248x 0 "register_operand" "=r")
17478 (zero_extend:SWI248x
17479 (any_shiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "rm")
17480 (match_operand:QI 2 "nonmemory_operand" "cI"))))]
17481 "TARGET_APX_NDD && <nf_condition>"
17482 "<nf_prefix><shift>{b}\t{%2, %1, %b0|%b0, %1, %2}"
17483 [(set_attr "type" "ishift")
17484 (set_attr "has_nf" "1")
17485 (set_attr "mode" "QI")])
17486
17487 (define_insn "*<insn>hi3_1_zext<mode><nf_name>"
17488 [(set (match_operand:SWI48x 0 "register_operand" "=r")
17489 (zero_extend:SWI48x
17490 (any_shiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "rm")
17491 (match_operand:QI 2 "nonmemory_operand" "cI"))))]
17492 "TARGET_APX_NDD && <nf_condition>"
17493 "<nf_prefix><shift>{w}\t{%2, %1, %w0|%w0, %1, %2}"
17494 [(set_attr "type" "ishift")
17495 (set_attr "has_nf" "1")
17496 (set_attr "mode" "HI")])
17497
17498 (define_insn "*<insn>si3_1_zext"
17499 [(set (match_operand:DI 0 "register_operand" "=r,r,r,?k")
17500 (zero_extend:DI
17501 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm,k")
17502 (match_operand:QI 2 "nonmemory_operand" "cI,r,cI,I"))))
17503 (clobber (reg:CC FLAGS_REG))]
17504 "TARGET_64BIT
17505 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
17506 {
17507 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17508 switch (get_attr_type (insn))
17509 {
17510 case TYPE_ISHIFTX:
17511 return "#";
17512
17513 case TYPE_MSKLOG:
17514 return "#";
17515 default:
17516 if (operands[2] == const1_rtx
17517 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17518 && !use_ndd)
17519 return "<shift>{l}\t%k0";
17520 else
17521 return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
17522 : "<shift>{l}\t{%2, %k0|%k0, %2}";
17523 }
17524 }
17525 [(set_attr "isa" "*,bmi2,apx_ndd,avx512bw")
17526 (set_attr "type" "ishift,ishiftx,ishift,msklog")
17527 (set (attr "length_immediate")
17528 (if_then_else
17529 (and (match_operand 2 "const1_operand")
17530 (ior (match_test "TARGET_SHIFT1")
17531 (match_test "optimize_function_for_size_p (cfun)")))
17532 (const_string "0")
17533 (const_string "*")))
17534 (set_attr "mode" "SI")
17535 (set (attr "enabled")
17536 (if_then_else
17537 (eq_attr "alternative" "3")
17538 (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW")
17539 (const_string "*")))])
17540
17541 ;; Convert shift to the shiftx pattern to avoid flags dependency.
17542 (define_split
17543 [(set (match_operand:DI 0 "register_operand")
17544 (zero_extend:DI
17545 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
17546 (match_operand:QI 2 "register_operand"))))
17547 (clobber (reg:CC FLAGS_REG))]
17548 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
17549 [(set (match_dup 0)
17550 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
17551 "operands[2] = gen_lowpart (SImode, operands[2]);")
17552
17553 (define_insn "*ashr<mode>3_1<nf_name>"
17554 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
17555 (ashiftrt:SWI12
17556 (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
17557 (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))]
17558 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)
17559 && <nf_condition>"
17560 {
17561 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17562 if (operands[2] == const1_rtx
17563 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17564 && !use_ndd && !<nf_applied>)
17565 return "sar{<imodesuffix>}\t%0";
17566 else
17567 return use_ndd ? "<nf_prefix>sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17568 : "<nf_prefix>sar{<imodesuffix>}\t{%2, %0|%0, %2}";
17569 }
17570 [(set_attr "isa" "*, apx_ndd")
17571 (set_attr "type" "ishift")
17572 (set (attr "length_immediate")
17573 (if_then_else
17574 (and (match_operand 2 "const1_operand")
17575 (ior (match_test "TARGET_SHIFT1")
17576 (match_test "optimize_function_for_size_p (cfun)")))
17577 (const_string "0")
17578 (const_string "*")))
17579 (set_attr "has_nf" "1")
17580 (set_attr "mode" "<MODE>")])
17581
17582 (define_insn "*lshrqi3_1<nf_name>"
17583 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r")
17584 (lshiftrt:QI
17585 (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
17586 (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))]
17587 "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)
17588 && <nf_condition>"
17589 {
17590 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17591 switch (get_attr_type (insn))
17592 {
17593 case TYPE_ISHIFT:
17594 if (operands[2] == const1_rtx
17595 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17596 && !use_ndd && !<nf_applied>)
17597 return "shr{b}\t%0";
17598 else
17599 return use_ndd ? "<nf_prefix>shr{b}\t{%2, %1, %0|%0, %1, %2}"
17600 : "<nf_prefix>shr{b}\t{%2, %0|%0, %2}";
17601 case TYPE_MSKLOG:
17602 return "#";
17603 default:
17604 gcc_unreachable ();
17605 }
17606 }
17607 [(set_attr "isa" "*,avx512dq,apx_ndd")
17608 (set_attr "type" "ishift,msklog,ishift")
17609 (set (attr "length_immediate")
17610 (if_then_else
17611 (and (and (match_operand 2 "const1_operand")
17612 (eq_attr "alternative" "0"))
17613 (ior (match_test "TARGET_SHIFT1")
17614 (match_test "optimize_function_for_size_p (cfun)")))
17615 (const_string "0")
17616 (const_string "*")))
17617 (set_attr "has_nf" "1")
17618 (set_attr "mode" "QI")])
17619
17620 (define_insn "*lshrhi3_1<nf_name>"
17621 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
17622 (lshiftrt:HI
17623 (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
17624 (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))]
17625 "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)
17626 && <nf_condition>"
17627 {
17628 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17629 switch (get_attr_type (insn))
17630 {
17631 case TYPE_ISHIFT:
17632 if (operands[2] == const1_rtx
17633 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17634 && !use_ndd && !<nf_applied>)
17635 return "shr{w}\t%0";
17636 else
17637 return use_ndd ? "<nf_prefix>shr{w}\t{%2, %1, %0|%0, %1, %2}"
17638 : "<nf_prefix>shr{w}\t{%2, %0|%0, %2}";
17639 case TYPE_MSKLOG:
17640 return "#";
17641 default:
17642 gcc_unreachable ();
17643 }
17644 }
17645 [(set_attr "isa" "*, avx512f, apx_ndd")
17646 (set_attr "type" "ishift,msklog,ishift")
17647 (set (attr "length_immediate")
17648 (if_then_else
17649 (and (and (match_operand 2 "const1_operand")
17650 (eq_attr "alternative" "0"))
17651 (ior (match_test "TARGET_SHIFT1")
17652 (match_test "optimize_function_for_size_p (cfun)")))
17653 (const_string "0")
17654 (const_string "*")))
17655 (set_attr "has_nf" "1")
17656 (set_attr "mode" "HI")])
17657
17658 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
17659 (define_insn_and_split "*<insn><mode>3_1_slp"
17660 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
17661 (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
17662 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
17663 (clobber (reg:CC FLAGS_REG))]
17664 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
17665 {
17666 if (which_alternative)
17667 return "#";
17668
17669 if (operands[2] == const1_rtx
17670 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
17671 return "<shift>{<imodesuffix>}\t%0";
17672 else
17673 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
17674 }
17675 "&& reload_completed
17676 && !(rtx_equal_p (operands[0], operands[1]))"
17677 [(set (strict_low_part (match_dup 0)) (match_dup 1))
17678 (parallel
17679 [(set (strict_low_part (match_dup 0))
17680 (any_shiftrt:SWI12 (match_dup 0) (match_dup 2)))
17681 (clobber (reg:CC FLAGS_REG))])]
17682 ""
17683 [(set_attr "type" "ishift")
17684 (set (attr "length_immediate")
17685 (if_then_else
17686 (and (match_operand 2 "const1_operand")
17687 (ior (match_test "TARGET_SHIFT1")
17688 (match_test "optimize_function_for_size_p (cfun)")))
17689 (const_string "0")
17690 (const_string "*")))
17691 (set_attr "mode" "<MODE>")])
17692
17693 ;; This pattern can't accept a variable shift count, since shifts by
17694 ;; zero don't affect the flags. We assume that shifts by constant
17695 ;; zero are optimized away.
17696 (define_insn "*<insn><mode>3_cmp"
17697 [(set (reg FLAGS_REG)
17698 (compare
17699 (any_shiftrt:SWI
17700 (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
17701 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
17702 (const_int 0)))
17703 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
17704 (any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
17705 "(optimize_function_for_size_p (cfun)
17706 || !TARGET_PARTIAL_FLAG_REG_STALL
17707 || (operands[2] == const1_rtx
17708 && TARGET_SHIFT1))
17709 && ix86_match_ccmode (insn, CCGOCmode)
17710 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
17711 {
17712 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17713 if (operands[2] == const1_rtx
17714 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17715 && !use_ndd)
17716 return "<shift>{<imodesuffix>}\t%0";
17717 else
17718 return use_ndd ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17719 : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
17720 }
17721 [(set_attr "isa" "*,apx_ndd")
17722 (set_attr "type" "ishift")
17723 (set (attr "length_immediate")
17724 (if_then_else
17725 (and (match_operand 2 "const1_operand")
17726 (ior (match_test "TARGET_SHIFT1")
17727 (match_test "optimize_function_for_size_p (cfun)")))
17728 (const_string "0")
17729 (const_string "*")))
17730 (set_attr "mode" "<MODE>")])
17731
17732 (define_insn "*<insn>si3_cmp_zext"
17733 [(set (reg FLAGS_REG)
17734 (compare
17735 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
17736 (match_operand:QI 2 "const_1_to_31_operand"))
17737 (const_int 0)))
17738 (set (match_operand:DI 0 "register_operand" "=r,r")
17739 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
17740 "TARGET_64BIT
17741 && (optimize_function_for_size_p (cfun)
17742 || !TARGET_PARTIAL_FLAG_REG_STALL
17743 || (operands[2] == const1_rtx
17744 && TARGET_SHIFT1))
17745 && ix86_match_ccmode (insn, CCGOCmode)
17746 && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
17747 {
17748 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17749 if (operands[2] == const1_rtx
17750 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17751 && !use_ndd)
17752 return "<shift>{l}\t%k0";
17753 else
17754 return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
17755 : "<shift>{l}\t{%2, %k0|%k0, %2}";
17756 }
17757 [(set_attr "isa" "*,apx_ndd")
17758 (set_attr "type" "ishift")
17759 (set (attr "length_immediate")
17760 (if_then_else
17761 (and (match_operand 2 "const1_operand")
17762 (ior (match_test "TARGET_SHIFT1")
17763 (match_test "optimize_function_for_size_p (cfun)")))
17764 (const_string "0")
17765 (const_string "*")))
17766 (set_attr "mode" "SI")])
17767
17768 (define_insn "*<insn><mode>3_cconly"
17769 [(set (reg FLAGS_REG)
17770 (compare
17771 (any_shiftrt:SWI
17772 (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
17773 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
17774 (const_int 0)))
17775 (clobber (match_scratch:SWI 0 "=<r>,r"))]
17776 "(optimize_function_for_size_p (cfun)
17777 || !TARGET_PARTIAL_FLAG_REG_STALL
17778 || (operands[2] == const1_rtx
17779 && TARGET_SHIFT1))
17780 && ix86_match_ccmode (insn, CCGOCmode)"
17781 {
17782 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17783 if (operands[2] == const1_rtx
17784 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17785 && !use_ndd)
17786 return "<shift>{<imodesuffix>}\t%0";
17787 else
17788 return use_ndd
17789 ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17790 : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
17791 }
17792 [(set_attr "isa" "*,apx_ndd")
17793 (set_attr "type" "ishift")
17794 (set (attr "length_immediate")
17795 (if_then_else
17796 (and (match_operand 2 "const1_operand")
17797 (ior (match_test "TARGET_SHIFT1")
17798 (match_test "optimize_function_for_size_p (cfun)")))
17799 (const_string "0")
17800 (const_string "*")))
17801 (set_attr "mode" "<MODE>")])
17802
17803 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
17804 (define_insn_and_split "*<insn>qi_ext<mode>_1"
17805 [(set (zero_extract:SWI248
17806 (match_operand 0 "int248_register_operand" "+Q,&Q")
17807 (const_int 8)
17808 (const_int 8))
17809 (subreg:SWI248
17810 (any_shiftrt:QI
17811 (subreg:QI
17812 (match_operator:SWI248 3 "extract_operator"
17813 [(match_operand 1 "int248_register_operand" "0,!Q")
17814 (const_int 8)
17815 (const_int 8)]) 0)
17816 (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
17817 (clobber (reg:CC FLAGS_REG))]
17818 ""
17819 {
17820 if (which_alternative)
17821 return "#";
17822
17823 if (operands[2] == const1_rtx
17824 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
17825 return "<shift>{b}\t%h0";
17826 else
17827 return "<shift>{b}\t{%2, %h0|%h0, %2}";
17828 }
17829 "reload_completed
17830 && !(rtx_equal_p (operands[0], operands[1]))"
17831 [(set (zero_extract:SWI248
17832 (match_dup 0) (const_int 8) (const_int 8))
17833 (zero_extract:SWI248
17834 (match_dup 1) (const_int 8) (const_int 8)))
17835 (parallel
17836 [(set (zero_extract:SWI248
17837 (match_dup 0) (const_int 8) (const_int 8))
17838 (subreg:SWI248
17839 (any_shiftrt:QI
17840 (subreg:QI
17841 (match_op_dup 3
17842 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
17843 (match_dup 2)) 0))
17844 (clobber (reg:CC FLAGS_REG))])]
17845 ""
17846 [(set_attr "type" "ishift")
17847 (set (attr "length_immediate")
17848 (if_then_else
17849 (and (match_operand 2 "const1_operand")
17850 (ior (match_test "TARGET_SHIFT1")
17851 (match_test "optimize_function_for_size_p (cfun)")))
17852 (const_string "0")
17853 (const_string "*")))
17854 (set_attr "mode" "QI")])
17855
17856 (define_insn_and_split "*extend<dwi>2_doubleword_highpart"
17857 [(set (match_operand:<DWI> 0 "register_operand" "=r")
17858 (ashiftrt:<DWI>
17859 (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
17860 (match_operand:QI 2 "const_int_operand"))
17861 (match_operand:QI 3 "const_int_operand")))
17862 (clobber (reg:CC FLAGS_REG))]
17863 "INTVAL (operands[2]) == INTVAL (operands[3])
17864 && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
17865 "#"
17866 "&& reload_completed"
17867 [(parallel [(set (match_dup 4)
17868 (ashift:DWIH (match_dup 4) (match_dup 2)))
17869 (clobber (reg:CC FLAGS_REG))])
17870 (parallel [(set (match_dup 4)
17871 (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
17872 (clobber (reg:CC FLAGS_REG))])]
17873 "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
17874
17875 (define_insn_and_split "*extendv2di2_highpart_stv"
17876 [(set (match_operand:V2DI 0 "register_operand" "=v")
17877 (ashiftrt:V2DI
17878 (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
17879 (match_operand:QI 2 "const_int_operand"))
17880 (match_operand:QI 3 "const_int_operand")))]
17881 "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
17882 && INTVAL (operands[2]) == INTVAL (operands[3])
17883 && UINTVAL (operands[2]) < 32"
17884 "#"
17885 "&& reload_completed"
17886 [(set (match_dup 0)
17887 (ashift:V2DI (match_dup 1) (match_dup 2)))
17888 (set (match_dup 0)
17889 (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
17890
17891 ;; Without AVX512VL, split this instruction before reload.
17892 (define_insn_and_split "*extendv2di2_highpart_stv_noavx512vl"
17893 [(set (match_operand:V2DI 0 "register_operand" "=v")
17894 (ashiftrt:V2DI
17895 (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
17896 (match_operand:QI 2 "const_int_operand"))
17897 (match_operand:QI 3 "const_int_operand")))]
17898 "!TARGET_AVX512VL
17899 && INTVAL (operands[2]) == INTVAL (operands[3])
17900 && UINTVAL (operands[2]) < 32
17901 && ix86_pre_reload_split ()"
17902 "#"
17903 "&& 1"
17904 [(set (match_dup 4)
17905 (ashift:V2DI (match_dup 1) (match_dup 2)))
17906 (set (match_dup 0)
17907 (ashiftrt:V2DI (match_dup 4) (match_dup 2)))]
17908 {
17909 if (!TARGET_XOP)
17910 {
17911 rtx op0 = operands[0];
17912 rtx op2 = operands[2];
17913 rtx tmp1 = gen_reg_rtx (V4SImode);
17914 rtx tmp2 = gen_reg_rtx (V4SImode);
17915 rtx tmp3 = gen_reg_rtx (V4SImode);
17916 rtx tmp4 = gen_reg_rtx (V4SImode);
17917 emit_move_insn (tmp1, lowpart_subreg (V4SImode, operands[1], V2DImode));
17918 emit_insn (gen_ashlv4si3 (tmp2, tmp1, op2));
17919 emit_insn (gen_ashrv4si3 (tmp3, tmp2, op2));
17920 vec_perm_builder sel (4, 4, 1);
17921 sel.quick_grow (4);
17922 sel[0] = 0;
17923 sel[1] = 5;
17924 sel[2] = 2;
17925 sel[3] = 7;
17926 vec_perm_indices indices(sel, 2, 4);
17927 bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode, tmp4,
17928 tmp1, tmp3, indices);
17929 gcc_assert (ok);
17930 emit_move_insn (op0, lowpart_subreg (V2DImode, tmp4, V4SImode));
17931 DONE;
17932 }
17933 else
17934 operands[4] = gen_reg_rtx (V2DImode);
17935 })
17936 \f
17937 ;; Rotate instructions
17938
17939 (define_expand "<insn>ti3"
17940 [(set (match_operand:TI 0 "register_operand")
17941 (any_rotate:TI (match_operand:TI 1 "register_operand")
17942 (match_operand:QI 2 "nonmemory_operand")))]
17943 "TARGET_64BIT"
17944 {
17945 if (const_1_to_63_operand (operands[2], VOIDmode))
17946 emit_insn (gen_ix86_<insn>ti3_doubleword
17947 (operands[0], operands[1], operands[2]));
17948 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
17949 {
17950 operands[1] = force_reg (TImode, operands[1]);
17951 emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
17952 }
17953 else
17954 {
17955 rtx amount = force_reg (QImode, operands[2]);
17956 rtx src_lo = gen_lowpart (DImode, operands[1]);
17957 rtx src_hi = gen_highpart (DImode, operands[1]);
17958 rtx tmp_lo = gen_reg_rtx (DImode);
17959 rtx tmp_hi = gen_reg_rtx (DImode);
17960 emit_move_insn (tmp_lo, src_lo);
17961 emit_move_insn (tmp_hi, src_hi);
17962 rtx (*shiftd) (rtx, rtx, rtx)
17963 = (<CODE> == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd;
17964 emit_insn (shiftd (tmp_lo, src_hi, amount));
17965 emit_insn (shiftd (tmp_hi, src_lo, amount));
17966 rtx dst_lo = gen_lowpart (DImode, operands[0]);
17967 rtx dst_hi = gen_highpart (DImode, operands[0]);
17968 emit_move_insn (dst_lo, tmp_lo);
17969 emit_move_insn (dst_hi, tmp_hi);
17970 emit_insn (gen_x86_shiftdi_adj_1 (dst_lo, dst_hi, amount, tmp_lo));
17971 }
17972 DONE;
17973 })
17974
17975 (define_expand "<insn>di3"
17976 [(set (match_operand:DI 0 "shiftdi_operand")
17977 (any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
17978 (match_operand:QI 2 "nonmemory_operand")))]
17979 ""
17980 {
17981 if (TARGET_64BIT)
17982 ix86_expand_binary_operator (<CODE>, DImode, operands, TARGET_APX_NDD);
17983 else if (const_1_to_31_operand (operands[2], VOIDmode))
17984 emit_insn (gen_ix86_<insn>di3_doubleword
17985 (operands[0], operands[1], operands[2]));
17986 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
17987 {
17988 operands[1] = force_reg (DImode, operands[1]);
17989 emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
17990 }
17991 else
17992 FAIL;
17993
17994 DONE;
17995 })
17996
17997 (define_expand "<insn><mode>3"
17998 [(set (match_operand:SWIM124 0 "nonimmediate_operand")
17999 (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
18000 (match_operand:QI 2 "nonmemory_operand")))]
18001 ""
18002 {
18003 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
18004 DONE;
18005 })
18006
18007 ;; Avoid useless masking of count operand.
18008 (define_insn_and_split "*<insn><mode>3_mask"
18009 [(set (match_operand:SWI 0 "nonimmediate_operand")
18010 (any_rotate:SWI
18011 (match_operand:SWI 1 "nonimmediate_operand")
18012 (subreg:QI
18013 (and
18014 (match_operand 2 "int248_register_operand" "c")
18015 (match_operand 3 "const_int_operand")) 0)))
18016 (clobber (reg:CC FLAGS_REG))]
18017 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
18018 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18019 == GET_MODE_BITSIZE (<MODE>mode)-1
18020 && ix86_pre_reload_split ()"
18021 "#"
18022 "&& 1"
18023 [(parallel
18024 [(set (match_dup 0)
18025 (any_rotate:SWI (match_dup 1)
18026 (match_dup 2)))
18027 (clobber (reg:CC FLAGS_REG))])]
18028 {
18029 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
18030 operands[2] = gen_lowpart (QImode, operands[2]);
18031 })
18032
18033 (define_split
18034 [(set (match_operand:SWI 0 "register_operand")
18035 (any_rotate:SWI
18036 (match_operand:SWI 1 "const_int_operand")
18037 (subreg:QI
18038 (and
18039 (match_operand 2 "int248_register_operand")
18040 (match_operand 3 "const_int_operand")) 0)))]
18041 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
18042 == GET_MODE_BITSIZE (<MODE>mode) - 1"
18043 [(set (match_dup 4) (match_dup 1))
18044 (set (match_dup 0)
18045 (any_rotate:SWI (match_dup 4)
18046 (subreg:QI (match_dup 2) 0)))]
18047 "operands[4] = gen_reg_rtx (<MODE>mode);")
18048
18049 (define_insn_and_split "*<insn><mode>3_mask_1"
18050 [(set (match_operand:SWI 0 "nonimmediate_operand")
18051 (any_rotate:SWI
18052 (match_operand:SWI 1 "nonimmediate_operand")
18053 (and:QI
18054 (match_operand:QI 2 "register_operand" "c")
18055 (match_operand:QI 3 "const_int_operand"))))
18056 (clobber (reg:CC FLAGS_REG))]
18057 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
18058 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18059 == GET_MODE_BITSIZE (<MODE>mode)-1
18060 && ix86_pre_reload_split ()"
18061 "#"
18062 "&& 1"
18063 [(parallel
18064 [(set (match_dup 0)
18065 (any_rotate:SWI (match_dup 1)
18066 (match_dup 2)))
18067 (clobber (reg:CC FLAGS_REG))])])
18068
18069 (define_split
18070 [(set (match_operand:SWI 0 "register_operand")
18071 (any_rotate:SWI
18072 (match_operand:SWI 1 "const_int_operand")
18073 (and:QI
18074 (match_operand:QI 2 "register_operand")
18075 (match_operand:QI 3 "const_int_operand"))))]
18076 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
18077 == GET_MODE_BITSIZE (<MODE>mode) - 1"
18078 [(set (match_dup 4) (match_dup 1))
18079 (set (match_dup 0)
18080 (any_rotate:SWI (match_dup 4) (match_dup 2)))]
18081 "operands[4] = gen_reg_rtx (<MODE>mode);")
18082
18083 ;; Implement rotation using two double-precision
18084 ;; shift instructions and a scratch register.
18085
18086 (define_insn_and_split "ix86_rotl<dwi>3_doubleword"
18087 [(set (match_operand:<DWI> 0 "register_operand" "=r")
18088 (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
18089 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
18090 (clobber (reg:CC FLAGS_REG))
18091 (clobber (match_scratch:DWIH 3 "=&r"))]
18092 ""
18093 "#"
18094 "reload_completed"
18095 [(set (match_dup 3) (match_dup 4))
18096 (parallel
18097 [(set (match_dup 4)
18098 (ior:DWIH (ashift:DWIH (match_dup 4)
18099 (and:QI (match_dup 2) (match_dup 6)))
18100 (subreg:DWIH
18101 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
18102 (minus:QI (match_dup 7)
18103 (and:QI (match_dup 2)
18104 (match_dup 6)))) 0)))
18105 (clobber (reg:CC FLAGS_REG))])
18106 (parallel
18107 [(set (match_dup 5)
18108 (ior:DWIH (ashift:DWIH (match_dup 5)
18109 (and:QI (match_dup 2) (match_dup 6)))
18110 (subreg:DWIH
18111 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
18112 (minus:QI (match_dup 7)
18113 (and:QI (match_dup 2)
18114 (match_dup 6)))) 0)))
18115 (clobber (reg:CC FLAGS_REG))])]
18116 {
18117 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
18118 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
18119
18120 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
18121 })
18122
18123 (define_insn_and_split "ix86_rotr<dwi>3_doubleword"
18124 [(set (match_operand:<DWI> 0 "register_operand" "=r")
18125 (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
18126 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
18127 (clobber (reg:CC FLAGS_REG))
18128 (clobber (match_scratch:DWIH 3 "=&r"))]
18129 ""
18130 "#"
18131 "reload_completed"
18132 [(set (match_dup 3) (match_dup 4))
18133 (parallel
18134 [(set (match_dup 4)
18135 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
18136 (and:QI (match_dup 2) (match_dup 6)))
18137 (subreg:DWIH
18138 (ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
18139 (minus:QI (match_dup 7)
18140 (and:QI (match_dup 2)
18141 (match_dup 6)))) 0)))
18142 (clobber (reg:CC FLAGS_REG))])
18143 (parallel
18144 [(set (match_dup 5)
18145 (ior:DWIH (lshiftrt:DWIH (match_dup 5)
18146 (and:QI (match_dup 2) (match_dup 6)))
18147 (subreg:DWIH
18148 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
18149 (minus:QI (match_dup 7)
18150 (and:QI (match_dup 2)
18151 (match_dup 6)))) 0)))
18152 (clobber (reg:CC FLAGS_REG))])]
18153 {
18154 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
18155 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
18156
18157 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
18158 })
18159
18160 (define_insn_and_split "<insn>32di2_doubleword"
18161 [(set (match_operand:DI 0 "register_operand" "=r,r")
18162 (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r")
18163 (const_int 32)))]
18164 "!TARGET_64BIT"
18165 "#"
18166 "&& reload_completed"
18167 [(set (match_dup 0) (match_dup 3))
18168 (set (match_dup 2) (match_dup 1))]
18169 {
18170 split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);
18171 if (rtx_equal_p (operands[0], operands[1]))
18172 {
18173 emit_insn (gen_swapsi (operands[0], operands[2]));
18174 DONE;
18175 }
18176 })
18177
18178 (define_insn_and_split "<insn>64ti2_doubleword"
18179 [(set (match_operand:TI 0 "register_operand" "=r,r")
18180 (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r")
18181 (const_int 64)))]
18182 "TARGET_64BIT"
18183 "#"
18184 "&& reload_completed"
18185 [(set (match_dup 0) (match_dup 3))
18186 (set (match_dup 2) (match_dup 1))]
18187 {
18188 split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]);
18189 if (rtx_equal_p (operands[0], operands[1]))
18190 {
18191 emit_insn (gen_swapdi (operands[0], operands[2]));
18192 DONE;
18193 }
18194 })
18195
18196 (define_mode_attr rorx_immediate_operand
18197 [(SI "const_0_to_31_operand")
18198 (DI "const_0_to_63_operand")])
18199
18200 (define_insn "*bmi2_rorx<mode>3_1"
18201 [(set (match_operand:SWI48 0 "register_operand" "=r")
18202 (rotatert:SWI48
18203 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
18204 (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
18205 "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
18206 "rorx\t{%2, %1, %0|%0, %1, %2}"
18207 [(set_attr "type" "rotatex")
18208 (set_attr "mode" "<MODE>")])
18209
18210 (define_insn "*<insn><mode>3_1<nf_name>"
18211 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
18212 (any_rotate:SWI48
18213 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
18214 (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))]
18215 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
18216 && <nf_condition>"
18217 {
18218 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
18219 switch (get_attr_type (insn))
18220 {
18221 case TYPE_ROTATEX:
18222 if (TARGET_APX_NDD && <nf_applied>)
18223 return "%{nf%} <rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
18224 else
18225 return "#";
18226
18227 default:
18228 if (operands[2] == const1_rtx
18229 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
18230 && !use_ndd && !<nf_applied>)
18231 return "<rotate>{<imodesuffix>}\t%0";
18232 else
18233 return use_ndd ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
18234 : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
18235 }
18236 }
18237 [(set_attr "isa" "*,bmi2,apx_ndd")
18238 (set_attr "type" "rotate,rotatex,rotate")
18239 (set (attr "preferred_for_size")
18240 (cond [(eq_attr "alternative" "0")
18241 (symbol_ref "true")]
18242 (symbol_ref "false")))
18243 (set (attr "length_immediate")
18244 (if_then_else
18245 (and (eq_attr "type" "rotate")
18246 (and (match_operand 2 "const1_operand")
18247 (ior (match_test "TARGET_SHIFT1")
18248 (match_test "optimize_function_for_size_p (cfun)"))))
18249 (const_string "0")
18250 (const_string "*")))
18251 (set_attr "has_nf" "1")
18252 (set_attr "mode" "<MODE>")])
18253
18254 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
18255 (define_split
18256 [(set (match_operand:SWI48 0 "register_operand")
18257 (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
18258 (match_operand:QI 2 "const_int_operand")))
18259 (clobber (reg:CC FLAGS_REG))]
18260 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
18261 [(set (match_dup 0)
18262 (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
18263 {
18264 int bitsize = GET_MODE_BITSIZE (<MODE>mode);
18265
18266 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
18267 })
18268
18269 (define_split
18270 [(set (match_operand:SWI48 0 "register_operand")
18271 (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
18272 (match_operand:QI 2 "const_int_operand")))]
18273 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)
18274 && !TARGET_APX_NDD"
18275 [(set (match_dup 0)
18276 (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
18277 {
18278 int bitsize = GET_MODE_BITSIZE (<MODE>mode);
18279
18280 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
18281 })
18282
18283 (define_split
18284 [(set (match_operand:SWI48 0 "register_operand")
18285 (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
18286 (match_operand:QI 2 "const_int_operand")))
18287 (clobber (reg:CC FLAGS_REG))]
18288 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
18289 [(set (match_dup 0)
18290 (rotatert:SWI48 (match_dup 1) (match_dup 2)))])
18291
18292 (define_insn "*bmi2_rorxsi3_1_zext"
18293 [(set (match_operand:DI 0 "register_operand" "=r")
18294 (zero_extend:DI
18295 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
18296 (match_operand:QI 2 "const_0_to_31_operand"))))]
18297 "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
18298 "rorx\t{%2, %1, %k0|%k0, %1, %2}"
18299 [(set_attr "type" "rotatex")
18300 (set_attr "mode" "SI")])
18301
18302 (define_insn "*<insn>qi3_1_zext<mode><nf_name>"
18303 [(set (match_operand:SWI248x 0 "register_operand" "=r")
18304 (zero_extend:SWI248x
18305 (any_rotate:QI (match_operand:QI 1 "nonimmediate_operand" "rm")
18306 (match_operand:QI 2 "nonmemory_operand" "cI"))))]
18307 "TARGET_APX_NDD && <nf_condition>"
18308 "<nf_prefix><rotate>{b}\t{%2, %1, %b0|%b0, %1, %2}"
18309 [(set_attr "type" "rotate")
18310 (set_attr "has_nf" "1")
18311 (set_attr "mode" "QI")])
18312
18313 (define_insn "*<insn>hi3_1_zext<mode><nf_name>"
18314 [(set (match_operand:SWI48x 0 "register_operand" "=r")
18315 (zero_extend:SWI48x
18316 (any_rotate:HI (match_operand:HI 1 "nonimmediate_operand" "rm")
18317 (match_operand:QI 2 "nonmemory_operand" "cI"))))]
18318 "TARGET_APX_NDD && <nf_condition>"
18319 "<nf_prefix><rotate>{w}\t{%2, %1, %w0|%w0, %1, %2}"
18320 [(set_attr "type" "rotate")
18321 (set_attr "has_nf" "1")
18322 (set_attr "mode" "HI")])
18323
18324 (define_insn "*<insn>si3_1_zext"
18325 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
18326 (zero_extend:DI
18327 (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
18328 (match_operand:QI 2 "nonmemory_operand" "cI,I,cI"))))
18329 (clobber (reg:CC FLAGS_REG))]
18330 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
18331 {
18332 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
18333 switch (get_attr_type (insn))
18334 {
18335 case TYPE_ROTATEX:
18336 return "#";
18337
18338 default:
18339 if (operands[2] == const1_rtx
18340 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
18341 && !use_ndd)
18342 return "<rotate>{l}\t%k0";
18343 else
18344 return use_ndd ? "<rotate>{l}\t{%2, %1, %k0|%k0, %1, %2}"
18345 : "<rotate>{l}\t{%2, %k0|%k0, %2}";
18346 }
18347 }
18348 [(set_attr "isa" "*,bmi2,apx_ndd")
18349 (set_attr "type" "rotate,rotatex,rotate")
18350 (set (attr "preferred_for_size")
18351 (cond [(eq_attr "alternative" "0")
18352 (symbol_ref "true")]
18353 (symbol_ref "false")))
18354 (set (attr "length_immediate")
18355 (if_then_else
18356 (and (eq_attr "type" "rotate")
18357 (and (match_operand 2 "const1_operand")
18358 (ior (match_test "TARGET_SHIFT1")
18359 (match_test "optimize_function_for_size_p (cfun)"))))
18360 (const_string "0")
18361 (const_string "*")))
18362 (set_attr "mode" "SI")])
18363
18364 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
18365 (define_split
18366 [(set (match_operand:DI 0 "register_operand")
18367 (zero_extend:DI
18368 (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
18369 (match_operand:QI 2 "const_int_operand"))))
18370 (clobber (reg:CC FLAGS_REG))]
18371 "TARGET_64BIT && TARGET_BMI2 && reload_completed
18372 && !optimize_function_for_size_p (cfun)"
18373 [(set (match_dup 0)
18374 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
18375 {
18376 int bitsize = GET_MODE_BITSIZE (SImode);
18377
18378 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
18379 })
18380
18381 (define_split
18382 [(set (match_operand:DI 0 "register_operand")
18383 (zero_extend:DI
18384 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
18385 (match_operand:QI 2 "const_int_operand"))))
18386 (clobber (reg:CC FLAGS_REG))]
18387 "TARGET_64BIT && TARGET_BMI2 && reload_completed
18388 && !optimize_function_for_size_p (cfun)"
18389 [(set (match_dup 0)
18390 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
18391
18392 (define_insn "*<insn><mode>3_1<nf_name>"
18393 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
18394 (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
18395 (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))]
18396 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)
18397 && <nf_condition>"
18398 {
18399 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
18400 if (operands[2] == const1_rtx
18401 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
18402 && !use_ndd && !<nf_applied>)
18403 return "<rotate>{<imodesuffix>}\t%0";
18404 else
18405 return use_ndd
18406 ? "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
18407 : "<nf_prefix><rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
18408 }
18409 [(set_attr "isa" "*,apx_ndd")
18410 (set_attr "type" "rotate")
18411 (set (attr "length_immediate")
18412 (if_then_else
18413 (and (match_operand 2 "const1_operand")
18414 (ior (match_test "TARGET_SHIFT1")
18415 (match_test "optimize_function_for_size_p (cfun)")))
18416 (const_string "0")
18417 (const_string "*")))
18418 (set_attr "has_nf" "1")
18419 (set_attr "mode" "<MODE>")])
18420
18421 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
18422 (define_insn_and_split "*<insn><mode>3_1_slp"
18423 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
18424 (any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
18425 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
18426 (clobber (reg:CC FLAGS_REG))]
18427 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
18428 {
18429 if (which_alternative)
18430 return "#";
18431
18432 if (operands[2] == const1_rtx
18433 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
18434 return "<rotate>{<imodesuffix>}\t%0";
18435 else
18436 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
18437 }
18438 "&& reload_completed
18439 && !(rtx_equal_p (operands[0], operands[1]))"
18440 [(set (strict_low_part (match_dup 0)) (match_dup 1))
18441 (parallel
18442 [(set (strict_low_part (match_dup 0))
18443 (any_rotate:SWI12 (match_dup 0) (match_dup 2)))
18444 (clobber (reg:CC FLAGS_REG))])]
18445 ""
18446 [(set_attr "type" "rotate")
18447 (set (attr "length_immediate")
18448 (if_then_else
18449 (and (match_operand 2 "const1_operand")
18450 (ior (match_test "TARGET_SHIFT1")
18451 (match_test "optimize_function_for_size_p (cfun)")))
18452 (const_string "0")
18453 (const_string "*")))
18454 (set_attr "mode" "<MODE>")])
18455
18456 (define_split
18457 [(set (match_operand:HI 0 "QIreg_operand")
18458 (any_rotate:HI (match_dup 0) (const_int 8)))
18459 (clobber (reg:CC FLAGS_REG))]
18460 "reload_completed
18461 && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
18462 [(set (match_dup 0) (bswap:HI (match_dup 0)))])
18463
18464 ;; Rotations through carry flag
18465 (define_insn "rcrsi2"
18466 [(set (match_operand:SI 0 "register_operand" "=r,r")
18467 (plus:SI
18468 (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
18469 (const_int 1))
18470 (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
18471 (const_int 31))))
18472 (clobber (reg:CC FLAGS_REG))]
18473 ""
18474 "@
18475 rcr{l}\t%0
18476 rcr{l}\t{%1, %0|%0, %1}"
18477 [(set_attr "isa" "*,apx_ndd")
18478 (set_attr "type" "ishift1")
18479 (set_attr "memory" "none")
18480 (set_attr "length_immediate" "0")
18481 (set_attr "mode" "SI")])
18482
18483 (define_insn "rcrdi2"
18484 [(set (match_operand:DI 0 "register_operand" "=r,r")
18485 (plus:DI
18486 (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,rm")
18487 (const_int 1))
18488 (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
18489 (const_int 63))))
18490 (clobber (reg:CC FLAGS_REG))]
18491 "TARGET_64BIT"
18492 "@
18493 rcr{q}\t%0
18494 rcr{q}\t{%1, %0|%0, %1}"
18495 [(set_attr "isa" "*,apx_ndd")
18496 (set_attr "type" "ishift1")
18497 (set_attr "length_immediate" "0")
18498 (set_attr "mode" "DI")])
18499
18500 ;; Versions of sar and shr that set the carry flag.
18501 (define_insn "<insn><mode>3_carry"
18502 [(set (reg:CCC FLAGS_REG)
18503 (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
18504 (const_int 1))
18505 (const_int 0)] UNSPEC_CC_NE))
18506 (set (match_operand:SWI48 0 "register_operand" "=r,r")
18507 (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
18508 ""
18509 {
18510 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
18511 if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
18512 && !use_ndd)
18513 return "<shift>{<imodesuffix>}\t%0";
18514 return use_ndd ? "<shift>{<imodesuffix>}\t{$1, %1, %0|%0, %1, 1}"
18515 : "<shift>{<imodesuffix>}\t{$1, %0|%0, 1}";
18516 }
18517 [(set_attr "isa" "*, apx_ndd")
18518 (set_attr "type" "ishift1")
18519 (set (attr "length_immediate")
18520 (if_then_else
18521 (ior (match_test "TARGET_SHIFT1")
18522 (match_test "optimize_function_for_size_p (cfun)"))
18523 (const_string "0")
18524 (const_string "*")))
18525 (set_attr "mode" "<MODE>")])
18526 \f
18527 ;; Bit set / bit test instructions
18528
18529 ;; %%% bts, btr, btc
18530
18531 ;; These instructions are *slow* when applied to memory.
18532
18533 (define_code_attr btsc [(ior "bts") (xor "btc")])
18534
18535 (define_insn "*<btsc><mode>"
18536 [(set (match_operand:SWI48 0 "register_operand" "=r")
18537 (any_or:SWI48
18538 (ashift:SWI48 (const_int 1)
18539 (match_operand:QI 2 "register_operand" "r"))
18540 (match_operand:SWI48 1 "register_operand" "0")))
18541 (clobber (reg:CC FLAGS_REG))]
18542 "TARGET_USE_BT"
18543 "<btsc>{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
18544 [(set_attr "type" "alu1")
18545 (set_attr "prefix_0f" "1")
18546 (set_attr "znver1_decode" "double")
18547 (set_attr "mode" "<MODE>")])
18548
18549 ;; Avoid useless masking of count operand.
18550 (define_insn_and_split "*<btsc><mode>_mask"
18551 [(set (match_operand:SWI48 0 "register_operand")
18552 (any_or:SWI48
18553 (ashift:SWI48
18554 (const_int 1)
18555 (subreg:QI
18556 (and
18557 (match_operand 1 "int248_register_operand")
18558 (match_operand 2 "const_int_operand")) 0))
18559 (match_operand:SWI48 3 "register_operand")))
18560 (clobber (reg:CC FLAGS_REG))]
18561 "TARGET_USE_BT
18562 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18563 == GET_MODE_BITSIZE (<MODE>mode)-1
18564 && ix86_pre_reload_split ()"
18565 "#"
18566 "&& 1"
18567 [(parallel
18568 [(set (match_dup 0)
18569 (any_or:SWI48
18570 (ashift:SWI48 (const_int 1)
18571 (match_dup 1))
18572 (match_dup 3)))
18573 (clobber (reg:CC FLAGS_REG))])]
18574 {
18575 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
18576 operands[1] = gen_lowpart (QImode, operands[1]);
18577 })
18578
18579 (define_insn_and_split "*<btsc><mode>_mask_1"
18580 [(set (match_operand:SWI48 0 "register_operand")
18581 (any_or:SWI48
18582 (ashift:SWI48
18583 (const_int 1)
18584 (and:QI
18585 (match_operand:QI 1 "register_operand")
18586 (match_operand:QI 2 "const_int_operand")))
18587 (match_operand:SWI48 3 "register_operand")))
18588 (clobber (reg:CC FLAGS_REG))]
18589 "TARGET_USE_BT
18590 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18591 == GET_MODE_BITSIZE (<MODE>mode)-1
18592 && ix86_pre_reload_split ()"
18593 "#"
18594 "&& 1"
18595 [(parallel
18596 [(set (match_dup 0)
18597 (any_or:SWI48
18598 (ashift:SWI48 (const_int 1)
18599 (match_dup 1))
18600 (match_dup 3)))
18601 (clobber (reg:CC FLAGS_REG))])])
18602
18603 (define_insn "*btr<mode>"
18604 [(set (match_operand:SWI48 0 "register_operand" "=r")
18605 (and:SWI48
18606 (rotate:SWI48 (const_int -2)
18607 (match_operand:QI 2 "register_operand" "r"))
18608 (match_operand:SWI48 1 "register_operand" "0")))
18609 (clobber (reg:CC FLAGS_REG))]
18610 "TARGET_USE_BT"
18611 "btr{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
18612 [(set_attr "type" "alu1")
18613 (set_attr "prefix_0f" "1")
18614 (set_attr "znver1_decode" "double")
18615 (set_attr "mode" "<MODE>")])
18616
18617 ;; Avoid useless masking of count operand.
18618 (define_insn_and_split "*btr<mode>_mask"
18619 [(set (match_operand:SWI48 0 "register_operand")
18620 (and:SWI48
18621 (rotate:SWI48
18622 (const_int -2)
18623 (subreg:QI
18624 (and
18625 (match_operand 1 "int248_register_operand")
18626 (match_operand 2 "const_int_operand")) 0))
18627 (match_operand:SWI48 3 "register_operand")))
18628 (clobber (reg:CC FLAGS_REG))]
18629 "TARGET_USE_BT
18630 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18631 == GET_MODE_BITSIZE (<MODE>mode)-1
18632 && ix86_pre_reload_split ()"
18633 "#"
18634 "&& 1"
18635 [(parallel
18636 [(set (match_dup 0)
18637 (and:SWI48
18638 (rotate:SWI48 (const_int -2)
18639 (match_dup 1))
18640 (match_dup 3)))
18641 (clobber (reg:CC FLAGS_REG))])]
18642 {
18643 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
18644 operands[1] = gen_lowpart (QImode, operands[1]);
18645 })
18646
18647 (define_insn_and_split "*btr<mode>_mask_1"
18648 [(set (match_operand:SWI48 0 "register_operand")
18649 (and:SWI48
18650 (rotate:SWI48
18651 (const_int -2)
18652 (and:QI
18653 (match_operand:QI 1 "register_operand")
18654 (match_operand:QI 2 "const_int_operand")))
18655 (match_operand:SWI48 3 "register_operand")))
18656 (clobber (reg:CC FLAGS_REG))]
18657 "TARGET_USE_BT
18658 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18659 == GET_MODE_BITSIZE (<MODE>mode)-1
18660 && ix86_pre_reload_split ()"
18661 "#"
18662 "&& 1"
18663 [(parallel
18664 [(set (match_dup 0)
18665 (and:SWI48
18666 (rotate:SWI48 (const_int -2)
18667 (match_dup 1))
18668 (match_dup 3)))
18669 (clobber (reg:CC FLAGS_REG))])])
18670
18671 (define_insn_and_split "*btr<mode>_1"
18672 [(set (match_operand:SWI12 0 "register_operand")
18673 (and:SWI12
18674 (subreg:SWI12
18675 (rotate:SI (const_int -2)
18676 (match_operand:QI 2 "register_operand")) 0)
18677 (match_operand:SWI12 1 "nonimmediate_operand")))
18678 (clobber (reg:CC FLAGS_REG))]
18679 "TARGET_USE_BT && ix86_pre_reload_split ()"
18680 "#"
18681 "&& 1"
18682 [(parallel
18683 [(set (match_dup 0)
18684 (and:SI (rotate:SI (const_int -2) (match_dup 2))
18685 (match_dup 1)))
18686 (clobber (reg:CC FLAGS_REG))])]
18687 {
18688 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
18689 operands[1] = force_reg (<MODE>mode, operands[1]);
18690 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
18691 })
18692
18693 (define_insn_and_split "*btr<mode>_2"
18694 [(set (zero_extract:HI
18695 (match_operand:SWI12 0 "nonimmediate_operand")
18696 (const_int 1)
18697 (match_operand:QI 1 "register_operand"))
18698 (const_int 0))
18699 (clobber (reg:CC FLAGS_REG))]
18700 "TARGET_USE_BT && ix86_pre_reload_split ()"
18701 "#"
18702 "&& MEM_P (operands[0])"
18703 [(set (match_dup 2) (match_dup 0))
18704 (parallel
18705 [(set (match_dup 3)
18706 (and:SI (rotate:SI (const_int -2) (match_dup 1))
18707 (match_dup 4)))
18708 (clobber (reg:CC FLAGS_REG))])
18709 (set (match_dup 0) (match_dup 5))]
18710 {
18711 operands[2] = gen_reg_rtx (<MODE>mode);
18712 operands[5] = gen_reg_rtx (<MODE>mode);
18713 operands[3] = lowpart_subreg (SImode, operands[5], <MODE>mode);
18714 operands[4] = lowpart_subreg (SImode, operands[2], <MODE>mode);
18715 })
18716
18717 (define_split
18718 [(set (zero_extract:HI
18719 (match_operand:SWI12 0 "register_operand")
18720 (const_int 1)
18721 (match_operand:QI 1 "register_operand"))
18722 (const_int 0))
18723 (clobber (reg:CC FLAGS_REG))]
18724 "TARGET_USE_BT && ix86_pre_reload_split ()"
18725 [(parallel
18726 [(set (match_dup 0)
18727 (and:SI (rotate:SI (const_int -2) (match_dup 1))
18728 (match_dup 2)))
18729 (clobber (reg:CC FLAGS_REG))])]
18730 {
18731 operands[2] = lowpart_subreg (SImode, operands[0], <MODE>mode);
18732 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
18733 })
18734
18735 ;; These instructions are never faster than the corresponding
18736 ;; and/ior/xor operations when using immediate operand, so with
18737 ;; 32-bit there's no point. But in 64-bit, we can't hold the
18738 ;; relevant immediates within the instruction itself, so operating
18739 ;; on bits in the high 32-bits of a register becomes easier.
18740 ;;
18741 ;; These are slow on Nocona, but fast on Athlon64. We do require the use
18742 ;; of btrq and btcq for corner cases of post-reload expansion of absdf and
18743 ;; negdf respectively, so they can never be disabled entirely.
18744
18745 (define_insn "*btsq_imm"
18746 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
18747 (const_int 1)
18748 (match_operand:QI 1 "const_0_to_63_operand"))
18749 (const_int 1))
18750 (clobber (reg:CC FLAGS_REG))]
18751 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
18752 "bts{q}\t{%1, %0|%0, %1}"
18753 [(set_attr "type" "alu1")
18754 (set_attr "prefix_0f" "1")
18755 (set_attr "znver1_decode" "double")
18756 (set_attr "mode" "DI")])
18757
18758 (define_insn "*btrq_imm"
18759 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
18760 (const_int 1)
18761 (match_operand:QI 1 "const_0_to_63_operand"))
18762 (const_int 0))
18763 (clobber (reg:CC FLAGS_REG))]
18764 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
18765 "btr{q}\t{%1, %0|%0, %1}"
18766 [(set_attr "type" "alu1")
18767 (set_attr "prefix_0f" "1")
18768 (set_attr "znver1_decode" "double")
18769 (set_attr "mode" "DI")])
18770
18771 (define_insn "*btcq_imm"
18772 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
18773 (const_int 1)
18774 (match_operand:QI 1 "const_0_to_63_operand"))
18775 (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
18776 (clobber (reg:CC FLAGS_REG))]
18777 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
18778 "btc{q}\t{%1, %0|%0, %1}"
18779 [(set_attr "type" "alu1")
18780 (set_attr "prefix_0f" "1")
18781 (set_attr "znver1_decode" "double")
18782 (set_attr "mode" "DI")])
18783
18784 ;; Allow Nocona to avoid these instructions if a register is available.
18785
18786 (define_peephole2
18787 [(match_scratch:DI 2 "r")
18788 (parallel [(set (zero_extract:DI
18789 (match_operand:DI 0 "nonimmediate_operand")
18790 (const_int 1)
18791 (match_operand:QI 1 "const_0_to_63_operand"))
18792 (const_int 1))
18793 (clobber (reg:CC FLAGS_REG))])]
18794 "TARGET_64BIT && !TARGET_USE_BT"
18795 [(parallel [(set (match_dup 0)
18796 (ior:DI (match_dup 0) (match_dup 3)))
18797 (clobber (reg:CC FLAGS_REG))])]
18798 {
18799 int i = INTVAL (operands[1]);
18800
18801 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
18802
18803 if (!x86_64_immediate_operand (operands[3], DImode))
18804 {
18805 emit_move_insn (operands[2], operands[3]);
18806 operands[3] = operands[2];
18807 }
18808 })
18809
18810 (define_peephole2
18811 [(match_scratch:DI 2 "r")
18812 (parallel [(set (zero_extract:DI
18813 (match_operand:DI 0 "nonimmediate_operand")
18814 (const_int 1)
18815 (match_operand:QI 1 "const_0_to_63_operand"))
18816 (const_int 0))
18817 (clobber (reg:CC FLAGS_REG))])]
18818 "TARGET_64BIT && !TARGET_USE_BT"
18819 [(parallel [(set (match_dup 0)
18820 (and:DI (match_dup 0) (match_dup 3)))
18821 (clobber (reg:CC FLAGS_REG))])]
18822 {
18823 int i = INTVAL (operands[1]);
18824
18825 operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
18826
18827 if (!x86_64_immediate_operand (operands[3], DImode))
18828 {
18829 emit_move_insn (operands[2], operands[3]);
18830 operands[3] = operands[2];
18831 }
18832 })
18833
18834 (define_peephole2
18835 [(match_scratch:DI 2 "r")
18836 (parallel [(set (zero_extract:DI
18837 (match_operand:DI 0 "nonimmediate_operand")
18838 (const_int 1)
18839 (match_operand:QI 1 "const_0_to_63_operand"))
18840 (not:DI (zero_extract:DI
18841 (match_dup 0) (const_int 1) (match_dup 1))))
18842 (clobber (reg:CC FLAGS_REG))])]
18843 "TARGET_64BIT && !TARGET_USE_BT"
18844 [(parallel [(set (match_dup 0)
18845 (xor:DI (match_dup 0) (match_dup 3)))
18846 (clobber (reg:CC FLAGS_REG))])]
18847 {
18848 int i = INTVAL (operands[1]);
18849
18850 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
18851
18852 if (!x86_64_immediate_operand (operands[3], DImode))
18853 {
18854 emit_move_insn (operands[2], operands[3]);
18855 operands[3] = operands[2];
18856 }
18857 })
18858
18859 ;; %%% bt
18860
18861 (define_insn "*bt<mode>"
18862 [(set (reg:CCC FLAGS_REG)
18863 (compare:CCC
18864 (zero_extract:SWI48
18865 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
18866 (const_int 1)
18867 (match_operand:QI 1 "nonmemory_operand" "q<S>,<S>"))
18868 (const_int 0)))]
18869 ""
18870 {
18871 switch (get_attr_mode (insn))
18872 {
18873 case MODE_SI:
18874 return "bt{l}\t{%k1, %k0|%k0, %k1}";
18875
18876 case MODE_DI:
18877 return "bt{q}\t{%q1, %0|%0, %q1}";
18878
18879 default:
18880 gcc_unreachable ();
18881 }
18882 }
18883 [(set_attr "type" "alu1")
18884 (set_attr "prefix_0f" "1")
18885 (set (attr "mode")
18886 (if_then_else
18887 (and (match_test "CONST_INT_P (operands[1])")
18888 (match_test "INTVAL (operands[1]) < 32"))
18889 (const_string "SI")
18890 (const_string "<MODE>")))])
18891
18892 (define_insn_and_split "*bt<SWI48:mode>_mask"
18893 [(set (reg:CCC FLAGS_REG)
18894 (compare:CCC
18895 (zero_extract:SWI48
18896 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
18897 (const_int 1)
18898 (subreg:QI
18899 (and:SWI248
18900 (match_operand:SWI248 1 "register_operand")
18901 (match_operand 2 "const_int_operand")) 0))
18902 (const_int 0)))]
18903 "TARGET_USE_BT
18904 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
18905 == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
18906 && ix86_pre_reload_split ()"
18907 "#"
18908 "&& 1"
18909 [(set (reg:CCC FLAGS_REG)
18910 (compare:CCC
18911 (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1))
18912 (const_int 0)))]
18913 "operands[1] = gen_lowpart (QImode, operands[1]);")
18914
18915 (define_insn_and_split "*jcc_bt<mode>"
18916 [(set (pc)
18917 (if_then_else (match_operator 0 "bt_comparison_operator"
18918 [(zero_extract:SWI48
18919 (match_operand:SWI48 1 "nonimmediate_operand")
18920 (const_int 1)
18921 (match_operand:QI 2 "nonmemory_operand"))
18922 (const_int 0)])
18923 (label_ref (match_operand 3))
18924 (pc)))
18925 (clobber (reg:CC FLAGS_REG))]
18926 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
18927 && (CONST_INT_P (operands[2])
18928 ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)
18929 && INTVAL (operands[2])
18930 >= (optimize_function_for_size_p (cfun) ? 8 : 32))
18931 : !memory_operand (operands[1], <MODE>mode))
18932 && ix86_pre_reload_split ()"
18933 "#"
18934 "&& 1"
18935 [(set (reg:CCC FLAGS_REG)
18936 (compare:CCC
18937 (zero_extract:SWI48
18938 (match_dup 1)
18939 (const_int 1)
18940 (match_dup 2))
18941 (const_int 0)))
18942 (set (pc)
18943 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
18944 (label_ref (match_dup 3))
18945 (pc)))]
18946 {
18947 operands[0] = shallow_copy_rtx (operands[0]);
18948 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
18949 })
18950
18951 ;; Avoid useless masking of bit offset operand.
18952 (define_insn_and_split "*jcc_bt<mode>_mask"
18953 [(set (pc)
18954 (if_then_else (match_operator 0 "bt_comparison_operator"
18955 [(zero_extract:SWI48
18956 (match_operand:SWI48 1 "register_operand")
18957 (const_int 1)
18958 (and:QI
18959 (match_operand:QI 2 "register_operand")
18960 (match_operand 3 "const_int_operand")))
18961 (const_int 0)])
18962 (label_ref (match_operand 4))
18963 (pc)))
18964 (clobber (reg:CC FLAGS_REG))]
18965 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
18966 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
18967 == GET_MODE_BITSIZE (<MODE>mode)-1
18968 && ix86_pre_reload_split ()"
18969 "#"
18970 "&& 1"
18971 [(set (reg:CCC FLAGS_REG)
18972 (compare:CCC
18973 (zero_extract:SWI48
18974 (match_dup 1)
18975 (const_int 1)
18976 (match_dup 2))
18977 (const_int 0)))
18978 (set (pc)
18979 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
18980 (label_ref (match_dup 4))
18981 (pc)))]
18982 {
18983 operands[0] = shallow_copy_rtx (operands[0]);
18984 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
18985 })
18986
18987 ;; Avoid useless masking of bit offset operand.
18988 (define_insn_and_split "*jcc_bt<SWI48:mode>_mask_1"
18989 [(set (pc)
18990 (if_then_else (match_operator 0 "bt_comparison_operator"
18991 [(zero_extract:SWI48
18992 (match_operand:SWI48 1 "register_operand")
18993 (const_int 1)
18994 (subreg:QI
18995 (and:SWI248
18996 (match_operand:SWI248 2 "register_operand")
18997 (match_operand 3 "const_int_operand")) 0))
18998 (const_int 0)])
18999 (label_ref (match_operand 4))
19000 (pc)))
19001 (clobber (reg:CC FLAGS_REG))]
19002 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
19003 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
19004 == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
19005 && ix86_pre_reload_split ()"
19006 "#"
19007 "&& 1"
19008 [(set (reg:CCC FLAGS_REG)
19009 (compare:CCC
19010 (zero_extract:SWI48
19011 (match_dup 1)
19012 (const_int 1)
19013 (match_dup 2))
19014 (const_int 0)))
19015 (set (pc)
19016 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
19017 (label_ref (match_dup 4))
19018 (pc)))]
19019 {
19020 operands[0] = shallow_copy_rtx (operands[0]);
19021 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
19022 operands[2] = gen_lowpart (QImode, operands[2]);
19023 })
19024
19025 ;; Help combine recognize bt followed by cmov
19026 (define_split
19027 [(set (match_operand:SWI248 0 "register_operand")
19028 (if_then_else:SWI248
19029 (match_operator 5 "bt_comparison_operator"
19030 [(zero_extract:SWI48
19031 (match_operand:SWI48 1 "register_operand")
19032 (const_int 1)
19033 (match_operand:QI 2 "register_operand"))
19034 (const_int 0)])
19035 (match_operand:SWI248 3 "nonimmediate_operand")
19036 (match_operand:SWI248 4 "nonimmediate_operand")))]
19037 "TARGET_USE_BT && TARGET_CMOVE
19038 && !(MEM_P (operands[3]) && MEM_P (operands[4]))
19039 && ix86_pre_reload_split ()"
19040 [(set (reg:CCC FLAGS_REG)
19041 (compare:CCC
19042 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
19043 (const_int 0)))
19044 (set (match_dup 0)
19045 (if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0))
19046 (match_dup 3)
19047 (match_dup 4)))]
19048 {
19049 if (GET_CODE (operands[5]) == EQ)
19050 std::swap (operands[3], operands[4]);
19051 })
19052
19053 ;; Help combine recognize bt followed by setc
19054 (define_insn_and_split "*bt<mode>_setcqi"
19055 [(set (subreg:SWI48 (match_operand:QI 0 "register_operand") 0)
19056 (zero_extract:SWI48
19057 (match_operand:SWI48 1 "register_operand")
19058 (const_int 1)
19059 (match_operand:QI 2 "register_operand")))
19060 (clobber (reg:CC FLAGS_REG))]
19061 "TARGET_USE_BT && ix86_pre_reload_split ()"
19062 "#"
19063 "&& 1"
19064 [(set (reg:CCC FLAGS_REG)
19065 (compare:CCC
19066 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
19067 (const_int 0)))
19068 (set (match_dup 0)
19069 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))])
19070
19071 ;; Help combine recognize bt followed by setnc
19072 (define_insn_and_split "*bt<mode>_setncqi"
19073 [(set (match_operand:QI 0 "register_operand")
19074 (and:QI
19075 (not:QI
19076 (subreg:QI
19077 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
19078 (match_operand:QI 2 "register_operand")) 0))
19079 (const_int 1)))
19080 (clobber (reg:CC FLAGS_REG))]
19081 "TARGET_USE_BT && ix86_pre_reload_split ()"
19082 "#"
19083 "&& 1"
19084 [(set (reg:CCC FLAGS_REG)
19085 (compare:CCC
19086 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
19087 (const_int 0)))
19088 (set (match_dup 0)
19089 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
19090
19091 (define_insn_and_split "*bt<mode>_setnc<mode>"
19092 [(set (match_operand:SWI48 0 "register_operand")
19093 (and:SWI48
19094 (not:SWI48
19095 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
19096 (match_operand:QI 2 "register_operand")))
19097 (const_int 1)))
19098 (clobber (reg:CC FLAGS_REG))]
19099 "TARGET_USE_BT && ix86_pre_reload_split ()"
19100 "#"
19101 "&& 1"
19102 [(set (reg:CCC FLAGS_REG)
19103 (compare:CCC
19104 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
19105 (const_int 0)))
19106 (set (match_dup 3)
19107 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
19108 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
19109 "operands[3] = gen_reg_rtx (QImode);")
19110
19111 ;; Help combine recognize bt followed by setnc (PR target/110588)
19112 (define_insn_and_split "*bt<mode>_setncqi_2"
19113 [(set (match_operand:QI 0 "register_operand")
19114 (eq:QI
19115 (zero_extract:SWI48
19116 (match_operand:SWI48 1 "register_operand")
19117 (const_int 1)
19118 (match_operand:QI 2 "register_operand"))
19119 (const_int 0)))
19120 (clobber (reg:CC FLAGS_REG))]
19121 "TARGET_USE_BT && ix86_pre_reload_split ()"
19122 "#"
19123 "&& 1"
19124 [(set (reg:CCC FLAGS_REG)
19125 (compare:CCC
19126 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
19127 (const_int 0)))
19128 (set (match_dup 0)
19129 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
19130
19131 ;; Help combine recognize bt followed by setc
19132 (define_insn_and_split "*bt<mode>_setc<mode>_mask"
19133 [(set (match_operand:SWI48 0 "register_operand")
19134 (zero_extract:SWI48
19135 (match_operand:SWI48 1 "register_operand")
19136 (const_int 1)
19137 (subreg:QI
19138 (and:SWI48
19139 (match_operand:SWI48 2 "register_operand")
19140 (match_operand 3 "const_int_operand")) 0)))
19141 (clobber (reg:CC FLAGS_REG))]
19142 "TARGET_USE_BT
19143 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
19144 == GET_MODE_BITSIZE (<MODE>mode)-1
19145 && ix86_pre_reload_split ()"
19146 "#"
19147 "&& 1"
19148 [(set (reg:CCC FLAGS_REG)
19149 (compare:CCC
19150 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
19151 (const_int 0)))
19152 (set (match_dup 3)
19153 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))
19154 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
19155 {
19156 operands[2] = gen_lowpart (QImode, operands[2]);
19157 operands[3] = gen_reg_rtx (QImode);
19158 })
19159 \f
19160 ;; Store-flag instructions.
19161
19162 (define_split
19163 [(set (match_operand:QI 0 "nonimmediate_operand")
19164 (match_operator:QI 1 "add_comparison_operator"
19165 [(not:SWI (match_operand:SWI 2 "register_operand"))
19166 (match_operand:SWI 3 "nonimmediate_operand")]))]
19167 ""
19168 [(set (reg:CCC FLAGS_REG)
19169 (compare:CCC
19170 (plus:SWI (match_dup 2) (match_dup 3))
19171 (match_dup 2)))
19172 (set (match_dup 0)
19173 (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))])
19174
19175 (define_split
19176 [(set (match_operand:QI 0 "nonimmediate_operand")
19177 (match_operator:QI 1 "shr_comparison_operator"
19178 [(match_operand:DI 2 "register_operand")
19179 (match_operand 3 "const_int_operand")]))]
19180 "TARGET_64BIT
19181 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
19182 [(set (reg:CCZ FLAGS_REG)
19183 (compare:CCZ
19184 (lshiftrt:DI (match_dup 2) (match_dup 4))
19185 (const_int 0)))
19186 (set (match_dup 0)
19187 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))]
19188 {
19189 enum rtx_code new_code;
19190
19191 operands[1] = shallow_copy_rtx (operands[1]);
19192 switch (GET_CODE (operands[1]))
19193 {
19194 case GTU: new_code = NE; break;
19195 case LEU: new_code = EQ; break;
19196 default: gcc_unreachable ();
19197 }
19198 PUT_CODE (operands[1], new_code);
19199
19200 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
19201 })
19202
19203 ;; For all sCOND expanders, also expand the compare or test insn that
19204 ;; generates cc0. Generate an equality comparison if `seq' or `sne'.
19205
19206 (define_insn "*setcc_<mode>_zu"
19207 [(set (match_operand:SWI248 0 "register_operand" "=r")
19208 (match_operator:SWI248 1 "ix86_comparison_operator"
19209 [(reg FLAGS_REG) (const_int 0)]))]
19210 "TARGET_APX_ZU"
19211 "setzu%C1\t%b0"
19212 [(set_attr "type" "setcc")])
19213
19214 (define_insn_and_split "*setcc_di_1"
19215 [(set (match_operand:DI 0 "register_operand" "=q")
19216 (match_operator:DI 1 "ix86_comparison_operator"
19217 [(reg FLAGS_REG) (const_int 0)]))]
19218 "!TARGET_APX_ZU && TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
19219 "#"
19220 "&& reload_completed"
19221 [(set (match_dup 2) (match_dup 1))
19222 (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
19223 {
19224 operands[1] = shallow_copy_rtx (operands[1]);
19225 PUT_MODE (operands[1], QImode);
19226 operands[2] = gen_lowpart (QImode, operands[0]);
19227 })
19228
19229 (define_insn_and_split "*setcc_<mode>_1_and"
19230 [(set (match_operand:SWI24 0 "register_operand" "=q")
19231 (match_operator:SWI24 1 "ix86_comparison_operator"
19232 [(reg FLAGS_REG) (const_int 0)]))
19233 (clobber (reg:CC FLAGS_REG))]
19234 "!TARGET_PARTIAL_REG_STALL
19235 && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
19236 "#"
19237 "&& reload_completed"
19238 [(set (match_dup 2) (match_dup 1))
19239 (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
19240 (clobber (reg:CC FLAGS_REG))])]
19241 {
19242 operands[1] = shallow_copy_rtx (operands[1]);
19243 PUT_MODE (operands[1], QImode);
19244 operands[2] = gen_lowpart (QImode, operands[0]);
19245 })
19246
19247 (define_insn_and_split "*setcc_<mode>_1_movzbl"
19248 [(set (match_operand:SWI24 0 "register_operand" "=q")
19249 (match_operator:SWI24 1 "ix86_comparison_operator"
19250 [(reg FLAGS_REG) (const_int 0)]))]
19251 "!TARGET_APX_ZU && !TARGET_PARTIAL_REG_STALL
19252 && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
19253 "#"
19254 "&& reload_completed"
19255 [(set (match_dup 2) (match_dup 1))
19256 (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
19257 {
19258 operands[1] = shallow_copy_rtx (operands[1]);
19259 PUT_MODE (operands[1], QImode);
19260 operands[2] = gen_lowpart (QImode, operands[0]);
19261 })
19262
19263 (define_insn "*setcc_qi"
19264 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
19265 (match_operator:QI 1 "ix86_comparison_operator"
19266 [(reg FLAGS_REG) (const_int 0)]))]
19267 ""
19268 "set%C1\t%0"
19269 [(set_attr "type" "setcc")
19270 (set_attr "mode" "QI")])
19271
19272 (define_insn "*setcc_qi_slp"
19273 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
19274 (match_operator:QI 1 "ix86_comparison_operator"
19275 [(reg FLAGS_REG) (const_int 0)]))]
19276 ""
19277 "set%C1\t%0"
19278 [(set_attr "type" "setcc")
19279 (set_attr "mode" "QI")])
19280
19281 ;; In general it is not safe to assume too much about CCmode registers,
19282 ;; so simplify-rtx stops when it sees a second one. Under certain
19283 ;; conditions this is safe on x86, so help combine not create
19284 ;;
19285 ;; seta %al
19286 ;; testb %al, %al
19287 ;; sete %al
19288
19289 (define_split
19290 [(set (match_operand:QI 0 "nonimmediate_operand")
19291 (ne:QI (match_operator 1 "ix86_comparison_operator"
19292 [(reg FLAGS_REG) (const_int 0)])
19293 (const_int 0)))]
19294 ""
19295 [(set (match_dup 0) (match_dup 1))]
19296 {
19297 operands[1] = shallow_copy_rtx (operands[1]);
19298 PUT_MODE (operands[1], QImode);
19299 })
19300
19301 (define_split
19302 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
19303 (ne:QI (match_operator 1 "ix86_comparison_operator"
19304 [(reg FLAGS_REG) (const_int 0)])
19305 (const_int 0)))]
19306 ""
19307 [(set (match_dup 0) (match_dup 1))]
19308 {
19309 operands[1] = shallow_copy_rtx (operands[1]);
19310 PUT_MODE (operands[1], QImode);
19311 })
19312
19313 (define_split
19314 [(set (match_operand:QI 0 "nonimmediate_operand")
19315 (eq:QI (match_operator 1 "ix86_comparison_operator"
19316 [(reg FLAGS_REG) (const_int 0)])
19317 (const_int 0)))]
19318 ""
19319 [(set (match_dup 0) (match_dup 1))]
19320 {
19321 operands[1] = shallow_copy_rtx (operands[1]);
19322 PUT_MODE (operands[1], QImode);
19323 PUT_CODE (operands[1],
19324 ix86_reverse_condition (GET_CODE (operands[1]),
19325 GET_MODE (XEXP (operands[1], 0))));
19326
19327 /* Make sure that (a) the CCmode we have for the flags is strong
19328 enough for the reversed compare or (b) we have a valid FP compare. */
19329 if (! ix86_comparison_operator (operands[1], VOIDmode))
19330 FAIL;
19331 })
19332
19333 (define_split
19334 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
19335 (eq:QI (match_operator 1 "ix86_comparison_operator"
19336 [(reg FLAGS_REG) (const_int 0)])
19337 (const_int 0)))]
19338 ""
19339 [(set (match_dup 0) (match_dup 1))]
19340 {
19341 operands[1] = shallow_copy_rtx (operands[1]);
19342 PUT_MODE (operands[1], QImode);
19343 PUT_CODE (operands[1],
19344 ix86_reverse_condition (GET_CODE (operands[1]),
19345 GET_MODE (XEXP (operands[1], 0))));
19346
19347 /* Make sure that (a) the CCmode we have for the flags is strong
19348 enough for the reversed compare or (b) we have a valid FP compare. */
19349 if (! ix86_comparison_operator (operands[1], VOIDmode))
19350 FAIL;
19351 })
19352
19353 ;; Eliminate redundant compare between set{z,nz} and j{z,nz}:
19354 ;; setz %al; test %al,%al; jz <...> -> setz %al; jnz <...> and
19355 ;; setnz %al, test %al,%al; jz <...> -> setnz %al; jz <...>.
19356 (define_peephole2
19357 [(set (match_operand:QI 0 "nonimmediate_operand")
19358 (match_operator:QI 1 "bt_comparison_operator"
19359 [(reg:CCZ FLAGS_REG) (const_int 0)]))
19360 (set (reg:CCZ FLAGS_REG)
19361 (compare:CCZ (match_dup 0) (const_int 0)))
19362 (set (pc)
19363 (if_then_else (match_operator 2 "bt_comparison_operator"
19364 [(reg:CCZ FLAGS_REG) (const_int 0)])
19365 (match_operand 3)
19366 (pc)))]
19367 "peep2_regno_dead_p (3, FLAGS_REG)"
19368 [(set (match_dup 0)
19369 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))
19370 (set (pc)
19371 (if_then_else (match_dup 2)
19372 (match_dup 3)
19373 (pc)))]
19374 {
19375 if (GET_CODE (operands[1]) == EQ)
19376 {
19377 operands[2] = shallow_copy_rtx (operands[2]);
19378 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
19379 }
19380 })
19381
19382 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
19383 ;; subsequent logical operations are used to imitate conditional moves.
19384 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
19385 ;; it directly.
19386
19387 (define_insn "setcc_<mode>_sse"
19388 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
19389 (match_operator:MODEF 3 "sse_comparison_operator"
19390 [(match_operand:MODEF 1 "register_operand" "0,x")
19391 (match_operand:MODEF 2 "nonimmediate_operand" "xm,xjm")]))]
19392 "SSE_FLOAT_MODE_P (<MODE>mode)"
19393 "@
19394 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
19395 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19396 [(set_attr "isa" "noavx,avx")
19397 (set_attr "addr" "*,gpr16")
19398 (set_attr "type" "ssecmp")
19399 (set_attr "length_immediate" "1")
19400 (set_attr "prefix" "orig,vex")
19401 (set_attr "mode" "<MODE>")])
19402
19403 (define_insn "setcc_hf_mask"
19404 [(set (match_operand:QI 0 "register_operand" "=k")
19405 (unspec:QI
19406 [(match_operand:HF 1 "register_operand" "v")
19407 (match_operand:HF 2 "nonimmediate_operand" "vm")
19408 (match_operand:SI 3 "const_0_to_31_operand")]
19409 UNSPEC_PCMP))]
19410 "TARGET_AVX512FP16"
19411 "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19412 [(set_attr "type" "ssecmp")
19413 (set_attr "prefix" "evex")
19414 (set_attr "mode" "HF")])
19415
19416 \f
19417 ;; Basic conditional jump instructions.
19418
19419 (define_split
19420 [(set (pc)
19421 (if_then_else
19422 (match_operator 1 "add_comparison_operator"
19423 [(not:SWI (match_operand:SWI 2 "register_operand"))
19424 (match_operand:SWI 3 "nonimmediate_operand")])
19425 (label_ref (match_operand 0))
19426 (pc)))]
19427 ""
19428 [(set (reg:CCC FLAGS_REG)
19429 (compare:CCC
19430 (plus:SWI (match_dup 2) (match_dup 3))
19431 (match_dup 2)))
19432 (set (pc)
19433 (if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)])
19434 (label_ref (match_operand 0))
19435 (pc)))])
19436
19437 (define_split
19438 [(set (pc)
19439 (if_then_else
19440 (match_operator 1 "shr_comparison_operator"
19441 [(match_operand:DI 2 "register_operand")
19442 (match_operand 3 "const_int_operand")])
19443 (label_ref (match_operand 0))
19444 (pc)))]
19445 "TARGET_64BIT
19446 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
19447 [(set (reg:CCZ FLAGS_REG)
19448 (compare:CCZ
19449 (lshiftrt:DI (match_dup 2) (match_dup 4))
19450 (const_int 0)))
19451 (set (pc)
19452 (if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])
19453 (label_ref (match_operand 0))
19454 (pc)))]
19455 {
19456 enum rtx_code new_code;
19457
19458 operands[1] = shallow_copy_rtx (operands[1]);
19459 switch (GET_CODE (operands[1]))
19460 {
19461 case GTU: new_code = NE; break;
19462 case LEU: new_code = EQ; break;
19463 default: gcc_unreachable ();
19464 }
19465 PUT_CODE (operands[1], new_code);
19466
19467 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
19468 })
19469
19470 ;; We ignore the overflow flag for signed branch instructions.
19471
19472 (define_insn "*jcc"
19473 [(set (pc)
19474 (if_then_else (match_operator 1 "ix86_comparison_operator"
19475 [(reg FLAGS_REG) (const_int 0)])
19476 (label_ref (match_operand 0))
19477 (pc)))]
19478 ""
19479 "%!%+j%C1\t%l0"
19480 [(set_attr "type" "ibr")
19481 (set_attr "modrm" "0")
19482 (set (attr "length")
19483 (if_then_else
19484 (and (ge (minus (match_dup 0) (pc))
19485 (const_int -126))
19486 (lt (minus (match_dup 0) (pc))
19487 (const_int 128)))
19488 (const_int 2)
19489 (const_int 6)))])
19490
19491 ;; In general it is not safe to assume too much about CCmode registers,
19492 ;; so simplify-rtx stops when it sees a second one. Under certain
19493 ;; conditions this is safe on x86, so help combine not create
19494 ;;
19495 ;; seta %al
19496 ;; testb %al, %al
19497 ;; je Lfoo
19498
19499 (define_split
19500 [(set (pc)
19501 (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
19502 [(reg FLAGS_REG) (const_int 0)])
19503 (const_int 0))
19504 (label_ref (match_operand 1))
19505 (pc)))]
19506 ""
19507 [(set (pc)
19508 (if_then_else (match_dup 0)
19509 (label_ref (match_dup 1))
19510 (pc)))]
19511 {
19512 operands[0] = shallow_copy_rtx (operands[0]);
19513 PUT_MODE (operands[0], VOIDmode);
19514 })
19515
19516 (define_split
19517 [(set (pc)
19518 (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
19519 [(reg FLAGS_REG) (const_int 0)])
19520 (const_int 0))
19521 (label_ref (match_operand 1))
19522 (pc)))]
19523 ""
19524 [(set (pc)
19525 (if_then_else (match_dup 0)
19526 (label_ref (match_dup 1))
19527 (pc)))]
19528 {
19529 operands[0] = shallow_copy_rtx (operands[0]);
19530 PUT_MODE (operands[0], VOIDmode);
19531 PUT_CODE (operands[0],
19532 ix86_reverse_condition (GET_CODE (operands[0]),
19533 GET_MODE (XEXP (operands[0], 0))));
19534
19535 /* Make sure that (a) the CCmode we have for the flags is strong
19536 enough for the reversed compare or (b) we have a valid FP compare. */
19537 if (! ix86_comparison_operator (operands[0], VOIDmode))
19538 FAIL;
19539 })
19540 \f
19541 ;; Unconditional and other jump instructions
19542
19543 (define_insn "jump"
19544 [(set (pc)
19545 (label_ref (match_operand 0)))]
19546 ""
19547 "%!jmp\t%l0"
19548 [(set_attr "type" "ibr")
19549 (set_attr "modrm" "0")
19550 (set (attr "length")
19551 (if_then_else
19552 (and (ge (minus (match_dup 0) (pc))
19553 (const_int -126))
19554 (lt (minus (match_dup 0) (pc))
19555 (const_int 128)))
19556 (const_int 2)
19557 (const_int 5)))])
19558
19559 (define_expand "indirect_jump"
19560 [(set (pc) (match_operand 0 "indirect_branch_operand"))]
19561 ""
19562 {
19563 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
19564 operands[0] = convert_memory_address (word_mode, operands[0]);
19565 cfun->machine->has_local_indirect_jump = true;
19566 })
19567
19568 (define_insn "*indirect_jump"
19569 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
19570 ""
19571 "* return ix86_output_indirect_jmp (operands[0]);"
19572 [(set (attr "type")
19573 (if_then_else (match_test "(cfun->machine->indirect_branch_type
19574 != indirect_branch_keep)")
19575 (const_string "multi")
19576 (const_string "ibr")))
19577 (set_attr "length_immediate" "0")])
19578
19579 (define_expand "tablejump"
19580 [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
19581 (use (label_ref (match_operand 1)))])]
19582 ""
19583 {
19584 /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
19585 relative. Convert the relative address to an absolute address. */
19586 if (flag_pic)
19587 {
19588 rtx op0, op1;
19589 enum rtx_code code;
19590
19591 /* We can't use @GOTOFF for text labels on VxWorks;
19592 see gotoff_operand. */
19593 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19594 {
19595 code = PLUS;
19596 op0 = operands[0];
19597 op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
19598 }
19599 else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
19600 {
19601 code = PLUS;
19602 op0 = operands[0];
19603 op1 = pic_offset_table_rtx;
19604 }
19605 else
19606 {
19607 code = MINUS;
19608 op0 = pic_offset_table_rtx;
19609 op1 = operands[0];
19610 }
19611
19612 operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
19613 OPTAB_DIRECT);
19614 }
19615
19616 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
19617 operands[0] = convert_memory_address (word_mode, operands[0]);
19618 cfun->machine->has_local_indirect_jump = true;
19619 })
19620
19621 (define_insn "*tablejump_1"
19622 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
19623 (use (label_ref (match_operand 1)))]
19624 ""
19625 "* return ix86_output_indirect_jmp (operands[0]);"
19626 [(set (attr "type")
19627 (if_then_else (match_test "(cfun->machine->indirect_branch_type
19628 != indirect_branch_keep)")
19629 (const_string "multi")
19630 (const_string "ibr")))
19631 (set_attr "length_immediate" "0")])
19632 \f
19633 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
19634
19635 (define_peephole2
19636 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
19637 (set (match_operand:QI 1 "register_operand")
19638 (match_operator:QI 2 "ix86_comparison_operator"
19639 [(reg FLAGS_REG) (const_int 0)]))
19640 (set (match_operand 3 "any_QIreg_operand")
19641 (zero_extend (match_dup 1)))]
19642 "(peep2_reg_dead_p (3, operands[1])
19643 || operands_match_p (operands[1], operands[3]))
19644 && ! reg_overlap_mentioned_p (operands[3], operands[0])
19645 && peep2_regno_dead_p (0, FLAGS_REG)"
19646 [(set (match_dup 4) (match_dup 0))
19647 (set (strict_low_part (match_dup 5))
19648 (match_dup 2))]
19649 {
19650 operands[5] = gen_lowpart (QImode, operands[3]);
19651 ix86_expand_clear (operands[3]);
19652 })
19653
19654 (define_peephole2
19655 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
19656 (match_operand 4)])
19657 (set (match_operand:QI 1 "register_operand")
19658 (match_operator:QI 2 "ix86_comparison_operator"
19659 [(reg FLAGS_REG) (const_int 0)]))
19660 (set (match_operand 3 "any_QIreg_operand")
19661 (zero_extend (match_dup 1)))]
19662 "(peep2_reg_dead_p (3, operands[1])
19663 || operands_match_p (operands[1], operands[3]))
19664 && ! reg_overlap_mentioned_p (operands[3], operands[0])
19665 && ! reg_overlap_mentioned_p (operands[3], operands[4])
19666 && ! reg_set_p (operands[3], operands[4])
19667 && peep2_regno_dead_p (0, FLAGS_REG)"
19668 [(parallel [(set (match_dup 5) (match_dup 0))
19669 (match_dup 4)])
19670 (set (strict_low_part (match_dup 6))
19671 (match_dup 2))]
19672 {
19673 operands[6] = gen_lowpart (QImode, operands[3]);
19674 ix86_expand_clear (operands[3]);
19675 })
19676
19677 (define_peephole2
19678 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
19679 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
19680 (match_operand 5)])
19681 (set (match_operand:QI 2 "register_operand")
19682 (match_operator:QI 3 "ix86_comparison_operator"
19683 [(reg FLAGS_REG) (const_int 0)]))
19684 (set (match_operand 4 "any_QIreg_operand")
19685 (zero_extend (match_dup 2)))]
19686 "(peep2_reg_dead_p (4, operands[2])
19687 || operands_match_p (operands[2], operands[4]))
19688 && ! reg_overlap_mentioned_p (operands[4], operands[0])
19689 && ! reg_overlap_mentioned_p (operands[4], operands[1])
19690 && ! reg_overlap_mentioned_p (operands[4], operands[5])
19691 && ! reg_set_p (operands[4], operands[5])
19692 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
19693 && peep2_regno_dead_p (0, FLAGS_REG)"
19694 [(set (match_dup 6) (match_dup 0))
19695 (parallel [(set (match_dup 7) (match_dup 1))
19696 (match_dup 5)])
19697 (set (strict_low_part (match_dup 8))
19698 (match_dup 3))]
19699 {
19700 operands[8] = gen_lowpart (QImode, operands[4]);
19701 ix86_expand_clear (operands[4]);
19702 })
19703
19704 ;; Similar, but match zero extend with andsi3.
19705
19706 (define_peephole2
19707 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
19708 (set (match_operand:QI 1 "register_operand")
19709 (match_operator:QI 2 "ix86_comparison_operator"
19710 [(reg FLAGS_REG) (const_int 0)]))
19711 (parallel [(set (match_operand:SI 3 "any_QIreg_operand")
19712 (and:SI (match_dup 3) (const_int 255)))
19713 (clobber (reg:CC FLAGS_REG))])]
19714 "REGNO (operands[1]) == REGNO (operands[3])
19715 && ! reg_overlap_mentioned_p (operands[3], operands[0])
19716 && peep2_regno_dead_p (0, FLAGS_REG)"
19717 [(set (match_dup 4) (match_dup 0))
19718 (set (strict_low_part (match_dup 5))
19719 (match_dup 2))]
19720 {
19721 operands[5] = gen_lowpart (QImode, operands[3]);
19722 ix86_expand_clear (operands[3]);
19723 })
19724
19725 (define_peephole2
19726 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
19727 (match_operand 4)])
19728 (set (match_operand:QI 1 "register_operand")
19729 (match_operator:QI 2 "ix86_comparison_operator"
19730 [(reg FLAGS_REG) (const_int 0)]))
19731 (parallel [(set (match_operand 3 "any_QIreg_operand")
19732 (zero_extend (match_dup 1)))
19733 (clobber (reg:CC FLAGS_REG))])]
19734 "(peep2_reg_dead_p (3, operands[1])
19735 || operands_match_p (operands[1], operands[3]))
19736 && ! reg_overlap_mentioned_p (operands[3], operands[0])
19737 && ! reg_overlap_mentioned_p (operands[3], operands[4])
19738 && ! reg_set_p (operands[3], operands[4])
19739 && peep2_regno_dead_p (0, FLAGS_REG)"
19740 [(parallel [(set (match_dup 5) (match_dup 0))
19741 (match_dup 4)])
19742 (set (strict_low_part (match_dup 6))
19743 (match_dup 2))]
19744 {
19745 operands[6] = gen_lowpart (QImode, operands[3]);
19746 ix86_expand_clear (operands[3]);
19747 })
19748
19749 (define_peephole2
19750 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
19751 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
19752 (match_operand 5)])
19753 (set (match_operand:QI 2 "register_operand")
19754 (match_operator:QI 3 "ix86_comparison_operator"
19755 [(reg FLAGS_REG) (const_int 0)]))
19756 (parallel [(set (match_operand 4 "any_QIreg_operand")
19757 (zero_extend (match_dup 2)))
19758 (clobber (reg:CC FLAGS_REG))])]
19759 "(peep2_reg_dead_p (4, operands[2])
19760 || operands_match_p (operands[2], operands[4]))
19761 && ! reg_overlap_mentioned_p (operands[4], operands[0])
19762 && ! reg_overlap_mentioned_p (operands[4], operands[1])
19763 && ! reg_overlap_mentioned_p (operands[4], operands[5])
19764 && ! reg_set_p (operands[4], operands[5])
19765 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
19766 && peep2_regno_dead_p (0, FLAGS_REG)"
19767 [(set (match_dup 6) (match_dup 0))
19768 (parallel [(set (match_dup 7) (match_dup 1))
19769 (match_dup 5)])
19770 (set (strict_low_part (match_dup 8))
19771 (match_dup 3))]
19772 {
19773 operands[8] = gen_lowpart (QImode, operands[4]);
19774 ix86_expand_clear (operands[4]);
19775 })
19776 \f
19777 ;; Call instructions.
19778
19779 ;; The predicates normally associated with named expanders are not properly
19780 ;; checked for calls. This is a bug in the generic code, but it isn't that
19781 ;; easy to fix. Ignore it for now and be prepared to fix things up.
19782
19783 ;; P6 processors will jump to the address after the decrement when %esp
19784 ;; is used as a call operand, so they will execute return address as a code.
19785 ;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
19786
19787 ;; Register constraint for call instruction.
19788 (define_mode_attr c [(SI "l") (DI "r")])
19789
19790 ;; Call subroutine returning no value.
19791
19792 (define_expand "call"
19793 [(call (match_operand:QI 0)
19794 (match_operand 1))
19795 (use (match_operand 2))]
19796 ""
19797 {
19798 ix86_expand_call (NULL, operands[0], operands[1],
19799 operands[2], NULL, false);
19800 DONE;
19801 })
19802
19803 (define_expand "sibcall"
19804 [(call (match_operand:QI 0)
19805 (match_operand 1))
19806 (use (match_operand 2))]
19807 ""
19808 {
19809 ix86_expand_call (NULL, operands[0], operands[1],
19810 operands[2], NULL, true);
19811 DONE;
19812 })
19813
19814 (define_insn "*call"
19815 [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
19816 (match_operand 1))]
19817 "!SIBLING_CALL_P (insn)"
19818 "* return ix86_output_call_insn (insn, operands[0]);"
19819 [(set_attr "type" "call")])
19820
19821 ;; This covers both call and sibcall since only GOT slot is allowed.
19822 (define_insn "*call_got_x32"
19823 [(call (mem:QI (zero_extend:DI
19824 (match_operand:SI 0 "GOT_memory_operand" "Bg")))
19825 (match_operand 1))]
19826 "TARGET_X32"
19827 {
19828 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
19829 return ix86_output_call_insn (insn, fnaddr);
19830 }
19831 [(set_attr "type" "call")])
19832
19833 ;; Since sibcall never returns, we can only use call-clobbered register
19834 ;; as GOT base.
19835 (define_insn "*sibcall_GOT_32"
19836 [(call (mem:QI
19837 (mem:SI (plus:SI
19838 (match_operand:SI 0 "register_no_elim_operand" "U")
19839 (match_operand:SI 1 "GOT32_symbol_operand"))))
19840 (match_operand 2))]
19841 "!TARGET_MACHO
19842 && !TARGET_64BIT
19843 && !TARGET_INDIRECT_BRANCH_REGISTER
19844 && SIBLING_CALL_P (insn)"
19845 {
19846 rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
19847 fnaddr = gen_const_mem (SImode, fnaddr);
19848 return ix86_output_call_insn (insn, fnaddr);
19849 }
19850 [(set_attr "type" "call")])
19851
19852 (define_insn "*sibcall"
19853 [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
19854 (match_operand 1))]
19855 "SIBLING_CALL_P (insn)"
19856 "* return ix86_output_call_insn (insn, operands[0]);"
19857 [(set_attr "type" "call")])
19858
19859 (define_insn "*sibcall_memory"
19860 [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
19861 (match_operand 1))
19862 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
19863 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
19864 "* return ix86_output_call_insn (insn, operands[0]);"
19865 [(set_attr "type" "call")])
19866
19867 (define_peephole2
19868 [(set (match_operand:W 0 "register_operand")
19869 (match_operand:W 1 "memory_operand"))
19870 (call (mem:QI (match_dup 0))
19871 (match_operand 3))]
19872 "!TARGET_X32
19873 && !TARGET_INDIRECT_BRANCH_REGISTER
19874 && SIBLING_CALL_P (peep2_next_insn (1))
19875 && !reg_mentioned_p (operands[0],
19876 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
19877 [(parallel [(call (mem:QI (match_dup 1))
19878 (match_dup 3))
19879 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
19880
19881 (define_peephole2
19882 [(set (match_operand:W 0 "register_operand")
19883 (match_operand:W 1 "memory_operand"))
19884 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
19885 (call (mem:QI (match_dup 0))
19886 (match_operand 3))]
19887 "!TARGET_X32
19888 && !TARGET_INDIRECT_BRANCH_REGISTER
19889 && SIBLING_CALL_P (peep2_next_insn (2))
19890 && !reg_mentioned_p (operands[0],
19891 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
19892 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
19893 (parallel [(call (mem:QI (match_dup 1))
19894 (match_dup 3))
19895 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
19896
19897 (define_expand "call_pop"
19898 [(parallel [(call (match_operand:QI 0)
19899 (match_operand:SI 1))
19900 (set (reg:SI SP_REG)
19901 (plus:SI (reg:SI SP_REG)
19902 (match_operand:SI 3)))])]
19903 "!TARGET_64BIT"
19904 {
19905 ix86_expand_call (NULL, operands[0], operands[1],
19906 operands[2], operands[3], false);
19907 DONE;
19908 })
19909
19910 (define_insn "*call_pop"
19911 [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
19912 (match_operand 1))
19913 (set (reg:SI SP_REG)
19914 (plus:SI (reg:SI SP_REG)
19915 (match_operand:SI 2 "immediate_operand" "i")))]
19916 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
19917 "* return ix86_output_call_insn (insn, operands[0]);"
19918 [(set_attr "type" "call")])
19919
19920 (define_insn "*sibcall_pop"
19921 [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz"))
19922 (match_operand 1))
19923 (set (reg:SI SP_REG)
19924 (plus:SI (reg:SI SP_REG)
19925 (match_operand:SI 2 "immediate_operand" "i")))]
19926 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
19927 "* return ix86_output_call_insn (insn, operands[0]);"
19928 [(set_attr "type" "call")])
19929
19930 (define_insn "*sibcall_pop_memory"
19931 [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
19932 (match_operand 1))
19933 (set (reg:SI SP_REG)
19934 (plus:SI (reg:SI SP_REG)
19935 (match_operand:SI 2 "immediate_operand" "i")))
19936 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
19937 "!TARGET_64BIT"
19938 "* return ix86_output_call_insn (insn, operands[0]);"
19939 [(set_attr "type" "call")])
19940
19941 (define_peephole2
19942 [(set (match_operand:SI 0 "register_operand")
19943 (match_operand:SI 1 "memory_operand"))
19944 (parallel [(call (mem:QI (match_dup 0))
19945 (match_operand 3))
19946 (set (reg:SI SP_REG)
19947 (plus:SI (reg:SI SP_REG)
19948 (match_operand:SI 4 "immediate_operand")))])]
19949 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
19950 && !reg_mentioned_p (operands[0],
19951 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
19952 [(parallel [(call (mem:QI (match_dup 1))
19953 (match_dup 3))
19954 (set (reg:SI SP_REG)
19955 (plus:SI (reg:SI SP_REG)
19956 (match_dup 4)))
19957 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
19958
19959 (define_peephole2
19960 [(set (match_operand:SI 0 "register_operand")
19961 (match_operand:SI 1 "memory_operand"))
19962 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
19963 (parallel [(call (mem:QI (match_dup 0))
19964 (match_operand 3))
19965 (set (reg:SI SP_REG)
19966 (plus:SI (reg:SI SP_REG)
19967 (match_operand:SI 4 "immediate_operand")))])]
19968 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
19969 && !reg_mentioned_p (operands[0],
19970 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
19971 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
19972 (parallel [(call (mem:QI (match_dup 1))
19973 (match_dup 3))
19974 (set (reg:SI SP_REG)
19975 (plus:SI (reg:SI SP_REG)
19976 (match_dup 4)))
19977 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
19978
19979 ;; Combining simple memory jump instruction
19980
19981 (define_peephole2
19982 [(set (match_operand:W 0 "register_operand")
19983 (match_operand:W 1 "memory_operand"))
19984 (set (pc) (match_dup 0))]
19985 "!TARGET_X32
19986 && !TARGET_INDIRECT_BRANCH_REGISTER
19987 && peep2_reg_dead_p (2, operands[0])"
19988 [(set (pc) (match_dup 1))])
19989
19990 ;; Call subroutine, returning value in operand 0
19991
19992 (define_expand "call_value"
19993 [(set (match_operand 0)
19994 (call (match_operand:QI 1)
19995 (match_operand 2)))
19996 (use (match_operand 3))]
19997 ""
19998 {
19999 ix86_expand_call (operands[0], operands[1], operands[2],
20000 operands[3], NULL, false);
20001 DONE;
20002 })
20003
20004 (define_expand "sibcall_value"
20005 [(set (match_operand 0)
20006 (call (match_operand:QI 1)
20007 (match_operand 2)))
20008 (use (match_operand 3))]
20009 ""
20010 {
20011 ix86_expand_call (operands[0], operands[1], operands[2],
20012 operands[3], NULL, true);
20013 DONE;
20014 })
20015
20016 (define_insn "*call_value"
20017 [(set (match_operand 0)
20018 (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz"))
20019 (match_operand 2)))]
20020 "!SIBLING_CALL_P (insn)"
20021 "* return ix86_output_call_insn (insn, operands[1]);"
20022 [(set_attr "type" "callv")])
20023
20024 ;; This covers both call and sibcall since only GOT slot is allowed.
20025 (define_insn "*call_value_got_x32"
20026 [(set (match_operand 0)
20027 (call (mem:QI
20028 (zero_extend:DI
20029 (match_operand:SI 1 "GOT_memory_operand" "Bg")))
20030 (match_operand 2)))]
20031 "TARGET_X32"
20032 {
20033 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
20034 return ix86_output_call_insn (insn, fnaddr);
20035 }
20036 [(set_attr "type" "callv")])
20037
20038 ;; Since sibcall never returns, we can only use call-clobbered register
20039 ;; as GOT base.
20040 (define_insn "*sibcall_value_GOT_32"
20041 [(set (match_operand 0)
20042 (call (mem:QI
20043 (mem:SI (plus:SI
20044 (match_operand:SI 1 "register_no_elim_operand" "U")
20045 (match_operand:SI 2 "GOT32_symbol_operand"))))
20046 (match_operand 3)))]
20047 "!TARGET_MACHO
20048 && !TARGET_64BIT
20049 && !TARGET_INDIRECT_BRANCH_REGISTER
20050 && SIBLING_CALL_P (insn)"
20051 {
20052 rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
20053 fnaddr = gen_const_mem (SImode, fnaddr);
20054 return ix86_output_call_insn (insn, fnaddr);
20055 }
20056 [(set_attr "type" "callv")])
20057
20058 (define_insn "*sibcall_value"
20059 [(set (match_operand 0)
20060 (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
20061 (match_operand 2)))]
20062 "SIBLING_CALL_P (insn)"
20063 "* return ix86_output_call_insn (insn, operands[1]);"
20064 [(set_attr "type" "callv")])
20065
20066 (define_insn "*sibcall_value_memory"
20067 [(set (match_operand 0)
20068 (call (mem:QI (match_operand:W 1 "memory_operand" "m"))
20069 (match_operand 2)))
20070 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
20071 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
20072 "* return ix86_output_call_insn (insn, operands[1]);"
20073 [(set_attr "type" "callv")])
20074
20075 (define_peephole2
20076 [(set (match_operand:W 0 "register_operand")
20077 (match_operand:W 1 "memory_operand"))
20078 (set (match_operand 2)
20079 (call (mem:QI (match_dup 0))
20080 (match_operand 3)))]
20081 "!TARGET_X32
20082 && !TARGET_INDIRECT_BRANCH_REGISTER
20083 && SIBLING_CALL_P (peep2_next_insn (1))
20084 && !reg_mentioned_p (operands[0],
20085 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
20086 [(parallel [(set (match_dup 2)
20087 (call (mem:QI (match_dup 1))
20088 (match_dup 3)))
20089 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
20090
20091 (define_peephole2
20092 [(set (match_operand:W 0 "register_operand")
20093 (match_operand:W 1 "memory_operand"))
20094 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
20095 (set (match_operand 2)
20096 (call (mem:QI (match_dup 0))
20097 (match_operand 3)))]
20098 "!TARGET_X32
20099 && !TARGET_INDIRECT_BRANCH_REGISTER
20100 && SIBLING_CALL_P (peep2_next_insn (2))
20101 && !reg_mentioned_p (operands[0],
20102 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
20103 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
20104 (parallel [(set (match_dup 2)
20105 (call (mem:QI (match_dup 1))
20106 (match_dup 3)))
20107 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
20108
20109 (define_expand "call_value_pop"
20110 [(parallel [(set (match_operand 0)
20111 (call (match_operand:QI 1)
20112 (match_operand:SI 2)))
20113 (set (reg:SI SP_REG)
20114 (plus:SI (reg:SI SP_REG)
20115 (match_operand:SI 4)))])]
20116 "!TARGET_64BIT"
20117 {
20118 ix86_expand_call (operands[0], operands[1], operands[2],
20119 operands[3], operands[4], false);
20120 DONE;
20121 })
20122
20123 (define_insn "*call_value_pop"
20124 [(set (match_operand 0)
20125 (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
20126 (match_operand 2)))
20127 (set (reg:SI SP_REG)
20128 (plus:SI (reg:SI SP_REG)
20129 (match_operand:SI 3 "immediate_operand" "i")))]
20130 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
20131 "* return ix86_output_call_insn (insn, operands[1]);"
20132 [(set_attr "type" "callv")])
20133
20134 (define_insn "*sibcall_value_pop"
20135 [(set (match_operand 0)
20136 (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz"))
20137 (match_operand 2)))
20138 (set (reg:SI SP_REG)
20139 (plus:SI (reg:SI SP_REG)
20140 (match_operand:SI 3 "immediate_operand" "i")))]
20141 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
20142 "* return ix86_output_call_insn (insn, operands[1]);"
20143 [(set_attr "type" "callv")])
20144
20145 (define_insn "*sibcall_value_pop_memory"
20146 [(set (match_operand 0)
20147 (call (mem:QI (match_operand:SI 1 "memory_operand" "m"))
20148 (match_operand 2)))
20149 (set (reg:SI SP_REG)
20150 (plus:SI (reg:SI SP_REG)
20151 (match_operand:SI 3 "immediate_operand" "i")))
20152 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
20153 "!TARGET_64BIT"
20154 "* return ix86_output_call_insn (insn, operands[1]);"
20155 [(set_attr "type" "callv")])
20156
20157 (define_peephole2
20158 [(set (match_operand:SI 0 "register_operand")
20159 (match_operand:SI 1 "memory_operand"))
20160 (parallel [(set (match_operand 2)
20161 (call (mem:QI (match_dup 0))
20162 (match_operand 3)))
20163 (set (reg:SI SP_REG)
20164 (plus:SI (reg:SI SP_REG)
20165 (match_operand:SI 4 "immediate_operand")))])]
20166 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
20167 && !reg_mentioned_p (operands[0],
20168 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
20169 [(parallel [(set (match_dup 2)
20170 (call (mem:QI (match_dup 1))
20171 (match_dup 3)))
20172 (set (reg:SI SP_REG)
20173 (plus:SI (reg:SI SP_REG)
20174 (match_dup 4)))
20175 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
20176
20177 (define_peephole2
20178 [(set (match_operand:SI 0 "register_operand")
20179 (match_operand:SI 1 "memory_operand"))
20180 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
20181 (parallel [(set (match_operand 2)
20182 (call (mem:QI (match_dup 0))
20183 (match_operand 3)))
20184 (set (reg:SI SP_REG)
20185 (plus:SI (reg:SI SP_REG)
20186 (match_operand:SI 4 "immediate_operand")))])]
20187 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
20188 && !reg_mentioned_p (operands[0],
20189 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
20190 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
20191 (parallel [(set (match_dup 2)
20192 (call (mem:QI (match_dup 1))
20193 (match_dup 3)))
20194 (set (reg:SI SP_REG)
20195 (plus:SI (reg:SI SP_REG)
20196 (match_dup 4)))
20197 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
20198
20199 ;; Call subroutine returning any type.
20200
20201 (define_expand "untyped_call"
20202 [(parallel [(call (match_operand 0)
20203 (const_int 0))
20204 (match_operand 1)
20205 (match_operand 2)])]
20206 ""
20207 {
20208 int i;
20209
20210 /* In order to give reg-stack an easier job in validating two
20211 coprocessor registers as containing a possible return value,
20212 simply pretend the untyped call returns a complex long double
20213 value.
20214
20215 We can't use SSE_REGPARM_MAX here since callee is unprototyped
20216 and should have the default ABI. */
20217
20218 ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
20219 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
20220 operands[0], const0_rtx,
20221 GEN_INT ((TARGET_64BIT
20222 ? (ix86_abi == SYSV_ABI
20223 ? X86_64_SSE_REGPARM_MAX
20224 : X86_64_MS_SSE_REGPARM_MAX)
20225 : X86_32_SSE_REGPARM_MAX)
20226 - 1),
20227 NULL, false);
20228
20229 for (i = 0; i < XVECLEN (operands[2], 0); i++)
20230 {
20231 rtx set = XVECEXP (operands[2], 0, i);
20232 emit_move_insn (SET_DEST (set), SET_SRC (set));
20233 }
20234
20235 /* The optimizer does not know that the call sets the function value
20236 registers we stored in the result block. We avoid problems by
20237 claiming that all hard registers are used and clobbered at this
20238 point. */
20239 emit_insn (gen_blockage ());
20240
20241 DONE;
20242 })
20243 \f
20244 ;; Prologue and epilogue instructions
20245
20246 ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
20247 ;; all of memory. This blocks insns from being moved across this point.
20248
20249 (define_insn "blockage"
20250 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
20251 ""
20252 ""
20253 [(set_attr "length" "0")])
20254
20255 ;; Do not schedule instructions accessing memory across this point.
20256
20257 (define_expand "memory_blockage"
20258 [(set (match_dup 0)
20259 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
20260 ""
20261 {
20262 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
20263 MEM_VOLATILE_P (operands[0]) = 1;
20264 })
20265
20266 (define_insn "*memory_blockage"
20267 [(set (match_operand:BLK 0)
20268 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
20269 ""
20270 ""
20271 [(set_attr "length" "0")])
20272
20273 ;; As USE insns aren't meaningful after reload, this is used instead
20274 ;; to prevent deleting instructions setting registers for PIC code
20275 (define_insn "prologue_use"
20276 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
20277 ""
20278 ""
20279 [(set_attr "length" "0")])
20280
20281 ;; Insn emitted into the body of a function to return from a function.
20282 ;; This is only done if the function's epilogue is known to be simple.
20283 ;; See comments for ix86_can_use_return_insn_p in i386.cc.
20284
20285 (define_expand "return"
20286 [(simple_return)]
20287 "ix86_can_use_return_insn_p ()"
20288 {
20289 if (crtl->args.pops_args)
20290 {
20291 rtx popc = GEN_INT (crtl->args.pops_args);
20292 emit_jump_insn (gen_simple_return_pop_internal (popc));
20293 DONE;
20294 }
20295 })
20296
20297 ;; We need to disable this for TARGET_SEH, as otherwise
20298 ;; shrink-wrapped prologue gets enabled too. This might exceed
20299 ;; the maximum size of prologue in unwind information.
20300 ;; Also disallow shrink-wrapping if using stack slot to pass the
20301 ;; static chain pointer - the first instruction has to be pushl %esi
20302 ;; and it can't be moved around, as we use alternate entry points
20303 ;; in that case.
20304 ;; Also disallow for ms_hook_prologue functions which have frame
20305 ;; pointer set up in function label which is correctly handled in
20306 ;; ix86_expand_{prologue|epligoue}() only.
20307
20308 (define_expand "simple_return"
20309 [(simple_return)]
20310 "!TARGET_SEH && !ix86_static_chain_on_stack && !ix86_function_ms_hook_prologue (cfun->decl)"
20311 {
20312 if (crtl->args.pops_args)
20313 {
20314 rtx popc = GEN_INT (crtl->args.pops_args);
20315 emit_jump_insn (gen_simple_return_pop_internal (popc));
20316 DONE;
20317 }
20318 })
20319
20320 (define_insn "simple_return_internal"
20321 [(simple_return)]
20322 "reload_completed"
20323 "* return ix86_output_function_return (false);"
20324 [(set_attr "length" "1")
20325 (set_attr "atom_unit" "jeu")
20326 (set_attr "length_immediate" "0")
20327 (set_attr "modrm" "0")])
20328
20329 (define_insn "interrupt_return"
20330 [(simple_return)
20331 (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
20332 "reload_completed"
20333 {
20334 return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
20335 })
20336
20337 ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
20338 ;; instruction Athlon and K8 have.
20339
20340 (define_insn "simple_return_internal_long"
20341 [(simple_return)
20342 (unspec [(const_int 0)] UNSPEC_REP)]
20343 "reload_completed"
20344 "* return ix86_output_function_return (true);"
20345 [(set_attr "length" "2")
20346 (set_attr "atom_unit" "jeu")
20347 (set_attr "length_immediate" "0")
20348 (set_attr "prefix_rep" "1")
20349 (set_attr "modrm" "0")])
20350
20351 (define_insn_and_split "simple_return_pop_internal"
20352 [(simple_return)
20353 (use (match_operand:SI 0 "const_int_operand"))]
20354 "reload_completed"
20355 "ret\t%0"
20356 "&& cfun->machine->function_return_type != indirect_branch_keep"
20357 [(const_int 0)]
20358 "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
20359 [(set_attr "length" "3")
20360 (set_attr "atom_unit" "jeu")
20361 (set_attr "length_immediate" "2")
20362 (set_attr "modrm" "0")])
20363
20364 (define_expand "simple_return_indirect_internal"
20365 [(parallel
20366 [(simple_return)
20367 (use (match_operand 0 "register_operand"))])])
20368
20369 (define_insn "*simple_return_indirect_internal<mode>"
20370 [(simple_return)
20371 (use (match_operand:W 0 "register_operand" "r"))]
20372 "reload_completed"
20373 "* return ix86_output_indirect_function_return (operands[0]);"
20374 [(set (attr "type")
20375 (if_then_else (match_test "(cfun->machine->indirect_branch_type
20376 != indirect_branch_keep)")
20377 (const_string "multi")
20378 (const_string "ibr")))
20379 (set_attr "length_immediate" "0")])
20380
20381 (define_insn "nop"
20382 [(const_int 0)]
20383 ""
20384 "nop"
20385 [(set_attr "length" "1")
20386 (set_attr "length_immediate" "0")
20387 (set_attr "modrm" "0")])
20388
20389 ;; Generate nops. Operand 0 is the number of nops, up to 8.
20390 (define_insn "nops"
20391 [(unspec_volatile [(match_operand 0 "const_int_operand")]
20392 UNSPECV_NOPS)]
20393 "reload_completed"
20394 {
20395 int num = INTVAL (operands[0]);
20396
20397 gcc_assert (IN_RANGE (num, 1, 8));
20398
20399 while (num--)
20400 fputs ("\tnop\n", asm_out_file);
20401
20402 return "";
20403 }
20404 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
20405 (set_attr "length_immediate" "0")
20406 (set_attr "modrm" "0")])
20407
20408 ;; Pad to 1 << op0 byte boundary, max skip in op1. Used to avoid
20409 ;; branch prediction penalty for the third jump in a 16-byte
20410 ;; block on K8.
20411 ;; Also it's used to align tight loops which can be fix into 1 cacheline.
20412 ;; It can help code prefetch and reduce DSB miss.
20413
20414 (define_insn "max_skip_align"
20415 [(unspec_volatile [(match_operand 0) (match_operand 1)] UNSPECV_ALIGN)]
20416 ""
20417 {
20418 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
20419 ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, (int)INTVAL (operands[0]), (int)INTVAL (operands[1]));
20420 #else
20421 /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
20422 The align insn is used to avoid 3 jump instructions in the row to improve
20423 branch prediction and the benefits hardly outweigh the cost of extra 8
20424 nops on the average inserted by full alignment pseudo operation. */
20425 #endif
20426 return "";
20427 }
20428 [(set_attr "length" "16")])
20429
20430 (define_expand "prologue"
20431 [(const_int 0)]
20432 ""
20433 "ix86_expand_prologue (); DONE;")
20434
20435 (define_expand "set_got"
20436 [(parallel
20437 [(set (match_operand:SI 0 "register_operand")
20438 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
20439 (clobber (reg:CC FLAGS_REG))])]
20440 "!TARGET_64BIT"
20441 {
20442 if (flag_pic && !TARGET_VXWORKS_RTP)
20443 ix86_pc_thunk_call_expanded = true;
20444 })
20445
20446 (define_insn "*set_got"
20447 [(set (match_operand:SI 0 "register_operand" "=r")
20448 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
20449 (clobber (reg:CC FLAGS_REG))]
20450 "!TARGET_64BIT"
20451 "* return output_set_got (operands[0], NULL_RTX);"
20452 [(set_attr "type" "multi")
20453 (set_attr "length" "12")])
20454
20455 (define_expand "set_got_labelled"
20456 [(parallel
20457 [(set (match_operand:SI 0 "register_operand")
20458 (unspec:SI [(label_ref (match_operand 1))]
20459 UNSPEC_SET_GOT))
20460 (clobber (reg:CC FLAGS_REG))])]
20461 "!TARGET_64BIT"
20462 {
20463 if (flag_pic && !TARGET_VXWORKS_RTP)
20464 ix86_pc_thunk_call_expanded = true;
20465 })
20466
20467 (define_insn "*set_got_labelled"
20468 [(set (match_operand:SI 0 "register_operand" "=r")
20469 (unspec:SI [(label_ref (match_operand 1))]
20470 UNSPEC_SET_GOT))
20471 (clobber (reg:CC FLAGS_REG))]
20472 "!TARGET_64BIT"
20473 "* return output_set_got (operands[0], operands[1]);"
20474 [(set_attr "type" "multi")
20475 (set_attr "length" "12")])
20476
20477 (define_insn "set_got_rex64"
20478 [(set (match_operand:DI 0 "register_operand" "=r")
20479 (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
20480 "TARGET_64BIT"
20481 "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
20482 [(set_attr "type" "lea")
20483 (set_attr "length_address" "4")
20484 (set_attr "mode" "DI")])
20485
20486 (define_insn "set_rip_rex64"
20487 [(set (match_operand:DI 0 "register_operand" "=r")
20488 (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
20489 "TARGET_64BIT"
20490 "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
20491 [(set_attr "type" "lea")
20492 (set_attr "length_address" "4")
20493 (set_attr "mode" "DI")])
20494
20495 (define_insn "set_got_offset_rex64"
20496 [(set (match_operand:DI 0 "register_operand" "=r")
20497 (unspec:DI
20498 [(label_ref (match_operand 1))]
20499 UNSPEC_SET_GOT_OFFSET))]
20500 "TARGET_LP64"
20501 "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
20502 [(set_attr "type" "imov")
20503 (set_attr "length_immediate" "0")
20504 (set_attr "length_address" "8")
20505 (set_attr "mode" "DI")])
20506
20507 (define_expand "epilogue"
20508 [(const_int 0)]
20509 ""
20510 "ix86_expand_epilogue (1); DONE;")
20511
20512 (define_expand "sibcall_epilogue"
20513 [(const_int 0)]
20514 ""
20515 "ix86_expand_epilogue (0); DONE;")
20516
20517 (define_expand "eh_return"
20518 [(use (match_operand 0 "register_operand"))]
20519 ""
20520 {
20521 rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
20522
20523 /* Tricky bit: we write the address of the handler to which we will
20524 be returning into someone else's stack frame, one word below the
20525 stack address we wish to restore. */
20526 tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
20527 tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
20528 /* Return address is always in word_mode. */
20529 tmp = gen_rtx_MEM (word_mode, tmp);
20530 if (GET_MODE (ra) != word_mode)
20531 ra = convert_to_mode (word_mode, ra, 1);
20532 emit_move_insn (tmp, ra);
20533
20534 emit_jump_insn (gen_eh_return_internal ());
20535 emit_barrier ();
20536 DONE;
20537 })
20538
20539 (define_insn_and_split "eh_return_internal"
20540 [(eh_return)]
20541 ""
20542 "#"
20543 "epilogue_completed"
20544 [(const_int 0)]
20545 "ix86_expand_epilogue (2); DONE;")
20546
20547 (define_expand "@leave_<mode>"
20548 [(parallel
20549 [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
20550 (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
20551 (clobber (mem:BLK (scratch)))])]
20552 ""
20553 "operands[0] = GEN_INT (<MODE_SIZE>);")
20554
20555 (define_insn "*leave"
20556 [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
20557 (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
20558 (clobber (mem:BLK (scratch)))]
20559 "!TARGET_64BIT"
20560 "leave"
20561 [(set_attr "type" "leave")])
20562
20563 (define_insn "*leave_rex64"
20564 [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20565 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20566 (clobber (mem:BLK (scratch)))]
20567 "TARGET_64BIT"
20568 "leave"
20569 [(set_attr "type" "leave")])
20570 \f
20571 ;; Handle -fsplit-stack.
20572
20573 (define_expand "split_stack_prologue"
20574 [(const_int 0)]
20575 ""
20576 {
20577 ix86_expand_split_stack_prologue ();
20578 DONE;
20579 })
20580
20581 ;; In order to support the call/return predictor, we use a return
20582 ;; instruction which the middle-end doesn't see.
20583 (define_insn "split_stack_return"
20584 [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
20585 UNSPECV_SPLIT_STACK_RETURN)]
20586 ""
20587 {
20588 if (operands[0] == const0_rtx)
20589 return "ret";
20590 else
20591 return "ret\t%0";
20592 }
20593 [(set_attr "atom_unit" "jeu")
20594 (set_attr "modrm" "0")
20595 (set (attr "length")
20596 (if_then_else (match_operand:SI 0 "const0_operand")
20597 (const_int 1)
20598 (const_int 3)))
20599 (set (attr "length_immediate")
20600 (if_then_else (match_operand:SI 0 "const0_operand")
20601 (const_int 0)
20602 (const_int 2)))])
20603
20604 ;; If there are operand 0 bytes available on the stack, jump to
20605 ;; operand 1.
20606
20607 (define_expand "split_stack_space_check"
20608 [(set (pc) (if_then_else
20609 (ltu (minus (reg SP_REG)
20610 (match_operand 0 "register_operand"))
20611 (match_dup 2))
20612 (label_ref (match_operand 1))
20613 (pc)))]
20614 ""
20615 {
20616 rtx reg = gen_reg_rtx (Pmode);
20617
20618 emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));
20619
20620 operands[2] = ix86_split_stack_guard ();
20621 ix86_expand_branch (GEU, reg, operands[2], operands[1]);
20622
20623 DONE;
20624 })
20625 \f
20626 ;; Bit manipulation instructions.
20627
20628 (define_expand "ffs<mode>2"
20629 [(set (match_dup 2) (const_int -1))
20630 (parallel [(set (match_dup 3) (match_dup 4))
20631 (set (match_operand:SWI48 0 "register_operand")
20632 (ctz:SWI48
20633 (match_operand:SWI48 1 "nonimmediate_operand")))])
20634 (set (match_dup 0) (if_then_else:SWI48
20635 (eq (match_dup 3) (const_int 0))
20636 (match_dup 2)
20637 (match_dup 0)))
20638 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
20639 (clobber (reg:CC FLAGS_REG))])]
20640 ""
20641 {
20642 machine_mode flags_mode;
20643
20644 if (<MODE>mode == SImode && !TARGET_CMOVE)
20645 {
20646 emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
20647 DONE;
20648 }
20649
20650 flags_mode = TARGET_BMI ? CCCmode : CCZmode;
20651
20652 operands[2] = gen_reg_rtx (<MODE>mode);
20653 operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
20654 operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
20655 })
20656
20657 (define_insn_and_split "ffssi2_no_cmove"
20658 [(set (match_operand:SI 0 "register_operand" "=r")
20659 (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
20660 (clobber (match_scratch:SI 2 "=&q"))
20661 (clobber (reg:CC FLAGS_REG))]
20662 "!TARGET_CMOVE"
20663 "#"
20664 "&& reload_completed"
20665 [(parallel [(set (match_dup 4) (match_dup 5))
20666 (set (match_dup 0) (ctz:SI (match_dup 1)))])
20667 (set (strict_low_part (match_dup 3))
20668 (eq:QI (match_dup 4) (const_int 0)))
20669 (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
20670 (clobber (reg:CC FLAGS_REG))])
20671 (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
20672 (clobber (reg:CC FLAGS_REG))])
20673 (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
20674 (clobber (reg:CC FLAGS_REG))])]
20675 {
20676 machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
20677
20678 operands[3] = gen_lowpart (QImode, operands[2]);
20679 operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
20680 operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
20681
20682 ix86_expand_clear (operands[2]);
20683 })
20684
20685 (define_insn_and_split "*tzcnt<mode>_1"
20686 [(set (reg:CCC FLAGS_REG)
20687 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20688 (const_int 0)))
20689 (set (match_operand:SWI48 0 "register_operand" "=r")
20690 (ctz:SWI48 (match_dup 1)))]
20691 "TARGET_BMI"
20692 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20693 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20694 && optimize_function_for_speed_p (cfun)
20695 && !reg_mentioned_p (operands[0], operands[1])"
20696 [(parallel
20697 [(set (reg:CCC FLAGS_REG)
20698 (compare:CCC (match_dup 1) (const_int 0)))
20699 (set (match_dup 0)
20700 (ctz:SWI48 (match_dup 1)))
20701 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
20702 "ix86_expand_clear (operands[0]);"
20703 [(set_attr "type" "alu1")
20704 (set_attr "prefix_0f" "1")
20705 (set_attr "prefix_rep" "1")
20706 (set_attr "btver2_decode" "double")
20707 (set_attr "mode" "<MODE>")])
20708
20709 ; False dependency happens when destination is only updated by tzcnt,
20710 ; lzcnt or popcnt. There is no false dependency when destination is
20711 ; also used in source.
20712 (define_insn "*tzcnt<mode>_1_falsedep"
20713 [(set (reg:CCC FLAGS_REG)
20714 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20715 (const_int 0)))
20716 (set (match_operand:SWI48 0 "register_operand" "=r")
20717 (ctz:SWI48 (match_dup 1)))
20718 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
20719 UNSPEC_INSN_FALSE_DEP)]
20720 "TARGET_BMI"
20721 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20722 [(set_attr "type" "alu1")
20723 (set_attr "prefix_0f" "1")
20724 (set_attr "prefix_rep" "1")
20725 (set_attr "btver2_decode" "double")
20726 (set_attr "mode" "<MODE>")])
20727
20728 (define_insn "*bsf<mode>_1"
20729 [(set (reg:CCZ FLAGS_REG)
20730 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20731 (const_int 0)))
20732 (set (match_operand:SWI48 0 "register_operand" "=r")
20733 (ctz:SWI48 (match_dup 1)))]
20734 ""
20735 "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
20736 [(set_attr "type" "alu1")
20737 (set_attr "prefix_0f" "1")
20738 (set_attr "btver2_decode" "double")
20739 (set_attr "znver1_decode" "vector")
20740 (set_attr "mode" "<MODE>")])
20741
20742 (define_insn_and_split "ctz<mode>2"
20743 [(set (match_operand:SWI48 0 "register_operand" "=r")
20744 (ctz:SWI48
20745 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20746 (clobber (reg:CC FLAGS_REG))]
20747 ""
20748 {
20749 if (TARGET_BMI)
20750 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20751 else if (optimize_function_for_size_p (cfun))
20752 ;
20753 else if (TARGET_CPU_P (GENERIC))
20754 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
20755 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
20756
20757 return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
20758 }
20759 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
20760 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20761 && optimize_function_for_speed_p (cfun)
20762 && !reg_mentioned_p (operands[0], operands[1])"
20763 [(parallel
20764 [(set (match_dup 0)
20765 (ctz:SWI48 (match_dup 1)))
20766 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20767 (clobber (reg:CC FLAGS_REG))])]
20768 "ix86_expand_clear (operands[0]);"
20769 [(set_attr "type" "alu1")
20770 (set_attr "prefix_0f" "1")
20771 (set (attr "prefix_rep")
20772 (if_then_else
20773 (ior (match_test "TARGET_BMI")
20774 (and (not (match_test "optimize_function_for_size_p (cfun)"))
20775 (match_test "TARGET_CPU_P (GENERIC)")))
20776 (const_string "1")
20777 (const_string "0")))
20778 (set_attr "mode" "<MODE>")])
20779
20780 ; False dependency happens when destination is only updated by tzcnt,
20781 ; lzcnt or popcnt. There is no false dependency when destination is
20782 ; also used in source.
20783 (define_insn "*ctz<mode>2_falsedep"
20784 [(set (match_operand:SWI48 0 "register_operand" "=r")
20785 (ctz:SWI48
20786 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20787 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
20788 UNSPEC_INSN_FALSE_DEP)
20789 (clobber (reg:CC FLAGS_REG))]
20790 ""
20791 {
20792 if (TARGET_BMI)
20793 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20794 else if (TARGET_CPU_P (GENERIC))
20795 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
20796 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
20797 else
20798 gcc_unreachable ();
20799 }
20800 [(set_attr "type" "alu1")
20801 (set_attr "prefix_0f" "1")
20802 (set_attr "prefix_rep" "1")
20803 (set_attr "mode" "<MODE>")])
20804
20805 (define_insn_and_split "*ctzsi2_zext"
20806 [(set (match_operand:DI 0 "register_operand" "=r")
20807 (and:DI
20808 (subreg:DI
20809 (ctz:SI
20810 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
20811 (const_int 63)))
20812 (clobber (reg:CC FLAGS_REG))]
20813 "TARGET_BMI && TARGET_64BIT"
20814 "tzcnt{l}\t{%1, %k0|%k0, %1}"
20815 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20816 && optimize_function_for_speed_p (cfun)
20817 && !reg_mentioned_p (operands[0], operands[1])"
20818 [(parallel
20819 [(set (match_dup 0)
20820 (and:DI (subreg:DI (ctz:SI (match_dup 1)) 0) (const_int 63)))
20821 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20822 (clobber (reg:CC FLAGS_REG))])]
20823 "ix86_expand_clear (operands[0]);"
20824 [(set_attr "type" "alu1")
20825 (set_attr "prefix_0f" "1")
20826 (set_attr "prefix_rep" "1")
20827 (set_attr "mode" "SI")])
20828
20829 ; False dependency happens when destination is only updated by tzcnt,
20830 ; lzcnt or popcnt. There is no false dependency when destination is
20831 ; also used in source.
20832 (define_insn "*ctzsi2_zext_falsedep"
20833 [(set (match_operand:DI 0 "register_operand" "=r")
20834 (and:DI
20835 (subreg:DI
20836 (ctz:SI
20837 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
20838 (const_int 63)))
20839 (unspec [(match_operand:DI 2 "register_operand" "0")]
20840 UNSPEC_INSN_FALSE_DEP)
20841 (clobber (reg:CC FLAGS_REG))]
20842 "TARGET_BMI && TARGET_64BIT"
20843 "tzcnt{l}\t{%1, %k0|%k0, %1}"
20844 [(set_attr "type" "alu1")
20845 (set_attr "prefix_0f" "1")
20846 (set_attr "prefix_rep" "1")
20847 (set_attr "mode" "SI")])
20848
20849 (define_insn_and_split "*ctzsidi2_<s>ext"
20850 [(set (match_operand:DI 0 "register_operand" "=r")
20851 (any_extend:DI
20852 (ctz:SI
20853 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20854 (clobber (reg:CC FLAGS_REG))]
20855 "TARGET_64BIT"
20856 {
20857 if (TARGET_BMI)
20858 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
20859 else if (TARGET_CPU_P (GENERIC)
20860 && !optimize_function_for_size_p (cfun))
20861 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
20862 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
20863 return "bsf{l}\t{%1, %k0|%k0, %1}";
20864 }
20865 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
20866 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20867 && optimize_function_for_speed_p (cfun)
20868 && !reg_mentioned_p (operands[0], operands[1])"
20869 [(parallel
20870 [(set (match_dup 0)
20871 (any_extend:DI (ctz:SI (match_dup 1))))
20872 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20873 (clobber (reg:CC FLAGS_REG))])]
20874 "ix86_expand_clear (operands[0]);"
20875 [(set_attr "type" "alu1")
20876 (set_attr "prefix_0f" "1")
20877 (set (attr "prefix_rep")
20878 (if_then_else
20879 (ior (match_test "TARGET_BMI")
20880 (and (not (match_test "optimize_function_for_size_p (cfun)"))
20881 (match_test "TARGET_CPU_P (GENERIC)")))
20882 (const_string "1")
20883 (const_string "0")))
20884 (set_attr "mode" "SI")])
20885
20886 (define_insn "*ctzsidi2_<s>ext_falsedep"
20887 [(set (match_operand:DI 0 "register_operand" "=r")
20888 (any_extend:DI
20889 (ctz:SI
20890 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20891 (unspec [(match_operand:DI 2 "register_operand" "0")]
20892 UNSPEC_INSN_FALSE_DEP)
20893 (clobber (reg:CC FLAGS_REG))]
20894 "TARGET_64BIT"
20895 {
20896 if (TARGET_BMI)
20897 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
20898 else if (TARGET_CPU_P (GENERIC))
20899 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
20900 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
20901 else
20902 gcc_unreachable ();
20903 }
20904 [(set_attr "type" "alu1")
20905 (set_attr "prefix_0f" "1")
20906 (set_attr "prefix_rep" "1")
20907 (set_attr "mode" "SI")])
20908
20909 (define_insn "bsr_rex64"
20910 [(set (reg:CCZ FLAGS_REG)
20911 (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
20912 (const_int 0)))
20913 (set (match_operand:DI 0 "register_operand" "=r")
20914 (minus:DI (const_int 63)
20915 (clz:DI (match_dup 1))))]
20916 "TARGET_64BIT"
20917 "bsr{q}\t{%1, %0|%0, %1}"
20918 [(set_attr "type" "alu1")
20919 (set_attr "prefix_0f" "1")
20920 (set_attr "znver1_decode" "vector")
20921 (set_attr "mode" "DI")])
20922
20923 (define_insn "bsr_rex64_1"
20924 [(set (match_operand:DI 0 "register_operand" "=r")
20925 (minus:DI (const_int 63)
20926 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
20927 (clobber (reg:CC FLAGS_REG))]
20928 "!TARGET_LZCNT && TARGET_64BIT"
20929 "bsr{q}\t{%1, %0|%0, %1}"
20930 [(set_attr "type" "alu1")
20931 (set_attr "prefix_0f" "1")
20932 (set_attr "znver1_decode" "vector")
20933 (set_attr "mode" "DI")])
20934
20935 (define_insn "bsr_rex64_1_zext"
20936 [(set (match_operand:DI 0 "register_operand" "=r")
20937 (zero_extend:DI
20938 (minus:SI (const_int 63)
20939 (subreg:SI
20940 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
20941 0))))
20942 (clobber (reg:CC FLAGS_REG))]
20943 "!TARGET_LZCNT && TARGET_64BIT"
20944 "bsr{q}\t{%1, %0|%0, %1}"
20945 [(set_attr "type" "alu1")
20946 (set_attr "prefix_0f" "1")
20947 (set_attr "znver1_decode" "vector")
20948 (set_attr "mode" "DI")])
20949
20950 (define_insn "bsr"
20951 [(set (reg:CCZ FLAGS_REG)
20952 (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
20953 (const_int 0)))
20954 (set (match_operand:SI 0 "register_operand" "=r")
20955 (minus:SI (const_int 31)
20956 (clz:SI (match_dup 1))))]
20957 ""
20958 "bsr{l}\t{%1, %0|%0, %1}"
20959 [(set_attr "type" "alu1")
20960 (set_attr "prefix_0f" "1")
20961 (set_attr "znver1_decode" "vector")
20962 (set_attr "mode" "SI")])
20963
20964 (define_insn "bsr_1"
20965 [(set (match_operand:SI 0 "register_operand" "=r")
20966 (minus:SI (const_int 31)
20967 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20968 (clobber (reg:CC FLAGS_REG))]
20969 "!TARGET_LZCNT"
20970 "bsr{l}\t{%1, %0|%0, %1}"
20971 [(set_attr "type" "alu1")
20972 (set_attr "prefix_0f" "1")
20973 (set_attr "znver1_decode" "vector")
20974 (set_attr "mode" "SI")])
20975
20976 (define_insn "bsr_zext_1"
20977 [(set (match_operand:DI 0 "register_operand" "=r")
20978 (zero_extend:DI
20979 (minus:SI
20980 (const_int 31)
20981 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
20982 (clobber (reg:CC FLAGS_REG))]
20983 "!TARGET_LZCNT && TARGET_64BIT"
20984 "bsr{l}\t{%1, %k0|%k0, %1}"
20985 [(set_attr "type" "alu1")
20986 (set_attr "prefix_0f" "1")
20987 (set_attr "znver1_decode" "vector")
20988 (set_attr "mode" "SI")])
20989
20990 ; As bsr is undefined behavior on zero and for other input
20991 ; values it is in range 0 to 63, we can optimize away sign-extends.
20992 (define_insn_and_split "*bsr_rex64_2"
20993 [(set (match_operand:DI 0 "register_operand")
20994 (xor:DI
20995 (sign_extend:DI
20996 (minus:SI
20997 (const_int 63)
20998 (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
20999 0)))
21000 (const_int 63)))
21001 (clobber (reg:CC FLAGS_REG))]
21002 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
21003 "#"
21004 "&& 1"
21005 [(parallel [(set (reg:CCZ FLAGS_REG)
21006 (compare:CCZ (match_dup 1) (const_int 0)))
21007 (set (match_dup 2)
21008 (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
21009 (parallel [(set (match_dup 0)
21010 (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
21011 (clobber (reg:CC FLAGS_REG))])]
21012 {
21013 operands[2] = gen_reg_rtx (DImode);
21014 operands[3] = lowpart_subreg (SImode, operands[2], DImode);
21015 })
21016
21017 (define_insn_and_split "*bsr_2"
21018 [(set (match_operand:DI 0 "register_operand")
21019 (sign_extend:DI
21020 (xor:SI
21021 (minus:SI
21022 (const_int 31)
21023 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
21024 (const_int 31))))
21025 (clobber (reg:CC FLAGS_REG))]
21026 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
21027 "#"
21028 "&& 1"
21029 [(parallel [(set (reg:CCZ FLAGS_REG)
21030 (compare:CCZ (match_dup 1) (const_int 0)))
21031 (set (match_dup 2)
21032 (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
21033 (parallel [(set (match_dup 0)
21034 (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
21035 (clobber (reg:CC FLAGS_REG))])]
21036 "operands[2] = gen_reg_rtx (SImode);")
21037
21038 ; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
21039 ; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
21040 ; in [0, 63] or [0, 31] range.
21041 (define_split
21042 [(set (match_operand:SI 0 "register_operand")
21043 (minus:SI
21044 (match_operand:SI 2 "const_int_operand")
21045 (xor:SI
21046 (minus:SI (const_int 63)
21047 (subreg:SI
21048 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
21049 0))
21050 (const_int 63))))]
21051 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
21052 [(set (match_dup 3)
21053 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
21054 (set (match_dup 0)
21055 (plus:SI (match_dup 5) (match_dup 4)))]
21056 {
21057 operands[3] = gen_reg_rtx (DImode);
21058 operands[5] = lowpart_subreg (SImode, operands[3], DImode);
21059 if (INTVAL (operands[2]) == 63)
21060 {
21061 emit_insn (gen_bsr_rex64_1_zext (operands[3], operands[1]));
21062 emit_move_insn (operands[0], operands[5]);
21063 DONE;
21064 }
21065 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
21066 })
21067
21068 (define_split
21069 [(set (match_operand:SI 0 "register_operand")
21070 (minus:SI
21071 (match_operand:SI 2 "const_int_operand")
21072 (xor:SI
21073 (minus:SI (const_int 31)
21074 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
21075 (const_int 31))))]
21076 "!TARGET_LZCNT && ix86_pre_reload_split ()"
21077 [(set (match_dup 3)
21078 (minus:SI (const_int 31) (clz:SI (match_dup 1))))
21079 (set (match_dup 0)
21080 (plus:SI (match_dup 3) (match_dup 4)))]
21081 {
21082 if (INTVAL (operands[2]) == 31)
21083 {
21084 emit_insn (gen_bsr_1 (operands[0], operands[1]));
21085 DONE;
21086 }
21087 operands[3] = gen_reg_rtx (SImode);
21088 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
21089 })
21090
21091 (define_split
21092 [(set (match_operand:DI 0 "register_operand")
21093 (minus:DI
21094 (match_operand:DI 2 "const_int_operand")
21095 (xor:DI
21096 (sign_extend:DI
21097 (minus:SI (const_int 63)
21098 (subreg:SI
21099 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
21100 0)))
21101 (const_int 63))))]
21102 "!TARGET_LZCNT
21103 && TARGET_64BIT
21104 && ix86_pre_reload_split ()
21105 && ((unsigned HOST_WIDE_INT)
21106 trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
21107 == UINTVAL (operands[2]) - 63)"
21108 [(set (match_dup 3)
21109 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
21110 (set (match_dup 0)
21111 (plus:DI (match_dup 3) (match_dup 4)))]
21112 {
21113 if (INTVAL (operands[2]) == 63)
21114 {
21115 emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
21116 DONE;
21117 }
21118 operands[3] = gen_reg_rtx (DImode);
21119 operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
21120 })
21121
21122 (define_split
21123 [(set (match_operand:DI 0 "register_operand")
21124 (minus:DI
21125 (match_operand:DI 2 "const_int_operand")
21126 (sign_extend:DI
21127 (xor:SI
21128 (minus:SI (const_int 31)
21129 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
21130 (const_int 31)))))]
21131 "!TARGET_LZCNT
21132 && TARGET_64BIT
21133 && ix86_pre_reload_split ()
21134 && ((unsigned HOST_WIDE_INT)
21135 trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
21136 == UINTVAL (operands[2]) - 31)"
21137 [(set (match_dup 3)
21138 (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
21139 (set (match_dup 0)
21140 (plus:DI (match_dup 3) (match_dup 4)))]
21141 {
21142 if (INTVAL (operands[2]) == 31)
21143 {
21144 emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
21145 DONE;
21146 }
21147 operands[3] = gen_reg_rtx (DImode);
21148 operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
21149 })
21150
21151 (define_expand "clz<mode>2"
21152 [(parallel
21153 [(set (reg:CCZ FLAGS_REG)
21154 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21155 (const_int 0)))
21156 (set (match_dup 3) (minus:SWI48
21157 (match_dup 2)
21158 (clz:SWI48 (match_dup 1))))])
21159 (parallel
21160 [(set (match_operand:SWI48 0 "register_operand")
21161 (xor:SWI48 (match_dup 3) (match_dup 2)))
21162 (clobber (reg:CC FLAGS_REG))])]
21163 ""
21164 {
21165 if (TARGET_LZCNT)
21166 {
21167 emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
21168 DONE;
21169 }
21170 operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
21171 operands[3] = gen_reg_rtx (<MODE>mode);
21172 })
21173
21174 (define_insn_and_split "clz<mode>2_lzcnt_nf"
21175 [(set (match_operand:SWI48 0 "register_operand" "=r")
21176 (clz:SWI48
21177 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
21178 "TARGET_APX_NF && TARGET_LZCNT"
21179 "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
21180 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21181 && optimize_function_for_speed_p (cfun)
21182 && !reg_mentioned_p (operands[0], operands[1])"
21183 [(parallel
21184 [(set (match_dup 0)
21185 (clz:SWI48 (match_dup 1)))
21186 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
21187 "ix86_expand_clear (operands[0]);"
21188 [(set_attr "prefix_rep" "1")
21189 (set_attr "type" "bitmanip")
21190 (set_attr "mode" "<MODE>")])
21191
21192 (define_insn_and_split "clz<mode>2_lzcnt"
21193 [(set (match_operand:SWI48 0 "register_operand" "=r")
21194 (clz:SWI48
21195 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21196 (clobber (reg:CC FLAGS_REG))]
21197 "TARGET_LZCNT"
21198 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
21199 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21200 && optimize_function_for_speed_p (cfun)
21201 && !reg_mentioned_p (operands[0], operands[1])"
21202 [(parallel
21203 [(set (match_dup 0)
21204 (clz:SWI48 (match_dup 1)))
21205 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21206 (clobber (reg:CC FLAGS_REG))])]
21207 "ix86_expand_clear (operands[0]);"
21208 [(set_attr "prefix_rep" "1")
21209 (set_attr "type" "bitmanip")
21210 (set_attr "has_nf" "1")
21211 (set_attr "mode" "<MODE>")])
21212
21213 ; False dependency happens when destination is only updated by tzcnt,
21214 ; lzcnt or popcnt. There is no false dependency when destination is
21215 ; also used in source.
21216 (define_insn "*clz<mode>2_lzcnt_falsedep_nf"
21217 [(set (match_operand:SWI48 0 "register_operand" "=r")
21218 (clz:SWI48
21219 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21220 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
21221 UNSPEC_INSN_FALSE_DEP)]
21222 "TARGET_APX_NF && TARGET_LZCNT"
21223 "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
21224 [(set_attr "prefix_rep" "1")
21225 (set_attr "type" "bitmanip")
21226 (set_attr "mode" "<MODE>")])
21227
21228 (define_insn "*clz<mode>2_lzcnt_falsedep"
21229 [(set (match_operand:SWI48 0 "register_operand" "=r")
21230 (clz:SWI48
21231 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21232 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
21233 UNSPEC_INSN_FALSE_DEP)
21234 (clobber (reg:CC FLAGS_REG))]
21235 "TARGET_LZCNT"
21236 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
21237 [(set_attr "prefix_rep" "1")
21238 (set_attr "type" "bitmanip")
21239 (set_attr "has_nf" "1")
21240 (set_attr "mode" "<MODE>")])
21241
21242 (define_insn_and_split "*clzsi2_lzcnt_zext"
21243 [(set (match_operand:DI 0 "register_operand" "=r")
21244 (and:DI
21245 (subreg:DI
21246 (clz:SI
21247 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
21248 (const_int 63)))
21249 (clobber (reg:CC FLAGS_REG))]
21250 "TARGET_LZCNT && TARGET_64BIT"
21251 "lzcnt{l}\t{%1, %k0|%k0, %1}"
21252 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21253 && optimize_function_for_speed_p (cfun)
21254 && !reg_mentioned_p (operands[0], operands[1])"
21255 [(parallel
21256 [(set (match_dup 0)
21257 (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
21258 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21259 (clobber (reg:CC FLAGS_REG))])]
21260 "ix86_expand_clear (operands[0]);"
21261 [(set_attr "prefix_rep" "1")
21262 (set_attr "type" "bitmanip")
21263 (set_attr "mode" "SI")])
21264
21265 ; False dependency happens when destination is only updated by tzcnt,
21266 ; lzcnt or popcnt. There is no false dependency when destination is
21267 ; also used in source.
21268 (define_insn "*clzsi2_lzcnt_zext_falsedep"
21269 [(set (match_operand:DI 0 "register_operand" "=r")
21270 (and:DI
21271 (subreg:DI
21272 (clz:SI
21273 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
21274 (const_int 63)))
21275 (unspec [(match_operand:DI 2 "register_operand" "0")]
21276 UNSPEC_INSN_FALSE_DEP)
21277 (clobber (reg:CC FLAGS_REG))]
21278 "TARGET_LZCNT"
21279 "lzcnt{l}\t{%1, %k0|%k0, %1}"
21280 [(set_attr "prefix_rep" "1")
21281 (set_attr "type" "bitmanip")
21282 (set_attr "mode" "SI")])
21283
21284 (define_insn_and_split "*clzsi2_lzcnt_zext_2"
21285 [(set (match_operand:DI 0 "register_operand" "=r")
21286 (zero_extend:DI
21287 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
21288 (clobber (reg:CC FLAGS_REG))]
21289 "TARGET_LZCNT && TARGET_64BIT"
21290 "lzcnt{l}\t{%1, %k0|%k0, %1}"
21291 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21292 && optimize_function_for_speed_p (cfun)
21293 && !reg_mentioned_p (operands[0], operands[1])"
21294 [(parallel
21295 [(set (match_dup 0)
21296 (zero_extend:DI (clz:SI (match_dup 1))))
21297 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21298 (clobber (reg:CC FLAGS_REG))])]
21299 "ix86_expand_clear (operands[0]);"
21300 [(set_attr "prefix_rep" "1")
21301 (set_attr "type" "bitmanip")
21302 (set_attr "mode" "SI")])
21303
21304 ; False dependency happens when destination is only updated by tzcnt,
21305 ; lzcnt or popcnt. There is no false dependency when destination is
21306 ; also used in source.
21307 (define_insn "*clzsi2_lzcnt_zext_2_falsedep"
21308 [(set (match_operand:DI 0 "register_operand" "=r")
21309 (zero_extend:DI
21310 (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm"))))
21311 (unspec [(match_operand:DI 2 "register_operand" "0")]
21312 UNSPEC_INSN_FALSE_DEP)
21313 (clobber (reg:CC FLAGS_REG))]
21314 "TARGET_LZCNT"
21315 "lzcnt{l}\t{%1, %k0|%k0, %1}"
21316 [(set_attr "prefix_rep" "1")
21317 (set_attr "type" "bitmanip")
21318 (set_attr "mode" "SI")])
21319
21320 (define_int_iterator LT_ZCNT
21321 [(UNSPEC_TZCNT "TARGET_BMI")
21322 (UNSPEC_LZCNT "TARGET_LZCNT")])
21323
21324 (define_int_attr lt_zcnt
21325 [(UNSPEC_TZCNT "tzcnt")
21326 (UNSPEC_LZCNT "lzcnt")])
21327
21328 (define_int_attr lt_zcnt_type
21329 [(UNSPEC_TZCNT "alu1")
21330 (UNSPEC_LZCNT "bitmanip")])
21331
21332 ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
21333 ;; provides operand size as output when source operand is zero.
21334
21335 (define_insn_and_split "<lt_zcnt>_<mode>_nf"
21336 [(set (match_operand:SWI48 0 "register_operand" "=r")
21337 (unspec:SWI48
21338 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
21339 "TARGET_APX_NF"
21340 "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
21341 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21342 && optimize_function_for_speed_p (cfun)
21343 && !reg_mentioned_p (operands[0], operands[1])"
21344 [(parallel
21345 [(set (match_dup 0)
21346 (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
21347 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
21348 "ix86_expand_clear (operands[0]);"
21349 [(set_attr "type" "<lt_zcnt_type>")
21350 (set_attr "prefix_0f" "1")
21351 (set_attr "prefix_rep" "1")
21352 (set_attr "mode" "<MODE>")])
21353
21354 (define_insn_and_split "<lt_zcnt>_<mode>"
21355 [(set (match_operand:SWI48 0 "register_operand" "=r")
21356 (unspec:SWI48
21357 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
21358 (clobber (reg:CC FLAGS_REG))]
21359 ""
21360 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
21361 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21362 && optimize_function_for_speed_p (cfun)
21363 && !reg_mentioned_p (operands[0], operands[1])"
21364 [(parallel
21365 [(set (match_dup 0)
21366 (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
21367 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21368 (clobber (reg:CC FLAGS_REG))])]
21369 "ix86_expand_clear (operands[0]);"
21370 [(set_attr "type" "<lt_zcnt_type>")
21371 (set_attr "prefix_0f" "1")
21372 (set_attr "prefix_rep" "1")
21373 (set_attr "has_nf" "1")
21374 (set_attr "mode" "<MODE>")])
21375
21376 ; False dependency happens when destination is only updated by tzcnt,
21377 ; lzcnt or popcnt. There is no false dependency when destination is
21378 ; also used in source.
21379 (define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
21380 [(set (match_operand:SWI48 0 "register_operand" "=r")
21381 (unspec:SWI48
21382 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
21383 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
21384 UNSPEC_INSN_FALSE_DEP)]
21385 "TARGET_APX_NF"
21386 "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
21387 [(set_attr "type" "<lt_zcnt_type>")
21388 (set_attr "prefix_0f" "1")
21389 (set_attr "prefix_rep" "1")
21390 (set_attr "mode" "<MODE>")])
21391
21392 (define_insn "*<lt_zcnt>_<mode>_falsedep"
21393 [(set (match_operand:SWI48 0 "register_operand" "=r")
21394 (unspec:SWI48
21395 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
21396 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
21397 UNSPEC_INSN_FALSE_DEP)
21398 (clobber (reg:CC FLAGS_REG))]
21399 ""
21400 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
21401 [(set_attr "type" "<lt_zcnt_type>")
21402 (set_attr "prefix_0f" "1")
21403 (set_attr "prefix_rep" "1")
21404 (set_attr "has_nf" "1")
21405 (set_attr "mode" "<MODE>")])
21406
21407 (define_insn "<lt_zcnt>_hi<nf_name>"
21408 [(set (match_operand:HI 0 "register_operand" "=r")
21409 (unspec:HI
21410 [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
21411 "<nf_condition>"
21412 "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}"
21413 [(set_attr "type" "<lt_zcnt_type>")
21414 (set_attr "prefix_0f" "1")
21415 (set_attr "prefix_rep" "1")
21416 (set_attr "has_nf" "1")
21417 (set_attr "mode" "HI")])
21418
21419 ;; BMI instructions.
21420
21421 (define_insn "bmi_bextr_<mode>"
21422 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
21423 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
21424 (match_operand:SWI48 2 "register_operand" "r,r")]
21425 UNSPEC_BEXTR))
21426 (clobber (reg:CC FLAGS_REG))]
21427 "TARGET_BMI"
21428 "bextr\t{%2, %1, %0|%0, %1, %2}"
21429 [(set_attr "type" "bitmanip")
21430 (set_attr "btver2_decode" "direct, double")
21431 (set_attr "mode" "<MODE>")])
21432
21433 (define_insn "*bmi_bextr_<mode>_ccz"
21434 [(set (reg:CCZ FLAGS_REG)
21435 (compare:CCZ
21436 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
21437 (match_operand:SWI48 2 "register_operand" "r,r")]
21438 UNSPEC_BEXTR)
21439 (const_int 0)))
21440 (clobber (match_scratch:SWI48 0 "=r,r"))]
21441 "TARGET_BMI"
21442 "bextr\t{%2, %1, %0|%0, %1, %2}"
21443 [(set_attr "type" "bitmanip")
21444 (set_attr "btver2_decode" "direct, double")
21445 (set_attr "mode" "<MODE>")])
21446
21447 (define_insn "*bmi_blsi_<mode>"
21448 [(set (match_operand:SWI48 0 "register_operand" "=r")
21449 (and:SWI48
21450 (neg:SWI48
21451 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
21452 (match_dup 1)))
21453 (clobber (reg:CC FLAGS_REG))]
21454 "TARGET_BMI"
21455 "blsi\t{%1, %0|%0, %1}"
21456 [(set_attr "type" "bitmanip")
21457 (set_attr "btver2_decode" "double")
21458 (set_attr "mode" "<MODE>")])
21459
21460 (define_insn "*bmi_blsi_<mode>_cmp"
21461 [(set (reg FLAGS_REG)
21462 (compare
21463 (and:SWI48
21464 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
21465 (match_dup 1))
21466 (const_int 0)))
21467 (set (match_operand:SWI48 0 "register_operand" "=r")
21468 (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
21469 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
21470 "blsi\t{%1, %0|%0, %1}"
21471 [(set_attr "type" "bitmanip")
21472 (set_attr "btver2_decode" "double")
21473 (set_attr "mode" "<MODE>")])
21474
21475 (define_insn "*bmi_blsi_<mode>_ccno"
21476 [(set (reg FLAGS_REG)
21477 (compare
21478 (and:SWI48
21479 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
21480 (match_dup 1))
21481 (const_int 0)))
21482 (clobber (match_scratch:SWI48 0 "=r"))]
21483 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
21484 "blsi\t{%1, %0|%0, %1}"
21485 [(set_attr "type" "bitmanip")
21486 (set_attr "btver2_decode" "double")
21487 (set_attr "mode" "<MODE>")])
21488
21489 (define_insn "*bmi_blsmsk_<mode>"
21490 [(set (match_operand:SWI48 0 "register_operand" "=r")
21491 (xor:SWI48
21492 (plus:SWI48
21493 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21494 (const_int -1))
21495 (match_dup 1)))
21496 (clobber (reg:CC FLAGS_REG))]
21497 "TARGET_BMI"
21498 "blsmsk\t{%1, %0|%0, %1}"
21499 [(set_attr "type" "bitmanip")
21500 (set_attr "btver2_decode" "double")
21501 (set_attr "mode" "<MODE>")])
21502
21503 (define_insn "*bmi_blsr_<mode>"
21504 [(set (match_operand:SWI48 0 "register_operand" "=r")
21505 (and:SWI48
21506 (plus:SWI48
21507 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21508 (const_int -1))
21509 (match_dup 1)))
21510 (clobber (reg:CC FLAGS_REG))]
21511 "TARGET_BMI"
21512 "blsr\t{%1, %0|%0, %1}"
21513 [(set_attr "type" "bitmanip")
21514 (set_attr "btver2_decode" "double")
21515 (set_attr "mode" "<MODE>")])
21516
21517 (define_insn "*bmi_blsr_<mode>_cmp"
21518 [(set (reg:CCZ FLAGS_REG)
21519 (compare:CCZ
21520 (and:SWI48
21521 (plus:SWI48
21522 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21523 (const_int -1))
21524 (match_dup 1))
21525 (const_int 0)))
21526 (set (match_operand:SWI48 0 "register_operand" "=r")
21527 (and:SWI48
21528 (plus:SWI48
21529 (match_dup 1)
21530 (const_int -1))
21531 (match_dup 1)))]
21532 "TARGET_BMI"
21533 "blsr\t{%1, %0|%0, %1}"
21534 [(set_attr "type" "bitmanip")
21535 (set_attr "btver2_decode" "double")
21536 (set_attr "mode" "<MODE>")])
21537
21538 (define_insn "*bmi_blsr_<mode>_ccz"
21539 [(set (reg:CCZ FLAGS_REG)
21540 (compare:CCZ
21541 (and:SWI48
21542 (plus:SWI48
21543 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21544 (const_int -1))
21545 (match_dup 1))
21546 (const_int 0)))
21547 (clobber (match_scratch:SWI48 0 "=r"))]
21548 "TARGET_BMI"
21549 "blsr\t{%1, %0|%0, %1}"
21550 [(set_attr "type" "bitmanip")
21551 (set_attr "btver2_decode" "double")
21552 (set_attr "mode" "<MODE>")])
21553
21554 ;; BMI2 instructions.
21555 (define_expand "bmi2_bzhi_<mode>3"
21556 [(parallel
21557 [(set (match_operand:SWI48 0 "register_operand")
21558 (if_then_else:SWI48
21559 (ne:QI (match_operand:QI 2 "register_operand")
21560 (const_int 0))
21561 (zero_extract:SWI48
21562 (match_operand:SWI48 1 "nonimmediate_operand")
21563 (umin:QI (match_dup 2) (match_dup 3))
21564 (const_int 0))
21565 (const_int 0)))
21566 (clobber (reg:CC FLAGS_REG))])]
21567 "TARGET_BMI2"
21568 {
21569 operands[2] = gen_lowpart (QImode, operands[2]);
21570 operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
21571 })
21572
21573 (define_insn "*bmi2_bzhi_<mode>3"
21574 [(set (match_operand:SWI48 0 "register_operand" "=r")
21575 (if_then_else:SWI48
21576 (ne:QI (match_operand:QI 2 "register_operand" "q")
21577 (const_int 0))
21578 (zero_extract:SWI48
21579 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21580 (umin:QI (match_dup 2)
21581 (match_operand:QI 3 "const_int_operand"))
21582 (const_int 0))
21583 (const_int 0)))
21584 (clobber (reg:CC FLAGS_REG))]
21585 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
21586 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
21587 [(set_attr "type" "bitmanip")
21588 (set_attr "prefix" "vex")
21589 (set_attr "mode" "<MODE>")])
21590
21591 (define_insn "*bmi2_bzhi_<mode>3_1_ccz"
21592 [(set (reg:CCZ FLAGS_REG)
21593 (compare:CCZ
21594 (if_then_else:SWI48
21595 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
21596 (zero_extract:SWI48
21597 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21598 (umin:QI (match_dup 2)
21599 (match_operand:QI 3 "const_int_operand"))
21600 (const_int 0))
21601 (const_int 0))
21602 (const_int 0)))
21603 (clobber (match_scratch:SWI48 0 "=r"))]
21604 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
21605 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
21606 [(set_attr "type" "bitmanip")
21607 (set_attr "prefix" "vex")
21608 (set_attr "mode" "<MODE>")])
21609
21610 (define_insn "*bmi2_bzhi_<mode>3_2"
21611 [(set (match_operand:SWI48 0 "register_operand" "=r")
21612 (and:SWI48
21613 (plus:SWI48
21614 (ashift:SWI48 (const_int 1)
21615 (match_operand:QI 2 "register_operand" "r"))
21616 (const_int -1))
21617 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21618 (clobber (reg:CC FLAGS_REG))]
21619 "TARGET_BMI2"
21620 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
21621 [(set_attr "type" "bitmanip")
21622 (set_attr "prefix" "vex")
21623 (set_attr "mode" "<MODE>")])
21624
21625 (define_insn "*bmi2_bzhi_<mode>3_3"
21626 [(set (match_operand:SWI48 0 "register_operand" "=r")
21627 (and:SWI48
21628 (not:SWI48
21629 (ashift:SWI48 (const_int -1)
21630 (match_operand:QI 2 "register_operand" "r")))
21631 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21632 (clobber (reg:CC FLAGS_REG))]
21633 "TARGET_BMI2"
21634 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
21635 [(set_attr "type" "bitmanip")
21636 (set_attr "prefix" "vex")
21637 (set_attr "mode" "<MODE>")])
21638
21639 (define_insn "*bmi2_bzhi_zero_extendsidi_4"
21640 [(set (match_operand:DI 0 "register_operand" "=r")
21641 (zero_extend:DI
21642 (and:SI
21643 (plus:SI
21644 (ashift:SI (const_int 1)
21645 (match_operand:QI 2 "register_operand" "r"))
21646 (const_int -1))
21647 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
21648 (clobber (reg:CC FLAGS_REG))]
21649 "TARGET_64BIT && TARGET_BMI2"
21650 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
21651 [(set_attr "type" "bitmanip")
21652 (set_attr "prefix" "vex")
21653 (set_attr "mode" "DI")])
21654
21655 (define_insn "*bmi2_bzhi_zero_extendsidi_5"
21656 [(set (match_operand:DI 0 "register_operand" "=r")
21657 (and:DI
21658 (zero_extend:DI
21659 (plus:SI
21660 (ashift:SI (const_int 1)
21661 (match_operand:QI 2 "register_operand" "r"))
21662 (const_int -1)))
21663 (match_operand:DI 1 "nonimmediate_operand" "rm")))
21664 (clobber (reg:CC FLAGS_REG))]
21665 "TARGET_64BIT && TARGET_BMI2"
21666 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
21667 [(set_attr "type" "bitmanip")
21668 (set_attr "prefix" "vex")
21669 (set_attr "mode" "DI")])
21670
21671 (define_insn "bmi2_pdep_<mode>3"
21672 [(set (match_operand:SWI48 0 "register_operand" "=r")
21673 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
21674 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
21675 UNSPEC_PDEP))]
21676 "TARGET_BMI2"
21677 "pdep\t{%2, %1, %0|%0, %1, %2}"
21678 [(set_attr "type" "bitmanip")
21679 (set_attr "prefix" "vex")
21680 (set_attr "mode" "<MODE>")])
21681
21682 (define_insn "bmi2_pext_<mode>3"
21683 [(set (match_operand:SWI48 0 "register_operand" "=r")
21684 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
21685 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
21686 UNSPEC_PEXT))]
21687 "TARGET_BMI2"
21688 "pext\t{%2, %1, %0|%0, %1, %2}"
21689 [(set_attr "type" "bitmanip")
21690 (set_attr "prefix" "vex")
21691 (set_attr "mode" "<MODE>")])
21692
21693 ;; TBM instructions.
21694 (define_insn "@tbm_bextri_<mode>"
21695 [(set (match_operand:SWI48 0 "register_operand" "=r")
21696 (zero_extract:SWI48
21697 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21698 (match_operand:QI 2 "const_0_to_255_operand")
21699 (match_operand:QI 3 "const_0_to_255_operand")))
21700 (clobber (reg:CC FLAGS_REG))]
21701 "TARGET_TBM"
21702 {
21703 operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
21704 return "bextr\t{%2, %1, %0|%0, %1, %2}";
21705 }
21706 [(set_attr "type" "bitmanip")
21707 (set_attr "mode" "<MODE>")])
21708
21709 (define_insn "*tbm_blcfill_<mode>"
21710 [(set (match_operand:SWI48 0 "register_operand" "=r")
21711 (and:SWI48
21712 (plus:SWI48
21713 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21714 (const_int 1))
21715 (match_dup 1)))
21716 (clobber (reg:CC FLAGS_REG))]
21717 "TARGET_TBM"
21718 "blcfill\t{%1, %0|%0, %1}"
21719 [(set_attr "type" "bitmanip")
21720 (set_attr "mode" "<MODE>")])
21721
21722 (define_insn "*tbm_blci_<mode>"
21723 [(set (match_operand:SWI48 0 "register_operand" "=r")
21724 (ior:SWI48
21725 (not:SWI48
21726 (plus:SWI48
21727 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21728 (const_int 1)))
21729 (match_dup 1)))
21730 (clobber (reg:CC FLAGS_REG))]
21731 "TARGET_TBM"
21732 "blci\t{%1, %0|%0, %1}"
21733 [(set_attr "type" "bitmanip")
21734 (set_attr "mode" "<MODE>")])
21735
21736 (define_insn "*tbm_blcic_<mode>"
21737 [(set (match_operand:SWI48 0 "register_operand" "=r")
21738 (and:SWI48
21739 (plus:SWI48
21740 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21741 (const_int 1))
21742 (not:SWI48
21743 (match_dup 1))))
21744 (clobber (reg:CC FLAGS_REG))]
21745 "TARGET_TBM"
21746 "blcic\t{%1, %0|%0, %1}"
21747 [(set_attr "type" "bitmanip")
21748 (set_attr "mode" "<MODE>")])
21749
21750 (define_insn "*tbm_blcmsk_<mode>"
21751 [(set (match_operand:SWI48 0 "register_operand" "=r")
21752 (xor:SWI48
21753 (plus:SWI48
21754 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21755 (const_int 1))
21756 (match_dup 1)))
21757 (clobber (reg:CC FLAGS_REG))]
21758 "TARGET_TBM"
21759 "blcmsk\t{%1, %0|%0, %1}"
21760 [(set_attr "type" "bitmanip")
21761 (set_attr "mode" "<MODE>")])
21762
21763 (define_insn "*tbm_blcs_<mode>"
21764 [(set (match_operand:SWI48 0 "register_operand" "=r")
21765 (ior:SWI48
21766 (plus:SWI48
21767 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21768 (const_int 1))
21769 (match_dup 1)))
21770 (clobber (reg:CC FLAGS_REG))]
21771 "TARGET_TBM"
21772 "blcs\t{%1, %0|%0, %1}"
21773 [(set_attr "type" "bitmanip")
21774 (set_attr "mode" "<MODE>")])
21775
21776 (define_insn "*tbm_blsfill_<mode>"
21777 [(set (match_operand:SWI48 0 "register_operand" "=r")
21778 (ior:SWI48
21779 (plus:SWI48
21780 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21781 (const_int -1))
21782 (match_dup 1)))
21783 (clobber (reg:CC FLAGS_REG))]
21784 "TARGET_TBM"
21785 "blsfill\t{%1, %0|%0, %1}"
21786 [(set_attr "type" "bitmanip")
21787 (set_attr "mode" "<MODE>")])
21788
21789 (define_insn "*tbm_blsic_<mode>"
21790 [(set (match_operand:SWI48 0 "register_operand" "=r")
21791 (ior:SWI48
21792 (plus:SWI48
21793 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21794 (const_int -1))
21795 (not:SWI48
21796 (match_dup 1))))
21797 (clobber (reg:CC FLAGS_REG))]
21798 "TARGET_TBM"
21799 "blsic\t{%1, %0|%0, %1}"
21800 [(set_attr "type" "bitmanip")
21801 (set_attr "mode" "<MODE>")])
21802
21803 (define_insn "*tbm_t1mskc_<mode>"
21804 [(set (match_operand:SWI48 0 "register_operand" "=r")
21805 (ior:SWI48
21806 (plus:SWI48
21807 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21808 (const_int 1))
21809 (not:SWI48
21810 (match_dup 1))))
21811 (clobber (reg:CC FLAGS_REG))]
21812 "TARGET_TBM"
21813 "t1mskc\t{%1, %0|%0, %1}"
21814 [(set_attr "type" "bitmanip")
21815 (set_attr "mode" "<MODE>")])
21816
21817 (define_insn "*tbm_tzmsk_<mode>"
21818 [(set (match_operand:SWI48 0 "register_operand" "=r")
21819 (and:SWI48
21820 (plus:SWI48
21821 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
21822 (const_int -1))
21823 (not:SWI48
21824 (match_dup 1))))
21825 (clobber (reg:CC FLAGS_REG))]
21826 "TARGET_TBM"
21827 "tzmsk\t{%1, %0|%0, %1}"
21828 [(set_attr "type" "bitmanip")
21829 (set_attr "mode" "<MODE>")])
21830
21831 (define_insn_and_split "popcount<mode>2_nf"
21832 [(set (match_operand:SWI48 0 "register_operand" "=r")
21833 (popcount:SWI48
21834 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
21835 "TARGET_APX_NF && TARGET_POPCNT"
21836 {
21837 #if TARGET_MACHO
21838 return "%{nf%} popcnt\t{%1, %0|%0, %1}";
21839 #else
21840 return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
21841 #endif
21842 }
21843 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21844 && optimize_function_for_speed_p (cfun)
21845 && !reg_mentioned_p (operands[0], operands[1])"
21846 [(parallel
21847 [(set (match_dup 0)
21848 (popcount:SWI48 (match_dup 1)))
21849 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
21850 "ix86_expand_clear (operands[0]);"
21851 [(set_attr "prefix_rep" "1")
21852 (set_attr "type" "bitmanip")
21853 (set_attr "mode" "<MODE>")])
21854
21855 (define_insn_and_split "popcount<mode>2"
21856 [(set (match_operand:SWI48 0 "register_operand" "=r")
21857 (popcount:SWI48
21858 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21859 (clobber (reg:CC FLAGS_REG))]
21860 "TARGET_POPCNT"
21861 {
21862 #if TARGET_MACHO
21863 return "popcnt\t{%1, %0|%0, %1}";
21864 #else
21865 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
21866 #endif
21867 }
21868 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21869 && optimize_function_for_speed_p (cfun)
21870 && !reg_mentioned_p (operands[0], operands[1])"
21871 [(parallel
21872 [(set (match_dup 0)
21873 (popcount:SWI48 (match_dup 1)))
21874 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21875 (clobber (reg:CC FLAGS_REG))])]
21876 "ix86_expand_clear (operands[0]);"
21877 [(set_attr "prefix_rep" "1")
21878 (set_attr "type" "bitmanip")
21879 (set_attr "has_nf" "1")
21880 (set_attr "mode" "<MODE>")])
21881
21882 ; False dependency happens when destination is only updated by tzcnt,
21883 ; lzcnt or popcnt. There is no false dependency when destination is
21884 ; also used in source.
21885 (define_insn "*popcount<mode>2_falsedep_nf"
21886 [(set (match_operand:SWI48 0 "register_operand" "=r")
21887 (popcount:SWI48
21888 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21889 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
21890 UNSPEC_INSN_FALSE_DEP)]
21891 "TARGET_APX_NF && TARGET_POPCNT"
21892 {
21893 #if TARGET_MACHO
21894 return "%{nf%} popcnt\t{%1, %0|%0, %1}";
21895 #else
21896 return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
21897 #endif
21898 }
21899 [(set_attr "prefix_rep" "1")
21900 (set_attr "type" "bitmanip")
21901 (set_attr "mode" "<MODE>")])
21902
21903 (define_insn "*popcount<mode>2_falsedep"
21904 [(set (match_operand:SWI48 0 "register_operand" "=r")
21905 (popcount:SWI48
21906 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
21907 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
21908 UNSPEC_INSN_FALSE_DEP)
21909 (clobber (reg:CC FLAGS_REG))]
21910 "TARGET_POPCNT"
21911 {
21912 #if TARGET_MACHO
21913 return "popcnt\t{%1, %0|%0, %1}";
21914 #else
21915 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
21916 #endif
21917 }
21918 [(set_attr "prefix_rep" "1")
21919 (set_attr "type" "bitmanip")
21920 (set_attr "has_nf" "1")
21921 (set_attr "mode" "<MODE>")])
21922
21923 (define_insn_and_split "*popcountsi2_zext"
21924 [(set (match_operand:DI 0 "register_operand" "=r")
21925 (and:DI
21926 (subreg:DI
21927 (popcount:SI
21928 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
21929 (const_int 63)))
21930 (clobber (reg:CC FLAGS_REG))]
21931 "TARGET_POPCNT && TARGET_64BIT"
21932 {
21933 #if TARGET_MACHO
21934 return "popcnt\t{%1, %k0|%k0, %1}";
21935 #else
21936 return "popcnt{l}\t{%1, %k0|%k0, %1}";
21937 #endif
21938 }
21939 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21940 && optimize_function_for_speed_p (cfun)
21941 && !reg_mentioned_p (operands[0], operands[1])"
21942 [(parallel
21943 [(set (match_dup 0)
21944 (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63)))
21945 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21946 (clobber (reg:CC FLAGS_REG))])]
21947 "ix86_expand_clear (operands[0]);"
21948 [(set_attr "prefix_rep" "1")
21949 (set_attr "type" "bitmanip")
21950 (set_attr "mode" "SI")])
21951
21952 ; False dependency happens when destination is only updated by tzcnt,
21953 ; lzcnt or popcnt. There is no false dependency when destination is
21954 ; also used in source.
21955 (define_insn "*popcountsi2_zext_falsedep"
21956 [(set (match_operand:DI 0 "register_operand" "=r")
21957 (and:DI
21958 (subreg:DI
21959 (popcount:SI
21960 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
21961 (const_int 63)))
21962 (unspec [(match_operand:DI 2 "register_operand" "0")]
21963 UNSPEC_INSN_FALSE_DEP)
21964 (clobber (reg:CC FLAGS_REG))]
21965 "TARGET_POPCNT && TARGET_64BIT"
21966 {
21967 #if TARGET_MACHO
21968 return "popcnt\t{%1, %k0|%k0, %1}";
21969 #else
21970 return "popcnt{l}\t{%1, %k0|%k0, %1}";
21971 #endif
21972 }
21973 [(set_attr "prefix_rep" "1")
21974 (set_attr "type" "bitmanip")
21975 (set_attr "mode" "SI")])
21976
21977 (define_insn_and_split "*popcountsi2_zext_2"
21978 [(set (match_operand:DI 0 "register_operand" "=r")
21979 (zero_extend:DI
21980 (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
21981 (clobber (reg:CC FLAGS_REG))]
21982 "TARGET_POPCNT && TARGET_64BIT"
21983 {
21984 #if TARGET_MACHO
21985 return "popcnt\t{%1, %k0|%k0, %1}";
21986 #else
21987 return "popcnt{l}\t{%1, %k0|%k0, %1}";
21988 #endif
21989 }
21990 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
21991 && optimize_function_for_speed_p (cfun)
21992 && !reg_mentioned_p (operands[0], operands[1])"
21993 [(parallel
21994 [(set (match_dup 0)
21995 (zero_extend:DI (popcount:SI (match_dup 1))))
21996 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
21997 (clobber (reg:CC FLAGS_REG))])]
21998 "ix86_expand_clear (operands[0]);"
21999 [(set_attr "prefix_rep" "1")
22000 (set_attr "type" "bitmanip")
22001 (set_attr "mode" "SI")])
22002
22003 ; False dependency happens when destination is only updated by tzcnt,
22004 ; lzcnt or popcnt. There is no false dependency when destination is
22005 ; also used in source.
22006 (define_insn "*popcountsi2_zext_2_falsedep"
22007 [(set (match_operand:DI 0 "register_operand" "=r")
22008 (zero_extend:DI
22009 (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
22010 (unspec [(match_operand:DI 2 "register_operand" "0")]
22011 UNSPEC_INSN_FALSE_DEP)
22012 (clobber (reg:CC FLAGS_REG))]
22013 "TARGET_POPCNT && TARGET_64BIT"
22014 {
22015 #if TARGET_MACHO
22016 return "popcnt\t{%1, %k0|%k0, %1}";
22017 #else
22018 return "popcnt{l}\t{%1, %k0|%k0, %1}";
22019 #endif
22020 }
22021 [(set_attr "prefix_rep" "1")
22022 (set_attr "type" "bitmanip")
22023 (set_attr "mode" "SI")])
22024
22025 (define_insn_and_split "*popcounthi2_1"
22026 [(set (match_operand:SI 0 "register_operand")
22027 (popcount:SI
22028 (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
22029 (clobber (reg:CC FLAGS_REG))]
22030 "TARGET_POPCNT
22031 && ix86_pre_reload_split ()"
22032 "#"
22033 "&& 1"
22034 [(const_int 0)]
22035 {
22036 rtx tmp = gen_reg_rtx (HImode);
22037
22038 emit_insn (gen_popcounthi2 (tmp, operands[1]));
22039 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
22040 DONE;
22041 })
22042
22043 (define_insn_and_split "*popcounthi2_2"
22044 [(set (match_operand:SI 0 "register_operand")
22045 (zero_extend:SI
22046 (popcount:HI (match_operand:HI 1 "nonimmediate_operand"))))
22047 (clobber (reg:CC FLAGS_REG))]
22048 "TARGET_POPCNT
22049 && ix86_pre_reload_split ()"
22050 "#"
22051 "&& 1"
22052 [(const_int 0)]
22053 {
22054 rtx tmp = gen_reg_rtx (HImode);
22055
22056 emit_insn (gen_popcounthi2 (tmp, operands[1]));
22057 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
22058 DONE;
22059 })
22060
22061 (define_insn "popcounthi2<nf_name>"
22062 [(set (match_operand:HI 0 "register_operand" "=r")
22063 (popcount:HI
22064 (match_operand:HI 1 "nonimmediate_operand" "rm")))]
22065 "TARGET_POPCNT && <nf_condition>"
22066 {
22067 #if TARGET_MACHO
22068 return "<nf_prefix>popcnt\t{%1, %0|%0, %1}";
22069 #else
22070 return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}";
22071 #endif
22072 }
22073 [(set_attr "prefix_rep" "1")
22074 (set_attr "type" "bitmanip")
22075 (set_attr "has_nf" "1")
22076 (set_attr "mode" "HI")])
22077
22078 (define_expand "bswapdi2"
22079 [(set (match_operand:DI 0 "register_operand")
22080 (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
22081 "TARGET_64BIT"
22082 {
22083 if (!TARGET_MOVBE)
22084 operands[1] = force_reg (DImode, operands[1]);
22085 })
22086
22087 (define_expand "bswapsi2"
22088 [(set (match_operand:SI 0 "register_operand")
22089 (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
22090 ""
22091 {
22092 if (!TARGET_MOVBE)
22093 {
22094 operands[1] = force_reg (SImode, operands[1]);
22095
22096 if (!TARGET_BSWAP)
22097 {
22098 rtx x = gen_reg_rtx (SImode);
22099
22100 emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
22101 emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
22102 emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
22103 DONE;
22104 }
22105 }
22106 })
22107
22108 (define_insn "*bswap<mode>2_movbe"
22109 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
22110 (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
22111 "TARGET_MOVBE
22112 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22113 "@
22114 bswap\t%0
22115 movbe{<imodesuffix>}\t{%1, %0|%0, %1}
22116 movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
22117 [(set_attr "type" "bitmanip,imov,imov")
22118 (set_attr "modrm" "0,1,1")
22119 (set_attr "prefix_0f" "*,1,1")
22120 (set_attr "prefix_extra" "*,1,1")
22121 (set_attr "mode" "<MODE>")])
22122
22123 (define_insn "*bswap<mode>2"
22124 [(set (match_operand:SWI48 0 "register_operand" "=r")
22125 (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
22126 "TARGET_BSWAP"
22127 "bswap\t%0"
22128 [(set_attr "type" "bitmanip")
22129 (set_attr "modrm" "0")
22130 (set_attr "mode" "<MODE>")])
22131
22132 (define_expand "bswaphi2"
22133 [(set (match_operand:HI 0 "register_operand")
22134 (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
22135 ""
22136 {
22137 if (!TARGET_MOVBE)
22138 operands[1] = force_reg (HImode, operands[1]);
22139 })
22140
22141 (define_insn "*bswaphi2_movbe"
22142 [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
22143 (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
22144 "TARGET_MOVBE
22145 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22146 "@
22147 xchg{b}\t{%h0, %b0|%b0, %h0}
22148 movbe{w}\t{%1, %0|%0, %1}
22149 movbe{w}\t{%1, %0|%0, %1}"
22150 [(set_attr "type" "imov")
22151 (set_attr "modrm" "*,1,1")
22152 (set_attr "prefix_0f" "*,1,1")
22153 (set_attr "prefix_extra" "*,1,1")
22154 (set_attr "pent_pair" "np,*,*")
22155 (set_attr "athlon_decode" "vector,*,*")
22156 (set_attr "amdfam10_decode" "double,*,*")
22157 (set_attr "bdver1_decode" "double,*,*")
22158 (set_attr "mode" "QI,HI,HI")])
22159
22160 (define_insn "*bswaphi2"
22161 [(set (match_operand:HI 0 "register_operand" "=Q")
22162 (bswap:HI (match_operand:HI 1 "register_operand" "0")))]
22163 "!TARGET_MOVBE"
22164 "xchg{b}\t{%h0, %b0|%b0, %h0}"
22165 [(set_attr "type" "imov")
22166 (set_attr "pent_pair" "np")
22167 (set_attr "athlon_decode" "vector")
22168 (set_attr "amdfam10_decode" "double")
22169 (set_attr "bdver1_decode" "double")
22170 (set_attr "mode" "QI")])
22171
22172 (define_peephole2
22173 [(set (match_operand:HI 0 "general_reg_operand")
22174 (bswap:HI (match_dup 0)))]
22175 "!(TARGET_USE_XCHGB ||
22176 TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
22177 && peep2_regno_dead_p (0, FLAGS_REG)"
22178 [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
22179 (clobber (reg:CC FLAGS_REG))])])
22180
22181 (define_insn "bswaphisi2_lowpart"
22182 [(set (match_operand:SI 0 "register_operand" "=Q")
22183 (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
22184 (const_int -65536))
22185 (lshiftrt:SI (bswap:SI (match_dup 1))
22186 (const_int 16))))]
22187 ""
22188 "xchg{b}\t{%h0, %b0|%b0, %h0}"
22189 [(set_attr "type" "imov")
22190 (set_attr "pent_pair" "np")
22191 (set_attr "athlon_decode" "vector")
22192 (set_attr "amdfam10_decode" "double")
22193 (set_attr "bdver1_decode" "double")
22194 (set_attr "mode" "QI")])
22195
22196 (define_peephole2
22197 [(set (match_operand:SI 0 "general_reg_operand")
22198 (ior:SI (and:SI (match_dup 0)
22199 (const_int -65536))
22200 (lshiftrt:SI (bswap:SI (match_dup 0))
22201 (const_int 16))))]
22202 "!(TARGET_USE_XCHGB ||
22203 TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
22204 && peep2_regno_dead_p (0, FLAGS_REG)"
22205 [(parallel [(set (strict_low_part (match_dup 0))
22206 (rotate:HI (match_dup 0) (const_int 8)))
22207 (clobber (reg:CC FLAGS_REG))])]
22208 "operands[0] = gen_lowpart (HImode, operands[0]);")
22209
22210 ;; Variant of above peephole2 to improve register allocation.
22211 (define_peephole2
22212 [(set (match_operand:SI 0 "general_reg_operand")
22213 (match_operand:SI 1 "register_operand"))
22214 (set (match_dup 0)
22215 (ior:SI (and:SI (match_dup 0)
22216 (const_int -65536))
22217 (lshiftrt:SI (bswap:SI (match_dup 0))
22218 (const_int 16))))
22219 (set (match_operand:SI 2 "general_reg_operand") (match_dup 0))]
22220 "!(TARGET_USE_XCHGB ||
22221 TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
22222 && peep2_regno_dead_p (0, FLAGS_REG)
22223 && peep2_reg_dead_p(3, operands[0])"
22224 [(parallel
22225 [(set (strict_low_part (match_dup 3))
22226 (rotate:HI (match_dup 3) (const_int 8)))
22227 (clobber (reg:CC FLAGS_REG))])]
22228 {
22229 if (!rtx_equal_p (operands[1], operands[2]))
22230 emit_move_insn (operands[2], operands[1]);
22231 operands[3] = gen_lowpart (HImode, operands[2]);
22232 })
22233
22234 (define_expand "paritydi2"
22235 [(set (match_operand:DI 0 "register_operand")
22236 (parity:DI (match_operand:DI 1 "register_operand")))]
22237 "! TARGET_POPCNT"
22238 {
22239 rtx scratch = gen_reg_rtx (QImode);
22240 rtx hipart1 = gen_reg_rtx (SImode);
22241 rtx lopart1 = gen_reg_rtx (SImode);
22242 rtx xor1 = gen_reg_rtx (SImode);
22243 rtx shift2 = gen_reg_rtx (SImode);
22244 rtx hipart2 = gen_reg_rtx (HImode);
22245 rtx lopart2 = gen_reg_rtx (HImode);
22246 rtx xor2 = gen_reg_rtx (HImode);
22247
22248 if (TARGET_64BIT)
22249 {
22250 rtx shift1 = gen_reg_rtx (DImode);
22251 emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32)));
22252 emit_move_insn (hipart1, gen_lowpart (SImode, shift1));
22253 }
22254 else
22255 emit_move_insn (hipart1, gen_highpart (SImode, operands[1]));
22256
22257 emit_move_insn (lopart1, gen_lowpart (SImode, operands[1]));
22258 emit_insn (gen_xorsi3 (xor1, hipart1, lopart1));
22259
22260 emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16)));
22261 emit_move_insn (hipart2, gen_lowpart (HImode, shift2));
22262 emit_move_insn (lopart2, gen_lowpart (HImode, xor1));
22263 emit_insn (gen_xorhi3 (xor2, hipart2, lopart2));
22264
22265 emit_insn (gen_parityhi2_cmp (xor2));
22266
22267 ix86_expand_setcc (scratch, ORDERED,
22268 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
22269
22270 if (TARGET_64BIT)
22271 emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
22272 else
22273 {
22274 rtx tmp = gen_reg_rtx (SImode);
22275
22276 emit_insn (gen_zero_extendqisi2 (tmp, scratch));
22277 emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
22278 }
22279 DONE;
22280 })
22281
22282 (define_expand "paritysi2"
22283 [(set (match_operand:SI 0 "register_operand")
22284 (parity:SI (match_operand:SI 1 "register_operand")))]
22285 "! TARGET_POPCNT"
22286 {
22287 rtx scratch = gen_reg_rtx (QImode);
22288 rtx shift = gen_reg_rtx (SImode);
22289 rtx hipart = gen_reg_rtx (HImode);
22290 rtx lopart = gen_reg_rtx (HImode);
22291 rtx tmp = gen_reg_rtx (HImode);
22292
22293 emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16)));
22294 emit_move_insn (hipart, gen_lowpart (HImode, shift));
22295 emit_move_insn (lopart, gen_lowpart (HImode, operands[1]));
22296 emit_insn (gen_xorhi3 (tmp, hipart, lopart));
22297
22298 emit_insn (gen_parityhi2_cmp (tmp));
22299
22300 ix86_expand_setcc (scratch, ORDERED,
22301 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
22302
22303 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
22304 DONE;
22305 })
22306
22307 (define_expand "parityhi2"
22308 [(set (match_operand:HI 0 "register_operand")
22309 (parity:HI (match_operand:HI 1 "register_operand")))]
22310 "! TARGET_POPCNT"
22311 {
22312 rtx scratch = gen_reg_rtx (QImode);
22313 rtx tmp = gen_reg_rtx (HImode);
22314
22315 emit_move_insn (tmp, operands[1]);
22316 emit_insn (gen_parityhi2_cmp (tmp));
22317
22318 ix86_expand_setcc (scratch, ORDERED,
22319 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
22320
22321 emit_insn (gen_zero_extendqihi2 (operands[0], scratch));
22322 DONE;
22323 })
22324
22325 (define_expand "parityqi2"
22326 [(set (match_operand:QI 0 "register_operand")
22327 (parity:QI (match_operand:QI 1 "register_operand")))]
22328 "! TARGET_POPCNT"
22329 {
22330 emit_insn (gen_parityqi2_cmp (operands[1]));
22331
22332 ix86_expand_setcc (operands[0], ORDERED,
22333 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
22334 DONE;
22335 })
22336
22337 (define_insn "parityhi2_cmp"
22338 [(set (reg:CC FLAGS_REG)
22339 (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")]
22340 UNSPEC_PARITY))
22341 (clobber (match_dup 0))]
22342 ""
22343 "xor{b}\t{%h0, %b0|%b0, %h0}"
22344 [(set_attr "length" "2")
22345 (set_attr "mode" "QI")])
22346
22347 (define_insn "parityqi2_cmp"
22348 [(set (reg:CC FLAGS_REG)
22349 (unspec:CC [(match_operand:QI 0 "register_operand" "q")]
22350 UNSPEC_PARITY))]
22351 ""
22352 "test{b}\t%0, %0"
22353 [(set_attr "mode" "QI")])
22354
22355 ;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
22356 (define_peephole2
22357 [(set (match_operand:HI 0 "register_operand")
22358 (zero_extend:HI (match_operand:QI 1 "general_reg_operand")))
22359 (parallel [(set (reg:CC FLAGS_REG)
22360 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
22361 (clobber (match_dup 0))])]
22362 ""
22363 [(set (reg:CC FLAGS_REG)
22364 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))])
22365
22366 ;; Eliminate QImode popcount&1 using parity flag
22367 (define_peephole2
22368 [(set (match_operand:SI 0 "register_operand")
22369 (zero_extend:SI (match_operand:QI 1 "general_reg_operand")))
22370 (parallel [(set (match_operand:SI 2 "register_operand")
22371 (popcount:SI (match_dup 0)))
22372 (clobber (reg:CC FLAGS_REG))])
22373 (set (reg:CCZ FLAGS_REG)
22374 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
22375 (const_int 1))
22376 (const_int 0)))
22377 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
22378 [(reg:CCZ FLAGS_REG)
22379 (const_int 0)])
22380 (label_ref (match_operand 5))
22381 (pc)))]
22382 "REGNO (operands[2]) == REGNO (operands[3])
22383 && peep2_reg_dead_p (3, operands[0])
22384 && peep2_reg_dead_p (3, operands[2])
22385 && peep2_regno_dead_p (4, FLAGS_REG)"
22386 [(set (reg:CC FLAGS_REG)
22387 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
22388 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
22389 (const_int 0)])
22390 (label_ref (match_dup 5))
22391 (pc)))]
22392 {
22393 operands[4] = shallow_copy_rtx (operands[4]);
22394 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
22395 })
22396
22397 ;; Eliminate HImode popcount&1 using parity flag
22398 (define_peephole2
22399 [(match_scratch:HI 0 "Q")
22400 (parallel [(set (match_operand:HI 1 "register_operand")
22401 (popcount:HI
22402 (match_operand:HI 2 "nonimmediate_operand")))
22403 (clobber (reg:CC FLAGS_REG))])
22404 (set (match_operand 3 "register_operand")
22405 (zero_extend (match_dup 1)))
22406 (set (reg:CCZ FLAGS_REG)
22407 (compare:CCZ (and:QI (match_operand:QI 4 "register_operand")
22408 (const_int 1))
22409 (const_int 0)))
22410 (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator"
22411 [(reg:CCZ FLAGS_REG)
22412 (const_int 0)])
22413 (label_ref (match_operand 6))
22414 (pc)))]
22415 "REGNO (operands[3]) == REGNO (operands[4])
22416 && peep2_reg_dead_p (3, operands[1])
22417 && peep2_reg_dead_p (3, operands[3])
22418 && peep2_regno_dead_p (4, FLAGS_REG)"
22419 [(set (match_dup 0) (match_dup 2))
22420 (parallel [(set (reg:CC FLAGS_REG)
22421 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
22422 (clobber (match_dup 0))])
22423 (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG)
22424 (const_int 0)])
22425 (label_ref (match_dup 6))
22426 (pc)))]
22427 {
22428 operands[5] = shallow_copy_rtx (operands[5]);
22429 PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
22430 })
22431
22432 ;; Eliminate HImode popcount&1 using parity flag (variant 2)
22433 (define_peephole2
22434 [(match_scratch:HI 0 "Q")
22435 (parallel [(set (match_operand:HI 1 "register_operand")
22436 (popcount:HI
22437 (match_operand:HI 2 "nonimmediate_operand")))
22438 (clobber (reg:CC FLAGS_REG))])
22439 (set (reg:CCZ FLAGS_REG)
22440 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
22441 (const_int 1))
22442 (const_int 0)))
22443 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
22444 [(reg:CCZ FLAGS_REG)
22445 (const_int 0)])
22446 (label_ref (match_operand 5))
22447 (pc)))]
22448 "REGNO (operands[1]) == REGNO (operands[3])
22449 && peep2_reg_dead_p (2, operands[1])
22450 && peep2_reg_dead_p (2, operands[3])
22451 && peep2_regno_dead_p (3, FLAGS_REG)"
22452 [(set (match_dup 0) (match_dup 2))
22453 (parallel [(set (reg:CC FLAGS_REG)
22454 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
22455 (clobber (match_dup 0))])
22456 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
22457 (const_int 0)])
22458 (label_ref (match_dup 5))
22459 (pc)))]
22460 {
22461 operands[4] = shallow_copy_rtx (operands[4]);
22462 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
22463 })
22464
22465 \f
22466 ;; Thread-local storage patterns for ELF.
22467 ;;
22468 ;; Note that these code sequences must appear exactly as shown
22469 ;; in order to allow linker relaxation.
22470
22471 (define_insn "*tls_global_dynamic_32_gnu"
22472 [(set (match_operand:SI 0 "register_operand" "=a")
22473 (unspec:SI
22474 [(match_operand:SI 1 "register_operand" "Yb")
22475 (match_operand 2 "tls_symbolic_operand")
22476 (match_operand 3 "constant_call_address_operand" "Bz")
22477 (reg:SI SP_REG)]
22478 UNSPEC_TLS_GD))
22479 (clobber (match_scratch:SI 4 "=d"))
22480 (clobber (match_scratch:SI 5 "=c"))
22481 (clobber (reg:CC FLAGS_REG))]
22482 "!TARGET_64BIT && TARGET_GNU_TLS"
22483 {
22484 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
22485 output_asm_insn
22486 ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
22487 else
22488 output_asm_insn
22489 ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands);
22490 if (TARGET_SUN_TLS)
22491 #ifdef HAVE_AS_IX86_TLSGDPLT
22492 return "call\t%a2@tlsgdplt";
22493 #else
22494 return "call\t%p3@plt";
22495 #endif
22496 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
22497 return "call\t%P3";
22498 return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}";
22499 }
22500 [(set_attr "type" "multi")
22501 (set_attr "length" "12")])
22502
22503 (define_expand "tls_global_dynamic_32"
22504 [(parallel
22505 [(set (match_operand:SI 0 "register_operand")
22506 (unspec:SI [(match_operand:SI 2 "register_operand")
22507 (match_operand 1 "tls_symbolic_operand")
22508 (match_operand 3 "constant_call_address_operand")
22509 (reg:SI SP_REG)]
22510 UNSPEC_TLS_GD))
22511 (clobber (scratch:SI))
22512 (clobber (scratch:SI))
22513 (clobber (reg:CC FLAGS_REG))])]
22514 ""
22515 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
22516
22517 (define_insn "*tls_global_dynamic_64_<mode>"
22518 [(set (match_operand:P 0 "register_operand" "=a")
22519 (call:P
22520 (mem:QI (match_operand 2 "constant_call_address_operand" "Bz"))
22521 (match_operand 3)))
22522 (unspec:P [(match_operand 1 "tls_symbolic_operand")
22523 (reg:P SP_REG)]
22524 UNSPEC_TLS_GD)]
22525 "TARGET_64BIT"
22526 {
22527 if (!TARGET_X32)
22528 /* The .loc directive has effect for 'the immediately following assembly
22529 instruction'. So for a sequence:
22530 .loc f l
22531 .byte x
22532 insn1
22533 the 'immediately following assembly instruction' is insn1.
22534 We want to emit an insn prefix here, but if we use .byte (as shown in
22535 'ELF Handling For Thread-Local Storage'), a preceding .loc will point
22536 inside the insn sequence, rather than to the start. After relaxation
22537 of the sequence by the linker, the .loc might point inside an insn.
22538 Use data16 prefix instead, which doesn't have this problem. */
22539 fputs ("\tdata16", asm_out_file);
22540 output_asm_insn
22541 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
22542 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
22543 fputs (ASM_SHORT "0x6666\n", asm_out_file);
22544 else
22545 fputs (ASM_BYTE "0x66\n", asm_out_file);
22546 fputs ("\trex64\n", asm_out_file);
22547 if (TARGET_SUN_TLS)
22548 return "call\t%p2@plt";
22549 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
22550 return "call\t%P2";
22551 return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
22552 }
22553 [(set_attr "type" "multi")
22554 (set (attr "length")
22555 (symbol_ref "TARGET_X32 ? 15 : 16"))])
22556
22557 (define_insn "*tls_global_dynamic_64_largepic"
22558 [(set (match_operand:DI 0 "register_operand" "=a")
22559 (call:DI
22560 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
22561 (match_operand:DI 3 "immediate_operand" "i")))
22562 (match_operand 4)))
22563 (unspec:DI [(match_operand 1 "tls_symbolic_operand")
22564 (reg:DI SP_REG)]
22565 UNSPEC_TLS_GD)]
22566 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
22567 && GET_CODE (operands[3]) == CONST
22568 && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
22569 && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
22570 {
22571 output_asm_insn
22572 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
22573 output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
22574 output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
22575 return "call\t{*%%rax|rax}";
22576 }
22577 [(set_attr "type" "multi")
22578 (set_attr "length" "22")])
22579
22580 (define_expand "@tls_global_dynamic_64_<mode>"
22581 [(parallel
22582 [(set (match_operand:P 0 "register_operand")
22583 (call:P
22584 (mem:QI (match_operand 2))
22585 (const_int 0)))
22586 (unspec:P [(match_operand 1 "tls_symbolic_operand")
22587 (reg:P SP_REG)]
22588 UNSPEC_TLS_GD)])]
22589 "TARGET_64BIT"
22590 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
22591
22592 (define_insn "*tls_local_dynamic_base_32_gnu"
22593 [(set (match_operand:SI 0 "register_operand" "=a")
22594 (unspec:SI
22595 [(match_operand:SI 1 "register_operand" "Yb")
22596 (match_operand 2 "constant_call_address_operand" "Bz")
22597 (reg:SI SP_REG)]
22598 UNSPEC_TLS_LD_BASE))
22599 (clobber (match_scratch:SI 3 "=d"))
22600 (clobber (match_scratch:SI 4 "=c"))
22601 (clobber (reg:CC FLAGS_REG))]
22602 "!TARGET_64BIT && TARGET_GNU_TLS"
22603 {
22604 output_asm_insn
22605 ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
22606 if (TARGET_SUN_TLS)
22607 {
22608 if (HAVE_AS_IX86_TLSLDMPLT)
22609 return "call\t%&@tlsldmplt";
22610 else
22611 return "call\t%p2@plt";
22612 }
22613 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
22614 return "call\t%P2";
22615 return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}";
22616 }
22617 [(set_attr "type" "multi")
22618 (set_attr "length" "11")])
22619
22620 (define_expand "tls_local_dynamic_base_32"
22621 [(parallel
22622 [(set (match_operand:SI 0 "register_operand")
22623 (unspec:SI
22624 [(match_operand:SI 1 "register_operand")
22625 (match_operand 2 "constant_call_address_operand")
22626 (reg:SI SP_REG)]
22627 UNSPEC_TLS_LD_BASE))
22628 (clobber (scratch:SI))
22629 (clobber (scratch:SI))
22630 (clobber (reg:CC FLAGS_REG))])]
22631 ""
22632 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
22633
22634 (define_insn "*tls_local_dynamic_base_64_<mode>"
22635 [(set (match_operand:P 0 "register_operand" "=a")
22636 (call:P
22637 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
22638 (match_operand 2)))
22639 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
22640 "TARGET_64BIT"
22641 {
22642 output_asm_insn
22643 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
22644 if (TARGET_SUN_TLS)
22645 return "call\t%p1@plt";
22646 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
22647 return "call\t%P1";
22648 return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
22649 }
22650 [(set_attr "type" "multi")
22651 (set_attr "length" "12")])
22652
22653 (define_insn "*tls_local_dynamic_base_64_largepic"
22654 [(set (match_operand:DI 0 "register_operand" "=a")
22655 (call:DI
22656 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
22657 (match_operand:DI 2 "immediate_operand" "i")))
22658 (match_operand 3)))
22659 (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
22660 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
22661 && GET_CODE (operands[2]) == CONST
22662 && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
22663 && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
22664 {
22665 output_asm_insn
22666 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
22667 output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
22668 output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
22669 return "call\t{*%%rax|rax}";
22670 }
22671 [(set_attr "type" "multi")
22672 (set_attr "length" "22")])
22673
22674 (define_expand "@tls_local_dynamic_base_64_<mode>"
22675 [(parallel
22676 [(set (match_operand:P 0 "register_operand")
22677 (call:P
22678 (mem:QI (match_operand 1))
22679 (const_int 0)))
22680 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
22681 "TARGET_64BIT"
22682 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
22683
22684 ;; Local dynamic of a single variable is a lose. Show combine how
22685 ;; to convert that back to global dynamic.
22686
22687 (define_insn_and_split "*tls_local_dynamic_32_once"
22688 [(set (match_operand:SI 0 "register_operand" "=a")
22689 (plus:SI
22690 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
22691 (match_operand 2 "constant_call_address_operand" "Bz")
22692 (reg:SI SP_REG)]
22693 UNSPEC_TLS_LD_BASE)
22694 (const:SI (unspec:SI
22695 [(match_operand 3 "tls_symbolic_operand")]
22696 UNSPEC_DTPOFF))))
22697 (clobber (match_scratch:SI 4 "=d"))
22698 (clobber (match_scratch:SI 5 "=c"))
22699 (clobber (reg:CC FLAGS_REG))]
22700 ""
22701 "#"
22702 ""
22703 [(parallel
22704 [(set (match_dup 0)
22705 (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)
22706 (reg:SI SP_REG)]
22707 UNSPEC_TLS_GD))
22708 (clobber (match_dup 4))
22709 (clobber (match_dup 5))
22710 (clobber (reg:CC FLAGS_REG))])])
22711
22712 ;; Load and add the thread base pointer from %<tp_seg>:0.
22713 (define_expand "get_thread_pointer<mode>"
22714 [(set (match_operand:PTR 0 "register_operand")
22715 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
22716 ""
22717 {
22718 /* targetm is not visible in the scope of the condition. */
22719 if (!targetm.have_tls)
22720 error ("%<__builtin_thread_pointer%> is not supported on this target");
22721 })
22722
22723 (define_insn_and_split "*load_tp_<mode>"
22724 [(set (match_operand:PTR 0 "register_operand" "=r")
22725 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
22726 ""
22727 "#"
22728 ""
22729 [(set (match_dup 0)
22730 (match_dup 1))]
22731 {
22732 addr_space_t as = DEFAULT_TLS_SEG_REG;
22733
22734 operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
22735 set_mem_addr_space (operands[1], as);
22736 })
22737
22738 (define_insn_and_split "*load_tp_x32_zext"
22739 [(set (match_operand:DI 0 "register_operand" "=r")
22740 (zero_extend:DI
22741 (unspec:SI [(const_int 0)] UNSPEC_TP)))]
22742 "TARGET_X32"
22743 "#"
22744 "&& 1"
22745 [(set (match_dup 0)
22746 (zero_extend:DI (match_dup 1)))]
22747 {
22748 addr_space_t as = DEFAULT_TLS_SEG_REG;
22749
22750 operands[1] = gen_const_mem (SImode, const0_rtx);
22751 set_mem_addr_space (operands[1], as);
22752 })
22753
22754 (define_insn_and_split "*add_tp_<mode>"
22755 [(set (match_operand:PTR 0 "register_operand" "=r")
22756 (plus:PTR
22757 (unspec:PTR [(const_int 0)] UNSPEC_TP)
22758 (match_operand:PTR 1 "register_operand" "0")))
22759 (clobber (reg:CC FLAGS_REG))]
22760 ""
22761 "#"
22762 ""
22763 [(parallel
22764 [(set (match_dup 0)
22765 (plus:PTR (match_dup 1) (match_dup 2)))
22766 (clobber (reg:CC FLAGS_REG))])]
22767 {
22768 addr_space_t as = DEFAULT_TLS_SEG_REG;
22769
22770 operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
22771 set_mem_addr_space (operands[2], as);
22772 })
22773
22774 (define_insn_and_split "*add_tp_x32_zext"
22775 [(set (match_operand:DI 0 "register_operand" "=r")
22776 (zero_extend:DI
22777 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
22778 (match_operand:SI 1 "register_operand" "0"))))
22779 (clobber (reg:CC FLAGS_REG))]
22780 "TARGET_X32"
22781 "#"
22782 "&& 1"
22783 [(parallel
22784 [(set (match_dup 0)
22785 (zero_extend:DI
22786 (plus:SI (match_dup 1) (match_dup 2))))
22787 (clobber (reg:CC FLAGS_REG))])]
22788 {
22789 addr_space_t as = DEFAULT_TLS_SEG_REG;
22790
22791 operands[2] = gen_const_mem (SImode, const0_rtx);
22792 set_mem_addr_space (operands[2], as);
22793 })
22794
22795 ;; The Sun linker took the AMD64 TLS spec literally and can only handle
22796 ;; %rax as destination of the initial executable code sequence.
22797 (define_insn "tls_initial_exec_64_sun"
22798 [(set (match_operand:DI 0 "register_operand" "=a")
22799 (unspec:DI
22800 [(match_operand 1 "tls_symbolic_operand")]
22801 UNSPEC_TLS_IE_SUN))
22802 (clobber (reg:CC FLAGS_REG))]
22803 "TARGET_64BIT && TARGET_SUN_TLS"
22804 {
22805 output_asm_insn
22806 ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
22807 return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
22808 }
22809 [(set_attr "type" "multi")])
22810
22811 ;; GNU2 TLS patterns can be split.
22812
22813 (define_expand "tls_dynamic_gnu2_32"
22814 [(set (match_dup 3)
22815 (plus:SI (match_operand:SI 2 "register_operand")
22816 (const:SI
22817 (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
22818 UNSPEC_TLSDESC))))
22819 (parallel
22820 [(set (match_operand:SI 0 "register_operand")
22821 (unspec:SI [(match_dup 1) (match_dup 3)
22822 (match_dup 2) (reg:SI SP_REG)]
22823 UNSPEC_TLSDESC))
22824 (clobber (reg:CC FLAGS_REG))])]
22825 "!TARGET_64BIT && TARGET_GNU2_TLS"
22826 {
22827 operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
22828 ix86_tls_descriptor_calls_expanded_in_cfun = true;
22829 })
22830
22831 (define_insn "*tls_dynamic_gnu2_lea_32"
22832 [(set (match_operand:SI 0 "register_operand" "=r")
22833 (plus:SI (match_operand:SI 1 "register_operand" "b")
22834 (const:SI
22835 (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
22836 UNSPEC_TLSDESC))))]
22837 "!TARGET_64BIT && TARGET_GNU2_TLS"
22838 "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
22839 [(set_attr "type" "lea")
22840 (set_attr "mode" "SI")
22841 (set_attr "length" "6")
22842 (set_attr "length_address" "4")])
22843
22844 (define_insn "*tls_dynamic_gnu2_call_32"
22845 [(set (match_operand:SI 0 "register_operand" "=a")
22846 (unspec:SI [(match_operand 1 "tls_symbolic_operand")
22847 (match_operand:SI 2 "register_operand" "0")
22848 ;; we have to make sure %ebx still points to the GOT
22849 (match_operand:SI 3 "register_operand" "b")
22850 (reg:SI SP_REG)]
22851 UNSPEC_TLSDESC))
22852 (clobber (reg:CC FLAGS_REG))]
22853 "!TARGET_64BIT && TARGET_GNU2_TLS"
22854 "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
22855 [(set_attr "type" "call")
22856 (set_attr "length" "2")
22857 (set_attr "length_address" "0")])
22858
22859 (define_insn_and_split "*tls_dynamic_gnu2_combine_32"
22860 [(set (match_operand:SI 0 "register_operand" "=&a")
22861 (plus:SI
22862 (unspec:SI [(match_operand 3 "tls_modbase_operand")
22863 (match_operand:SI 4)
22864 (match_operand:SI 2 "register_operand" "b")
22865 (reg:SI SP_REG)]
22866 UNSPEC_TLSDESC)
22867 (const:SI (unspec:SI
22868 [(match_operand 1 "tls_symbolic_operand")]
22869 UNSPEC_DTPOFF))))
22870 (clobber (reg:CC FLAGS_REG))]
22871 "!TARGET_64BIT && TARGET_GNU2_TLS"
22872 "#"
22873 "&& 1"
22874 [(set (match_dup 0) (match_dup 5))]
22875 {
22876 operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
22877 emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
22878 })
22879
22880 (define_expand "@tls_dynamic_gnu2_64_<mode>"
22881 [(set (match_dup 2)
22882 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
22883 UNSPEC_TLSDESC))
22884 (parallel
22885 [(set (match_operand:PTR 0 "register_operand")
22886 (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)]
22887 UNSPEC_TLSDESC))
22888 (clobber (reg:CC FLAGS_REG))])]
22889 "TARGET_64BIT && TARGET_GNU2_TLS"
22890 {
22891 operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
22892 ix86_tls_descriptor_calls_expanded_in_cfun = true;
22893 })
22894
22895 (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
22896 [(set (match_operand:PTR 0 "register_operand" "=r")
22897 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
22898 UNSPEC_TLSDESC))]
22899 "TARGET_64BIT && TARGET_GNU2_TLS"
22900 "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
22901 [(set_attr "type" "lea")
22902 (set_attr "mode" "<MODE>")
22903 (set_attr "length" "7")
22904 (set_attr "length_address" "4")])
22905
22906 (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
22907 [(set (match_operand:PTR 0 "register_operand" "=a")
22908 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")
22909 (match_operand:PTR 2 "register_operand" "0")
22910 (reg:PTR SP_REG)]
22911 UNSPEC_TLSDESC))
22912 (clobber (reg:CC FLAGS_REG))]
22913 "TARGET_64BIT && TARGET_GNU2_TLS"
22914 "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
22915 [(set_attr "type" "call")
22916 (set_attr "length" "2")
22917 (set_attr "length_address" "0")])
22918
22919 (define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
22920 [(set (match_operand:PTR 0 "register_operand" "=&a")
22921 (plus:PTR
22922 (unspec:PTR [(match_operand 2 "tls_modbase_operand")
22923 (match_operand:PTR 3)
22924 (reg:PTR SP_REG)]
22925 UNSPEC_TLSDESC)
22926 (const:PTR (unspec:PTR
22927 [(match_operand 1 "tls_symbolic_operand")]
22928 UNSPEC_DTPOFF))))
22929 (clobber (reg:CC FLAGS_REG))]
22930 "TARGET_64BIT && TARGET_GNU2_TLS"
22931 "#"
22932 "&& 1"
22933 [(set (match_dup 0) (match_dup 4))]
22934 {
22935 operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
22936 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
22937 })
22938
22939 (define_split
22940 [(match_operand 0 "tls_address_pattern")]
22941 "TARGET_TLS_DIRECT_SEG_REFS"
22942 [(match_dup 0)]
22943 "operands[0] = ix86_rewrite_tls_address (operands[0]);")
22944
22945 \f
22946 ;; These patterns match the binary 387 instructions for addM3, subM3,
22947 ;; mulM3 and divM3. There are three patterns for each of DFmode and
22948 ;; SFmode. The first is the normal insn, the second the same insn but
22949 ;; with one operand a conversion, and the third the same insn but with
22950 ;; the other operand a conversion. The conversion may be SFmode or
22951 ;; SImode if the target mode DFmode, but only SImode if the target mode
22952 ;; is SFmode.
22953
22954 ;; Gcc is slightly more smart about handling normal two address instructions
22955 ;; so use special patterns for add and mull.
22956
22957 (define_insn "*fop_xf_comm_i387"
22958 [(set (match_operand:XF 0 "register_operand" "=f")
22959 (match_operator:XF 3 "binary_fp_operator"
22960 [(match_operand:XF 1 "register_operand" "%0")
22961 (match_operand:XF 2 "register_operand" "f")]))]
22962 "TARGET_80387
22963 && COMMUTATIVE_ARITH_P (operands[3])"
22964 "* return output_387_binary_op (insn, operands);"
22965 [(set (attr "type")
22966 (if_then_else (match_operand:XF 3 "mult_operator")
22967 (const_string "fmul")
22968 (const_string "fop")))
22969 (set_attr "mode" "XF")])
22970
22971 (define_insn "*fop_<mode>_comm"
22972 [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
22973 (match_operator:MODEF 3 "binary_fp_operator"
22974 [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v")
22975 (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))]
22976 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22977 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
22978 && COMMUTATIVE_ARITH_P (operands[3])
22979 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
22980 "* return output_387_binary_op (insn, operands);"
22981 [(set (attr "type")
22982 (if_then_else (eq_attr "alternative" "1,2")
22983 (if_then_else (match_operand:MODEF 3 "mult_operator")
22984 (const_string "ssemul")
22985 (const_string "sseadd"))
22986 (if_then_else (match_operand:MODEF 3 "mult_operator")
22987 (const_string "fmul")
22988 (const_string "fop"))))
22989 (set_attr "isa" "*,noavx,avx")
22990 (set_attr "prefix" "orig,orig,vex")
22991 (set_attr "mode" "<MODE>")
22992 (set (attr "enabled")
22993 (if_then_else
22994 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
22995 (if_then_else
22996 (eq_attr "alternative" "0")
22997 (symbol_ref "TARGET_MIX_SSE_I387
22998 && X87_ENABLE_ARITH (<MODE>mode)")
22999 (const_string "*"))
23000 (if_then_else
23001 (eq_attr "alternative" "0")
23002 (symbol_ref "true")
23003 (symbol_ref "false"))))])
23004
23005 (define_insn "*<insn>hf"
23006 [(set (match_operand:HF 0 "register_operand" "=v")
23007 (plusminusmultdiv:HF
23008 (match_operand:HF 1 "nonimmediate_operand" "<comm>v")
23009 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
23010 "TARGET_AVX512FP16
23011 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
23012 "v<insn>sh\t{%2, %1, %0|%0, %1, %2}"
23013 [(set_attr "prefix" "evex")
23014 (set_attr "mode" "HF")])
23015
23016 (define_insn "*rcpsf2_sse"
23017 [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
23018 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
23019 UNSPEC_RCP))]
23020 "TARGET_SSE && TARGET_SSE_MATH"
23021 "@
23022 %vrcpss\t{%d1, %0|%0, %d1}
23023 %vrcpss\t{%d1, %0|%0, %d1}
23024 rcpss\t{%1, %d0|%d0, %1}
23025 vrcpss\t{%1, %d0|%d0, %1}"
23026 [(set_attr "isa" "*,*,noavx,avx")
23027 (set_attr "addr" "*,*,*,gpr16")
23028 (set_attr "type" "sse")
23029 (set_attr "atom_sse_attr" "rcp")
23030 (set_attr "btver2_sse_attr" "rcp")
23031 (set_attr "prefix" "maybe_vex")
23032 (set_attr "mode" "SF")
23033 (set_attr "avx_partial_xmm_update" "false,false,true,true")
23034 (set (attr "preferred_for_speed")
23035 (cond [(match_test "TARGET_AVX")
23036 (symbol_ref "true")
23037 (eq_attr "alternative" "1,2,3")
23038 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
23039 ]
23040 (symbol_ref "true")))])
23041
23042 (define_insn "rcphf2"
23043 [(set (match_operand:HF 0 "register_operand" "=v,v")
23044 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
23045 UNSPEC_RCP))]
23046 "TARGET_AVX512FP16"
23047 "@
23048 vrcpsh\t{%d1, %0|%0, %d1}
23049 vrcpsh\t{%1, %d0|%d0, %1}"
23050 [(set_attr "type" "sse")
23051 (set_attr "prefix" "evex")
23052 (set_attr "mode" "HF")
23053 (set_attr "avx_partial_xmm_update" "false,true")])
23054
23055 (define_insn "*fop_xf_1_i387"
23056 [(set (match_operand:XF 0 "register_operand" "=f,f")
23057 (match_operator:XF 3 "binary_fp_operator"
23058 [(match_operand:XF 1 "register_operand" "0,f")
23059 (match_operand:XF 2 "register_operand" "f,0")]))]
23060 "TARGET_80387
23061 && !COMMUTATIVE_ARITH_P (operands[3])"
23062 "* return output_387_binary_op (insn, operands);"
23063 [(set (attr "type")
23064 (if_then_else (match_operand:XF 3 "div_operator")
23065 (const_string "fdiv")
23066 (const_string "fop")))
23067 (set_attr "mode" "XF")])
23068
23069 (define_insn "*fop_<mode>_1"
23070 [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
23071 (match_operator:MODEF 3 "binary_fp_operator"
23072 [(match_operand:MODEF 1
23073 "x87nonimm_ssenomem_operand" "0,fm,0,v")
23074 (match_operand:MODEF 2
23075 "nonimmediate_operand" "fm,0,xm,vm")]))]
23076 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23077 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
23078 && !COMMUTATIVE_ARITH_P (operands[3])
23079 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
23080 "* return output_387_binary_op (insn, operands);"
23081 [(set (attr "type")
23082 (if_then_else (eq_attr "alternative" "2,3")
23083 (if_then_else (match_operand:MODEF 3 "div_operator")
23084 (const_string "ssediv")
23085 (const_string "sseadd"))
23086 (if_then_else (match_operand:MODEF 3 "div_operator")
23087 (const_string "fdiv")
23088 (const_string "fop"))))
23089 (set_attr "isa" "*,*,noavx,avx")
23090 (set_attr "prefix" "orig,orig,orig,vex")
23091 (set_attr "mode" "<MODE>")
23092 (set (attr "enabled")
23093 (if_then_else
23094 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
23095 (if_then_else
23096 (eq_attr "alternative" "0,1")
23097 (symbol_ref "TARGET_MIX_SSE_I387
23098 && X87_ENABLE_ARITH (<MODE>mode)")
23099 (const_string "*"))
23100 (if_then_else
23101 (eq_attr "alternative" "0,1")
23102 (symbol_ref "true")
23103 (symbol_ref "false"))))])
23104
23105 (define_insn "*fop_<X87MODEF:mode>_2_i387"
23106 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
23107 (match_operator:X87MODEF 3 "binary_fp_operator"
23108 [(float:X87MODEF
23109 (match_operand:SWI24 1 "nonimmediate_operand" "m"))
23110 (match_operand:X87MODEF 2 "register_operand" "0")]))]
23111 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
23112 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
23113 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
23114 || optimize_function_for_size_p (cfun))"
23115 "* return output_387_binary_op (insn, operands);"
23116 [(set (attr "type")
23117 (cond [(match_operand:X87MODEF 3 "mult_operator")
23118 (const_string "fmul")
23119 (match_operand:X87MODEF 3 "div_operator")
23120 (const_string "fdiv")
23121 ]
23122 (const_string "fop")))
23123 (set_attr "fp_int_src" "true")
23124 (set_attr "mode" "<SWI24:MODE>")])
23125
23126 (define_insn "*fop_<X87MODEF:mode>_3_i387"
23127 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
23128 (match_operator:X87MODEF 3 "binary_fp_operator"
23129 [(match_operand:X87MODEF 1 "register_operand" "0")
23130 (float:X87MODEF
23131 (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
23132 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
23133 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
23134 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
23135 || optimize_function_for_size_p (cfun))"
23136 "* return output_387_binary_op (insn, operands);"
23137 [(set (attr "type")
23138 (cond [(match_operand:X87MODEF 3 "mult_operator")
23139 (const_string "fmul")
23140 (match_operand:X87MODEF 3 "div_operator")
23141 (const_string "fdiv")
23142 ]
23143 (const_string "fop")))
23144 (set_attr "fp_int_src" "true")
23145 (set_attr "mode" "<SWI24:MODE>")])
23146
23147 (define_insn "*fop_xf_4_i387"
23148 [(set (match_operand:XF 0 "register_operand" "=f,f")
23149 (match_operator:XF 3 "binary_fp_operator"
23150 [(float_extend:XF
23151 (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
23152 (match_operand:XF 2 "register_operand" "0,f")]))]
23153 "TARGET_80387"
23154 "* return output_387_binary_op (insn, operands);"
23155 [(set (attr "type")
23156 (cond [(match_operand:XF 3 "mult_operator")
23157 (const_string "fmul")
23158 (match_operand:XF 3 "div_operator")
23159 (const_string "fdiv")
23160 ]
23161 (const_string "fop")))
23162 (set_attr "mode" "<MODE>")])
23163
23164 (define_insn "*fop_df_4_i387"
23165 [(set (match_operand:DF 0 "register_operand" "=f,f")
23166 (match_operator:DF 3 "binary_fp_operator"
23167 [(float_extend:DF
23168 (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
23169 (match_operand:DF 2 "register_operand" "0,f")]))]
23170 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
23171 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
23172 "* return output_387_binary_op (insn, operands);"
23173 [(set (attr "type")
23174 (cond [(match_operand:DF 3 "mult_operator")
23175 (const_string "fmul")
23176 (match_operand:DF 3 "div_operator")
23177 (const_string "fdiv")
23178 ]
23179 (const_string "fop")))
23180 (set_attr "mode" "SF")])
23181
23182 (define_insn "*fop_xf_5_i387"
23183 [(set (match_operand:XF 0 "register_operand" "=f,f")
23184 (match_operator:XF 3 "binary_fp_operator"
23185 [(match_operand:XF 1 "register_operand" "0,f")
23186 (float_extend:XF
23187 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
23188 "TARGET_80387"
23189 "* return output_387_binary_op (insn, operands);"
23190 [(set (attr "type")
23191 (cond [(match_operand:XF 3 "mult_operator")
23192 (const_string "fmul")
23193 (match_operand:XF 3 "div_operator")
23194 (const_string "fdiv")
23195 ]
23196 (const_string "fop")))
23197 (set_attr "mode" "<MODE>")])
23198
23199 (define_insn "*fop_df_5_i387"
23200 [(set (match_operand:DF 0 "register_operand" "=f,f")
23201 (match_operator:DF 3 "binary_fp_operator"
23202 [(match_operand:DF 1 "register_operand" "0,f")
23203 (float_extend:DF
23204 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
23205 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
23206 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
23207 "* return output_387_binary_op (insn, operands);"
23208 [(set (attr "type")
23209 (cond [(match_operand:DF 3 "mult_operator")
23210 (const_string "fmul")
23211 (match_operand:DF 3 "div_operator")
23212 (const_string "fdiv")
23213 ]
23214 (const_string "fop")))
23215 (set_attr "mode" "SF")])
23216
23217 (define_insn "*fop_xf_6_i387"
23218 [(set (match_operand:XF 0 "register_operand" "=f,f")
23219 (match_operator:XF 3 "binary_fp_operator"
23220 [(float_extend:XF
23221 (match_operand:MODEF 1 "register_operand" "0,f"))
23222 (float_extend:XF
23223 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
23224 "TARGET_80387"
23225 "* return output_387_binary_op (insn, operands);"
23226 [(set (attr "type")
23227 (cond [(match_operand:XF 3 "mult_operator")
23228 (const_string "fmul")
23229 (match_operand:XF 3 "div_operator")
23230 (const_string "fdiv")
23231 ]
23232 (const_string "fop")))
23233 (set_attr "mode" "<MODE>")])
23234
23235 (define_insn "*fop_df_6_i387"
23236 [(set (match_operand:DF 0 "register_operand" "=f,f")
23237 (match_operator:DF 3 "binary_fp_operator"
23238 [(float_extend:DF
23239 (match_operand:SF 1 "register_operand" "0,f"))
23240 (float_extend:DF
23241 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
23242 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
23243 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
23244 "* return output_387_binary_op (insn, operands);"
23245 [(set (attr "type")
23246 (cond [(match_operand:DF 3 "mult_operator")
23247 (const_string "fmul")
23248 (match_operand:DF 3 "div_operator")
23249 (const_string "fdiv")
23250 ]
23251 (const_string "fop")))
23252 (set_attr "mode" "SF")])
23253 \f
23254 ;; FPU special functions.
23255
23256 ;; This pattern implements a no-op XFmode truncation for
23257 ;; all fancy i386 XFmode math functions.
23258
23259 (define_insn "truncxf<mode>2_i387_noop_unspec"
23260 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
23261 (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
23262 UNSPEC_TRUNC_NOOP))]
23263 "TARGET_USE_FANCY_MATH_387"
23264 "* return output_387_reg_move (insn, operands);"
23265 [(set_attr "type" "fmov")
23266 (set_attr "mode" "<MODE>")])
23267
23268 (define_insn "sqrtxf2"
23269 [(set (match_operand:XF 0 "register_operand" "=f")
23270 (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
23271 "TARGET_USE_FANCY_MATH_387"
23272 "fsqrt"
23273 [(set_attr "type" "fpspc")
23274 (set_attr "mode" "XF")
23275 (set_attr "athlon_decode" "direct")
23276 (set_attr "amdfam10_decode" "direct")
23277 (set_attr "bdver1_decode" "direct")])
23278
23279 (define_insn "*rsqrtsf2_sse"
23280 [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
23281 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
23282 UNSPEC_RSQRT))]
23283 "TARGET_SSE && TARGET_SSE_MATH"
23284 "@
23285 %vrsqrtss\t{%d1, %0|%0, %d1}
23286 %vrsqrtss\t{%d1, %0|%0, %d1}
23287 rsqrtss\t{%1, %d0|%d0, %1}
23288 vrsqrtss\t{%1, %d0|%d0, %1}"
23289 [(set_attr "isa" "*,*,noavx,avx")
23290 (set_attr "addr" "*,*,*,gpr16")
23291 (set_attr "type" "sse")
23292 (set_attr "atom_sse_attr" "rcp")
23293 (set_attr "btver2_sse_attr" "rcp")
23294 (set_attr "prefix" "maybe_vex")
23295 (set_attr "mode" "SF")
23296 (set_attr "avx_partial_xmm_update" "false,false,true,true")
23297 (set (attr "preferred_for_speed")
23298 (cond [(match_test "TARGET_AVX")
23299 (symbol_ref "true")
23300 (eq_attr "alternative" "1,2,3")
23301 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
23302 ]
23303 (symbol_ref "true")))])
23304
23305 (define_expand "rsqrtsf2"
23306 [(set (match_operand:SF 0 "register_operand")
23307 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
23308 UNSPEC_RSQRT))]
23309 "TARGET_SSE && TARGET_SSE_MATH"
23310 {
23311 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
23312 DONE;
23313 })
23314
23315 (define_insn "rsqrthf2"
23316 [(set (match_operand:HF 0 "register_operand" "=v,v")
23317 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
23318 UNSPEC_RSQRT))]
23319 "TARGET_AVX512FP16"
23320 "@
23321 vrsqrtsh\t{%d1, %0|%0, %d1}
23322 vrsqrtsh\t{%1, %d0|%d0, %1}"
23323 [(set_attr "type" "sse")
23324 (set_attr "prefix" "evex")
23325 (set_attr "avx_partial_xmm_update" "false,true")
23326 (set_attr "mode" "HF")])
23327
23328 (define_insn "sqrthf2"
23329 [(set (match_operand:HF 0 "register_operand" "=v,v")
23330 (sqrt:HF
23331 (match_operand:HF 1 "nonimmediate_operand" "v,m")))]
23332 "TARGET_AVX512FP16"
23333 "@
23334 vsqrtsh\t{%d1, %0|%0, %d1}
23335 vsqrtsh\t{%1, %d0|%d0, %1}"
23336 [(set_attr "type" "sse")
23337 (set_attr "prefix" "evex")
23338 (set_attr "avx_partial_xmm_update" "false,true")
23339 (set_attr "mode" "HF")])
23340
23341 (define_insn "*sqrt<mode>2_sse"
23342 [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
23343 (sqrt:MODEF
23344 (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
23345 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
23346 "@
23347 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
23348 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
23349 %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
23350 [(set_attr "type" "sse")
23351 (set_attr "atom_sse_attr" "sqrt")
23352 (set_attr "btver2_sse_attr" "sqrt")
23353 (set_attr "prefix" "maybe_vex")
23354 (set_attr "avx_partial_xmm_update" "false,false,true")
23355 (set_attr "mode" "<MODE>")
23356 (set (attr "preferred_for_speed")
23357 (cond [(match_test "TARGET_AVX")
23358 (symbol_ref "true")
23359 (eq_attr "alternative" "1,2")
23360 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
23361 ]
23362 (symbol_ref "true")))])
23363
23364 (define_expand "sqrt<mode>2"
23365 [(set (match_operand:MODEF 0 "register_operand")
23366 (sqrt:MODEF
23367 (match_operand:MODEF 1 "nonimmediate_operand")))]
23368 "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
23369 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
23370 {
23371 if (<MODE>mode == SFmode
23372 && TARGET_SSE && TARGET_SSE_MATH
23373 && TARGET_RECIP_SQRT
23374 && !optimize_function_for_size_p (cfun)
23375 && flag_finite_math_only && !flag_trapping_math
23376 && flag_unsafe_math_optimizations)
23377 {
23378 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
23379 DONE;
23380 }
23381
23382 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
23383 {
23384 rtx op0 = gen_reg_rtx (XFmode);
23385 rtx op1 = gen_reg_rtx (XFmode);
23386
23387 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23388 emit_insn (gen_sqrtxf2 (op0, op1));
23389 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23390 DONE;
23391 }
23392 })
23393
23394 (define_expand "hypot<mode>3"
23395 [(use (match_operand:MODEF 0 "register_operand"))
23396 (use (match_operand:MODEF 1 "general_operand"))
23397 (use (match_operand:MODEF 2 "general_operand"))]
23398 "TARGET_USE_FANCY_MATH_387
23399 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23400 || TARGET_MIX_SSE_I387)
23401 && flag_finite_math_only
23402 && flag_unsafe_math_optimizations"
23403 {
23404 rtx op0 = gen_reg_rtx (XFmode);
23405 rtx op1 = gen_reg_rtx (XFmode);
23406 rtx op2 = gen_reg_rtx (XFmode);
23407
23408 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23409 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23410
23411 emit_insn (gen_mulxf3 (op1, op1, op1));
23412 emit_insn (gen_mulxf3 (op2, op2, op2));
23413 emit_insn (gen_addxf3 (op0, op2, op1));
23414 emit_insn (gen_sqrtxf2 (op0, op0));
23415
23416 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23417 DONE;
23418 })
23419
23420 (define_insn "x86_fnstsw_1"
23421 [(set (match_operand:HI 0 "register_operand" "=a")
23422 (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
23423 "TARGET_80387"
23424 "fnstsw\t%0"
23425 [(set_attr "length" "2")
23426 (set_attr "mode" "SI")
23427 (set_attr "unit" "i387")])
23428
23429 (define_insn "fpremxf4_i387"
23430 [(set (match_operand:XF 0 "register_operand" "=f")
23431 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
23432 (match_operand:XF 3 "register_operand" "1")]
23433 UNSPEC_FPREM_F))
23434 (set (match_operand:XF 1 "register_operand" "=f")
23435 (unspec:XF [(match_dup 2) (match_dup 3)]
23436 UNSPEC_FPREM_U))
23437 (set (reg:CCFP FPSR_REG)
23438 (unspec:CCFP [(match_dup 2) (match_dup 3)]
23439 UNSPEC_C2_FLAG))]
23440 "TARGET_USE_FANCY_MATH_387"
23441 "fprem"
23442 [(set_attr "type" "fpspc")
23443 (set_attr "znver1_decode" "vector")
23444 (set_attr "mode" "XF")])
23445
23446 (define_expand "fmodxf3"
23447 [(use (match_operand:XF 0 "register_operand"))
23448 (use (match_operand:XF 1 "general_operand"))
23449 (use (match_operand:XF 2 "general_operand"))]
23450 "TARGET_USE_FANCY_MATH_387"
23451 {
23452 rtx_code_label *label = gen_label_rtx ();
23453
23454 rtx op1 = gen_reg_rtx (XFmode);
23455 rtx op2 = gen_reg_rtx (XFmode);
23456
23457 emit_move_insn (op2, operands[2]);
23458 emit_move_insn (op1, operands[1]);
23459
23460 emit_label (label);
23461 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
23462 ix86_emit_fp_unordered_jump (label);
23463 LABEL_NUSES (label) = 1;
23464
23465 emit_move_insn (operands[0], op1);
23466 DONE;
23467 })
23468
23469 (define_expand "fmod<mode>3"
23470 [(use (match_operand:MODEF 0 "register_operand"))
23471 (use (match_operand:MODEF 1 "general_operand"))
23472 (use (match_operand:MODEF 2 "general_operand"))]
23473 "TARGET_USE_FANCY_MATH_387"
23474 {
23475 rtx (*gen_truncxf) (rtx, rtx);
23476
23477 rtx_code_label *label = gen_label_rtx ();
23478
23479 rtx op1 = gen_reg_rtx (XFmode);
23480 rtx op2 = gen_reg_rtx (XFmode);
23481
23482 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23483 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23484
23485 emit_label (label);
23486 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
23487 ix86_emit_fp_unordered_jump (label);
23488 LABEL_NUSES (label) = 1;
23489
23490 /* Truncate the result properly for strict SSE math. */
23491 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23492 && !TARGET_MIX_SSE_I387)
23493 gen_truncxf = gen_truncxf<mode>2;
23494 else
23495 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
23496
23497 emit_insn (gen_truncxf (operands[0], op1));
23498 DONE;
23499 })
23500
23501 (define_insn "fprem1xf4_i387"
23502 [(set (match_operand:XF 0 "register_operand" "=f")
23503 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
23504 (match_operand:XF 3 "register_operand" "1")]
23505 UNSPEC_FPREM1_F))
23506 (set (match_operand:XF 1 "register_operand" "=f")
23507 (unspec:XF [(match_dup 2) (match_dup 3)]
23508 UNSPEC_FPREM1_U))
23509 (set (reg:CCFP FPSR_REG)
23510 (unspec:CCFP [(match_dup 2) (match_dup 3)]
23511 UNSPEC_C2_FLAG))]
23512 "TARGET_USE_FANCY_MATH_387"
23513 "fprem1"
23514 [(set_attr "type" "fpspc")
23515 (set_attr "znver1_decode" "vector")
23516 (set_attr "mode" "XF")])
23517
23518 (define_expand "remainderxf3"
23519 [(use (match_operand:XF 0 "register_operand"))
23520 (use (match_operand:XF 1 "general_operand"))
23521 (use (match_operand:XF 2 "general_operand"))]
23522 "TARGET_USE_FANCY_MATH_387"
23523 {
23524 rtx_code_label *label = gen_label_rtx ();
23525
23526 rtx op1 = gen_reg_rtx (XFmode);
23527 rtx op2 = gen_reg_rtx (XFmode);
23528
23529 emit_move_insn (op2, operands[2]);
23530 emit_move_insn (op1, operands[1]);
23531
23532 emit_label (label);
23533 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
23534 ix86_emit_fp_unordered_jump (label);
23535 LABEL_NUSES (label) = 1;
23536
23537 emit_move_insn (operands[0], op1);
23538 DONE;
23539 })
23540
23541 (define_expand "remainder<mode>3"
23542 [(use (match_operand:MODEF 0 "register_operand"))
23543 (use (match_operand:MODEF 1 "general_operand"))
23544 (use (match_operand:MODEF 2 "general_operand"))]
23545 "TARGET_USE_FANCY_MATH_387"
23546 {
23547 rtx (*gen_truncxf) (rtx, rtx);
23548
23549 rtx_code_label *label = gen_label_rtx ();
23550
23551 rtx op1 = gen_reg_rtx (XFmode);
23552 rtx op2 = gen_reg_rtx (XFmode);
23553
23554 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23555 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23556
23557 emit_label (label);
23558
23559 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
23560 ix86_emit_fp_unordered_jump (label);
23561 LABEL_NUSES (label) = 1;
23562
23563 /* Truncate the result properly for strict SSE math. */
23564 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23565 && !TARGET_MIX_SSE_I387)
23566 gen_truncxf = gen_truncxf<mode>2;
23567 else
23568 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
23569
23570 emit_insn (gen_truncxf (operands[0], op1));
23571 DONE;
23572 })
23573
23574 (define_int_iterator SINCOS
23575 [UNSPEC_SIN
23576 UNSPEC_COS])
23577
23578 (define_int_attr sincos
23579 [(UNSPEC_SIN "sin")
23580 (UNSPEC_COS "cos")])
23581
23582 (define_insn "<sincos>xf2"
23583 [(set (match_operand:XF 0 "register_operand" "=f")
23584 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
23585 SINCOS))]
23586 "TARGET_USE_FANCY_MATH_387
23587 && flag_unsafe_math_optimizations"
23588 "f<sincos>"
23589 [(set_attr "type" "fpspc")
23590 (set_attr "znver1_decode" "vector")
23591 (set_attr "mode" "XF")])
23592
23593 (define_expand "<sincos><mode>2"
23594 [(set (match_operand:MODEF 0 "register_operand")
23595 (unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
23596 SINCOS))]
23597 "TARGET_USE_FANCY_MATH_387
23598 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23599 || TARGET_MIX_SSE_I387)
23600 && flag_unsafe_math_optimizations"
23601 {
23602 rtx op0 = gen_reg_rtx (XFmode);
23603 rtx op1 = gen_reg_rtx (XFmode);
23604
23605 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23606 emit_insn (gen_<sincos>xf2 (op0, op1));
23607 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23608 DONE;
23609 })
23610
23611 (define_insn "sincosxf3"
23612 [(set (match_operand:XF 0 "register_operand" "=f")
23613 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
23614 UNSPEC_SINCOS_COS))
23615 (set (match_operand:XF 1 "register_operand" "=f")
23616 (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
23617 "TARGET_USE_FANCY_MATH_387
23618 && flag_unsafe_math_optimizations"
23619 "fsincos"
23620 [(set_attr "type" "fpspc")
23621 (set_attr "znver1_decode" "vector")
23622 (set_attr "mode" "XF")])
23623
23624 (define_expand "sincos<mode>3"
23625 [(use (match_operand:MODEF 0 "register_operand"))
23626 (use (match_operand:MODEF 1 "register_operand"))
23627 (use (match_operand:MODEF 2 "general_operand"))]
23628 "TARGET_USE_FANCY_MATH_387
23629 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23630 || TARGET_MIX_SSE_I387)
23631 && flag_unsafe_math_optimizations"
23632 {
23633 rtx op0 = gen_reg_rtx (XFmode);
23634 rtx op1 = gen_reg_rtx (XFmode);
23635 rtx op2 = gen_reg_rtx (XFmode);
23636
23637 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23638 emit_insn (gen_sincosxf3 (op0, op1, op2));
23639 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23640 emit_insn (gen_truncxf<mode>2 (operands[1], op1));
23641 DONE;
23642 })
23643
23644 (define_insn "fptanxf4_i387"
23645 [(set (match_operand:SF 0 "register_operand" "=f")
23646 (match_operand:SF 3 "const1_operand"))
23647 (set (match_operand:XF 1 "register_operand" "=f")
23648 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
23649 UNSPEC_TAN))]
23650 "TARGET_USE_FANCY_MATH_387
23651 && flag_unsafe_math_optimizations"
23652 "fptan"
23653 [(set_attr "type" "fpspc")
23654 (set_attr "znver1_decode" "vector")
23655 (set_attr "mode" "XF")])
23656
23657 (define_expand "tanxf2"
23658 [(use (match_operand:XF 0 "register_operand"))
23659 (use (match_operand:XF 1 "register_operand"))]
23660 "TARGET_USE_FANCY_MATH_387
23661 && flag_unsafe_math_optimizations"
23662 {
23663 rtx one = gen_reg_rtx (SFmode);
23664 emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
23665 CONST1_RTX (SFmode)));
23666 DONE;
23667 })
23668
23669 (define_expand "tan<mode>2"
23670 [(use (match_operand:MODEF 0 "register_operand"))
23671 (use (match_operand:MODEF 1 "general_operand"))]
23672 "TARGET_USE_FANCY_MATH_387
23673 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23674 || TARGET_MIX_SSE_I387)
23675 && flag_unsafe_math_optimizations"
23676 {
23677 rtx op0 = gen_reg_rtx (XFmode);
23678 rtx op1 = gen_reg_rtx (XFmode);
23679
23680 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23681 emit_insn (gen_tanxf2 (op0, op1));
23682 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23683 DONE;
23684 })
23685
23686 (define_insn "atan2xf3"
23687 [(set (match_operand:XF 0 "register_operand" "=f")
23688 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
23689 (match_operand:XF 1 "register_operand" "f")]
23690 UNSPEC_FPATAN))
23691 (clobber (match_scratch:XF 3 "=1"))]
23692 "TARGET_USE_FANCY_MATH_387
23693 && flag_unsafe_math_optimizations"
23694 "fpatan"
23695 [(set_attr "type" "fpspc")
23696 (set_attr "znver1_decode" "vector")
23697 (set_attr "mode" "XF")])
23698
23699 (define_expand "atan2<mode>3"
23700 [(use (match_operand:MODEF 0 "register_operand"))
23701 (use (match_operand:MODEF 1 "general_operand"))
23702 (use (match_operand:MODEF 2 "general_operand"))]
23703 "TARGET_USE_FANCY_MATH_387
23704 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23705 || TARGET_MIX_SSE_I387)
23706 && flag_unsafe_math_optimizations"
23707 {
23708 rtx op0 = gen_reg_rtx (XFmode);
23709 rtx op1 = gen_reg_rtx (XFmode);
23710 rtx op2 = gen_reg_rtx (XFmode);
23711
23712 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23713 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23714
23715 emit_insn (gen_atan2xf3 (op0, op1, op2));
23716 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23717 DONE;
23718 })
23719
23720 (define_expand "atanxf2"
23721 [(parallel [(set (match_operand:XF 0 "register_operand")
23722 (unspec:XF [(match_dup 2)
23723 (match_operand:XF 1 "register_operand")]
23724 UNSPEC_FPATAN))
23725 (clobber (scratch:XF))])]
23726 "TARGET_USE_FANCY_MATH_387
23727 && flag_unsafe_math_optimizations"
23728 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
23729
23730 (define_expand "atan<mode>2"
23731 [(use (match_operand:MODEF 0 "register_operand"))
23732 (use (match_operand:MODEF 1 "general_operand"))]
23733 "TARGET_USE_FANCY_MATH_387
23734 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23735 || TARGET_MIX_SSE_I387)
23736 && flag_unsafe_math_optimizations"
23737 {
23738 rtx op0 = gen_reg_rtx (XFmode);
23739 rtx op1 = gen_reg_rtx (XFmode);
23740
23741 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23742 emit_insn (gen_atanxf2 (op0, op1));
23743 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23744 DONE;
23745 })
23746
23747 (define_expand "asinxf2"
23748 [(set (match_dup 2)
23749 (mult:XF (match_operand:XF 1 "register_operand")
23750 (match_dup 1)))
23751 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
23752 (set (match_dup 5) (sqrt:XF (match_dup 4)))
23753 (parallel [(set (match_operand:XF 0 "register_operand")
23754 (unspec:XF [(match_dup 5) (match_dup 1)]
23755 UNSPEC_FPATAN))
23756 (clobber (scratch:XF))])]
23757 "TARGET_USE_FANCY_MATH_387
23758 && flag_unsafe_math_optimizations"
23759 {
23760 int i;
23761
23762 for (i = 2; i < 6; i++)
23763 operands[i] = gen_reg_rtx (XFmode);
23764
23765 emit_move_insn (operands[3], CONST1_RTX (XFmode));
23766 })
23767
23768 (define_expand "asin<mode>2"
23769 [(use (match_operand:MODEF 0 "register_operand"))
23770 (use (match_operand:MODEF 1 "general_operand"))]
23771 "TARGET_USE_FANCY_MATH_387
23772 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23773 || TARGET_MIX_SSE_I387)
23774 && flag_unsafe_math_optimizations"
23775 {
23776 rtx op0 = gen_reg_rtx (XFmode);
23777 rtx op1 = gen_reg_rtx (XFmode);
23778
23779 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23780 emit_insn (gen_asinxf2 (op0, op1));
23781 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23782 DONE;
23783 })
23784
23785 (define_expand "acosxf2"
23786 [(set (match_dup 2)
23787 (mult:XF (match_operand:XF 1 "register_operand")
23788 (match_dup 1)))
23789 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
23790 (set (match_dup 5) (sqrt:XF (match_dup 4)))
23791 (parallel [(set (match_operand:XF 0 "register_operand")
23792 (unspec:XF [(match_dup 1) (match_dup 5)]
23793 UNSPEC_FPATAN))
23794 (clobber (scratch:XF))])]
23795 "TARGET_USE_FANCY_MATH_387
23796 && flag_unsafe_math_optimizations"
23797 {
23798 int i;
23799
23800 for (i = 2; i < 6; i++)
23801 operands[i] = gen_reg_rtx (XFmode);
23802
23803 emit_move_insn (operands[3], CONST1_RTX (XFmode));
23804 })
23805
23806 (define_expand "acos<mode>2"
23807 [(use (match_operand:MODEF 0 "register_operand"))
23808 (use (match_operand:MODEF 1 "general_operand"))]
23809 "TARGET_USE_FANCY_MATH_387
23810 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23811 || TARGET_MIX_SSE_I387)
23812 && flag_unsafe_math_optimizations"
23813 {
23814 rtx op0 = gen_reg_rtx (XFmode);
23815 rtx op1 = gen_reg_rtx (XFmode);
23816
23817 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23818 emit_insn (gen_acosxf2 (op0, op1));
23819 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23820 DONE;
23821 })
23822
23823 (define_expand "sinhxf2"
23824 [(use (match_operand:XF 0 "register_operand"))
23825 (use (match_operand:XF 1 "register_operand"))]
23826 "TARGET_USE_FANCY_MATH_387
23827 && flag_finite_math_only
23828 && flag_unsafe_math_optimizations"
23829 {
23830 ix86_emit_i387_sinh (operands[0], operands[1]);
23831 DONE;
23832 })
23833
23834 (define_expand "sinh<mode>2"
23835 [(use (match_operand:MODEF 0 "register_operand"))
23836 (use (match_operand:MODEF 1 "general_operand"))]
23837 "TARGET_USE_FANCY_MATH_387
23838 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23839 || TARGET_MIX_SSE_I387)
23840 && flag_finite_math_only
23841 && flag_unsafe_math_optimizations"
23842 {
23843 rtx op0 = gen_reg_rtx (XFmode);
23844 rtx op1 = gen_reg_rtx (XFmode);
23845
23846 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23847 emit_insn (gen_sinhxf2 (op0, op1));
23848 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23849 DONE;
23850 })
23851
23852 (define_expand "coshxf2"
23853 [(use (match_operand:XF 0 "register_operand"))
23854 (use (match_operand:XF 1 "register_operand"))]
23855 "TARGET_USE_FANCY_MATH_387
23856 && flag_unsafe_math_optimizations"
23857 {
23858 ix86_emit_i387_cosh (operands[0], operands[1]);
23859 DONE;
23860 })
23861
23862 (define_expand "cosh<mode>2"
23863 [(use (match_operand:MODEF 0 "register_operand"))
23864 (use (match_operand:MODEF 1 "general_operand"))]
23865 "TARGET_USE_FANCY_MATH_387
23866 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23867 || TARGET_MIX_SSE_I387)
23868 && flag_unsafe_math_optimizations"
23869 {
23870 rtx op0 = gen_reg_rtx (XFmode);
23871 rtx op1 = gen_reg_rtx (XFmode);
23872
23873 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23874 emit_insn (gen_coshxf2 (op0, op1));
23875 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23876 DONE;
23877 })
23878
23879 (define_expand "tanhxf2"
23880 [(use (match_operand:XF 0 "register_operand"))
23881 (use (match_operand:XF 1 "register_operand"))]
23882 "TARGET_USE_FANCY_MATH_387
23883 && flag_unsafe_math_optimizations"
23884 {
23885 ix86_emit_i387_tanh (operands[0], operands[1]);
23886 DONE;
23887 })
23888
23889 (define_expand "tanh<mode>2"
23890 [(use (match_operand:MODEF 0 "register_operand"))
23891 (use (match_operand:MODEF 1 "general_operand"))]
23892 "TARGET_USE_FANCY_MATH_387
23893 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23894 || TARGET_MIX_SSE_I387)
23895 && flag_unsafe_math_optimizations"
23896 {
23897 rtx op0 = gen_reg_rtx (XFmode);
23898 rtx op1 = gen_reg_rtx (XFmode);
23899
23900 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23901 emit_insn (gen_tanhxf2 (op0, op1));
23902 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23903 DONE;
23904 })
23905
23906 (define_expand "asinhxf2"
23907 [(use (match_operand:XF 0 "register_operand"))
23908 (use (match_operand:XF 1 "register_operand"))]
23909 "TARGET_USE_FANCY_MATH_387
23910 && flag_finite_math_only
23911 && flag_unsafe_math_optimizations"
23912 {
23913 ix86_emit_i387_asinh (operands[0], operands[1]);
23914 DONE;
23915 })
23916
23917 (define_expand "asinh<mode>2"
23918 [(use (match_operand:MODEF 0 "register_operand"))
23919 (use (match_operand:MODEF 1 "general_operand"))]
23920 "TARGET_USE_FANCY_MATH_387
23921 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23922 || TARGET_MIX_SSE_I387)
23923 && flag_finite_math_only
23924 && flag_unsafe_math_optimizations"
23925 {
23926 rtx op0 = gen_reg_rtx (XFmode);
23927 rtx op1 = gen_reg_rtx (XFmode);
23928
23929 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23930 emit_insn (gen_asinhxf2 (op0, op1));
23931 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23932 DONE;
23933 })
23934
23935 (define_expand "acoshxf2"
23936 [(use (match_operand:XF 0 "register_operand"))
23937 (use (match_operand:XF 1 "register_operand"))]
23938 "TARGET_USE_FANCY_MATH_387
23939 && flag_unsafe_math_optimizations"
23940 {
23941 ix86_emit_i387_acosh (operands[0], operands[1]);
23942 DONE;
23943 })
23944
23945 (define_expand "acosh<mode>2"
23946 [(use (match_operand:MODEF 0 "register_operand"))
23947 (use (match_operand:MODEF 1 "general_operand"))]
23948 "TARGET_USE_FANCY_MATH_387
23949 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23950 || TARGET_MIX_SSE_I387)
23951 && flag_unsafe_math_optimizations"
23952 {
23953 rtx op0 = gen_reg_rtx (XFmode);
23954 rtx op1 = gen_reg_rtx (XFmode);
23955
23956 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23957 emit_insn (gen_acoshxf2 (op0, op1));
23958 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23959 DONE;
23960 })
23961
23962 (define_expand "atanhxf2"
23963 [(use (match_operand:XF 0 "register_operand"))
23964 (use (match_operand:XF 1 "register_operand"))]
23965 "TARGET_USE_FANCY_MATH_387
23966 && flag_unsafe_math_optimizations"
23967 {
23968 ix86_emit_i387_atanh (operands[0], operands[1]);
23969 DONE;
23970 })
23971
23972 (define_expand "atanh<mode>2"
23973 [(use (match_operand:MODEF 0 "register_operand"))
23974 (use (match_operand:MODEF 1 "general_operand"))]
23975 "TARGET_USE_FANCY_MATH_387
23976 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23977 || TARGET_MIX_SSE_I387)
23978 && flag_unsafe_math_optimizations"
23979 {
23980 rtx op0 = gen_reg_rtx (XFmode);
23981 rtx op1 = gen_reg_rtx (XFmode);
23982
23983 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23984 emit_insn (gen_atanhxf2 (op0, op1));
23985 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23986 DONE;
23987 })
23988
23989 (define_insn "fyl2xxf3_i387"
23990 [(set (match_operand:XF 0 "register_operand" "=f")
23991 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
23992 (match_operand:XF 2 "register_operand" "f")]
23993 UNSPEC_FYL2X))
23994 (clobber (match_scratch:XF 3 "=2"))]
23995 "TARGET_USE_FANCY_MATH_387
23996 && flag_unsafe_math_optimizations"
23997 "fyl2x"
23998 [(set_attr "type" "fpspc")
23999 (set_attr "znver1_decode" "vector")
24000 (set_attr "mode" "XF")])
24001
24002 (define_expand "logxf2"
24003 [(parallel [(set (match_operand:XF 0 "register_operand")
24004 (unspec:XF [(match_operand:XF 1 "register_operand")
24005 (match_dup 2)] UNSPEC_FYL2X))
24006 (clobber (scratch:XF))])]
24007 "TARGET_USE_FANCY_MATH_387
24008 && flag_unsafe_math_optimizations"
24009 {
24010 operands[2]
24011 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
24012 })
24013
24014 (define_expand "log<mode>2"
24015 [(use (match_operand:MODEF 0 "register_operand"))
24016 (use (match_operand:MODEF 1 "general_operand"))]
24017 "TARGET_USE_FANCY_MATH_387
24018 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24019 || TARGET_MIX_SSE_I387)
24020 && flag_unsafe_math_optimizations"
24021 {
24022 rtx op0 = gen_reg_rtx (XFmode);
24023 rtx op1 = gen_reg_rtx (XFmode);
24024
24025 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24026 emit_insn (gen_logxf2 (op0, op1));
24027 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24028 DONE;
24029 })
24030
24031 (define_expand "log10xf2"
24032 [(parallel [(set (match_operand:XF 0 "register_operand")
24033 (unspec:XF [(match_operand:XF 1 "register_operand")
24034 (match_dup 2)] UNSPEC_FYL2X))
24035 (clobber (scratch:XF))])]
24036 "TARGET_USE_FANCY_MATH_387
24037 && flag_unsafe_math_optimizations"
24038 {
24039 operands[2]
24040 = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
24041 })
24042
24043 (define_expand "log10<mode>2"
24044 [(use (match_operand:MODEF 0 "register_operand"))
24045 (use (match_operand:MODEF 1 "general_operand"))]
24046 "TARGET_USE_FANCY_MATH_387
24047 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24048 || TARGET_MIX_SSE_I387)
24049 && flag_unsafe_math_optimizations"
24050 {
24051 rtx op0 = gen_reg_rtx (XFmode);
24052 rtx op1 = gen_reg_rtx (XFmode);
24053
24054 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24055 emit_insn (gen_log10xf2 (op0, op1));
24056 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24057 DONE;
24058 })
24059
24060 (define_expand "log2xf2"
24061 [(parallel [(set (match_operand:XF 0 "register_operand")
24062 (unspec:XF [(match_operand:XF 1 "register_operand")
24063 (match_dup 2)] UNSPEC_FYL2X))
24064 (clobber (scratch:XF))])]
24065 "TARGET_USE_FANCY_MATH_387
24066 && flag_unsafe_math_optimizations"
24067 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
24068
24069 (define_expand "log2<mode>2"
24070 [(use (match_operand:MODEF 0 "register_operand"))
24071 (use (match_operand:MODEF 1 "general_operand"))]
24072 "TARGET_USE_FANCY_MATH_387
24073 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24074 || TARGET_MIX_SSE_I387)
24075 && flag_unsafe_math_optimizations"
24076 {
24077 rtx op0 = gen_reg_rtx (XFmode);
24078 rtx op1 = gen_reg_rtx (XFmode);
24079
24080 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24081 emit_insn (gen_log2xf2 (op0, op1));
24082 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24083 DONE;
24084 })
24085
24086 (define_insn "fyl2xp1xf3_i387"
24087 [(set (match_operand:XF 0 "register_operand" "=f")
24088 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
24089 (match_operand:XF 2 "register_operand" "f")]
24090 UNSPEC_FYL2XP1))
24091 (clobber (match_scratch:XF 3 "=2"))]
24092 "TARGET_USE_FANCY_MATH_387
24093 && flag_unsafe_math_optimizations"
24094 "fyl2xp1"
24095 [(set_attr "type" "fpspc")
24096 (set_attr "znver1_decode" "vector")
24097 (set_attr "mode" "XF")])
24098
24099 (define_expand "log1pxf2"
24100 [(use (match_operand:XF 0 "register_operand"))
24101 (use (match_operand:XF 1 "register_operand"))]
24102 "TARGET_USE_FANCY_MATH_387
24103 && flag_unsafe_math_optimizations"
24104 {
24105 ix86_emit_i387_log1p (operands[0], operands[1]);
24106 DONE;
24107 })
24108
24109 (define_expand "log1p<mode>2"
24110 [(use (match_operand:MODEF 0 "register_operand"))
24111 (use (match_operand:MODEF 1 "general_operand"))]
24112 "TARGET_USE_FANCY_MATH_387
24113 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24114 || TARGET_MIX_SSE_I387)
24115 && flag_unsafe_math_optimizations"
24116 {
24117 rtx op0 = gen_reg_rtx (XFmode);
24118 rtx op1 = gen_reg_rtx (XFmode);
24119
24120 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24121 emit_insn (gen_log1pxf2 (op0, op1));
24122 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24123 DONE;
24124 })
24125
24126 (define_insn "fxtractxf3_i387"
24127 [(set (match_operand:XF 0 "register_operand" "=f")
24128 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
24129 UNSPEC_XTRACT_FRACT))
24130 (set (match_operand:XF 1 "register_operand" "=f")
24131 (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
24132 "TARGET_USE_FANCY_MATH_387
24133 && flag_unsafe_math_optimizations"
24134 "fxtract"
24135 [(set_attr "type" "fpspc")
24136 (set_attr "znver1_decode" "vector")
24137 (set_attr "mode" "XF")])
24138
24139 (define_expand "logbxf2"
24140 [(parallel [(set (match_dup 2)
24141 (unspec:XF [(match_operand:XF 1 "register_operand")]
24142 UNSPEC_XTRACT_FRACT))
24143 (set (match_operand:XF 0 "register_operand")
24144 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
24145 "TARGET_USE_FANCY_MATH_387
24146 && flag_unsafe_math_optimizations"
24147 "operands[2] = gen_reg_rtx (XFmode);")
24148
24149 (define_expand "logb<mode>2"
24150 [(use (match_operand:MODEF 0 "register_operand"))
24151 (use (match_operand:MODEF 1 "general_operand"))]
24152 "TARGET_USE_FANCY_MATH_387
24153 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24154 || TARGET_MIX_SSE_I387)
24155 && flag_unsafe_math_optimizations"
24156 {
24157 rtx op0 = gen_reg_rtx (XFmode);
24158 rtx op1 = gen_reg_rtx (XFmode);
24159
24160 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24161 emit_insn (gen_logbxf2 (op0, op1));
24162 emit_insn (gen_truncxf<mode>2 (operands[0], op1));
24163 DONE;
24164 })
24165
24166 (define_expand "ilogbxf2"
24167 [(use (match_operand:SI 0 "register_operand"))
24168 (use (match_operand:XF 1 "register_operand"))]
24169 "TARGET_USE_FANCY_MATH_387
24170 && flag_unsafe_math_optimizations"
24171 {
24172 rtx op0, op1;
24173
24174 if (optimize_insn_for_size_p ())
24175 FAIL;
24176
24177 op0 = gen_reg_rtx (XFmode);
24178 op1 = gen_reg_rtx (XFmode);
24179
24180 emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
24181 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
24182 DONE;
24183 })
24184
24185 (define_expand "ilogb<mode>2"
24186 [(use (match_operand:SI 0 "register_operand"))
24187 (use (match_operand:MODEF 1 "general_operand"))]
24188 "TARGET_USE_FANCY_MATH_387
24189 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24190 || TARGET_MIX_SSE_I387)
24191 && flag_unsafe_math_optimizations"
24192 {
24193 rtx op0, op1, op2;
24194
24195 if (optimize_insn_for_size_p ())
24196 FAIL;
24197
24198 op0 = gen_reg_rtx (XFmode);
24199 op1 = gen_reg_rtx (XFmode);
24200 op2 = gen_reg_rtx (XFmode);
24201
24202 emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
24203 emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
24204 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
24205 DONE;
24206 })
24207
24208 (define_insn "*f2xm1xf2_i387"
24209 [(set (match_operand:XF 0 "register_operand" "=f")
24210 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
24211 UNSPEC_F2XM1))]
24212 "TARGET_USE_FANCY_MATH_387
24213 && flag_unsafe_math_optimizations"
24214 "f2xm1"
24215 [(set_attr "type" "fpspc")
24216 (set_attr "znver1_decode" "vector")
24217 (set_attr "mode" "XF")])
24218
24219 (define_insn "fscalexf4_i387"
24220 [(set (match_operand:XF 0 "register_operand" "=f")
24221 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
24222 (match_operand:XF 3 "register_operand" "1")]
24223 UNSPEC_FSCALE_FRACT))
24224 (set (match_operand:XF 1 "register_operand" "=f")
24225 (unspec:XF [(match_dup 2) (match_dup 3)]
24226 UNSPEC_FSCALE_EXP))]
24227 "TARGET_USE_FANCY_MATH_387
24228 && flag_unsafe_math_optimizations"
24229 "fscale"
24230 [(set_attr "type" "fpspc")
24231 (set_attr "znver1_decode" "vector")
24232 (set_attr "mode" "XF")])
24233
24234 (define_expand "expNcorexf3"
24235 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
24236 (match_operand:XF 2 "register_operand")))
24237 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
24238 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
24239 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
24240 (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
24241 (parallel [(set (match_operand:XF 0 "register_operand")
24242 (unspec:XF [(match_dup 8) (match_dup 4)]
24243 UNSPEC_FSCALE_FRACT))
24244 (set (match_dup 9)
24245 (unspec:XF [(match_dup 8) (match_dup 4)]
24246 UNSPEC_FSCALE_EXP))])]
24247 "TARGET_USE_FANCY_MATH_387
24248 && flag_unsafe_math_optimizations"
24249 {
24250 int i;
24251
24252 for (i = 3; i < 10; i++)
24253 operands[i] = gen_reg_rtx (XFmode);
24254
24255 emit_move_insn (operands[7], CONST1_RTX (XFmode));
24256 })
24257
24258 (define_expand "expxf2"
24259 [(use (match_operand:XF 0 "register_operand"))
24260 (use (match_operand:XF 1 "register_operand"))]
24261 "TARGET_USE_FANCY_MATH_387
24262 && flag_unsafe_math_optimizations"
24263 {
24264 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */
24265
24266 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
24267 DONE;
24268 })
24269
24270 (define_expand "exp<mode>2"
24271 [(use (match_operand:MODEF 0 "register_operand"))
24272 (use (match_operand:MODEF 1 "general_operand"))]
24273 "TARGET_USE_FANCY_MATH_387
24274 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24275 || TARGET_MIX_SSE_I387)
24276 && flag_unsafe_math_optimizations"
24277 {
24278 rtx op0 = gen_reg_rtx (XFmode);
24279 rtx op1 = gen_reg_rtx (XFmode);
24280
24281 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24282 emit_insn (gen_expxf2 (op0, op1));
24283 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24284 DONE;
24285 })
24286
24287 (define_expand "exp10xf2"
24288 [(use (match_operand:XF 0 "register_operand"))
24289 (use (match_operand:XF 1 "register_operand"))]
24290 "TARGET_USE_FANCY_MATH_387
24291 && flag_unsafe_math_optimizations"
24292 {
24293 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */
24294
24295 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
24296 DONE;
24297 })
24298
24299 (define_expand "exp10<mode>2"
24300 [(use (match_operand:MODEF 0 "register_operand"))
24301 (use (match_operand:MODEF 1 "general_operand"))]
24302 "TARGET_USE_FANCY_MATH_387
24303 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24304 || TARGET_MIX_SSE_I387)
24305 && flag_unsafe_math_optimizations"
24306 {
24307 rtx op0 = gen_reg_rtx (XFmode);
24308 rtx op1 = gen_reg_rtx (XFmode);
24309
24310 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24311 emit_insn (gen_exp10xf2 (op0, op1));
24312 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24313 DONE;
24314 })
24315
24316 (define_expand "exp2xf2"
24317 [(use (match_operand:XF 0 "register_operand"))
24318 (use (match_operand:XF 1 "register_operand"))]
24319 "TARGET_USE_FANCY_MATH_387
24320 && flag_unsafe_math_optimizations"
24321 {
24322 rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));
24323
24324 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
24325 DONE;
24326 })
24327
24328 (define_expand "exp2<mode>2"
24329 [(use (match_operand:MODEF 0 "register_operand"))
24330 (use (match_operand:MODEF 1 "general_operand"))]
24331 "TARGET_USE_FANCY_MATH_387
24332 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24333 || TARGET_MIX_SSE_I387)
24334 && flag_unsafe_math_optimizations"
24335 {
24336 rtx op0 = gen_reg_rtx (XFmode);
24337 rtx op1 = gen_reg_rtx (XFmode);
24338
24339 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24340 emit_insn (gen_exp2xf2 (op0, op1));
24341 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24342 DONE;
24343 })
24344
24345 (define_expand "expm1xf2"
24346 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
24347 (match_dup 2)))
24348 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
24349 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
24350 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
24351 (parallel [(set (match_dup 7)
24352 (unspec:XF [(match_dup 6) (match_dup 4)]
24353 UNSPEC_FSCALE_FRACT))
24354 (set (match_dup 8)
24355 (unspec:XF [(match_dup 6) (match_dup 4)]
24356 UNSPEC_FSCALE_EXP))])
24357 (parallel [(set (match_dup 10)
24358 (unspec:XF [(match_dup 9) (match_dup 8)]
24359 UNSPEC_FSCALE_FRACT))
24360 (set (match_dup 11)
24361 (unspec:XF [(match_dup 9) (match_dup 8)]
24362 UNSPEC_FSCALE_EXP))])
24363 (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
24364 (set (match_operand:XF 0 "register_operand")
24365 (plus:XF (match_dup 12) (match_dup 7)))]
24366 "TARGET_USE_FANCY_MATH_387
24367 && flag_unsafe_math_optimizations"
24368 {
24369 int i;
24370
24371 for (i = 2; i < 13; i++)
24372 operands[i] = gen_reg_rtx (XFmode);
24373
24374 emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
24375 emit_move_insn (operands[9], CONST1_RTX (XFmode));
24376 })
24377
24378 (define_expand "expm1<mode>2"
24379 [(use (match_operand:MODEF 0 "register_operand"))
24380 (use (match_operand:MODEF 1 "general_operand"))]
24381 "TARGET_USE_FANCY_MATH_387
24382 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24383 || TARGET_MIX_SSE_I387)
24384 && flag_unsafe_math_optimizations"
24385 {
24386 rtx op0 = gen_reg_rtx (XFmode);
24387 rtx op1 = gen_reg_rtx (XFmode);
24388
24389 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24390 emit_insn (gen_expm1xf2 (op0, op1));
24391 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24392 DONE;
24393 })
24394
24395 (define_insn "avx512f_scalef<mode>2"
24396 [(set (match_operand:MODEF 0 "register_operand" "=v")
24397 (unspec:MODEF
24398 [(match_operand:MODEF 1 "register_operand" "v")
24399 (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
24400 UNSPEC_SCALEF))]
24401 "TARGET_AVX512F"
24402 "vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24403 [(set_attr "prefix" "evex")
24404 (set_attr "mode" "<MODE>")])
24405
24406 (define_expand "ldexpxf3"
24407 [(match_operand:XF 0 "register_operand")
24408 (match_operand:XF 1 "register_operand")
24409 (match_operand:SI 2 "register_operand")]
24410 "TARGET_USE_FANCY_MATH_387
24411 && flag_unsafe_math_optimizations"
24412 {
24413 rtx tmp1 = gen_reg_rtx (XFmode);
24414 rtx tmp2 = gen_reg_rtx (XFmode);
24415
24416 emit_insn (gen_floatsixf2 (tmp1, operands[2]));
24417 emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
24418 operands[1], tmp1));
24419 DONE;
24420 })
24421
24422 (define_expand "ldexp<mode>3"
24423 [(use (match_operand:MODEF 0 "register_operand"))
24424 (use (match_operand:MODEF 1 "general_operand"))
24425 (use (match_operand:SI 2 "register_operand"))]
24426 "((TARGET_USE_FANCY_MATH_387
24427 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24428 || TARGET_MIX_SSE_I387))
24429 || (TARGET_AVX512F && TARGET_SSE_MATH))
24430 && flag_unsafe_math_optimizations"
24431 {
24432 /* Prefer avx512f version. */
24433 if (TARGET_AVX512F && TARGET_SSE_MATH)
24434 {
24435 rtx op2 = gen_reg_rtx (<MODE>mode);
24436 operands[1] = force_reg (<MODE>mode, operands[1]);
24437
24438 emit_insn (gen_floatsi<mode>2 (op2, operands[2]));
24439 emit_insn (gen_avx512f_scalef<mode>2 (operands[0], operands[1], op2));
24440 }
24441 else
24442 {
24443 rtx op0 = gen_reg_rtx (XFmode);
24444 rtx op1 = gen_reg_rtx (XFmode);
24445
24446 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24447 emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
24448 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24449 }
24450 DONE;
24451 })
24452
24453 (define_expand "scalbxf3"
24454 [(parallel [(set (match_operand:XF 0 " register_operand")
24455 (unspec:XF [(match_operand:XF 1 "register_operand")
24456 (match_operand:XF 2 "register_operand")]
24457 UNSPEC_FSCALE_FRACT))
24458 (set (match_dup 3)
24459 (unspec:XF [(match_dup 1) (match_dup 2)]
24460 UNSPEC_FSCALE_EXP))])]
24461 "TARGET_USE_FANCY_MATH_387
24462 && flag_unsafe_math_optimizations"
24463 "operands[3] = gen_reg_rtx (XFmode);")
24464
24465 (define_expand "scalb<mode>3"
24466 [(use (match_operand:MODEF 0 "register_operand"))
24467 (use (match_operand:MODEF 1 "general_operand"))
24468 (use (match_operand:MODEF 2 "general_operand"))]
24469 "TARGET_USE_FANCY_MATH_387
24470 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24471 || TARGET_MIX_SSE_I387)
24472 && flag_unsafe_math_optimizations"
24473 {
24474 rtx op0 = gen_reg_rtx (XFmode);
24475 rtx op1 = gen_reg_rtx (XFmode);
24476 rtx op2 = gen_reg_rtx (XFmode);
24477
24478 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24479 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
24480 emit_insn (gen_scalbxf3 (op0, op1, op2));
24481 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24482 DONE;
24483 })
24484
24485 (define_expand "significandxf2"
24486 [(parallel [(set (match_operand:XF 0 "register_operand")
24487 (unspec:XF [(match_operand:XF 1 "register_operand")]
24488 UNSPEC_XTRACT_FRACT))
24489 (set (match_dup 2)
24490 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
24491 "TARGET_USE_FANCY_MATH_387
24492 && flag_unsafe_math_optimizations"
24493 "operands[2] = gen_reg_rtx (XFmode);")
24494
24495 (define_expand "significand<mode>2"
24496 [(use (match_operand:MODEF 0 "register_operand"))
24497 (use (match_operand:MODEF 1 "general_operand"))]
24498 "TARGET_USE_FANCY_MATH_387
24499 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24500 || TARGET_MIX_SSE_I387)
24501 && flag_unsafe_math_optimizations"
24502 {
24503 rtx op0 = gen_reg_rtx (XFmode);
24504 rtx op1 = gen_reg_rtx (XFmode);
24505
24506 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24507 emit_insn (gen_significandxf2 (op0, op1));
24508 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
24509 DONE;
24510 })
24511 \f
24512
24513 (define_insn "sse4_1_round<mode>2"
24514 [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
24515 (unspec:MODEFH
24516 [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m")
24517 (match_operand:SI 2 "const_0_to_15_operand")]
24518 UNSPEC_ROUND))]
24519 "TARGET_SSE4_1"
24520 "@
24521 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
24522 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
24523 %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
24524 vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
24525 vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
24526 [(set_attr "type" "ssecvt")
24527 (set_attr "prefix_extra" "1,1,1,*,*")
24528 (set_attr "length_immediate" "1")
24529 (set_attr "addr" "*,*,gpr16,*,*")
24530 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
24531 (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
24532 (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
24533 (set_attr "mode" "<MODE>")
24534 (set (attr "preferred_for_speed")
24535 (cond [(match_test "TARGET_AVX")
24536 (symbol_ref "true")
24537 (eq_attr "alternative" "1,2")
24538 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
24539 ]
24540 (symbol_ref "true")))])
24541
24542 (define_insn "rintxf2"
24543 [(set (match_operand:XF 0 "register_operand" "=f")
24544 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
24545 UNSPEC_FRNDINT))]
24546 "TARGET_USE_FANCY_MATH_387"
24547 "frndint"
24548 [(set_attr "type" "fpspc")
24549 (set_attr "znver1_decode" "vector")
24550 (set_attr "mode" "XF")])
24551
24552 (define_expand "rinthf2"
24553 [(match_operand:HF 0 "register_operand")
24554 (match_operand:HF 1 "nonimmediate_operand")]
24555 "TARGET_AVX512FP16"
24556 {
24557 emit_insn (gen_sse4_1_roundhf2 (operands[0],
24558 operands[1],
24559 GEN_INT (ROUND_MXCSR)));
24560 DONE;
24561 })
24562
24563 (define_expand "rint<mode>2"
24564 [(use (match_operand:MODEF 0 "register_operand"))
24565 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
24566 "TARGET_USE_FANCY_MATH_387
24567 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
24568 {
24569 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24570 {
24571 if (TARGET_SSE4_1)
24572 emit_insn (gen_sse4_1_round<mode>2
24573 (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
24574 else
24575 ix86_expand_rint (operands[0], operands[1]);
24576 }
24577 else
24578 {
24579 rtx op0 = gen_reg_rtx (XFmode);
24580 rtx op1 = gen_reg_rtx (XFmode);
24581
24582 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24583 emit_insn (gen_rintxf2 (op0, op1));
24584 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
24585 }
24586 DONE;
24587 })
24588
24589 (define_expand "nearbyintxf2"
24590 [(set (match_operand:XF 0 "register_operand")
24591 (unspec:XF [(match_operand:XF 1 "register_operand")]
24592 UNSPEC_FRNDINT))]
24593 "TARGET_USE_FANCY_MATH_387
24594 && !flag_trapping_math")
24595
24596 (define_expand "nearbyinthf2"
24597 [(match_operand:HF 0 "register_operand")
24598 (match_operand:HF 1 "nonimmediate_operand")]
24599 "TARGET_AVX512FP16"
24600 {
24601 emit_insn (gen_sse4_1_roundhf2 (operands[0],
24602 operands[1],
24603 GEN_INT (ROUND_MXCSR | ROUND_NO_EXC)));
24604 DONE;
24605 })
24606
24607 (define_expand "nearbyint<mode>2"
24608 [(use (match_operand:MODEF 0 "register_operand"))
24609 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
24610 "(TARGET_USE_FANCY_MATH_387
24611 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24612 || TARGET_MIX_SSE_I387)
24613 && !flag_trapping_math)
24614 || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
24615 {
24616 if (TARGET_SSE4_1 && TARGET_SSE_MATH)
24617 emit_insn (gen_sse4_1_round<mode>2
24618 (operands[0], operands[1], GEN_INT (ROUND_MXCSR
24619 | ROUND_NO_EXC)));
24620 else
24621 {
24622 rtx op0 = gen_reg_rtx (XFmode);
24623 rtx op1 = gen_reg_rtx (XFmode);
24624
24625 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24626 emit_insn (gen_nearbyintxf2 (op0, op1));
24627 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
24628 }
24629 DONE;
24630 })
24631
24632 (define_expand "roundhf2"
24633 [(match_operand:HF 0 "register_operand")
24634 (match_operand:HF 1 "register_operand")]
24635 "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
24636 {
24637 ix86_expand_round_sse4 (operands[0], operands[1]);
24638 DONE;
24639 })
24640
24641 (define_expand "round<mode>2"
24642 [(match_operand:X87MODEF 0 "register_operand")
24643 (match_operand:X87MODEF 1 "nonimmediate_operand")]
24644 "(TARGET_USE_FANCY_MATH_387
24645 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24646 || TARGET_MIX_SSE_I387)
24647 && flag_unsafe_math_optimizations
24648 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
24649 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24650 && !flag_trapping_math && !flag_rounding_math)"
24651 {
24652 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24653 && !flag_trapping_math && !flag_rounding_math)
24654 {
24655 if (TARGET_SSE4_1)
24656 {
24657 operands[1] = force_reg (<MODE>mode, operands[1]);
24658 ix86_expand_round_sse4 (operands[0], operands[1]);
24659 }
24660 else if (TARGET_64BIT || (<MODE>mode != DFmode))
24661 ix86_expand_round (operands[0], operands[1]);
24662 else
24663 ix86_expand_rounddf_32 (operands[0], operands[1]);
24664 }
24665 else
24666 {
24667 operands[1] = force_reg (<MODE>mode, operands[1]);
24668 ix86_emit_i387_round (operands[0], operands[1]);
24669 }
24670 DONE;
24671 })
24672
24673 (define_insn "lrintxfdi2"
24674 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
24675 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
24676 UNSPEC_FIST))
24677 (clobber (match_scratch:XF 2 "=&f"))]
24678 "TARGET_USE_FANCY_MATH_387"
24679 "* return output_fix_trunc (insn, operands, false);"
24680 [(set_attr "type" "fpspc")
24681 (set_attr "mode" "DI")])
24682
24683 (define_insn "lrintxf<mode>2"
24684 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
24685 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
24686 UNSPEC_FIST))]
24687 "TARGET_USE_FANCY_MATH_387"
24688 "* return output_fix_trunc (insn, operands, false);"
24689 [(set_attr "type" "fpspc")
24690 (set_attr "mode" "<MODE>")])
24691
24692 (define_expand "lroundhf<mode>2"
24693 [(set (match_operand:SWI248 0 "register_operand")
24694 (unspec:SWI248 [(match_operand:HF 1 "nonimmediate_operand")]
24695 UNSPEC_FIX_NOTRUNC))]
24696 "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
24697 {
24698 ix86_expand_lround (operands[0], operands[1]);
24699 DONE;
24700 })
24701
24702 (define_expand "lrinthf<mode>2"
24703 [(set (match_operand:SWI48 0 "register_operand")
24704 (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand")]
24705 UNSPEC_FIX_NOTRUNC))]
24706 "TARGET_AVX512FP16")
24707
24708 (define_expand "lrint<MODEF:mode><SWI48:mode>2"
24709 [(set (match_operand:SWI48 0 "register_operand")
24710 (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
24711 UNSPEC_FIX_NOTRUNC))]
24712 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
24713
24714 (define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
24715 [(match_operand:SWI248x 0 "nonimmediate_operand")
24716 (match_operand:X87MODEF 1 "register_operand")]
24717 "(TARGET_USE_FANCY_MATH_387
24718 && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
24719 || TARGET_MIX_SSE_I387)
24720 && flag_unsafe_math_optimizations)
24721 || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
24722 && <SWI248x:MODE>mode != HImode
24723 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
24724 && !flag_trapping_math && !flag_rounding_math)"
24725 {
24726 if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
24727 && <SWI248x:MODE>mode != HImode
24728 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
24729 && !flag_trapping_math && !flag_rounding_math)
24730 ix86_expand_lround (operands[0], operands[1]);
24731 else
24732 ix86_emit_i387_round (operands[0], operands[1]);
24733 DONE;
24734 })
24735
24736 (define_int_iterator FRNDINT_ROUNDING
24737 [UNSPEC_FRNDINT_ROUNDEVEN
24738 UNSPEC_FRNDINT_FLOOR
24739 UNSPEC_FRNDINT_CEIL
24740 UNSPEC_FRNDINT_TRUNC])
24741
24742 (define_int_iterator FIST_ROUNDING
24743 [UNSPEC_FIST_FLOOR
24744 UNSPEC_FIST_CEIL])
24745
24746 ;; Base name for define_insn
24747 (define_int_attr rounding_insn
24748 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
24749 (UNSPEC_FRNDINT_FLOOR "floor")
24750 (UNSPEC_FRNDINT_CEIL "ceil")
24751 (UNSPEC_FRNDINT_TRUNC "btrunc")
24752 (UNSPEC_FIST_FLOOR "floor")
24753 (UNSPEC_FIST_CEIL "ceil")])
24754
24755 (define_int_attr rounding
24756 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
24757 (UNSPEC_FRNDINT_FLOOR "floor")
24758 (UNSPEC_FRNDINT_CEIL "ceil")
24759 (UNSPEC_FRNDINT_TRUNC "trunc")
24760 (UNSPEC_FIST_FLOOR "floor")
24761 (UNSPEC_FIST_CEIL "ceil")])
24762
24763 (define_int_attr ROUNDING
24764 [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
24765 (UNSPEC_FRNDINT_FLOOR "FLOOR")
24766 (UNSPEC_FRNDINT_CEIL "CEIL")
24767 (UNSPEC_FRNDINT_TRUNC "TRUNC")
24768 (UNSPEC_FIST_FLOOR "FLOOR")
24769 (UNSPEC_FIST_CEIL "CEIL")])
24770
24771 ;; Rounding mode control word calculation could clobber FLAGS_REG.
24772 (define_insn_and_split "frndintxf2_<rounding>"
24773 [(set (match_operand:XF 0 "register_operand")
24774 (unspec:XF [(match_operand:XF 1 "register_operand")]
24775 FRNDINT_ROUNDING))
24776 (clobber (reg:CC FLAGS_REG))]
24777 "TARGET_USE_FANCY_MATH_387
24778 && (flag_fp_int_builtin_inexact || !flag_trapping_math)
24779 && ix86_pre_reload_split ()"
24780 "#"
24781 "&& 1"
24782 [(const_int 0)]
24783 {
24784 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
24785
24786 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
24787 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
24788
24789 emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
24790 operands[2], operands[3]));
24791 DONE;
24792 }
24793 [(set_attr "type" "frndint")
24794 (set_attr "i387_cw" "<rounding>")
24795 (set_attr "mode" "XF")])
24796
24797 (define_insn "frndintxf2_<rounding>_i387"
24798 [(set (match_operand:XF 0 "register_operand" "=f")
24799 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
24800 FRNDINT_ROUNDING))
24801 (use (match_operand:HI 2 "memory_operand" "m"))
24802 (use (match_operand:HI 3 "memory_operand" "m"))]
24803 "TARGET_USE_FANCY_MATH_387
24804 && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
24805 "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
24806 [(set_attr "type" "frndint")
24807 (set_attr "i387_cw" "<rounding>")
24808 (set_attr "mode" "XF")])
24809
24810 (define_expand "<rounding_insn>xf2"
24811 [(parallel [(set (match_operand:XF 0 "register_operand")
24812 (unspec:XF [(match_operand:XF 1 "register_operand")]
24813 FRNDINT_ROUNDING))
24814 (clobber (reg:CC FLAGS_REG))])]
24815 "TARGET_USE_FANCY_MATH_387
24816 && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
24817
24818 (define_expand "<rounding_insn>hf2"
24819 [(parallel [(set (match_operand:HF 0 "register_operand")
24820 (unspec:HF [(match_operand:HF 1 "register_operand")]
24821 FRNDINT_ROUNDING))
24822 (clobber (reg:CC FLAGS_REG))])]
24823 "TARGET_AVX512FP16"
24824 {
24825 emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1],
24826 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
24827 DONE;
24828 })
24829
24830 (define_expand "<rounding_insn><mode>2"
24831 [(parallel [(set (match_operand:MODEF 0 "register_operand")
24832 (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
24833 FRNDINT_ROUNDING))
24834 (clobber (reg:CC FLAGS_REG))])]
24835 "(TARGET_USE_FANCY_MATH_387
24836 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
24837 || TARGET_MIX_SSE_I387)
24838 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
24839 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24840 && (TARGET_SSE4_1
24841 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
24842 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
24843 {
24844 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24845 && (TARGET_SSE4_1
24846 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
24847 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))
24848 {
24849 if (TARGET_SSE4_1)
24850 emit_insn (gen_sse4_1_round<mode>2
24851 (operands[0], operands[1],
24852 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
24853 else if (TARGET_64BIT || (<MODE>mode != DFmode))
24854 {
24855 if (ROUND_<ROUNDING> == ROUND_FLOOR)
24856 ix86_expand_floorceil (operands[0], operands[1], true);
24857 else if (ROUND_<ROUNDING> == ROUND_CEIL)
24858 ix86_expand_floorceil (operands[0], operands[1], false);
24859 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
24860 ix86_expand_trunc (operands[0], operands[1]);
24861 else
24862 gcc_unreachable ();
24863 }
24864 else
24865 {
24866 if (ROUND_<ROUNDING> == ROUND_FLOOR)
24867 ix86_expand_floorceildf_32 (operands[0], operands[1], true);
24868 else if (ROUND_<ROUNDING> == ROUND_CEIL)
24869 ix86_expand_floorceildf_32 (operands[0], operands[1], false);
24870 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
24871 ix86_expand_truncdf_32 (operands[0], operands[1]);
24872 else
24873 gcc_unreachable ();
24874 }
24875 }
24876 else
24877 {
24878 rtx op0 = gen_reg_rtx (XFmode);
24879 rtx op1 = gen_reg_rtx (XFmode);
24880
24881 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
24882 emit_insn (gen_frndintxf2_<rounding> (op0, op1));
24883 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
24884 }
24885 DONE;
24886 })
24887
24888 ;; Rounding mode control word calculation could clobber FLAGS_REG.
24889 (define_insn_and_split "*fist<mode>2_<rounding>_1"
24890 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
24891 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
24892 FIST_ROUNDING))
24893 (clobber (reg:CC FLAGS_REG))]
24894 "TARGET_USE_FANCY_MATH_387
24895 && flag_unsafe_math_optimizations
24896 && ix86_pre_reload_split ()"
24897 "#"
24898 "&& 1"
24899 [(const_int 0)]
24900 {
24901 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
24902
24903 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
24904 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
24905
24906 emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
24907 operands[2], operands[3]));
24908 DONE;
24909 }
24910 [(set_attr "type" "fistp")
24911 (set_attr "i387_cw" "<rounding>")
24912 (set_attr "mode" "<MODE>")])
24913
24914 (define_insn "fistdi2_<rounding>"
24915 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
24916 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
24917 FIST_ROUNDING))
24918 (use (match_operand:HI 2 "memory_operand" "m"))
24919 (use (match_operand:HI 3 "memory_operand" "m"))
24920 (clobber (match_scratch:XF 4 "=&f"))]
24921 "TARGET_USE_FANCY_MATH_387
24922 && flag_unsafe_math_optimizations"
24923 "* return output_fix_trunc (insn, operands, false);"
24924 [(set_attr "type" "fistp")
24925 (set_attr "i387_cw" "<rounding>")
24926 (set_attr "mode" "DI")])
24927
24928 (define_insn "fist<mode>2_<rounding>"
24929 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
24930 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
24931 FIST_ROUNDING))
24932 (use (match_operand:HI 2 "memory_operand" "m"))
24933 (use (match_operand:HI 3 "memory_operand" "m"))]
24934 "TARGET_USE_FANCY_MATH_387
24935 && flag_unsafe_math_optimizations"
24936 "* return output_fix_trunc (insn, operands, false);"
24937 [(set_attr "type" "fistp")
24938 (set_attr "i387_cw" "<rounding>")
24939 (set_attr "mode" "<MODE>")])
24940
24941 (define_expand "l<rounding_insn>xf<mode>2"
24942 [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
24943 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
24944 FIST_ROUNDING))
24945 (clobber (reg:CC FLAGS_REG))])]
24946 "TARGET_USE_FANCY_MATH_387
24947 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
24948 && flag_unsafe_math_optimizations")
24949
24950 (define_expand "l<rounding_insn>hf<mode>2"
24951 [(set (match_operand:SWI48 0 "nonimmediate_operand")
24952 (unspec:SWI48 [(match_operand:HF 1 "register_operand")]
24953 FIST_ROUNDING))]
24954 "TARGET_AVX512FP16"
24955 {
24956 rtx tmp = gen_reg_rtx (HFmode);
24957 emit_insn (gen_sse4_1_roundhf2 (tmp, operands[1],
24958 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
24959 emit_insn (gen_fix_trunchf<mode>2 (operands[0], tmp));
24960 DONE;
24961 })
24962
24963 (define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
24964 [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
24965 (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
24966 FIST_ROUNDING))
24967 (clobber (reg:CC FLAGS_REG))])]
24968 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
24969 && (TARGET_SSE4_1 || !flag_trapping_math)"
24970 {
24971 if (TARGET_SSE4_1)
24972 {
24973 rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
24974
24975 emit_insn (gen_sse4_1_round<MODEF:mode>2
24976 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
24977 | ROUND_NO_EXC)));
24978 emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
24979 (operands[0], tmp));
24980 }
24981 else if (ROUND_<ROUNDING> == ROUND_FLOOR)
24982 ix86_expand_lfloorceil (operands[0], operands[1], true);
24983 else if (ROUND_<ROUNDING> == ROUND_CEIL)
24984 ix86_expand_lfloorceil (operands[0], operands[1], false);
24985 else
24986 gcc_unreachable ();
24987
24988 DONE;
24989 })
24990
24991 (define_insn "fxam<mode>2_i387"
24992 [(set (match_operand:HI 0 "register_operand" "=a")
24993 (unspec:HI
24994 [(match_operand:X87MODEF 1 "register_operand" "f")]
24995 UNSPEC_FXAM))]
24996 "TARGET_USE_FANCY_MATH_387"
24997 "fxam\n\tfnstsw\t%0"
24998 [(set_attr "type" "multi")
24999 (set_attr "length" "4")
25000 (set_attr "unit" "i387")
25001 (set_attr "mode" "<MODE>")])
25002
25003 (define_expand "signbittf2"
25004 [(use (match_operand:SI 0 "register_operand"))
25005 (use (match_operand:TF 1 "register_operand"))]
25006 "TARGET_SSE"
25007 {
25008 if (TARGET_SSE4_1)
25009 {
25010 rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
25011 rtx scratch = gen_reg_rtx (QImode);
25012
25013 emit_insn (gen_ptesttf2 (operands[1], mask));
25014 ix86_expand_setcc (scratch, NE,
25015 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
25016
25017 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
25018 }
25019 else
25020 {
25021 emit_insn (gen_sse_movmskps (operands[0],
25022 gen_lowpart (V4SFmode, operands[1])));
25023 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8)));
25024 }
25025 DONE;
25026 })
25027
25028 (define_expand "signbitxf2"
25029 [(use (match_operand:SI 0 "register_operand"))
25030 (use (match_operand:XF 1 "register_operand"))]
25031 "TARGET_USE_FANCY_MATH_387"
25032 {
25033 rtx scratch = gen_reg_rtx (HImode);
25034
25035 emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
25036 emit_insn (gen_andsi3 (operands[0],
25037 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
25038 DONE;
25039 })
25040
25041 (define_insn "movmsk_df"
25042 [(set (match_operand:SI 0 "register_operand" "=r,jr")
25043 (unspec:SI
25044 [(match_operand:DF 1 "register_operand" "x,x")]
25045 UNSPEC_MOVMSK))]
25046 "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
25047 "%vmovmskpd\t{%1, %0|%0, %1}"
25048 [(set_attr "isa" "noavx,avx")
25049 (set_attr "type" "ssemov")
25050 (set_attr "prefix" "maybe_evex")
25051 (set_attr "mode" "DF")])
25052
25053 ;; Use movmskpd in SSE mode to avoid store forwarding stall
25054 ;; for 32bit targets and movq+shrq sequence for 64bit targets.
25055 (define_expand "signbitdf2"
25056 [(use (match_operand:SI 0 "register_operand"))
25057 (use (match_operand:DF 1 "register_operand"))]
25058 "TARGET_USE_FANCY_MATH_387
25059 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
25060 {
25061 if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
25062 {
25063 emit_insn (gen_movmsk_df (operands[0], operands[1]));
25064 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
25065 }
25066 else
25067 {
25068 rtx scratch = gen_reg_rtx (HImode);
25069
25070 emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
25071 emit_insn (gen_andsi3 (operands[0],
25072 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
25073 }
25074 DONE;
25075 })
25076
25077 (define_expand "signbitsf2"
25078 [(use (match_operand:SI 0 "register_operand"))
25079 (use (match_operand:SF 1 "register_operand"))]
25080 "TARGET_USE_FANCY_MATH_387
25081 && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
25082 {
25083 rtx scratch = gen_reg_rtx (HImode);
25084
25085 emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
25086 emit_insn (gen_andsi3 (operands[0],
25087 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
25088 DONE;
25089 })
25090 \f
25091 ;; Block operation instructions
25092
25093 (define_insn "cld"
25094 [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
25095 ""
25096 "cld"
25097 [(set_attr "length" "1")
25098 (set_attr "length_immediate" "0")
25099 (set_attr "modrm" "0")])
25100
25101 (define_expand "cpymem<mode>"
25102 [(use (match_operand:BLK 0 "memory_operand"))
25103 (use (match_operand:BLK 1 "memory_operand"))
25104 (use (match_operand:SWI48 2 "nonmemory_operand"))
25105 (use (match_operand:SWI48 3 "const_int_operand"))
25106 (use (match_operand:SI 4 "const_int_operand"))
25107 (use (match_operand:SI 5 "const_int_operand"))
25108 (use (match_operand:SI 6 ""))
25109 (use (match_operand:SI 7 ""))
25110 (use (match_operand:SI 8 ""))]
25111 ""
25112 {
25113 if (ix86_expand_set_or_cpymem (operands[0], operands[1],
25114 operands[2], NULL, operands[3],
25115 operands[4], operands[5],
25116 operands[6], operands[7],
25117 operands[8], false))
25118 DONE;
25119 else
25120 FAIL;
25121 })
25122
25123 ;; Most CPUs don't like single string operations
25124 ;; Handle this case here to simplify previous expander.
25125
25126 (define_expand "strmov"
25127 [(set (match_dup 4) (match_operand 3 "memory_operand"))
25128 (set (match_operand 1 "memory_operand") (match_dup 4))
25129 (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
25130 (clobber (reg:CC FLAGS_REG))])
25131 (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
25132 (clobber (reg:CC FLAGS_REG))])]
25133 ""
25134 {
25135 /* Can't use this for non-default address spaces. */
25136 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
25137 FAIL;
25138
25139 int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
25140
25141 /* If .md ever supports :P for Pmode, these can be directly
25142 in the pattern above. */
25143 operands[5] = plus_constant (Pmode, operands[0], piece_size);
25144 operands[6] = plus_constant (Pmode, operands[2], piece_size);
25145
25146 /* Can't use this if the user has appropriated esi or edi. */
25147 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
25148 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
25149 {
25150 emit_insn (gen_strmov_singleop (operands[0], operands[1],
25151 operands[2], operands[3],
25152 operands[5], operands[6]));
25153 DONE;
25154 }
25155
25156 operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
25157 })
25158
25159 (define_expand "strmov_singleop"
25160 [(parallel [(set (match_operand 1 "memory_operand")
25161 (match_operand 3 "memory_operand"))
25162 (set (match_operand 0 "register_operand")
25163 (match_operand 4))
25164 (set (match_operand 2 "register_operand")
25165 (match_operand 5))])]
25166 ""
25167 {
25168 if (TARGET_CLD)
25169 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25170 })
25171
25172 (define_insn "*strmovdi_rex_1"
25173 [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
25174 (mem:DI (match_operand:P 3 "register_operand" "1")))
25175 (set (match_operand:P 0 "register_operand" "=D")
25176 (plus:P (match_dup 2)
25177 (const_int 8)))
25178 (set (match_operand:P 1 "register_operand" "=S")
25179 (plus:P (match_dup 3)
25180 (const_int 8)))]
25181 "TARGET_64BIT
25182 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
25183 && ix86_check_no_addr_space (insn)"
25184 "%^movsq"
25185 [(set_attr "type" "str")
25186 (set_attr "memory" "both")
25187 (set_attr "mode" "DI")])
25188
25189 (define_insn "*strmovsi_1"
25190 [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
25191 (mem:SI (match_operand:P 3 "register_operand" "1")))
25192 (set (match_operand:P 0 "register_operand" "=D")
25193 (plus:P (match_dup 2)
25194 (const_int 4)))
25195 (set (match_operand:P 1 "register_operand" "=S")
25196 (plus:P (match_dup 3)
25197 (const_int 4)))]
25198 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
25199 && ix86_check_no_addr_space (insn)"
25200 "%^movs{l|d}"
25201 [(set_attr "type" "str")
25202 (set_attr "memory" "both")
25203 (set_attr "mode" "SI")])
25204
25205 (define_insn "*strmovhi_1"
25206 [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
25207 (mem:HI (match_operand:P 3 "register_operand" "1")))
25208 (set (match_operand:P 0 "register_operand" "=D")
25209 (plus:P (match_dup 2)
25210 (const_int 2)))
25211 (set (match_operand:P 1 "register_operand" "=S")
25212 (plus:P (match_dup 3)
25213 (const_int 2)))]
25214 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
25215 && ix86_check_no_addr_space (insn)"
25216 "%^movsw"
25217 [(set_attr "type" "str")
25218 (set_attr "memory" "both")
25219 (set_attr "mode" "HI")])
25220
25221 (define_insn "*strmovqi_1"
25222 [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
25223 (mem:QI (match_operand:P 3 "register_operand" "1")))
25224 (set (match_operand:P 0 "register_operand" "=D")
25225 (plus:P (match_dup 2)
25226 (const_int 1)))
25227 (set (match_operand:P 1 "register_operand" "=S")
25228 (plus:P (match_dup 3)
25229 (const_int 1)))]
25230 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
25231 && ix86_check_no_addr_space (insn)"
25232 "%^movsb"
25233 [(set_attr "type" "str")
25234 (set_attr "memory" "both")
25235 (set (attr "prefix_rex")
25236 (if_then_else
25237 (match_test "<P:MODE>mode == DImode")
25238 (const_string "0")
25239 (const_string "*")))
25240 (set_attr "mode" "QI")])
25241
25242 (define_expand "rep_mov"
25243 [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
25244 (set (match_operand 0 "register_operand")
25245 (match_operand 5))
25246 (set (match_operand 2 "register_operand")
25247 (match_operand 6))
25248 (set (match_operand 1 "memory_operand")
25249 (match_operand 3 "memory_operand"))
25250 (use (match_dup 4))])]
25251 ""
25252 {
25253 if (TARGET_CLD)
25254 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25255 })
25256
25257 (define_insn "*rep_movdi_rex64"
25258 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
25259 (set (match_operand:P 0 "register_operand" "=D")
25260 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
25261 (const_int 3))
25262 (match_operand:P 3 "register_operand" "0")))
25263 (set (match_operand:P 1 "register_operand" "=S")
25264 (plus:P (ashift:P (match_dup 5) (const_int 3))
25265 (match_operand:P 4 "register_operand" "1")))
25266 (set (mem:BLK (match_dup 3))
25267 (mem:BLK (match_dup 4)))
25268 (use (match_dup 5))]
25269 "TARGET_64BIT
25270 && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
25271 && ix86_check_no_addr_space (insn)"
25272 "%^rep{%;} movsq"
25273 [(set_attr "type" "str")
25274 (set_attr "prefix_rep" "1")
25275 (set_attr "memory" "both")
25276 (set_attr "mode" "DI")])
25277
25278 (define_insn "*rep_movsi"
25279 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
25280 (set (match_operand:P 0 "register_operand" "=D")
25281 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
25282 (const_int 2))
25283 (match_operand:P 3 "register_operand" "0")))
25284 (set (match_operand:P 1 "register_operand" "=S")
25285 (plus:P (ashift:P (match_dup 5) (const_int 2))
25286 (match_operand:P 4 "register_operand" "1")))
25287 (set (mem:BLK (match_dup 3))
25288 (mem:BLK (match_dup 4)))
25289 (use (match_dup 5))]
25290 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
25291 && ix86_check_no_addr_space (insn)"
25292 "%^rep{%;} movs{l|d}"
25293 [(set_attr "type" "str")
25294 (set_attr "prefix_rep" "1")
25295 (set_attr "memory" "both")
25296 (set_attr "mode" "SI")])
25297
25298 (define_insn "*rep_movqi"
25299 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
25300 (set (match_operand:P 0 "register_operand" "=D")
25301 (plus:P (match_operand:P 3 "register_operand" "0")
25302 (match_operand:P 5 "register_operand" "2")))
25303 (set (match_operand:P 1 "register_operand" "=S")
25304 (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
25305 (set (mem:BLK (match_dup 3))
25306 (mem:BLK (match_dup 4)))
25307 (use (match_dup 5))]
25308 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
25309 && ix86_check_no_addr_space (insn)"
25310 "%^rep{%;} movsb"
25311 [(set_attr "type" "str")
25312 (set_attr "prefix_rep" "1")
25313 (set_attr "memory" "both")
25314 (set_attr "mode" "QI")])
25315
25316 (define_expand "setmem<mode>"
25317 [(use (match_operand:BLK 0 "memory_operand"))
25318 (use (match_operand:SWI48 1 "nonmemory_operand"))
25319 (use (match_operand:QI 2 "nonmemory_operand"))
25320 (use (match_operand 3 "const_int_operand"))
25321 (use (match_operand:SI 4 "const_int_operand"))
25322 (use (match_operand:SI 5 "const_int_operand"))
25323 (use (match_operand:SI 6 ""))
25324 (use (match_operand:SI 7 ""))
25325 (use (match_operand:SI 8 ""))]
25326 ""
25327 {
25328 if (ix86_expand_set_or_cpymem (operands[0], NULL,
25329 operands[1], operands[2],
25330 operands[3], operands[4],
25331 operands[5], operands[6],
25332 operands[7], operands[8], true))
25333 DONE;
25334 else
25335 FAIL;
25336 })
25337
25338 ;; Most CPUs don't like single string operations
25339 ;; Handle this case here to simplify previous expander.
25340
25341 (define_expand "strset"
25342 [(set (match_operand 1 "memory_operand")
25343 (match_operand 2 "register_operand"))
25344 (parallel [(set (match_operand 0 "register_operand")
25345 (match_dup 3))
25346 (clobber (reg:CC FLAGS_REG))])]
25347 ""
25348 {
25349 /* Can't use this for non-default address spaces. */
25350 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
25351 FAIL;
25352
25353 if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
25354 operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
25355
25356 /* If .md ever supports :P for Pmode, this can be directly
25357 in the pattern above. */
25358 operands[3] = plus_constant (Pmode, operands[0],
25359 GET_MODE_SIZE (GET_MODE (operands[2])));
25360
25361 /* Can't use this if the user has appropriated eax or edi. */
25362 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
25363 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
25364 {
25365 emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
25366 operands[3]));
25367 DONE;
25368 }
25369 })
25370
25371 (define_expand "strset_singleop"
25372 [(parallel [(set (match_operand 1 "memory_operand")
25373 (match_operand 2 "register_operand"))
25374 (set (match_operand 0 "register_operand")
25375 (match_operand 3))
25376 (unspec [(const_int 0)] UNSPEC_STOS)])]
25377 ""
25378 {
25379 if (TARGET_CLD)
25380 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25381 })
25382
25383 (define_insn "*strsetdi_rex_1"
25384 [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
25385 (match_operand:DI 2 "register_operand" "a"))
25386 (set (match_operand:P 0 "register_operand" "=D")
25387 (plus:P (match_dup 1)
25388 (const_int 8)))
25389 (unspec [(const_int 0)] UNSPEC_STOS)]
25390 "TARGET_64BIT
25391 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
25392 && ix86_check_no_addr_space (insn)"
25393 "%^stosq"
25394 [(set_attr "type" "str")
25395 (set_attr "memory" "store")
25396 (set_attr "mode" "DI")])
25397
25398 (define_insn "*strsetsi_1"
25399 [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
25400 (match_operand:SI 2 "register_operand" "a"))
25401 (set (match_operand:P 0 "register_operand" "=D")
25402 (plus:P (match_dup 1)
25403 (const_int 4)))
25404 (unspec [(const_int 0)] UNSPEC_STOS)]
25405 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
25406 && ix86_check_no_addr_space (insn)"
25407 "%^stos{l|d}"
25408 [(set_attr "type" "str")
25409 (set_attr "memory" "store")
25410 (set_attr "mode" "SI")])
25411
25412 (define_insn "*strsethi_1"
25413 [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
25414 (match_operand:HI 2 "register_operand" "a"))
25415 (set (match_operand:P 0 "register_operand" "=D")
25416 (plus:P (match_dup 1)
25417 (const_int 2)))
25418 (unspec [(const_int 0)] UNSPEC_STOS)]
25419 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
25420 && ix86_check_no_addr_space (insn)"
25421 "%^stosw"
25422 [(set_attr "type" "str")
25423 (set_attr "memory" "store")
25424 (set_attr "mode" "HI")])
25425
25426 (define_insn "*strsetqi_1"
25427 [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
25428 (match_operand:QI 2 "register_operand" "a"))
25429 (set (match_operand:P 0 "register_operand" "=D")
25430 (plus:P (match_dup 1)
25431 (const_int 1)))
25432 (unspec [(const_int 0)] UNSPEC_STOS)]
25433 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
25434 && ix86_check_no_addr_space (insn)"
25435 "%^stosb"
25436 [(set_attr "type" "str")
25437 (set_attr "memory" "store")
25438 (set (attr "prefix_rex")
25439 (if_then_else
25440 (match_test "<P:MODE>mode == DImode")
25441 (const_string "0")
25442 (const_string "*")))
25443 (set_attr "mode" "QI")])
25444
25445 (define_expand "rep_stos"
25446 [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
25447 (set (match_operand 0 "register_operand")
25448 (match_operand 4))
25449 (set (match_operand 2 "memory_operand") (const_int 0))
25450 (use (match_operand 3 "register_operand"))
25451 (use (match_dup 1))])]
25452 ""
25453 {
25454 if (TARGET_CLD)
25455 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25456 })
25457
25458 (define_insn "*rep_stosdi_rex64"
25459 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
25460 (set (match_operand:P 0 "register_operand" "=D")
25461 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
25462 (const_int 3))
25463 (match_operand:P 3 "register_operand" "0")))
25464 (set (mem:BLK (match_dup 3))
25465 (const_int 0))
25466 (use (match_operand:DI 2 "register_operand" "a"))
25467 (use (match_dup 4))]
25468 "TARGET_64BIT
25469 && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25470 && ix86_check_no_addr_space (insn)"
25471 "%^rep{%;} stosq"
25472 [(set_attr "type" "str")
25473 (set_attr "prefix_rep" "1")
25474 (set_attr "memory" "store")
25475 (set_attr "mode" "DI")])
25476
25477 (define_insn "*rep_stossi"
25478 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
25479 (set (match_operand:P 0 "register_operand" "=D")
25480 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
25481 (const_int 2))
25482 (match_operand:P 3 "register_operand" "0")))
25483 (set (mem:BLK (match_dup 3))
25484 (const_int 0))
25485 (use (match_operand:SI 2 "register_operand" "a"))
25486 (use (match_dup 4))]
25487 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25488 && ix86_check_no_addr_space (insn)"
25489 "%^rep{%;} stos{l|d}"
25490 [(set_attr "type" "str")
25491 (set_attr "prefix_rep" "1")
25492 (set_attr "memory" "store")
25493 (set_attr "mode" "SI")])
25494
25495 (define_insn "*rep_stosqi"
25496 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
25497 (set (match_operand:P 0 "register_operand" "=D")
25498 (plus:P (match_operand:P 3 "register_operand" "0")
25499 (match_operand:P 4 "register_operand" "1")))
25500 (set (mem:BLK (match_dup 3))
25501 (const_int 0))
25502 (use (match_operand:QI 2 "register_operand" "a"))
25503 (use (match_dup 4))]
25504 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25505 && ix86_check_no_addr_space (insn)"
25506 "%^rep{%;} stosb"
25507 [(set_attr "type" "str")
25508 (set_attr "prefix_rep" "1")
25509 (set_attr "memory" "store")
25510 (set (attr "prefix_rex")
25511 (if_then_else
25512 (match_test "<P:MODE>mode == DImode")
25513 (const_string "0")
25514 (const_string "*")))
25515 (set_attr "mode" "QI")])
25516
25517 (define_expand "cmpmemsi"
25518 [(set (match_operand:SI 0 "register_operand" "")
25519 (compare:SI (match_operand:BLK 1 "memory_operand" "")
25520 (match_operand:BLK 2 "memory_operand" "") ) )
25521 (use (match_operand 3 "general_operand"))
25522 (use (match_operand 4 "immediate_operand"))]
25523 ""
25524 {
25525 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
25526 operands[2], operands[3],
25527 operands[4], false))
25528 DONE;
25529 else
25530 FAIL;
25531 })
25532
25533 (define_expand "cmpstrnsi"
25534 [(set (match_operand:SI 0 "register_operand")
25535 (compare:SI (match_operand:BLK 1 "general_operand")
25536 (match_operand:BLK 2 "general_operand")))
25537 (use (match_operand 3 "general_operand"))
25538 (use (match_operand 4 "immediate_operand"))]
25539 ""
25540 {
25541 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
25542 operands[2], operands[3],
25543 operands[4], true))
25544 DONE;
25545 else
25546 FAIL;
25547 })
25548
25549 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
25550
25551 (define_expand "cmpintqi"
25552 [(set (match_dup 1)
25553 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
25554 (set (match_dup 2)
25555 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
25556 (parallel [(set (match_operand:QI 0 "register_operand")
25557 (minus:QI (match_dup 1)
25558 (match_dup 2)))
25559 (clobber (reg:CC FLAGS_REG))])]
25560 ""
25561 {
25562 operands[1] = gen_reg_rtx (QImode);
25563 operands[2] = gen_reg_rtx (QImode);
25564 })
25565
25566 ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
25567 ;; zero. Emit extra code to make sure that a zero-length compare is EQ.
25568
25569 (define_expand "cmpstrnqi_nz_1"
25570 [(parallel [(set (reg:CC FLAGS_REG)
25571 (compare:CC (match_operand 4 "memory_operand")
25572 (match_operand 5 "memory_operand")))
25573 (use (match_operand 2 "register_operand"))
25574 (use (match_operand:SI 3 "immediate_operand"))
25575 (clobber (match_operand 0 "register_operand"))
25576 (clobber (match_operand 1 "register_operand"))
25577 (clobber (match_dup 2))])]
25578 ""
25579 {
25580 if (TARGET_CLD)
25581 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25582 })
25583
25584 (define_insn "*cmpstrnqi_nz_1"
25585 [(set (reg:CC FLAGS_REG)
25586 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
25587 (mem:BLK (match_operand:P 5 "register_operand" "1"))))
25588 (use (match_operand:P 6 "register_operand" "2"))
25589 (use (match_operand:SI 3 "immediate_operand" "i"))
25590 (clobber (match_operand:P 0 "register_operand" "=S"))
25591 (clobber (match_operand:P 1 "register_operand" "=D"))
25592 (clobber (match_operand:P 2 "register_operand" "=c"))]
25593 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
25594 && ix86_check_no_addr_space (insn)"
25595 "%^repz{%;} cmpsb"
25596 [(set_attr "type" "str")
25597 (set_attr "mode" "QI")
25598 (set (attr "prefix_rex")
25599 (if_then_else
25600 (match_test "<P:MODE>mode == DImode")
25601 (const_string "0")
25602 (const_string "*")))
25603 (set_attr "prefix_rep" "1")])
25604
25605 ;; The same, but the count is not known to not be zero.
25606
25607 (define_expand "cmpstrnqi_1"
25608 [(parallel [(set (reg:CC FLAGS_REG)
25609 (if_then_else:CC (ne (match_operand 2 "register_operand")
25610 (const_int 0))
25611 (compare:CC (match_operand 4 "memory_operand")
25612 (match_operand 5 "memory_operand"))
25613 (reg:CC FLAGS_REG)))
25614 (use (match_operand:SI 3 "immediate_operand"))
25615 (clobber (match_operand 0 "register_operand"))
25616 (clobber (match_operand 1 "register_operand"))
25617 (clobber (match_dup 2))])]
25618 ""
25619 {
25620 if (TARGET_CLD)
25621 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25622 })
25623
25624 (define_insn "*cmpstrnqi_1"
25625 [(set (reg:CC FLAGS_REG)
25626 (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
25627 (const_int 0))
25628 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
25629 (mem:BLK (match_operand:P 5 "register_operand" "1")))
25630 (reg:CC FLAGS_REG)))
25631 (use (match_operand:SI 3 "immediate_operand" "i"))
25632 (clobber (match_operand:P 0 "register_operand" "=S"))
25633 (clobber (match_operand:P 1 "register_operand" "=D"))
25634 (clobber (match_operand:P 2 "register_operand" "=c"))]
25635 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
25636 && ix86_check_no_addr_space (insn)"
25637 "%^repz{%;} cmpsb"
25638 [(set_attr "type" "str")
25639 (set_attr "mode" "QI")
25640 (set (attr "prefix_rex")
25641 (if_then_else
25642 (match_test "<P:MODE>mode == DImode")
25643 (const_string "0")
25644 (const_string "*")))
25645 (set_attr "prefix_rep" "1")])
25646
25647 (define_expand "strlen<mode>"
25648 [(set (match_operand:P 0 "register_operand")
25649 (unspec:P [(match_operand:BLK 1 "general_operand")
25650 (match_operand:QI 2 "immediate_operand")
25651 (match_operand 3 "immediate_operand")]
25652 UNSPEC_SCAS))]
25653 ""
25654 {
25655 if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
25656 DONE;
25657 else
25658 FAIL;
25659 })
25660
25661 (define_expand "strlenqi_1"
25662 [(parallel [(set (match_operand 0 "register_operand")
25663 (match_operand 2))
25664 (clobber (match_operand 1 "register_operand"))
25665 (clobber (reg:CC FLAGS_REG))])]
25666 ""
25667 {
25668 if (TARGET_CLD)
25669 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
25670 })
25671
25672 (define_insn "*strlenqi_1"
25673 [(set (match_operand:P 0 "register_operand" "=&c")
25674 (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
25675 (match_operand:QI 2 "register_operand" "a")
25676 (match_operand:P 3 "immediate_operand" "i")
25677 (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
25678 (clobber (match_operand:P 1 "register_operand" "=D"))
25679 (clobber (reg:CC FLAGS_REG))]
25680 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25681 && ix86_check_no_addr_space (insn)"
25682 "%^repnz{%;} scasb"
25683 [(set_attr "type" "str")
25684 (set_attr "mode" "QI")
25685 (set (attr "prefix_rex")
25686 (if_then_else
25687 (match_test "<P:MODE>mode == DImode")
25688 (const_string "0")
25689 (const_string "*")))
25690 (set_attr "prefix_rep" "1")])
25691
25692 ;; Peephole optimizations to clean up after cmpstrn*. This should be
25693 ;; handled in combine, but it is not currently up to the task.
25694 ;; When used for their truth value, the cmpstrn* expanders generate
25695 ;; code like this:
25696 ;;
25697 ;; repz cmpsb
25698 ;; seta %al
25699 ;; setb %dl
25700 ;; cmpb %al, %dl
25701 ;; jcc label
25702 ;;
25703 ;; The intermediate three instructions are unnecessary.
25704
25705 ;; This one handles cmpstrn*_nz_1...
25706 (define_peephole2
25707 [(parallel[
25708 (set (reg:CC FLAGS_REG)
25709 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
25710 (mem:BLK (match_operand 5 "register_operand"))))
25711 (use (match_operand 6 "register_operand"))
25712 (use (match_operand:SI 3 "immediate_operand"))
25713 (clobber (match_operand 0 "register_operand"))
25714 (clobber (match_operand 1 "register_operand"))
25715 (clobber (match_operand 2 "register_operand"))])
25716 (set (match_operand:QI 7 "register_operand")
25717 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
25718 (set (match_operand:QI 8 "register_operand")
25719 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
25720 (set (reg FLAGS_REG)
25721 (compare (match_dup 7) (match_dup 8)))
25722 ]
25723 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
25724 [(parallel[
25725 (set (reg:CC FLAGS_REG)
25726 (compare:CC (mem:BLK (match_dup 4))
25727 (mem:BLK (match_dup 5))))
25728 (use (match_dup 6))
25729 (use (match_dup 3))
25730 (clobber (match_dup 0))
25731 (clobber (match_dup 1))
25732 (clobber (match_dup 2))])])
25733
25734 ;; ...and this one handles cmpstrn*_1.
25735 (define_peephole2
25736 [(parallel[
25737 (set (reg:CC FLAGS_REG)
25738 (if_then_else:CC (ne (match_operand 6 "register_operand")
25739 (const_int 0))
25740 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
25741 (mem:BLK (match_operand 5 "register_operand")))
25742 (reg:CC FLAGS_REG)))
25743 (use (match_operand:SI 3 "immediate_operand"))
25744 (clobber (match_operand 0 "register_operand"))
25745 (clobber (match_operand 1 "register_operand"))
25746 (clobber (match_operand 2 "register_operand"))])
25747 (set (match_operand:QI 7 "register_operand")
25748 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
25749 (set (match_operand:QI 8 "register_operand")
25750 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
25751 (set (reg FLAGS_REG)
25752 (compare (match_dup 7) (match_dup 8)))
25753 ]
25754 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
25755 [(parallel[
25756 (set (reg:CC FLAGS_REG)
25757 (if_then_else:CC (ne (match_dup 6)
25758 (const_int 0))
25759 (compare:CC (mem:BLK (match_dup 4))
25760 (mem:BLK (match_dup 5)))
25761 (reg:CC FLAGS_REG)))
25762 (use (match_dup 3))
25763 (clobber (match_dup 0))
25764 (clobber (match_dup 1))
25765 (clobber (match_dup 2))])])
25766 \f
25767 ;; Conditional move instructions.
25768
25769 (define_expand "mov<mode>cc"
25770 [(set (match_operand:SWIM 0 "register_operand")
25771 (if_then_else:SWIM (match_operand 1 "comparison_operator")
25772 (match_operand:SWIM 2 "<general_operand>")
25773 (match_operand:SWIM 3 "<general_operand>")))]
25774 ""
25775 "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
25776
25777 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
25778 ;; the register first winds up with `sbbl $0,reg', which is also weird.
25779 ;; So just document what we're doing explicitly.
25780
25781 (define_expand "x86_mov<mode>cc_0_m1"
25782 [(parallel
25783 [(set (match_operand:SWI48 0 "register_operand")
25784 (if_then_else:SWI48
25785 (match_operator:SWI48 2 "ix86_carry_flag_operator"
25786 [(match_operand 1 "flags_reg_operand")
25787 (const_int 0)])
25788 (const_int -1)
25789 (const_int 0)))
25790 (clobber (reg:CC FLAGS_REG))])])
25791
25792 (define_insn "*x86_mov<mode>cc_0_m1"
25793 [(set (match_operand:SWI48 0 "register_operand" "=r")
25794 (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
25795 [(reg FLAGS_REG) (const_int 0)])
25796 (const_int -1)
25797 (const_int 0)))
25798 (clobber (reg:CC FLAGS_REG))]
25799 ""
25800 "sbb{<imodesuffix>}\t%0, %0"
25801 [(set_attr "type" "alu1")
25802 (set_attr "use_carry" "1")
25803 (set_attr "pent_pair" "pu")
25804 (set_attr "mode" "<MODE>")
25805 (set_attr "length_immediate" "0")])
25806
25807 (define_insn "*x86_mov<mode>cc_0_m1_se"
25808 [(set (match_operand:SWI48 0 "register_operand" "=r")
25809 (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
25810 [(reg FLAGS_REG) (const_int 0)])
25811 (const_int 1)
25812 (const_int 0)))
25813 (clobber (reg:CC FLAGS_REG))]
25814 ""
25815 "sbb{<imodesuffix>}\t%0, %0"
25816 [(set_attr "type" "alu1")
25817 (set_attr "use_carry" "1")
25818 (set_attr "pent_pair" "pu")
25819 (set_attr "mode" "<MODE>")
25820 (set_attr "length_immediate" "0")])
25821
25822 (define_insn "*x86_mov<mode>cc_0_m1_neg"
25823 [(set (match_operand:SWI 0 "register_operand" "=<r>")
25824 (neg:SWI (match_operator 1 "ix86_carry_flag_operator"
25825 [(reg FLAGS_REG) (const_int 0)])))
25826 (clobber (reg:CC FLAGS_REG))]
25827 ""
25828 "sbb{<imodesuffix>}\t%0, %0"
25829 [(set_attr "type" "alu1")
25830 (set_attr "use_carry" "1")
25831 (set_attr "pent_pair" "pu")
25832 (set_attr "mode" "<MODE>")
25833 (set_attr "length_immediate" "0")])
25834
25835 (define_expand "x86_mov<mode>cc_0_m1_neg"
25836 [(parallel
25837 [(set (match_operand:SWI 0 "register_operand")
25838 (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))
25839 (clobber (reg:CC FLAGS_REG))])])
25840
25841 (define_split
25842 [(set (match_operand:SWI48 0 "register_operand")
25843 (neg:SWI48
25844 (leu:SWI48
25845 (match_operand 1 "int_nonimmediate_operand")
25846 (match_operand 2 "const_int_operand"))))]
25847 "x86_64_immediate_operand (operands[2], VOIDmode)
25848 && INTVAL (operands[2]) != -1
25849 && INTVAL (operands[2]) != 2147483647"
25850 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
25851 (set (match_dup 0)
25852 (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))]
25853 "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
25854
25855 (define_split
25856 [(set (match_operand:SWI 0 "register_operand")
25857 (neg:SWI
25858 (eq:SWI
25859 (match_operand 1 "int_nonimmediate_operand")
25860 (const_int 0))))]
25861 ""
25862 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1)))
25863 (set (match_dup 0)
25864 (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))])
25865
25866 (define_split
25867 [(set (match_operand:SWI 0 "register_operand")
25868 (neg:SWI
25869 (ne:SWI
25870 (match_operand 1 "int_nonimmediate_operand")
25871 (const_int 0))))]
25872 ""
25873 [(set (reg:CCC FLAGS_REG)
25874 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
25875 (set (match_dup 0)
25876 (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
25877
25878 (define_insn "*mov<mode>cc_noc"
25879 [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
25880 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
25881 [(reg FLAGS_REG) (const_int 0)])
25882 (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
25883 (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))]
25884 "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
25885 "@
25886 cmov%O2%C1\t{%2, %0|%0, %2}
25887 cmov%O2%c1\t{%3, %0|%0, %3}
25888 cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
25889 cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
25890 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
25891 (set_attr "type" "icmov")
25892 (set_attr "mode" "<MODE>")])
25893
25894 (define_insn "*movsicc_noc_zext"
25895 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
25896 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
25897 [(reg FLAGS_REG) (const_int 0)])
25898 (zero_extend:DI
25899 (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r"))
25900 (zero_extend:DI
25901 (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
25902 "TARGET_64BIT
25903 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
25904 "@
25905 cmov%O2%C1\t{%2, %k0|%k0, %2}
25906 cmov%O2%c1\t{%3, %k0|%k0, %3}
25907 cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
25908 cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
25909 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
25910 (set_attr "type" "icmov")
25911 (set_attr "mode" "SI")])
25912
25913 (define_insn "*movsicc_noc_zext_1"
25914 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r")
25915 (zero_extend:DI
25916 (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
25917 [(reg FLAGS_REG) (const_int 0)])
25918 (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")
25919 (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
25920 "TARGET_64BIT
25921 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
25922 "@
25923 cmov%O2%C1\t{%2, %k0|%k0, %2}
25924 cmov%O2%c1\t{%3, %k0|%k0, %3}
25925 cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
25926 cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
25927 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
25928 (set_attr "type" "icmov")
25929 (set_attr "mode" "SI")])
25930
25931
25932 ;; Don't do conditional moves with memory inputs. This splitter helps
25933 ;; register starved x86_32 by forcing inputs into registers before reload.
25934 (define_split
25935 [(set (match_operand:SWI248 0 "register_operand")
25936 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
25937 [(reg FLAGS_REG) (const_int 0)])
25938 (match_operand:SWI248 2 "nonimmediate_operand")
25939 (match_operand:SWI248 3 "nonimmediate_operand")))]
25940 "!TARGET_64BIT && TARGET_CMOVE
25941 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
25942 && (MEM_P (operands[2]) || MEM_P (operands[3]))
25943 && can_create_pseudo_p ()
25944 && optimize_insn_for_speed_p ()"
25945 [(set (match_dup 0)
25946 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
25947 {
25948 operands[2] = force_reg (<MODE>mode, operands[2]);
25949 operands[3] = force_reg (<MODE>mode, operands[3]);
25950 })
25951
25952 (define_insn "*movqicc_noc"
25953 [(set (match_operand:QI 0 "register_operand" "=r,r,r")
25954 (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
25955 [(reg FLAGS_REG) (const_int 0)])
25956 (match_operand:QI 2 "register_operand" "r,0,r")
25957 (match_operand:QI 3 "register_operand" "0,r,r")))]
25958 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
25959 "#"
25960 [(set_attr "isa" "*,*,apx_ndd")
25961 (set_attr "type" "icmov")
25962 (set_attr "mode" "QI")])
25963
25964 (define_split
25965 [(set (match_operand:SWI12 0 "register_operand")
25966 (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
25967 [(reg FLAGS_REG) (const_int 0)])
25968 (match_operand:SWI12 2 "register_operand")
25969 (match_operand:SWI12 3 "register_operand")))]
25970 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
25971 && reload_completed"
25972 [(set (match_dup 0)
25973 (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
25974 {
25975 operands[0] = gen_lowpart (SImode, operands[0]);
25976 operands[2] = gen_lowpart (SImode, operands[2]);
25977 operands[3] = gen_lowpart (SImode, operands[3]);
25978 })
25979
25980 ;; Don't do conditional moves with memory inputs
25981 (define_peephole2
25982 [(match_scratch:SWI248 4 "r")
25983 (set (match_operand:SWI248 0 "register_operand")
25984 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
25985 [(reg FLAGS_REG) (const_int 0)])
25986 (match_operand:SWI248 2 "nonimmediate_operand")
25987 (match_operand:SWI248 3 "nonimmediate_operand")))]
25988 "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
25989 && (MEM_P (operands[2]) || MEM_P (operands[3]))
25990 && optimize_insn_for_speed_p ()"
25991 [(set (match_dup 4) (match_dup 5))
25992 (set (match_dup 0)
25993 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
25994 {
25995 if (MEM_P (operands[2]))
25996 {
25997 operands[5] = operands[2];
25998 operands[2] = operands[4];
25999 }
26000 else if (MEM_P (operands[3]))
26001 {
26002 operands[5] = operands[3];
26003 operands[3] = operands[4];
26004 }
26005 else
26006 gcc_unreachable ();
26007 })
26008
26009 (define_peephole2
26010 [(match_scratch:SI 4 "r")
26011 (set (match_operand:DI 0 "register_operand")
26012 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
26013 [(reg FLAGS_REG) (const_int 0)])
26014 (zero_extend:DI
26015 (match_operand:SI 2 "nonimmediate_operand"))
26016 (zero_extend:DI
26017 (match_operand:SI 3 "nonimmediate_operand"))))]
26018 "TARGET_64BIT
26019 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
26020 && (MEM_P (operands[2]) || MEM_P (operands[3]))
26021 && optimize_insn_for_speed_p ()"
26022 [(set (match_dup 4) (match_dup 5))
26023 (set (match_dup 0)
26024 (if_then_else:DI (match_dup 1)
26025 (zero_extend:DI (match_dup 2))
26026 (zero_extend:DI (match_dup 3))))]
26027 {
26028 if (MEM_P (operands[2]))
26029 {
26030 operands[5] = operands[2];
26031 operands[2] = operands[4];
26032 }
26033 else if (MEM_P (operands[3]))
26034 {
26035 operands[5] = operands[3];
26036 operands[3] = operands[4];
26037 }
26038 else
26039 gcc_unreachable ();
26040 })
26041
26042 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
26043 ;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
26044 (define_peephole2
26045 [(set (match_operand:SWI248 0 "general_reg_operand")
26046 (match_operand:SWI248 1 "general_reg_operand"))
26047 (parallel [(set (reg FLAGS_REG) (match_operand 5))
26048 (set (match_dup 0) (match_operand:SWI248 6))])
26049 (set (match_operand:SWI248 2 "general_reg_operand")
26050 (match_operand:SWI248 3 "general_gr_operand"))
26051 (set (match_dup 0)
26052 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
26053 [(reg FLAGS_REG) (const_int 0)])
26054 (match_dup 0)
26055 (match_dup 2)))]
26056 "TARGET_CMOVE
26057 && REGNO (operands[2]) != REGNO (operands[0])
26058 && REGNO (operands[2]) != REGNO (operands[1])
26059 && peep2_reg_dead_p (1, operands[1])
26060 && peep2_reg_dead_p (4, operands[2])
26061 && !reg_overlap_mentioned_p (operands[0], operands[3])"
26062 [(parallel [(set (match_dup 7) (match_dup 8))
26063 (set (match_dup 1) (match_dup 9))])
26064 (set (match_dup 0) (match_dup 3))
26065 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
26066 (match_dup 1)
26067 (match_dup 0)))]
26068 {
26069 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
26070 operands[8]
26071 = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
26072 operands[9]
26073 = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
26074 })
26075
26076 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
26077 ;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
26078 (define_peephole2
26079 [(set (match_operand:SWI248 2 "general_reg_operand")
26080 (match_operand:SWI248 3 "general_gr_operand"))
26081 (set (match_operand:SWI248 0 "general_reg_operand")
26082 (match_operand:SWI248 1 "general_reg_operand"))
26083 (parallel [(set (reg FLAGS_REG) (match_operand 5))
26084 (set (match_dup 0) (match_operand:SWI248 6))])
26085 (set (match_dup 0)
26086 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
26087 [(reg FLAGS_REG) (const_int 0)])
26088 (match_dup 0)
26089 (match_dup 2)))]
26090 "TARGET_CMOVE
26091 && REGNO (operands[2]) != REGNO (operands[0])
26092 && REGNO (operands[2]) != REGNO (operands[1])
26093 && peep2_reg_dead_p (2, operands[1])
26094 && peep2_reg_dead_p (4, operands[2])
26095 && !reg_overlap_mentioned_p (operands[0], operands[3])
26096 && !reg_mentioned_p (operands[2], operands[6])"
26097 [(parallel [(set (match_dup 7) (match_dup 8))
26098 (set (match_dup 1) (match_dup 9))])
26099 (set (match_dup 0) (match_dup 3))
26100 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
26101 (match_dup 1)
26102 (match_dup 0)))]
26103 {
26104 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
26105 operands[8]
26106 = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
26107 operands[9]
26108 = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
26109 })
26110
26111 (define_insn "movhf_mask"
26112 [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
26113 (unspec:HF
26114 [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
26115 (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
26116 (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
26117 UNSPEC_MOVCC_MASK))]
26118 "TARGET_AVX512FP16"
26119 "@
26120 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
26121 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
26122 vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
26123 [(set_attr "type" "ssemov")
26124 (set_attr "prefix" "evex")
26125 (set_attr "mode" "HF")])
26126
26127 (define_expand "movhfcc"
26128 [(set (match_operand:HF 0 "register_operand")
26129 (if_then_else:HF
26130 (match_operand 1 "comparison_operator")
26131 (match_operand:HF 2 "register_operand")
26132 (match_operand:HF 3 "register_operand")))]
26133 "TARGET_AVX512FP16"
26134 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
26135
26136 (define_expand "mov<mode>cc"
26137 [(set (match_operand:X87MODEF 0 "register_operand")
26138 (if_then_else:X87MODEF
26139 (match_operand 1 "comparison_operator")
26140 (match_operand:X87MODEF 2 "register_operand")
26141 (match_operand:X87MODEF 3 "register_operand")))]
26142 "(TARGET_80387 && TARGET_CMOVE)
26143 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
26144 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
26145
26146 (define_insn "*movxfcc_1"
26147 [(set (match_operand:XF 0 "register_operand" "=f,f")
26148 (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
26149 [(reg FLAGS_REG) (const_int 0)])
26150 (match_operand:XF 2 "register_operand" "f,0")
26151 (match_operand:XF 3 "register_operand" "0,f")))]
26152 "TARGET_80387 && TARGET_CMOVE"
26153 "@
26154 fcmov%F1\t{%2, %0|%0, %2}
26155 fcmov%f1\t{%3, %0|%0, %3}"
26156 [(set_attr "type" "fcmov")
26157 (set_attr "mode" "XF")])
26158
26159 (define_insn "*movdfcc_1"
26160 [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
26161 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
26162 [(reg FLAGS_REG) (const_int 0)])
26163 (match_operand:DF 2 "nonimmediate_operand"
26164 "f ,0,rm,0 ,rm,0")
26165 (match_operand:DF 3 "nonimmediate_operand"
26166 "0 ,f,0 ,rm,0, rm")))]
26167 "TARGET_80387 && TARGET_CMOVE
26168 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
26169 "@
26170 fcmov%F1\t{%2, %0|%0, %2}
26171 fcmov%f1\t{%3, %0|%0, %3}
26172 #
26173 #
26174 cmov%O2%C1\t{%2, %0|%0, %2}
26175 cmov%O2%c1\t{%3, %0|%0, %3}"
26176 [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
26177 (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
26178 (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
26179
26180 (define_split
26181 [(set (match_operand:DF 0 "general_reg_operand")
26182 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
26183 [(reg FLAGS_REG) (const_int 0)])
26184 (match_operand:DF 2 "nonimmediate_operand")
26185 (match_operand:DF 3 "nonimmediate_operand")))]
26186 "!TARGET_64BIT && reload_completed"
26187 [(set (match_dup 2)
26188 (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
26189 (set (match_dup 3)
26190 (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
26191 {
26192 split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
26193 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
26194 })
26195
26196 (define_insn "*movsfcc_1_387"
26197 [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
26198 (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
26199 [(reg FLAGS_REG) (const_int 0)])
26200 (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
26201 (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
26202 "TARGET_80387 && TARGET_CMOVE
26203 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
26204 "@
26205 fcmov%F1\t{%2, %0|%0, %2}
26206 fcmov%f1\t{%3, %0|%0, %3}
26207 cmov%O2%C1\t{%2, %0|%0, %2}
26208 cmov%O2%c1\t{%3, %0|%0, %3}"
26209 [(set_attr "type" "fcmov,fcmov,icmov,icmov")
26210 (set_attr "mode" "SF,SF,SI,SI")])
26211
26212 ;; Don't do conditional moves with memory inputs. This splitter helps
26213 ;; register starved x86_32 by forcing inputs into registers before reload.
26214 (define_split
26215 [(set (match_operand:MODEF 0 "register_operand")
26216 (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
26217 [(reg FLAGS_REG) (const_int 0)])
26218 (match_operand:MODEF 2 "nonimmediate_operand")
26219 (match_operand:MODEF 3 "nonimmediate_operand")))]
26220 "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
26221 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
26222 && (MEM_P (operands[2]) || MEM_P (operands[3]))
26223 && can_create_pseudo_p ()
26224 && optimize_insn_for_speed_p ()"
26225 [(set (match_dup 0)
26226 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
26227 {
26228 operands[2] = force_reg (<MODE>mode, operands[2]);
26229 operands[3] = force_reg (<MODE>mode, operands[3]);
26230 })
26231
26232 ;; Don't do conditional moves with memory inputs
26233 (define_peephole2
26234 [(match_scratch:MODEF 4 "r")
26235 (set (match_operand:MODEF 0 "general_reg_operand")
26236 (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
26237 [(reg FLAGS_REG) (const_int 0)])
26238 (match_operand:MODEF 2 "nonimmediate_operand")
26239 (match_operand:MODEF 3 "nonimmediate_operand")))]
26240 "(<MODE>mode != DFmode || TARGET_64BIT)
26241 && TARGET_80387 && TARGET_CMOVE
26242 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
26243 && (MEM_P (operands[2]) || MEM_P (operands[3]))
26244 && optimize_insn_for_speed_p ()"
26245 [(set (match_dup 4) (match_dup 5))
26246 (set (match_dup 0)
26247 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
26248 {
26249 if (MEM_P (operands[2]))
26250 {
26251 operands[5] = operands[2];
26252 operands[2] = operands[4];
26253 }
26254 else if (MEM_P (operands[3]))
26255 {
26256 operands[5] = operands[3];
26257 operands[3] = operands[4];
26258 }
26259 else
26260 gcc_unreachable ();
26261 })
26262
26263 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
26264 ;; the scalar versions to have only XMM registers as operands.
26265
26266 ;; XOP conditional move
26267 (define_insn "*xop_pcmov_<mode>"
26268 [(set (match_operand:MODEF 0 "register_operand" "=x")
26269 (if_then_else:MODEF
26270 (match_operand:MODEF 1 "register_operand" "x")
26271 (match_operand:MODEF 2 "register_operand" "x")
26272 (match_operand:MODEF 3 "register_operand" "x")))]
26273 "TARGET_XOP"
26274 "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
26275 [(set_attr "type" "sse4arg")
26276 (set_attr "mode" "TI")])
26277
26278 ;; These versions of the min/max patterns are intentionally ignorant of
26279 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
26280 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
26281 ;; are undefined in this condition, we're certain this is correct.
26282
26283 (define_insn "<code><mode>3"
26284 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
26285 (smaxmin:MODEF
26286 (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
26287 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
26288 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
26289 "@
26290 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
26291 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
26292 [(set_attr "isa" "noavx,avx")
26293 (set_attr "prefix" "orig,vex")
26294 (set_attr "type" "sseadd")
26295 (set_attr "mode" "<MODE>")])
26296
26297 (define_insn "<code>hf3"
26298 [(set (match_operand:HF 0 "register_operand" "=v")
26299 (smaxmin:HF
26300 (match_operand:HF 1 "nonimmediate_operand" "%v")
26301 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
26302 "TARGET_AVX512FP16"
26303 "v<maxmin_float>sh\t{%2, %1, %0|%0, %1, %2}"
26304 [(set_attr "prefix" "evex")
26305 (set_attr "type" "sseadd")
26306 (set_attr "mode" "HF")])
26307
26308 ;; These versions of the min/max patterns implement exactly the operations
26309 ;; min = (op1 < op2 ? op1 : op2)
26310 ;; max = (!(op1 < op2) ? op1 : op2)
26311 ;; Their operands are not commutative, and thus they may be used in the
26312 ;; presence of -0.0 and NaN.
26313
26314 (define_insn "*ieee_s<ieee_maxmin>hf3"
26315 [(set (match_operand:HF 0 "register_operand" "=v")
26316 (unspec:HF
26317 [(match_operand:HF 1 "register_operand" "v")
26318 (match_operand:HF 2 "nonimmediate_operand" "vm")]
26319 IEEE_MAXMIN))]
26320 "TARGET_AVX512FP16"
26321 "v<ieee_maxmin>sh\t{%2, %1, %0|%0, %1, %2}"
26322 [(set_attr "prefix" "evex")
26323 (set_attr "type" "sseadd")
26324 (set_attr "mode" "HF")])
26325
26326 (define_insn "*ieee_s<ieee_maxmin><mode>3"
26327 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
26328 (unspec:MODEF
26329 [(match_operand:MODEF 1 "register_operand" "0,v")
26330 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]
26331 IEEE_MAXMIN))]
26332 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
26333 "@
26334 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
26335 v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
26336 [(set_attr "isa" "noavx,avx")
26337 (set_attr "prefix" "orig,maybe_evex")
26338 (set_attr "type" "sseadd")
26339 (set_attr "mode" "<MODE>")])
26340
26341 ;; Operands order in min/max instruction matters for signed zero and NANs.
26342 (define_insn_and_split "*ieee_max<mode>3_1"
26343 [(set (match_operand:MODEF 0 "register_operand")
26344 (unspec:MODEF
26345 [(match_operand:MODEF 1 "register_operand")
26346 (match_operand:MODEF 2 "register_operand")
26347 (lt:MODEF
26348 (match_operand:MODEF 3 "register_operand")
26349 (match_operand:MODEF 4 "register_operand"))]
26350 UNSPEC_BLENDV))]
26351 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
26352 && (rtx_equal_p (operands[1], operands[3])
26353 && rtx_equal_p (operands[2], operands[4]))
26354 && ix86_pre_reload_split ()"
26355 "#"
26356 "&& 1"
26357 [(set (match_dup 0)
26358 (unspec:MODEF
26359 [(match_dup 2)
26360 (match_dup 1)]
26361 UNSPEC_IEEE_MAX))])
26362
26363 (define_insn_and_split "*ieee_min<mode>3_1"
26364 [(set (match_operand:MODEF 0 "register_operand")
26365 (unspec:MODEF
26366 [(match_operand:MODEF 1 "register_operand")
26367 (match_operand:MODEF 2 "register_operand")
26368 (lt:MODEF
26369 (match_operand:MODEF 3 "register_operand")
26370 (match_operand:MODEF 4 "register_operand"))]
26371 UNSPEC_BLENDV))]
26372 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
26373 && (rtx_equal_p (operands[1], operands[4])
26374 && rtx_equal_p (operands[2], operands[3]))
26375 && ix86_pre_reload_split ()"
26376 "#"
26377 "&& 1"
26378 [(set (match_dup 0)
26379 (unspec:MODEF
26380 [(match_dup 2)
26381 (match_dup 1)]
26382 UNSPEC_IEEE_MIN))])
26383
26384 ;; Make two stack loads independent:
26385 ;; fld aa fld aa
26386 ;; fld %st(0) -> fld bb
26387 ;; fmul bb fmul %st(1), %st
26388 ;;
26389 ;; Actually we only match the last two instructions for simplicity.
26390
26391 (define_peephole2
26392 [(set (match_operand 0 "fp_register_operand")
26393 (match_operand 1 "fp_register_operand"))
26394 (set (match_dup 0)
26395 (match_operator 2 "binary_fp_operator"
26396 [(match_dup 0)
26397 (match_operand 3 "memory_operand")]))]
26398 "REGNO (operands[0]) != REGNO (operands[1])"
26399 [(set (match_dup 0) (match_dup 3))
26400 (set (match_dup 0)
26401 (match_op_dup 2
26402 [(match_dup 5) (match_dup 4)]))]
26403 {
26404 operands[4] = operands[0];
26405 operands[5] = operands[1];
26406
26407 /* The % modifier is not operational anymore in peephole2's, so we have to
26408 swap the operands manually in the case of addition and multiplication. */
26409 if (COMMUTATIVE_ARITH_P (operands[2]))
26410 std::swap (operands[4], operands[5]);
26411 })
26412
26413 (define_peephole2
26414 [(set (match_operand 0 "fp_register_operand")
26415 (match_operand 1 "fp_register_operand"))
26416 (set (match_dup 0)
26417 (match_operator 2 "binary_fp_operator"
26418 [(match_operand 3 "memory_operand")
26419 (match_dup 0)]))]
26420 "REGNO (operands[0]) != REGNO (operands[1])"
26421 [(set (match_dup 0) (match_dup 3))
26422 (set (match_dup 0)
26423 (match_op_dup 2
26424 [(match_dup 4) (match_dup 5)]))]
26425 {
26426 operands[4] = operands[0];
26427 operands[5] = operands[1];
26428
26429 /* The % modifier is not operational anymore in peephole2's, so we have to
26430 swap the operands manually in the case of addition and multiplication. */
26431 if (COMMUTATIVE_ARITH_P (operands[2]))
26432 std::swap (operands[4], operands[5]);
26433 })
26434
26435 ;; Conditional addition patterns
26436 (define_expand "add<mode>cc"
26437 [(match_operand:SWI 0 "register_operand")
26438 (match_operand 1 "ordered_comparison_operator")
26439 (match_operand:SWI 2 "register_operand")
26440 (match_operand:SWI 3 "const_int_operand")]
26441 ""
26442 "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
26443
26444 ;; min/max patterns
26445
26446 (define_code_attr maxmin_rel
26447 [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
26448
26449 (define_expand "<code><mode>3"
26450 [(parallel
26451 [(set (match_operand:SDWIM 0 "register_operand")
26452 (maxmin:SDWIM
26453 (match_operand:SDWIM 1 "register_operand")
26454 (match_operand:SDWIM 2 "general_operand")))
26455 (clobber (reg:CC FLAGS_REG))])]
26456 "TARGET_CMOVE
26457 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)")
26458
26459 (define_insn_and_split "*<code><dwi>3_doubleword"
26460 [(set (match_operand:<DWI> 0 "register_operand")
26461 (maxmin:<DWI>
26462 (match_operand:<DWI> 1 "register_operand")
26463 (match_operand:<DWI> 2 "general_operand")))
26464 (clobber (reg:CC FLAGS_REG))]
26465 "TARGET_CMOVE
26466 && ix86_pre_reload_split ()"
26467 "#"
26468 "&& 1"
26469 [(set (match_dup 0)
26470 (if_then_else:DWIH (match_dup 6)
26471 (match_dup 1)
26472 (match_dup 2)))
26473 (set (match_dup 3)
26474 (if_then_else:DWIH (match_dup 6)
26475 (match_dup 4)
26476 (match_dup 5)))]
26477 {
26478 operands[2] = force_reg (<DWI>mode, operands[2]);
26479
26480 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
26481
26482 rtx cmplo[2] = { operands[1], operands[2] };
26483 rtx cmphi[2] = { operands[4], operands[5] };
26484
26485 enum rtx_code code = <maxmin_rel>;
26486
26487 switch (code)
26488 {
26489 case LE: case LEU:
26490 std::swap (cmplo[0], cmplo[1]);
26491 std::swap (cmphi[0], cmphi[1]);
26492 code = swap_condition (code);
26493 /* FALLTHRU */
26494
26495 case GE: case GEU:
26496 {
26497 bool uns = (code == GEU);
26498 rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
26499 = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
26500
26501 emit_insn (gen_cmp_1 (<MODE>mode, cmplo[0], cmplo[1]));
26502
26503 rtx tmp = gen_rtx_SCRATCH (<MODE>mode);
26504 emit_insn (sbb_insn (<MODE>mode, tmp, cmphi[0], cmphi[1]));
26505
26506 rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
26507 operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
26508
26509 break;
26510 }
26511
26512 default:
26513 gcc_unreachable ();
26514 }
26515 })
26516
26517 (define_insn_and_split "*<code><mode>3_1"
26518 [(set (match_operand:SWI 0 "register_operand")
26519 (maxmin:SWI
26520 (match_operand:SWI 1 "register_operand")
26521 (match_operand:SWI 2 "general_operand")))
26522 (clobber (reg:CC FLAGS_REG))]
26523 "TARGET_CMOVE
26524 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
26525 && ix86_pre_reload_split ()"
26526 "#"
26527 "&& 1"
26528 [(set (match_dup 0)
26529 (if_then_else:SWI (match_dup 3)
26530 (match_dup 1)
26531 (match_dup 2)))]
26532 {
26533 machine_mode mode = <MODE>mode;
26534 rtx cmp_op = operands[2];
26535
26536 operands[2] = force_reg (mode, cmp_op);
26537
26538 enum rtx_code code = <maxmin_rel>;
26539
26540 if (cmp_op == const1_rtx)
26541 {
26542 /* Convert smax (x, 1) into (x > 0 ? x : 1).
26543 Convert umax (x, 1) into (x != 0 ? x : 1).
26544 Convert ?min (x, 1) into (x <= 0 ? x : 1). */
26545 cmp_op = const0_rtx;
26546 if (code == GE)
26547 code = GT;
26548 else if (code == GEU)
26549 code = NE;
26550 }
26551 /* Convert smin (x, -1) into (x < 0 ? x : -1). */
26552 else if (cmp_op == constm1_rtx && code == LE)
26553 {
26554 cmp_op = const0_rtx;
26555 code = LT;
26556 }
26557 /* Convert smax (x, -1) into (x >= 0 ? x : -1). */
26558 else if (cmp_op == constm1_rtx && code == GE)
26559 cmp_op = const0_rtx;
26560 else if (cmp_op != const0_rtx)
26561 cmp_op = operands[2];
26562
26563 machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op);
26564 rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
26565
26566 rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op);
26567 emit_insn (gen_rtx_SET (flags, tmp));
26568
26569 operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
26570 })
26571
26572 ;; Avoid clearing a register between a flags setting comparison and its use,
26573 ;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax".
26574 (define_peephole2
26575 [(set (reg FLAGS_REG) (match_operand 0))
26576 (set (match_operand:SWI 1 "general_reg_operand") (const_int 0))]
26577 "peep2_regno_dead_p (0, FLAGS_REG)
26578 && !reg_overlap_mentioned_p (operands[1], operands[0])"
26579 [(set (match_dup 2) (match_dup 0))]
26580 {
26581 operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
26582 ix86_expand_clear (operands[1]);
26583 })
26584
26585 ;; When optimizing for size, zeroing memory should use a register.
26586 (define_peephole2
26587 [(match_scratch:SWI48 0 "r")
26588 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
26589 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
26590 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
26591 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
26592 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
26593 [(const_int 0)]
26594 {
26595 ix86_expand_clear (operands[0]);
26596 emit_move_insn (operands[1], operands[0]);
26597 emit_move_insn (operands[2], operands[0]);
26598 emit_move_insn (operands[3], operands[0]);
26599 ix86_last_zero_store_uid
26600 = INSN_UID (emit_move_insn (operands[4], operands[0]));
26601 DONE;
26602 })
26603
26604 (define_peephole2
26605 [(match_scratch:SWI48 0 "r")
26606 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
26607 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
26608 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
26609 [(const_int 0)]
26610 {
26611 ix86_expand_clear (operands[0]);
26612 emit_move_insn (operands[1], operands[0]);
26613 ix86_last_zero_store_uid
26614 = INSN_UID (emit_move_insn (operands[2], operands[0]));
26615 DONE;
26616 })
26617
26618 (define_peephole2
26619 [(match_scratch:SWI48 0 "r")
26620 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
26621 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
26622 [(const_int 0)]
26623 {
26624 ix86_expand_clear (operands[0]);
26625 ix86_last_zero_store_uid
26626 = INSN_UID (emit_move_insn (operands[1], operands[0]));
26627 DONE;
26628 })
26629
26630 (define_peephole2
26631 [(set (match_operand:SWI48 5 "memory_operand")
26632 (match_operand:SWI48 0 "general_reg_operand"))
26633 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
26634 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
26635 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
26636 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
26637 "optimize_insn_for_size_p ()
26638 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
26639 [(const_int 0)]
26640 {
26641 emit_move_insn (operands[5], operands[0]);
26642 emit_move_insn (operands[1], operands[0]);
26643 emit_move_insn (operands[2], operands[0]);
26644 emit_move_insn (operands[3], operands[0]);
26645 ix86_last_zero_store_uid
26646 = INSN_UID (emit_move_insn (operands[4], operands[0]));
26647 DONE;
26648 })
26649
26650 (define_peephole2
26651 [(set (match_operand:SWI48 3 "memory_operand")
26652 (match_operand:SWI48 0 "general_reg_operand"))
26653 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
26654 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
26655 "optimize_insn_for_size_p ()
26656 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
26657 [(const_int 0)]
26658 {
26659 emit_move_insn (operands[3], operands[0]);
26660 emit_move_insn (operands[1], operands[0]);
26661 ix86_last_zero_store_uid
26662 = INSN_UID (emit_move_insn (operands[2], operands[0]));
26663 DONE;
26664 })
26665
26666 (define_peephole2
26667 [(set (match_operand:SWI48 2 "memory_operand")
26668 (match_operand:SWI48 0 "general_reg_operand"))
26669 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
26670 "optimize_insn_for_size_p ()
26671 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
26672 [(const_int 0)]
26673 {
26674 emit_move_insn (operands[2], operands[0]);
26675 ix86_last_zero_store_uid
26676 = INSN_UID (emit_move_insn (operands[1], operands[0]));
26677 DONE;
26678 })
26679
26680 ;; Reload dislikes loading constants directly into class_likely_spilled
26681 ;; hard registers. Try to tidy things up here.
26682 (define_peephole2
26683 [(set (match_operand:SWI 0 "general_reg_operand")
26684 (match_operand:SWI 1 "x86_64_general_operand"))
26685 (set (match_operand:SWI 2 "general_reg_operand")
26686 (match_dup 0))]
26687 "peep2_reg_dead_p (2, operands[0])"
26688 [(set (match_dup 2) (match_dup 1))])
26689 \f
26690 ;; Misc patterns (?)
26691
26692 ;; This pattern exists to put a dependency on all ebp-based memory accesses.
26693 ;; Otherwise there will be nothing to keep
26694 ;;
26695 ;; [(set (reg ebp) (reg esp))]
26696 ;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
26697 ;; (clobber (eflags)]
26698 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
26699 ;;
26700 ;; in proper program order.
26701
26702 (define_insn "@pro_epilogue_adjust_stack_add_<mode>"
26703 [(set (match_operand:P 0 "register_operand" "=r,r")
26704 (plus:P (match_operand:P 1 "register_operand" "0,r")
26705 (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
26706 (clobber (reg:CC FLAGS_REG))
26707 (clobber (mem:BLK (scratch)))]
26708 ""
26709 {
26710 switch (get_attr_type (insn))
26711 {
26712 case TYPE_IMOV:
26713 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
26714
26715 case TYPE_ALU:
26716 gcc_assert (rtx_equal_p (operands[0], operands[1]));
26717 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
26718 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
26719
26720 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
26721
26722 default:
26723 operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
26724 return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
26725 }
26726 }
26727 [(set (attr "type")
26728 (cond [(and (eq_attr "alternative" "0")
26729 (not (match_test "TARGET_OPT_AGU")))
26730 (const_string "alu")
26731 (match_operand:<MODE> 2 "const0_operand")
26732 (const_string "imov")
26733 ]
26734 (const_string "lea")))
26735 (set (attr "length_immediate")
26736 (cond [(eq_attr "type" "imov")
26737 (const_string "0")
26738 (and (eq_attr "type" "alu")
26739 (match_operand 2 "const128_operand"))
26740 (const_string "1")
26741 ]
26742 (const_string "*")))
26743 (set_attr "mode" "<MODE>")])
26744
26745 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
26746 [(set (match_operand:P 0 "register_operand" "=r")
26747 (minus:P (match_operand:P 1 "register_operand" "0")
26748 (match_operand:P 2 "register_operand" "r")))
26749 (clobber (reg:CC FLAGS_REG))
26750 (clobber (mem:BLK (scratch)))]
26751 ""
26752 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
26753 [(set_attr "type" "alu")
26754 (set_attr "mode" "<MODE>")])
26755
26756 (define_insn "@allocate_stack_worker_probe_<mode>"
26757 [(set (match_operand:P 0 "register_operand" "=a")
26758 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
26759 UNSPECV_STACK_PROBE))
26760 (clobber (reg:CC FLAGS_REG))]
26761 "ix86_target_stack_probe ()"
26762 "call\t___chkstk_ms"
26763 [(set_attr "type" "multi")
26764 (set_attr "length" "5")])
26765
26766 (define_expand "allocate_stack"
26767 [(match_operand 0 "register_operand")
26768 (match_operand 1 "general_operand")]
26769 "ix86_target_stack_probe ()"
26770 {
26771 rtx x;
26772
26773 #ifndef CHECK_STACK_LIMIT
26774 #define CHECK_STACK_LIMIT 0
26775 #endif
26776
26777 if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
26778 && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
26779 x = operands[1];
26780 else
26781 {
26782 x = copy_to_mode_reg (Pmode, operands[1]);
26783
26784 emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
26785 }
26786
26787 x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
26788 stack_pointer_rtx, 0, OPTAB_DIRECT);
26789
26790 if (x != stack_pointer_rtx)
26791 emit_move_insn (stack_pointer_rtx, x);
26792
26793 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
26794 DONE;
26795 })
26796
26797 (define_expand "probe_stack"
26798 [(match_operand 0 "memory_operand")]
26799 ""
26800 {
26801 emit_insn (gen_probe_stack_1
26802 (word_mode, operands[0], const0_rtx));
26803 DONE;
26804 })
26805
26806 ;; Use OR for stack probes, this is shorter.
26807 (define_insn "@probe_stack_1_<mode>"
26808 [(set (match_operand:W 0 "memory_operand" "=m")
26809 (unspec:W [(match_operand:W 1 "const0_operand")]
26810 UNSPEC_PROBE_STACK))
26811 (clobber (reg:CC FLAGS_REG))]
26812 ""
26813 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
26814 [(set_attr "type" "alu1")
26815 (set_attr "mode" "<MODE>")
26816 (set_attr "length_immediate" "1")])
26817
26818 (define_insn "@adjust_stack_and_probe_<mode>"
26819 [(set (match_operand:P 0 "register_operand" "=r")
26820 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
26821 UNSPECV_PROBE_STACK_RANGE))
26822 (set (reg:P SP_REG)
26823 (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand")))
26824 (clobber (reg:CC FLAGS_REG))
26825 (clobber (mem:BLK (scratch)))]
26826 ""
26827 "* return output_adjust_stack_and_probe (operands[0]);"
26828 [(set_attr "type" "multi")])
26829
26830 (define_insn "@probe_stack_range_<mode>"
26831 [(set (match_operand:P 0 "register_operand" "=r")
26832 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
26833 (match_operand:P 2 "const_int_operand")]
26834 UNSPECV_PROBE_STACK_RANGE))
26835 (clobber (reg:CC FLAGS_REG))]
26836 ""
26837 "* return output_probe_stack_range (operands[0], operands[2]);"
26838 [(set_attr "type" "multi")])
26839
26840 (define_expand "builtin_setjmp_receiver"
26841 [(label_ref (match_operand 0))]
26842 "!TARGET_64BIT && flag_pic"
26843 {
26844 #if TARGET_MACHO
26845 if (TARGET_MACHO)
26846 {
26847 rtx xops[3];
26848 rtx_code_label *label_rtx = gen_label_rtx ();
26849 emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
26850 xops[0] = xops[1] = pic_offset_table_rtx;
26851 xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
26852 ix86_expand_binary_operator (MINUS, SImode, xops);
26853 }
26854 else
26855 #endif
26856 emit_insn (gen_set_got (pic_offset_table_rtx));
26857 DONE;
26858 })
26859
26860 (define_expand "save_stack_nonlocal"
26861 [(set (match_operand 0 "memory_operand")
26862 (match_operand 1 "register_operand"))]
26863 ""
26864 {
26865 rtx stack_slot;
26866
26867 if (flag_cf_protection & CF_RETURN)
26868 {
26869 /* Copy shadow stack pointer to the first slot
26870 and stack pointer to the second slot. */
26871 rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
26872 stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
26873
26874 rtx reg_ssp = force_reg (word_mode, const0_rtx);
26875 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
26876 emit_move_insn (ssp_slot, reg_ssp);
26877 }
26878 else
26879 stack_slot = adjust_address (operands[0], Pmode, 0);
26880 emit_move_insn (stack_slot, operands[1]);
26881 DONE;
26882 })
26883
26884 (define_expand "restore_stack_nonlocal"
26885 [(set (match_operand 0 "register_operand" "")
26886 (match_operand 1 "memory_operand" ""))]
26887 ""
26888 {
26889 rtx stack_slot;
26890
26891 if (flag_cf_protection & CF_RETURN)
26892 {
26893 /* Restore shadow stack pointer from the first slot
26894 and stack pointer from the second slot. */
26895 rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
26896 stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
26897
26898 /* Get the current shadow stack pointer. The code below will check if
26899 SHSTK feature is enabled. If it is not enabled the RDSSP instruction
26900 is a NOP. */
26901 rtx reg_ssp = force_reg (word_mode, const0_rtx);
26902 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
26903
26904 /* Compare through subtraction the saved and the current ssp
26905 to decide if ssp has to be adjusted. */
26906 reg_ssp = expand_simple_binop (word_mode, MINUS,
26907 reg_ssp, ssp_slot,
26908 reg_ssp, 1, OPTAB_DIRECT);
26909
26910 /* Compare and jump over adjustment code. */
26911 rtx noadj_label = gen_label_rtx ();
26912 emit_cmp_and_jump_insns (reg_ssp, const0_rtx, EQ, NULL_RTX,
26913 word_mode, 1, noadj_label);
26914
26915 /* Compute the number of frames to adjust. */
26916 rtx reg_adj = gen_lowpart (ptr_mode, reg_ssp);
26917 rtx reg_adj_neg = expand_simple_unop (ptr_mode, NEG, reg_adj,
26918 NULL_RTX, 1);
26919
26920 reg_adj = expand_simple_binop (ptr_mode, LSHIFTRT, reg_adj_neg,
26921 GEN_INT (exact_log2 (UNITS_PER_WORD)),
26922 reg_adj, 1, OPTAB_DIRECT);
26923
26924 /* Check if number of frames <= 255 so no loop is needed. */
26925 rtx inc_label = gen_label_rtx ();
26926 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), LEU, NULL_RTX,
26927 ptr_mode, 1, inc_label);
26928
26929 /* Adjust the ssp in a loop. */
26930 rtx loop_label = gen_label_rtx ();
26931 emit_label (loop_label);
26932 LABEL_NUSES (loop_label) = 1;
26933
26934 rtx reg_255 = force_reg (word_mode, GEN_INT (255));
26935 emit_insn (gen_incssp (word_mode, reg_255));
26936
26937 reg_adj = expand_simple_binop (ptr_mode, MINUS,
26938 reg_adj, GEN_INT (255),
26939 reg_adj, 1, OPTAB_DIRECT);
26940
26941 /* Compare and jump to the loop label. */
26942 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), GTU, NULL_RTX,
26943 ptr_mode, 1, loop_label);
26944
26945 emit_label (inc_label);
26946 LABEL_NUSES (inc_label) = 1;
26947
26948 emit_insn (gen_incssp (word_mode, reg_ssp));
26949
26950 emit_label (noadj_label);
26951 LABEL_NUSES (noadj_label) = 1;
26952 }
26953 else
26954 stack_slot = adjust_address (operands[1], Pmode, 0);
26955 emit_move_insn (operands[0], stack_slot);
26956 DONE;
26957 })
26958
26959 (define_expand "stack_protect_set"
26960 [(match_operand 0 "memory_operand")
26961 (match_operand 1 "memory_operand")]
26962 ""
26963 {
26964 rtx scratch = gen_reg_rtx (word_mode);
26965
26966 emit_insn (gen_stack_protect_set_1
26967 (ptr_mode, word_mode, operands[0], operands[1], scratch));
26968 DONE;
26969 })
26970
26971 (define_insn "@stack_protect_set_1_<PTR:mode>_<W:mode>"
26972 [(set (match_operand:PTR 0 "memory_operand" "=m")
26973 (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
26974 UNSPEC_SP_SET))
26975 (set (match_operand:W 2 "register_operand" "=&r") (const_int 0))
26976 (clobber (reg:CC FLAGS_REG))]
26977 ""
26978 {
26979 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%1, %<PTR:k>2|%<PTR:k>2, %1}",
26980 operands);
26981 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>2, %0|%0, %<PTR:k>2}",
26982 operands);
26983 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
26984 return "xor{l}\t%k2, %k2";
26985 else
26986 return "mov{l}\t{$0, %k2|%k2, 0}";
26987 }
26988 [(set_attr "type" "multi")])
26989
26990 ;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
26991 ;; immediately followed by *mov{s,d}i_internal, where we can avoid
26992 ;; the xor{l} above. We don't split this, so that scheduling or
26993 ;; anything else doesn't separate the *stack_protect_set* pattern from
26994 ;; the set of the register that overwrites the register with a new value.
26995
26996 (define_peephole2
26997 [(parallel [(set (match_operand:PTR 0 "memory_operand")
26998 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
26999 UNSPEC_SP_SET))
27000 (set (match_operand 2 "general_reg_operand") (const_int 0))
27001 (clobber (reg:CC FLAGS_REG))])
27002 (set (match_operand 3 "general_reg_operand")
27003 (match_operand 4 "const0_operand"))]
27004 "GET_MODE (operands[2]) == word_mode
27005 && GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD
27006 && peep2_reg_dead_p (0, operands[3])
27007 && peep2_reg_dead_p (1, operands[2])"
27008 [(parallel [(set (match_dup 0)
27009 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
27010 (set (match_dup 3) (const_int 0))
27011 (clobber (reg:CC FLAGS_REG))])]
27012 "operands[3] = gen_lowpart (word_mode, operands[3]);")
27013
27014 (define_insn "*stack_protect_set_2_<mode>_si"
27015 [(set (match_operand:PTR 0 "memory_operand" "=m")
27016 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
27017 UNSPEC_SP_SET))
27018 (set (match_operand:SI 1 "register_operand" "=&r")
27019 (match_operand:SI 2 "general_operand" "g"))]
27020 "reload_completed"
27021 {
27022 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
27023 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
27024 if (pic_32bit_operand (operands[2], SImode)
27025 || ix86_use_lea_for_mov (insn, operands + 1))
27026 return "lea{l}\t{%E2, %1|%1, %E2}";
27027 else
27028 return "mov{l}\t{%2, %1|%1, %2}";
27029 }
27030 [(set_attr "type" "multi")
27031 (set_attr "length" "24")])
27032
27033 (define_insn "*stack_protect_set_2_<mode>_di"
27034 [(set (match_operand:PTR 0 "memory_operand" "=m,m,m")
27035 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")]
27036 UNSPEC_SP_SET))
27037 (set (match_operand:DI 1 "register_operand" "=&r,&r,&r")
27038 (match_operand:DI 2 "general_operand" "Z,rem,i"))]
27039 "TARGET_64BIT && reload_completed"
27040 {
27041 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
27042 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
27043 if (pic_32bit_operand (operands[2], DImode))
27044 return "lea{q}\t{%E2, %1|%1, %E2}";
27045 else if (which_alternative == 0)
27046 return "mov{l}\t{%k2, %k1|%k1, %k2}";
27047 else if (which_alternative == 2)
27048 return "movabs{q}\t{%2, %1|%1, %2}";
27049 else if (ix86_use_lea_for_mov (insn, operands + 1))
27050 return "lea{q}\t{%E2, %1|%1, %E2}";
27051 else
27052 return "mov{q}\t{%2, %1|%1, %2}";
27053 }
27054 [(set_attr "type" "multi")
27055 (set_attr "length" "24")])
27056
27057 (define_peephole2
27058 [(parallel [(set (match_operand:PTR 0 "memory_operand")
27059 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
27060 UNSPEC_SP_SET))
27061 (set (match_operand 2 "general_reg_operand") (const_int 0))
27062 (clobber (reg:CC FLAGS_REG))])
27063 (set (match_operand:SWI48 3 "general_reg_operand")
27064 (match_operand:SWI48 4 "general_gr_operand"))]
27065 "GET_MODE (operands[2]) == word_mode
27066 && peep2_reg_dead_p (0, operands[3])
27067 && peep2_reg_dead_p (1, operands[2])"
27068 [(parallel [(set (match_dup 0)
27069 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
27070 (set (match_dup 3) (match_dup 4))])])
27071
27072 (define_peephole2
27073 [(set (match_operand:SWI48 3 "general_reg_operand")
27074 (match_operand:SWI48 4 "general_gr_operand"))
27075 (parallel [(set (match_operand:PTR 0 "memory_operand")
27076 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
27077 UNSPEC_SP_SET))
27078 (set (match_operand 2 "general_reg_operand") (const_int 0))
27079 (clobber (reg:CC FLAGS_REG))])]
27080 "GET_MODE (operands[2]) == word_mode
27081 && peep2_reg_dead_p (0, operands[3])
27082 && peep2_reg_dead_p (2, operands[2])
27083 && !reg_mentioned_p (operands[3], operands[0])
27084 && !reg_mentioned_p (operands[3], operands[1])"
27085 [(parallel [(set (match_dup 0)
27086 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
27087 (set (match_dup 3) (match_dup 4))])])
27088
27089 (define_insn "*stack_protect_set_3_<PTR:mode>_<SWI48:mode>"
27090 [(set (match_operand:PTR 0 "memory_operand" "=m")
27091 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
27092 UNSPEC_SP_SET))
27093 (set (match_operand:SWI48 1 "register_operand" "=&r")
27094 (match_operand:SWI48 2 "address_no_seg_operand" "Ts"))]
27095 ""
27096 {
27097 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%3, %<PTR:k>1|%<PTR:k>1, %3}",
27098 operands);
27099 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>1, %0|%0, %<PTR:k>1}",
27100 operands);
27101 if (SImode_address_operand (operands[2], VOIDmode))
27102 {
27103 gcc_assert (TARGET_64BIT);
27104 return "lea{l}\t{%E2, %k1|%k1, %E2}";
27105 }
27106 else
27107 return "lea{<SWI48:imodesuffix>}\t{%E2, %1|%1, %E2}";
27108 }
27109 [(set_attr "type" "multi")
27110 (set_attr "length" "24")])
27111
27112 (define_peephole2
27113 [(parallel [(set (match_operand:PTR 0 "memory_operand")
27114 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
27115 UNSPEC_SP_SET))
27116 (set (match_operand 2 "general_reg_operand") (const_int 0))
27117 (clobber (reg:CC FLAGS_REG))])
27118 (set (match_operand:SWI48 3 "general_reg_operand")
27119 (match_operand:SWI48 4 "address_no_seg_operand"))]
27120 "GET_MODE (operands[2]) == word_mode
27121 && peep2_reg_dead_p (0, operands[3])
27122 && peep2_reg_dead_p (1, operands[2])"
27123 [(parallel [(set (match_dup 0)
27124 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
27125 (set (match_dup 3) (match_dup 4))])])
27126
27127 (define_insn "*stack_protect_set_4z_<mode>_di"
27128 [(set (match_operand:PTR 0 "memory_operand" "=m")
27129 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
27130 UNSPEC_SP_SET))
27131 (set (match_operand:DI 1 "register_operand" "=&r")
27132 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))]
27133 "TARGET_64BIT && reload_completed"
27134 {
27135 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
27136 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
27137 if (ix86_use_lea_for_mov (insn, operands + 1))
27138 return "lea{l}\t{%E2, %k1|%k1, %E2}";
27139 else
27140 return "mov{l}\t{%2, %k1|%k1, %2}";
27141 }
27142 [(set_attr "type" "multi")
27143 (set_attr "length" "24")])
27144
27145 (define_insn "*stack_protect_set_4s_<mode>_di"
27146 [(set (match_operand:PTR 0 "memory_operand" "=m")
27147 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
27148 UNSPEC_SP_SET))
27149 (set (match_operand:DI 1 "register_operand" "=&r")
27150 (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))]
27151 "TARGET_64BIT && reload_completed"
27152 {
27153 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
27154 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
27155 return "movs{lq|x}\t{%2, %1|%1, %2}";
27156 }
27157 [(set_attr "type" "multi")
27158 (set_attr "length" "24")])
27159
27160 (define_peephole2
27161 [(parallel [(set (match_operand:PTR 0 "memory_operand")
27162 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
27163 UNSPEC_SP_SET))
27164 (set (match_operand 2 "general_reg_operand") (const_int 0))
27165 (clobber (reg:CC FLAGS_REG))])
27166 (set (match_operand:DI 3 "general_reg_operand")
27167 (any_extend:DI
27168 (match_operand:SI 4 "nonimmediate_gr_operand")))]
27169 "TARGET_64BIT
27170 && GET_MODE (operands[2]) == word_mode
27171 && peep2_reg_dead_p (0, operands[3])
27172 && peep2_reg_dead_p (1, operands[2])"
27173 [(parallel [(set (match_dup 0)
27174 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
27175 (set (match_dup 3)
27176 (any_extend:DI (match_dup 4)))])])
27177
27178 (define_expand "stack_protect_test"
27179 [(match_operand 0 "memory_operand")
27180 (match_operand 1 "memory_operand")
27181 (match_operand 2)]
27182 ""
27183 {
27184 rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
27185
27186 emit_insn (gen_stack_protect_test_1
27187 (ptr_mode, flags, operands[0], operands[1]));
27188
27189 emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
27190 flags, const0_rtx, operands[2]));
27191 DONE;
27192 })
27193
27194 (define_insn "@stack_protect_test_1_<mode>"
27195 [(set (match_operand:CCZ 0 "flags_reg_operand")
27196 (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
27197 (match_operand:PTR 2 "memory_operand" "m")]
27198 UNSPEC_SP_TEST))
27199 (clobber (match_scratch:PTR 3 "=&r"))]
27200 ""
27201 {
27202 output_asm_insn ("mov{<imodesuffix>}\t{%1, %3|%3, %1}", operands);
27203 return "sub{<imodesuffix>}\t{%2, %3|%3, %2}";
27204 }
27205 [(set_attr "type" "multi")])
27206
27207 ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
27208 ;; Do not split instructions with mask registers.
27209 (define_split
27210 [(set (match_operand 0 "general_reg_operand")
27211 (match_operator 3 "promotable_binary_operator"
27212 [(match_operand 1 "general_reg_operand")
27213 (match_operand 2 "aligned_operand")]))
27214 (clobber (reg:CC FLAGS_REG))]
27215 "! TARGET_PARTIAL_REG_STALL && reload_completed
27216 && ((GET_MODE (operands[0]) == HImode
27217 && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
27218 /* ??? next two lines just !satisfies_constraint_K (...) */
27219 || !CONST_INT_P (operands[2])
27220 || satisfies_constraint_K (operands[2])))
27221 || (GET_MODE (operands[0]) == QImode
27222 && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
27223 [(parallel [(set (match_dup 0)
27224 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
27225 (clobber (reg:CC FLAGS_REG))])]
27226 {
27227 operands[0] = gen_lowpart (SImode, operands[0]);
27228 operands[1] = gen_lowpart (SImode, operands[1]);
27229 if (GET_CODE (operands[3]) != ASHIFT)
27230 operands[2] = gen_lowpart (SImode, operands[2]);
27231 operands[3] = shallow_copy_rtx (operands[3]);
27232 PUT_MODE (operands[3], SImode);
27233 })
27234
27235 ; Promote the QImode tests, as i386 has encoding of the AND
27236 ; instruction with 32-bit sign-extended immediate and thus the
27237 ; instruction size is unchanged, except in the %eax case for
27238 ; which it is increased by one byte, hence the ! optimize_size.
27239 (define_split
27240 [(set (match_operand 0 "flags_reg_operand")
27241 (match_operator 2 "compare_operator"
27242 [(and (match_operand 3 "aligned_operand")
27243 (match_operand 4 "const_int_operand"))
27244 (const_int 0)]))
27245 (set (match_operand 1 "register_operand")
27246 (and (match_dup 3) (match_dup 4)))]
27247 "! TARGET_PARTIAL_REG_STALL && reload_completed
27248 && optimize_insn_for_speed_p ()
27249 && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
27250 || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
27251 /* Ensure that the operand will remain sign-extended immediate. */
27252 && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
27253 [(parallel [(set (match_dup 0)
27254 (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
27255 (const_int 0)]))
27256 (set (match_dup 1)
27257 (and:SI (match_dup 3) (match_dup 4)))])]
27258 {
27259 operands[4]
27260 = gen_int_mode (INTVAL (operands[4])
27261 & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
27262 operands[1] = gen_lowpart (SImode, operands[1]);
27263 operands[3] = gen_lowpart (SImode, operands[3]);
27264 })
27265
27266 ; Don't promote the QImode tests, as i386 doesn't have encoding of
27267 ; the TEST instruction with 32-bit sign-extended immediate and thus
27268 ; the instruction size would at least double, which is not what we
27269 ; want even with ! optimize_size.
27270 (define_split
27271 [(set (match_operand 0 "flags_reg_operand")
27272 (match_operator 1 "compare_operator"
27273 [(and (match_operand:HI 2 "aligned_operand")
27274 (match_operand:HI 3 "const_int_operand"))
27275 (const_int 0)]))]
27276 "! TARGET_PARTIAL_REG_STALL && reload_completed
27277 && ! TARGET_FAST_PREFIX
27278 && optimize_insn_for_speed_p ()
27279 /* Ensure that the operand will remain sign-extended immediate. */
27280 && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
27281 [(set (match_dup 0)
27282 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
27283 (const_int 0)]))]
27284 {
27285 operands[3]
27286 = gen_int_mode (INTVAL (operands[3])
27287 & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
27288 operands[2] = gen_lowpart (SImode, operands[2]);
27289 })
27290
27291 (define_split
27292 [(set (match_operand 0 "register_operand")
27293 (neg (match_operand 1 "register_operand")))
27294 (clobber (reg:CC FLAGS_REG))]
27295 "! TARGET_PARTIAL_REG_STALL && reload_completed
27296 && (GET_MODE (operands[0]) == HImode
27297 || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
27298 [(parallel [(set (match_dup 0)
27299 (neg:SI (match_dup 1)))
27300 (clobber (reg:CC FLAGS_REG))])]
27301 {
27302 operands[0] = gen_lowpart (SImode, operands[0]);
27303 operands[1] = gen_lowpart (SImode, operands[1]);
27304 })
27305
27306 ;; Do not split instructions with mask regs.
27307 (define_split
27308 [(set (match_operand 0 "general_reg_operand")
27309 (not (match_operand 1 "general_reg_operand")))]
27310 "! TARGET_PARTIAL_REG_STALL && reload_completed
27311 && (GET_MODE (operands[0]) == HImode
27312 || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
27313 [(set (match_dup 0)
27314 (not:SI (match_dup 1)))]
27315 {
27316 operands[0] = gen_lowpart (SImode, operands[0]);
27317 operands[1] = gen_lowpart (SImode, operands[1]);
27318 })
27319
27320 (define_split
27321 [(set (match_operand 0 "general_reg_operand")
27322 (neg (match_operator 1 "ix86_carry_flag_operator"
27323 [(reg FLAGS_REG) (const_int 0)])))
27324 (clobber (reg:CC FLAGS_REG))]
27325 "! TARGET_PARTIAL_REG_STALL && reload_completed
27326 && (GET_MODE (operands[0]) == HImode
27327 || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
27328 [(parallel [(set (match_dup 0)
27329 (neg:SI (match_dup 1)))
27330 (clobber (reg:CC FLAGS_REG))])]
27331 {
27332 operands[0] = gen_lowpart (SImode, operands[0]);
27333 operands[1] = shallow_copy_rtx (operands[1]);
27334 PUT_MODE (operands[1], SImode);
27335 })
27336 \f
27337 ;; RTL Peephole optimizations, run before sched2. These primarily look to
27338 ;; transform a complex memory operation into two memory to register operations.
27339
27340 ;; Don't push memory operands
27341 (define_peephole2
27342 [(set (match_operand:SWI 0 "push_operand")
27343 (match_operand:SWI 1 "memory_operand"))
27344 (match_scratch:SWI 2 "<r>")]
27345 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
27346 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
27347 [(set (match_dup 2) (match_dup 1))
27348 (set (match_dup 0) (match_dup 2))])
27349
27350 ;; We need to handle SFmode only, because DFmode and XFmode are split to
27351 ;; SImode pushes.
27352 (define_peephole2
27353 [(set (match_operand:SF 0 "push_operand")
27354 (match_operand:SF 1 "memory_operand"))
27355 (match_scratch:SF 2 "r")]
27356 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
27357 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
27358 [(set (match_dup 2) (match_dup 1))
27359 (set (match_dup 0) (match_dup 2))])
27360
27361 ;; Don't move an immediate directly to memory when the instruction
27362 ;; gets too big, or if LCP stalls are a problem for 16-bit moves.
27363 (define_peephole2
27364 [(match_scratch:SWI124 1 "<r>")
27365 (set (match_operand:SWI124 0 "memory_operand")
27366 (const_int 0))]
27367 "optimize_insn_for_speed_p ()
27368 && ((<MODE>mode == HImode
27369 && TARGET_LCP_STALL)
27370 || (!TARGET_USE_MOV0
27371 && TARGET_SPLIT_LONG_MOVES
27372 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
27373 && peep2_regno_dead_p (0, FLAGS_REG)"
27374 [(parallel [(set (match_dup 2) (const_int 0))
27375 (clobber (reg:CC FLAGS_REG))])
27376 (set (match_dup 0) (match_dup 1))]
27377 "operands[2] = gen_lowpart (SImode, operands[1]);")
27378
27379 (define_peephole2
27380 [(match_scratch:SWI124 2 "<r>")
27381 (set (match_operand:SWI124 0 "memory_operand")
27382 (match_operand:SWI124 1 "immediate_operand"))]
27383 "optimize_insn_for_speed_p ()
27384 && ((<MODE>mode == HImode
27385 && TARGET_LCP_STALL)
27386 || (TARGET_SPLIT_LONG_MOVES
27387 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
27388 [(set (match_dup 2) (match_dup 1))
27389 (set (match_dup 0) (match_dup 2))])
27390
27391 ;; Don't compare memory with zero, load and use a test instead.
27392 (define_peephole2
27393 [(set (match_operand 0 "flags_reg_operand")
27394 (match_operator 1 "compare_operator"
27395 [(match_operand:SI 2 "memory_operand")
27396 (const_int 0)]))
27397 (match_scratch:SI 3 "r")]
27398 "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
27399 [(set (match_dup 3) (match_dup 2))
27400 (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
27401
27402 ;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
27403 ;; Don't split NOTs with a displacement operand, because resulting XOR
27404 ;; will not be pairable anyway.
27405 ;;
27406 ;; On AMD K6, NOT is vector decoded with memory operand that cannot be
27407 ;; represented using a modRM byte. The XOR replacement is long decoded,
27408 ;; so this split helps here as well.
27409 ;;
27410 ;; Note: Can't do this as a regular split because we can't get proper
27411 ;; lifetime information then.
27412
27413 (define_peephole2
27414 [(set (match_operand:SWI124 0 "nonimmediate_gr_operand")
27415 (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))]
27416 "optimize_insn_for_speed_p ()
27417 && ((TARGET_NOT_UNPAIRABLE
27418 && (!MEM_P (operands[0])
27419 || !memory_displacement_operand (operands[0], <MODE>mode)))
27420 || (TARGET_NOT_VECTORMODE
27421 && long_memory_operand (operands[0], <MODE>mode)))
27422 && peep2_regno_dead_p (0, FLAGS_REG)"
27423 [(parallel [(set (match_dup 0)
27424 (xor:SWI124 (match_dup 1) (const_int -1)))
27425 (clobber (reg:CC FLAGS_REG))])])
27426
27427 ;; Non pairable "test imm, reg" instructions can be translated to
27428 ;; "and imm, reg" if reg dies. The "and" form is also shorter (one
27429 ;; byte opcode instead of two, have a short form for byte operands),
27430 ;; so do it for other CPUs as well. Given that the value was dead,
27431 ;; this should not create any new dependencies. Pass on the sub-word
27432 ;; versions if we're concerned about partial register stalls.
27433
27434 (define_peephole2
27435 [(set (match_operand 0 "flags_reg_operand")
27436 (match_operator 1 "compare_operator"
27437 [(and:SI (match_operand:SI 2 "register_operand")
27438 (match_operand:SI 3 "immediate_operand"))
27439 (const_int 0)]))]
27440 "ix86_match_ccmode (insn, CCNOmode)
27441 && (REGNO (operands[2]) != AX_REG
27442 || satisfies_constraint_K (operands[3]))
27443 && peep2_reg_dead_p (1, operands[2])"
27444 [(parallel
27445 [(set (match_dup 0)
27446 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
27447 (const_int 0)]))
27448 (set (match_dup 2)
27449 (and:SI (match_dup 2) (match_dup 3)))])])
27450
27451 ;; We don't need to handle HImode case, because it will be promoted to SImode
27452 ;; on ! TARGET_PARTIAL_REG_STALL
27453
27454 (define_peephole2
27455 [(set (match_operand 0 "flags_reg_operand")
27456 (match_operator 1 "compare_operator"
27457 [(and:QI (match_operand:QI 2 "register_operand")
27458 (match_operand:QI 3 "immediate_operand"))
27459 (const_int 0)]))]
27460 "! TARGET_PARTIAL_REG_STALL
27461 && ix86_match_ccmode (insn, CCNOmode)
27462 && REGNO (operands[2]) != AX_REG
27463 && peep2_reg_dead_p (1, operands[2])"
27464 [(parallel
27465 [(set (match_dup 0)
27466 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
27467 (const_int 0)]))
27468 (set (match_dup 2)
27469 (and:QI (match_dup 2) (match_dup 3)))])])
27470
27471 (define_peephole2
27472 [(set (match_operand 0 "flags_reg_operand")
27473 (match_operator 1 "compare_operator"
27474 [(and:QI
27475 (subreg:QI
27476 (match_operator:SWI248 4 "extract_operator"
27477 [(match_operand 2 "int248_register_operand")
27478 (const_int 8)
27479 (const_int 8)]) 0)
27480 (match_operand 3 "const_int_operand"))
27481 (const_int 0)]))]
27482 "! TARGET_PARTIAL_REG_STALL
27483 && ix86_match_ccmode (insn, CCNOmode)
27484 && REGNO (operands[2]) != AX_REG
27485 && peep2_reg_dead_p (1, operands[2])"
27486 [(parallel
27487 [(set (match_dup 0)
27488 (match_op_dup 1
27489 [(and:QI
27490 (subreg:QI
27491 (match_op_dup 4 [(match_dup 2)
27492 (const_int 8)
27493 (const_int 8)]) 0)
27494 (match_dup 3))
27495 (const_int 0)]))
27496 (set (zero_extract:SWI248 (match_dup 2)
27497 (const_int 8)
27498 (const_int 8))
27499 (subreg:SWI248
27500 (and:QI
27501 (subreg:QI
27502 (match_op_dup 4 [(match_dup 2)
27503 (const_int 8)
27504 (const_int 8)]) 0)
27505 (match_dup 3)) 0))])])
27506
27507 ;; Don't do logical operations with memory inputs.
27508 (define_peephole2
27509 [(match_scratch:SWI 2 "<r>")
27510 (parallel [(set (match_operand:SWI 0 "register_operand")
27511 (match_operator:SWI 3 "arith_or_logical_operator"
27512 [(match_dup 0)
27513 (match_operand:SWI 1 "memory_operand")]))
27514 (clobber (reg:CC FLAGS_REG))])]
27515 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
27516 [(set (match_dup 2) (match_dup 1))
27517 (parallel [(set (match_dup 0)
27518 (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
27519 (clobber (reg:CC FLAGS_REG))])])
27520
27521 (define_peephole2
27522 [(match_scratch:SWI 2 "<r>")
27523 (parallel [(set (match_operand:SWI 0 "register_operand")
27524 (match_operator:SWI 3 "arith_or_logical_operator"
27525 [(match_operand:SWI 1 "memory_operand")
27526 (match_dup 0)]))
27527 (clobber (reg:CC FLAGS_REG))])]
27528 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
27529 [(set (match_dup 2) (match_dup 1))
27530 (parallel [(set (match_dup 0)
27531 (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
27532 (clobber (reg:CC FLAGS_REG))])])
27533
27534 ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when
27535 ;; the memory address refers to the destination of the load!
27536
27537 (define_peephole2
27538 [(set (match_operand:SWI 0 "general_reg_operand")
27539 (match_operand:SWI 1 "general_reg_operand"))
27540 (parallel [(set (match_dup 0)
27541 (match_operator:SWI 3 "commutative_operator"
27542 [(match_dup 0)
27543 (match_operand:SWI 2 "memory_operand")]))
27544 (clobber (reg:CC FLAGS_REG))])]
27545 "REGNO (operands[0]) != REGNO (operands[1])
27546 && (<MODE>mode != QImode
27547 || any_QIreg_operand (operands[1], QImode))"
27548 [(set (match_dup 0) (match_dup 4))
27549 (parallel [(set (match_dup 0)
27550 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
27551 (clobber (reg:CC FLAGS_REG))])]
27552 {
27553 operands[4]
27554 = ix86_replace_reg_with_reg (operands[2], operands[0], operands[1]);
27555 })
27556
27557 (define_peephole2
27558 [(set (match_operand 0 "mmx_reg_operand")
27559 (match_operand 1 "mmx_reg_operand"))
27560 (set (match_dup 0)
27561 (match_operator 3 "commutative_operator"
27562 [(match_dup 0)
27563 (match_operand 2 "memory_operand")]))]
27564 "REGNO (operands[0]) != REGNO (operands[1])"
27565 [(set (match_dup 0) (match_dup 2))
27566 (set (match_dup 0)
27567 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
27568
27569 (define_peephole2
27570 [(set (match_operand 0 "sse_reg_operand")
27571 (match_operand 1 "sse_reg_operand"))
27572 (set (match_dup 0)
27573 (match_operator 3 "commutative_operator"
27574 [(match_dup 0)
27575 (match_operand 2 "memory_operand")]))]
27576 "REGNO (operands[0]) != REGNO (operands[1])
27577 /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled,
27578 as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw]
27579 instructions require AVX512BW and AVX512VL, but with the original
27580 instructions it might require just AVX512VL.
27581 AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */
27582 && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1]))
27583 || TARGET_AVX512BW
27584 || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2
27585 || logic_operator (operands[3], VOIDmode))"
27586 [(set (match_dup 0) (match_dup 2))
27587 (set (match_dup 0)
27588 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
27589
27590 ; Don't do logical operations with memory outputs
27591 ;
27592 ; These two don't make sense for PPro/PII -- we're expanding a 4-uop
27593 ; instruction into two 1-uop insns plus a 2-uop insn. That last has
27594 ; the same decoder scheduling characteristics as the original.
27595
27596 (define_peephole2
27597 [(match_scratch:SWI 2 "<r>")
27598 (parallel [(set (match_operand:SWI 0 "memory_operand")
27599 (match_operator:SWI 3 "arith_or_logical_operator"
27600 [(match_dup 0)
27601 (match_operand:SWI 1 "<nonmemory_operand>")]))
27602 (clobber (reg:CC FLAGS_REG))])]
27603 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
27604 [(set (match_dup 2) (match_dup 0))
27605 (parallel [(set (match_dup 2)
27606 (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
27607 (clobber (reg:CC FLAGS_REG))])
27608 (set (match_dup 0) (match_dup 2))])
27609
27610 (define_peephole2
27611 [(match_scratch:SWI 2 "<r>")
27612 (parallel [(set (match_operand:SWI 0 "memory_operand")
27613 (match_operator:SWI 3 "arith_or_logical_operator"
27614 [(match_operand:SWI 1 "<nonmemory_operand>")
27615 (match_dup 0)]))
27616 (clobber (reg:CC FLAGS_REG))])]
27617 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
27618 [(set (match_dup 2) (match_dup 0))
27619 (parallel [(set (match_dup 2)
27620 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
27621 (clobber (reg:CC FLAGS_REG))])
27622 (set (match_dup 0) (match_dup 2))])
27623
27624 ;; Attempt to use arith or logical operations with memory outputs with
27625 ;; setting of flags.
27626 (define_peephole2
27627 [(set (match_operand:SWI 0 "register_operand")
27628 (match_operand:SWI 1 "memory_operand"))
27629 (parallel [(set (match_dup 0)
27630 (match_operator:SWI 3 "plusminuslogic_operator"
27631 [(match_dup 0)
27632 (match_operand:SWI 2 "<nonmemory_operand>")]))
27633 (clobber (reg:CC FLAGS_REG))])
27634 (set (match_dup 1) (match_dup 0))
27635 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
27636 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27637 && peep2_reg_dead_p (4, operands[0])
27638 && !reg_overlap_mentioned_p (operands[0], operands[1])
27639 && !reg_overlap_mentioned_p (operands[0], operands[2])
27640 && (<MODE>mode != QImode
27641 || immediate_operand (operands[2], QImode)
27642 || any_QIreg_operand (operands[2], QImode))
27643 && ix86_match_ccmode (peep2_next_insn (3),
27644 (GET_CODE (operands[3]) == PLUS
27645 || GET_CODE (operands[3]) == MINUS)
27646 ? CCGOCmode : CCNOmode)"
27647 [(parallel [(set (match_dup 4) (match_dup 6))
27648 (set (match_dup 1) (match_dup 5))])]
27649 {
27650 operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
27651 operands[5]
27652 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
27653 copy_rtx (operands[1]),
27654 operands[2]);
27655 operands[6]
27656 = gen_rtx_COMPARE (GET_MODE (operands[4]),
27657 copy_rtx (operands[5]),
27658 const0_rtx);
27659 })
27660
27661 ;; Likewise for cmpelim optimized pattern.
27662 (define_peephole2
27663 [(set (match_operand:SWI 0 "register_operand")
27664 (match_operand:SWI 1 "memory_operand"))
27665 (parallel [(set (reg FLAGS_REG)
27666 (compare (match_operator:SWI 3 "plusminuslogic_operator"
27667 [(match_dup 0)
27668 (match_operand:SWI 2 "<nonmemory_operand>")])
27669 (const_int 0)))
27670 (set (match_dup 0) (match_dup 3))])
27671 (set (match_dup 1) (match_dup 0))]
27672 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27673 && peep2_reg_dead_p (3, operands[0])
27674 && !reg_overlap_mentioned_p (operands[0], operands[1])
27675 && !reg_overlap_mentioned_p (operands[0], operands[2])
27676 && ix86_match_ccmode (peep2_next_insn (1),
27677 (GET_CODE (operands[3]) == PLUS
27678 || GET_CODE (operands[3]) == MINUS)
27679 ? CCGOCmode : CCNOmode)"
27680 [(parallel [(set (match_dup 4) (match_dup 6))
27681 (set (match_dup 1) (match_dup 5))])]
27682 {
27683 operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
27684 operands[5]
27685 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
27686 copy_rtx (operands[1]), operands[2]);
27687 operands[6]
27688 = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
27689 const0_rtx);
27690 })
27691
27692 ;; Likewise for instances where we have a lea pattern.
27693 (define_peephole2
27694 [(set (match_operand:SWI 0 "register_operand")
27695 (match_operand:SWI 1 "memory_operand"))
27696 (set (match_operand:<LEAMODE> 3 "register_operand")
27697 (plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
27698 (match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
27699 (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
27700 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
27701 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27702 && REGNO (operands[4]) == REGNO (operands[0])
27703 && REGNO (operands[5]) == REGNO (operands[3])
27704 && peep2_reg_dead_p (4, operands[3])
27705 && ((REGNO (operands[0]) == REGNO (operands[3]))
27706 || peep2_reg_dead_p (2, operands[0]))
27707 && !reg_overlap_mentioned_p (operands[0], operands[1])
27708 && !reg_overlap_mentioned_p (operands[3], operands[1])
27709 && !reg_overlap_mentioned_p (operands[0], operands[2])
27710 && (<MODE>mode != QImode
27711 || immediate_operand (operands[2], QImode)
27712 || any_QIreg_operand (operands[2], QImode))
27713 && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
27714 [(parallel [(set (match_dup 6) (match_dup 8))
27715 (set (match_dup 1) (match_dup 7))])]
27716 {
27717 operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
27718 operands[7]
27719 = gen_rtx_PLUS (<MODE>mode,
27720 copy_rtx (operands[1]),
27721 gen_lowpart (<MODE>mode, operands[2]));
27722 operands[8]
27723 = gen_rtx_COMPARE (GET_MODE (operands[6]),
27724 copy_rtx (operands[7]),
27725 const0_rtx);
27726 })
27727
27728 (define_peephole2
27729 [(parallel [(set (match_operand:SWI 0 "register_operand")
27730 (match_operator:SWI 2 "plusminuslogic_operator"
27731 [(match_dup 0)
27732 (match_operand:SWI 1 "memory_operand")]))
27733 (clobber (reg:CC FLAGS_REG))])
27734 (set (match_dup 1) (match_dup 0))
27735 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
27736 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27737 && COMMUTATIVE_ARITH_P (operands[2])
27738 && peep2_reg_dead_p (3, operands[0])
27739 && !reg_overlap_mentioned_p (operands[0], operands[1])
27740 && ix86_match_ccmode (peep2_next_insn (2),
27741 GET_CODE (operands[2]) == PLUS
27742 ? CCGOCmode : CCNOmode)"
27743 [(parallel [(set (match_dup 3) (match_dup 5))
27744 (set (match_dup 1) (match_dup 4))])]
27745 {
27746 operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
27747 operands[4]
27748 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
27749 copy_rtx (operands[1]),
27750 operands[0]);
27751 operands[5]
27752 = gen_rtx_COMPARE (GET_MODE (operands[3]),
27753 copy_rtx (operands[4]),
27754 const0_rtx);
27755 })
27756
27757 ;; Likewise for cmpelim optimized pattern.
27758 (define_peephole2
27759 [(parallel [(set (reg FLAGS_REG)
27760 (compare (match_operator:SWI 2 "plusminuslogic_operator"
27761 [(match_operand:SWI 0 "register_operand")
27762 (match_operand:SWI 1 "memory_operand")])
27763 (const_int 0)))
27764 (set (match_dup 0) (match_dup 2))])
27765 (set (match_dup 1) (match_dup 0))]
27766 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27767 && COMMUTATIVE_ARITH_P (operands[2])
27768 && peep2_reg_dead_p (2, operands[0])
27769 && !reg_overlap_mentioned_p (operands[0], operands[1])
27770 && ix86_match_ccmode (peep2_next_insn (0),
27771 GET_CODE (operands[2]) == PLUS
27772 ? CCGOCmode : CCNOmode)"
27773 [(parallel [(set (match_dup 3) (match_dup 5))
27774 (set (match_dup 1) (match_dup 4))])]
27775 {
27776 operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
27777 operands[4]
27778 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
27779 copy_rtx (operands[1]), operands[0]);
27780 operands[5]
27781 = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
27782 const0_rtx);
27783 })
27784
27785 (define_peephole2
27786 [(set (match_operand:SWI12 0 "register_operand")
27787 (match_operand:SWI12 1 "memory_operand"))
27788 (parallel [(set (match_operand:SI 4 "register_operand")
27789 (match_operator:SI 3 "plusminuslogic_operator"
27790 [(match_dup 4)
27791 (match_operand:SI 2 "nonmemory_operand")]))
27792 (clobber (reg:CC FLAGS_REG))])
27793 (set (match_dup 1) (match_dup 0))
27794 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
27795 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27796 && REGNO (operands[0]) == REGNO (operands[4])
27797 && peep2_reg_dead_p (4, operands[0])
27798 && (<MODE>mode != QImode
27799 || immediate_operand (operands[2], SImode)
27800 || any_QIreg_operand (operands[2], SImode))
27801 && !reg_overlap_mentioned_p (operands[0], operands[1])
27802 && !reg_overlap_mentioned_p (operands[0], operands[2])
27803 && ix86_match_ccmode (peep2_next_insn (3),
27804 (GET_CODE (operands[3]) == PLUS
27805 || GET_CODE (operands[3]) == MINUS)
27806 ? CCGOCmode : CCNOmode)"
27807 [(parallel [(set (match_dup 5) (match_dup 7))
27808 (set (match_dup 1) (match_dup 6))])]
27809 {
27810 operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
27811 operands[6]
27812 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
27813 copy_rtx (operands[1]),
27814 gen_lowpart (<MODE>mode, operands[2]));
27815 operands[7]
27816 = gen_rtx_COMPARE (GET_MODE (operands[5]),
27817 copy_rtx (operands[6]),
27818 const0_rtx);
27819 })
27820
27821 ;; peephole2 comes before regcprop, so deal also with a case that
27822 ;; would be cleaned up by regcprop.
27823 (define_peephole2
27824 [(set (match_operand:SWI 0 "register_operand")
27825 (match_operand:SWI 1 "memory_operand"))
27826 (parallel [(set (match_dup 0)
27827 (match_operator:SWI 3 "plusminuslogic_operator"
27828 [(match_dup 0)
27829 (match_operand:SWI 2 "<nonmemory_operand>")]))
27830 (clobber (reg:CC FLAGS_REG))])
27831 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
27832 (set (match_dup 1) (match_dup 4))
27833 (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
27834 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27835 && peep2_reg_dead_p (3, operands[0])
27836 && peep2_reg_dead_p (5, operands[4])
27837 && !reg_overlap_mentioned_p (operands[0], operands[1])
27838 && !reg_overlap_mentioned_p (operands[0], operands[2])
27839 && !reg_overlap_mentioned_p (operands[4], operands[1])
27840 && (<MODE>mode != QImode
27841 || immediate_operand (operands[2], QImode)
27842 || any_QIreg_operand (operands[2], QImode))
27843 && ix86_match_ccmode (peep2_next_insn (4),
27844 (GET_CODE (operands[3]) == PLUS
27845 || GET_CODE (operands[3]) == MINUS)
27846 ? CCGOCmode : CCNOmode)"
27847 [(parallel [(set (match_dup 5) (match_dup 7))
27848 (set (match_dup 1) (match_dup 6))])]
27849 {
27850 operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
27851 operands[6]
27852 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
27853 copy_rtx (operands[1]),
27854 operands[2]);
27855 operands[7]
27856 = gen_rtx_COMPARE (GET_MODE (operands[5]),
27857 copy_rtx (operands[6]),
27858 const0_rtx);
27859 })
27860
27861 (define_peephole2
27862 [(set (match_operand:SWI12 0 "register_operand")
27863 (match_operand:SWI12 1 "memory_operand"))
27864 (parallel [(set (match_operand:SI 4 "register_operand")
27865 (match_operator:SI 3 "plusminuslogic_operator"
27866 [(match_dup 4)
27867 (match_operand:SI 2 "nonmemory_operand")]))
27868 (clobber (reg:CC FLAGS_REG))])
27869 (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
27870 (set (match_dup 1) (match_dup 5))
27871 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
27872 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27873 && REGNO (operands[0]) == REGNO (operands[4])
27874 && peep2_reg_dead_p (3, operands[0])
27875 && peep2_reg_dead_p (5, operands[5])
27876 && (<MODE>mode != QImode
27877 || immediate_operand (operands[2], SImode)
27878 || any_QIreg_operand (operands[2], SImode))
27879 && !reg_overlap_mentioned_p (operands[0], operands[1])
27880 && !reg_overlap_mentioned_p (operands[0], operands[2])
27881 && !reg_overlap_mentioned_p (operands[5], operands[1])
27882 && ix86_match_ccmode (peep2_next_insn (4),
27883 (GET_CODE (operands[3]) == PLUS
27884 || GET_CODE (operands[3]) == MINUS)
27885 ? CCGOCmode : CCNOmode)"
27886 [(parallel [(set (match_dup 6) (match_dup 8))
27887 (set (match_dup 1) (match_dup 7))])]
27888 {
27889 operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
27890 operands[7]
27891 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
27892 copy_rtx (operands[1]),
27893 gen_lowpart (<MODE>mode, operands[2]));
27894 operands[8]
27895 = gen_rtx_COMPARE (GET_MODE (operands[6]),
27896 copy_rtx (operands[7]),
27897 const0_rtx);
27898 })
27899
27900 ;; Likewise for cmpelim optimized pattern.
27901 (define_peephole2
27902 [(set (match_operand:SWI 0 "register_operand")
27903 (match_operand:SWI 1 "memory_operand"))
27904 (parallel [(set (reg FLAGS_REG)
27905 (compare (match_operator:SWI 3 "plusminuslogic_operator"
27906 [(match_dup 0)
27907 (match_operand:SWI 2 "<nonmemory_operand>")])
27908 (const_int 0)))
27909 (set (match_dup 0) (match_dup 3))])
27910 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
27911 (set (match_dup 1) (match_dup 4))]
27912 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27913 && peep2_reg_dead_p (3, operands[0])
27914 && peep2_reg_dead_p (4, operands[4])
27915 && !reg_overlap_mentioned_p (operands[0], operands[1])
27916 && !reg_overlap_mentioned_p (operands[0], operands[2])
27917 && !reg_overlap_mentioned_p (operands[4], operands[1])
27918 && ix86_match_ccmode (peep2_next_insn (1),
27919 (GET_CODE (operands[3]) == PLUS
27920 || GET_CODE (operands[3]) == MINUS)
27921 ? CCGOCmode : CCNOmode)"
27922 [(parallel [(set (match_dup 5) (match_dup 7))
27923 (set (match_dup 1) (match_dup 6))])]
27924 {
27925 operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
27926 operands[6]
27927 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
27928 copy_rtx (operands[1]), operands[2]);
27929 operands[7]
27930 = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
27931 const0_rtx);
27932 })
27933
27934 ;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
27935 ;; into x = z; x ^= y; x != z
27936 (define_peephole2
27937 [(set (match_operand:SWI 0 "register_operand")
27938 (match_operand:SWI 1 "memory_operand"))
27939 (set (match_operand:SWI 3 "register_operand") (match_dup 0))
27940 (parallel [(set (match_operand:SWI 4 "register_operand")
27941 (xor:SWI (match_dup 4)
27942 (match_operand:SWI 2 "<nonmemory_operand>")))
27943 (clobber (reg:CC FLAGS_REG))])
27944 (set (match_dup 1) (match_dup 4))
27945 (set (reg:CCZ FLAGS_REG)
27946 (compare:CCZ (match_operand:SWI 5 "register_operand")
27947 (match_operand:SWI 6 "<nonmemory_operand>")))]
27948 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27949 && (REGNO (operands[4]) == REGNO (operands[0])
27950 || REGNO (operands[4]) == REGNO (operands[3]))
27951 && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
27952 ? 3 : 0], operands[5])
27953 ? rtx_equal_p (operands[2], operands[6])
27954 : rtx_equal_p (operands[2], operands[5])
27955 && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
27956 ? 3 : 0], operands[6]))
27957 && peep2_reg_dead_p (4, operands[4])
27958 && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
27959 ? 3 : 0])
27960 && !reg_overlap_mentioned_p (operands[0], operands[1])
27961 && !reg_overlap_mentioned_p (operands[0], operands[2])
27962 && !reg_overlap_mentioned_p (operands[3], operands[0])
27963 && !reg_overlap_mentioned_p (operands[3], operands[1])
27964 && !reg_overlap_mentioned_p (operands[3], operands[2])
27965 && (<MODE>mode != QImode
27966 || immediate_operand (operands[2], QImode)
27967 || any_QIreg_operand (operands[2], QImode))"
27968 [(parallel [(set (match_dup 7) (match_dup 9))
27969 (set (match_dup 1) (match_dup 8))])]
27970 {
27971 operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
27972 operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
27973 operands[2]);
27974 operands[9]
27975 = gen_rtx_COMPARE (GET_MODE (operands[7]),
27976 copy_rtx (operands[8]),
27977 const0_rtx);
27978 })
27979
27980 (define_peephole2
27981 [(set (match_operand:SWI12 0 "register_operand")
27982 (match_operand:SWI12 1 "memory_operand"))
27983 (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
27984 (parallel [(set (match_operand:SI 4 "register_operand")
27985 (xor:SI (match_dup 4)
27986 (match_operand:SI 2 "<nonmemory_operand>")))
27987 (clobber (reg:CC FLAGS_REG))])
27988 (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
27989 (set (reg:CCZ FLAGS_REG)
27990 (compare:CCZ (match_operand:SWI12 6 "register_operand")
27991 (match_operand:SWI12 7 "<nonmemory_operand>")))]
27992 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
27993 && (REGNO (operands[5]) == REGNO (operands[0])
27994 || REGNO (operands[5]) == REGNO (operands[3]))
27995 && REGNO (operands[5]) == REGNO (operands[4])
27996 && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
27997 ? 3 : 0], operands[6])
27998 ? (REG_P (operands[2])
27999 ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
28000 : rtx_equal_p (operands[2], operands[7]))
28001 : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
28002 ? 3 : 0], operands[7])
28003 && REG_P (operands[2])
28004 && REGNO (operands[2]) == REGNO (operands[6])))
28005 && peep2_reg_dead_p (4, operands[5])
28006 && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
28007 ? 3 : 0])
28008 && !reg_overlap_mentioned_p (operands[0], operands[1])
28009 && !reg_overlap_mentioned_p (operands[0], operands[2])
28010 && !reg_overlap_mentioned_p (operands[3], operands[0])
28011 && !reg_overlap_mentioned_p (operands[3], operands[1])
28012 && !reg_overlap_mentioned_p (operands[3], operands[2])
28013 && (<MODE>mode != QImode
28014 || immediate_operand (operands[2], SImode)
28015 || any_QIreg_operand (operands[2], SImode))"
28016 [(parallel [(set (match_dup 8) (match_dup 10))
28017 (set (match_dup 1) (match_dup 9))])]
28018 {
28019 operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
28020 operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
28021 gen_lowpart (<MODE>mode, operands[2]));
28022 operands[10]
28023 = gen_rtx_COMPARE (GET_MODE (operands[8]),
28024 copy_rtx (operands[9]),
28025 const0_rtx);
28026 })
28027
28028 ;; Attempt to optimize away memory stores of values the memory already
28029 ;; has. See PR79593.
28030 (define_peephole2
28031 [(set (match_operand 0 "register_operand")
28032 (match_operand 1 "memory_operand"))
28033 (set (match_operand 2 "memory_operand") (match_dup 0))]
28034 "!MEM_VOLATILE_P (operands[1])
28035 && !MEM_VOLATILE_P (operands[2])
28036 && rtx_equal_p (operands[1], operands[2])
28037 && !reg_overlap_mentioned_p (operands[0], operands[2])"
28038 [(set (match_dup 0) (match_dup 1))])
28039
28040 ;; Attempt to always use XOR for zeroing registers (including FP modes).
28041 (define_peephole2
28042 [(set (match_operand 0 "general_reg_operand")
28043 (match_operand 1 "const0_operand"))]
28044 "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
28045 && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
28046 && peep2_regno_dead_p (0, FLAGS_REG)"
28047 [(parallel [(set (match_dup 0) (const_int 0))
28048 (clobber (reg:CC FLAGS_REG))])]
28049 "operands[0] = gen_lowpart (word_mode, operands[0]);")
28050
28051 (define_peephole2
28052 [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand"))
28053 (const_int 0))]
28054 "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
28055 && peep2_regno_dead_p (0, FLAGS_REG)"
28056 [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
28057 (clobber (reg:CC FLAGS_REG))])])
28058
28059 ;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
28060 (define_peephole2
28061 [(set (match_operand:SWI248 0 "general_reg_operand")
28062 (const_int -1))]
28063 "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ())
28064 && peep2_regno_dead_p (0, FLAGS_REG)"
28065 [(parallel [(set (match_dup 0) (const_int -1))
28066 (clobber (reg:CC FLAGS_REG))])]
28067 {
28068 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
28069 operands[0] = gen_lowpart (SImode, operands[0]);
28070 })
28071
28072 ;; Attempt to convert simple lea to add/shift.
28073 ;; These can be created by move expanders.
28074 ;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
28075 ;; relevant lea instructions were already split.
28076
28077 (define_peephole2
28078 [(set (match_operand:SWI48 0 "register_operand")
28079 (plus:SWI48 (match_dup 0)
28080 (match_operand:SWI48 1 "<nonmemory_operand>")))]
28081 "!TARGET_OPT_AGU
28082 && peep2_regno_dead_p (0, FLAGS_REG)"
28083 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
28084 (clobber (reg:CC FLAGS_REG))])])
28085
28086 (define_peephole2
28087 [(set (match_operand:SWI48 0 "register_operand")
28088 (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
28089 (match_dup 0)))]
28090 "!TARGET_OPT_AGU
28091 && peep2_regno_dead_p (0, FLAGS_REG)"
28092 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
28093 (clobber (reg:CC FLAGS_REG))])])
28094
28095 (define_peephole2
28096 [(set (match_operand:DI 0 "register_operand")
28097 (zero_extend:DI
28098 (plus:SI (match_operand:SI 1 "register_operand")
28099 (match_operand:SI 2 "nonmemory_operand"))))]
28100 "TARGET_64BIT && !TARGET_OPT_AGU
28101 && REGNO (operands[0]) == REGNO (operands[1])
28102 && peep2_regno_dead_p (0, FLAGS_REG)"
28103 [(parallel [(set (match_dup 0)
28104 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
28105 (clobber (reg:CC FLAGS_REG))])])
28106
28107 (define_peephole2
28108 [(set (match_operand:DI 0 "register_operand")
28109 (zero_extend:DI
28110 (plus:SI (match_operand:SI 1 "nonmemory_operand")
28111 (match_operand:SI 2 "register_operand"))))]
28112 "TARGET_64BIT && !TARGET_OPT_AGU
28113 && REGNO (operands[0]) == REGNO (operands[2])
28114 && peep2_regno_dead_p (0, FLAGS_REG)"
28115 [(parallel [(set (match_dup 0)
28116 (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
28117 (clobber (reg:CC FLAGS_REG))])])
28118
28119 (define_peephole2
28120 [(set (match_operand:SWI48 0 "register_operand")
28121 (mult:SWI48 (match_dup 0)
28122 (match_operand:SWI48 1 "const_int_operand")))]
28123 "pow2p_hwi (INTVAL (operands[1]))
28124 && peep2_regno_dead_p (0, FLAGS_REG)"
28125 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
28126 (clobber (reg:CC FLAGS_REG))])]
28127 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
28128
28129 (define_peephole2
28130 [(set (match_operand:DI 0 "register_operand")
28131 (zero_extend:DI
28132 (mult:SI (match_operand:SI 1 "register_operand")
28133 (match_operand:SI 2 "const_int_operand"))))]
28134 "TARGET_64BIT
28135 && pow2p_hwi (INTVAL (operands[2]))
28136 && REGNO (operands[0]) == REGNO (operands[1])
28137 && peep2_regno_dead_p (0, FLAGS_REG)"
28138 [(parallel [(set (match_dup 0)
28139 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
28140 (clobber (reg:CC FLAGS_REG))])]
28141 "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
28142
28143 ;; The ESP adjustments can be done by the push and pop instructions. Resulting
28144 ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
28145 ;; On many CPUs it is also faster, since special hardware to avoid esp
28146 ;; dependencies is present.
28147
28148 ;; While some of these conversions may be done using splitters, we use
28149 ;; peepholes in order to allow combine_stack_adjustments pass to see
28150 ;; nonobfuscated RTL.
28151
28152 ;; Convert prologue esp subtractions to push.
28153 ;; We need register to push. In order to keep verify_flow_info happy we have
28154 ;; two choices
28155 ;; - use scratch and clobber it in order to avoid dependencies
28156 ;; - use already live register
28157 ;; We can't use the second way right now, since there is no reliable way how to
28158 ;; verify that given register is live. First choice will also most likely in
28159 ;; fewer dependencies. On the place of esp adjustments it is very likely that
28160 ;; call clobbered registers are dead. We may want to use base pointer as an
28161 ;; alternative when no register is available later.
28162
28163 (define_peephole2
28164 [(match_scratch:W 1 "r")
28165 (parallel [(set (reg:P SP_REG)
28166 (plus:P (reg:P SP_REG)
28167 (match_operand:P 0 "const_int_operand")))
28168 (clobber (reg:CC FLAGS_REG))
28169 (clobber (mem:BLK (scratch)))])]
28170 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
28171 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
28172 && !ix86_red_zone_used"
28173 [(clobber (match_dup 1))
28174 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
28175 (clobber (mem:BLK (scratch)))])])
28176
28177 (define_peephole2
28178 [(match_scratch:W 1 "r")
28179 (parallel [(set (reg:P SP_REG)
28180 (plus:P (reg:P SP_REG)
28181 (match_operand:P 0 "const_int_operand")))
28182 (clobber (reg:CC FLAGS_REG))
28183 (clobber (mem:BLK (scratch)))])]
28184 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
28185 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
28186 && !ix86_red_zone_used"
28187 [(clobber (match_dup 1))
28188 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
28189 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
28190 (clobber (mem:BLK (scratch)))])])
28191
28192 ;; Convert esp subtractions to push.
28193 (define_peephole2
28194 [(match_scratch:W 1 "r")
28195 (parallel [(set (reg:P SP_REG)
28196 (plus:P (reg:P SP_REG)
28197 (match_operand:P 0 "const_int_operand")))
28198 (clobber (reg:CC FLAGS_REG))])]
28199 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
28200 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
28201 && !ix86_red_zone_used"
28202 [(clobber (match_dup 1))
28203 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
28204
28205 (define_peephole2
28206 [(match_scratch:W 1 "r")
28207 (parallel [(set (reg:P SP_REG)
28208 (plus:P (reg:P SP_REG)
28209 (match_operand:P 0 "const_int_operand")))
28210 (clobber (reg:CC FLAGS_REG))])]
28211 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
28212 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
28213 && !ix86_red_zone_used"
28214 [(clobber (match_dup 1))
28215 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
28216 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
28217
28218 ;; Convert epilogue deallocator to pop.
28219 (define_peephole2
28220 [(match_scratch:W 1 "r")
28221 (parallel [(set (reg:P SP_REG)
28222 (plus:P (reg:P SP_REG)
28223 (match_operand:P 0 "const_int_operand")))
28224 (clobber (reg:CC FLAGS_REG))
28225 (clobber (mem:BLK (scratch)))])]
28226 "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
28227 && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
28228 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
28229 (clobber (mem:BLK (scratch)))])])
28230
28231 ;; Two pops case is tricky, since pop causes dependency
28232 ;; on destination register. We use two registers if available.
28233 (define_peephole2
28234 [(match_scratch:W 1 "r")
28235 (match_scratch:W 2 "r")
28236 (parallel [(set (reg:P SP_REG)
28237 (plus:P (reg:P SP_REG)
28238 (match_operand:P 0 "const_int_operand")))
28239 (clobber (reg:CC FLAGS_REG))
28240 (clobber (mem:BLK (scratch)))])]
28241 "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
28242 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
28243 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
28244 (clobber (mem:BLK (scratch)))])
28245 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
28246
28247 (define_peephole2
28248 [(match_scratch:W 1 "r")
28249 (parallel [(set (reg:P SP_REG)
28250 (plus:P (reg:P SP_REG)
28251 (match_operand:P 0 "const_int_operand")))
28252 (clobber (reg:CC FLAGS_REG))
28253 (clobber (mem:BLK (scratch)))])]
28254 "optimize_insn_for_size_p ()
28255 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
28256 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
28257 (clobber (mem:BLK (scratch)))])
28258 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
28259
28260 ;; Convert esp additions to pop.
28261 (define_peephole2
28262 [(match_scratch:W 1 "r")
28263 (parallel [(set (reg:P SP_REG)
28264 (plus:P (reg:P SP_REG)
28265 (match_operand:P 0 "const_int_operand")))
28266 (clobber (reg:CC FLAGS_REG))])]
28267 "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
28268 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
28269
28270 ;; Two pops case is tricky, since pop causes dependency
28271 ;; on destination register. We use two registers if available.
28272 (define_peephole2
28273 [(match_scratch:W 1 "r")
28274 (match_scratch:W 2 "r")
28275 (parallel [(set (reg:P SP_REG)
28276 (plus:P (reg:P SP_REG)
28277 (match_operand:P 0 "const_int_operand")))
28278 (clobber (reg:CC FLAGS_REG))])]
28279 "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
28280 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
28281 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
28282
28283 (define_peephole2
28284 [(match_scratch:W 1 "r")
28285 (parallel [(set (reg:P SP_REG)
28286 (plus:P (reg:P SP_REG)
28287 (match_operand:P 0 "const_int_operand")))
28288 (clobber (reg:CC FLAGS_REG))])]
28289 "optimize_insn_for_size_p ()
28290 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
28291 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
28292 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
28293 \f
28294 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
28295 ;; required and register dies. Similarly for 128 to -128.
28296 (define_peephole2
28297 [(set (match_operand 0 "flags_reg_operand")
28298 (match_operator 1 "compare_operator"
28299 [(match_operand 2 "register_operand")
28300 (match_operand 3 "const_int_operand")]))]
28301 "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
28302 && incdec_operand (operands[3], GET_MODE (operands[3])))
28303 || (!TARGET_FUSE_CMP_AND_BRANCH
28304 && INTVAL (operands[3]) == 128))
28305 && ix86_match_ccmode (insn, CCGCmode)
28306 && peep2_reg_dead_p (1, operands[2])"
28307 [(parallel [(set (match_dup 0)
28308 (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
28309 (clobber (match_dup 2))])])
28310 \f
28311 ;; Convert imul by three, five and nine into lea
28312 (define_peephole2
28313 [(parallel
28314 [(set (match_operand:SWI48 0 "register_operand")
28315 (mult:SWI48 (match_operand:SWI48 1 "register_operand")
28316 (match_operand:SWI48 2 "const359_operand")))
28317 (clobber (reg:CC FLAGS_REG))])]
28318 "!TARGET_PARTIAL_REG_STALL
28319 || <MODE>mode == SImode
28320 || optimize_function_for_size_p (cfun)"
28321 [(set (match_dup 0)
28322 (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
28323 (match_dup 1)))]
28324 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
28325
28326 (define_peephole2
28327 [(parallel
28328 [(set (match_operand:SWI48 0 "register_operand")
28329 (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
28330 (match_operand:SWI48 2 "const359_operand")))
28331 (clobber (reg:CC FLAGS_REG))])]
28332 "optimize_insn_for_speed_p ()
28333 && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
28334 [(set (match_dup 0) (match_dup 1))
28335 (set (match_dup 0)
28336 (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
28337 (match_dup 0)))]
28338 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
28339
28340 ;; imul $32bit_imm, mem, reg is vector decoded, while
28341 ;; imul $32bit_imm, reg, reg is direct decoded.
28342 (define_peephole2
28343 [(match_scratch:SWI48 3 "r")
28344 (parallel [(set (match_operand:SWI48 0 "register_operand")
28345 (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
28346 (match_operand:SWI48 2 "immediate_operand")))
28347 (clobber (reg:CC FLAGS_REG))])]
28348 "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
28349 && !satisfies_constraint_K (operands[2])"
28350 [(set (match_dup 3) (match_dup 1))
28351 (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
28352 (clobber (reg:CC FLAGS_REG))])])
28353
28354 (define_peephole2
28355 [(match_scratch:SI 3 "r")
28356 (parallel [(set (match_operand:DI 0 "register_operand")
28357 (zero_extend:DI
28358 (mult:SI (match_operand:SI 1 "memory_operand")
28359 (match_operand:SI 2 "immediate_operand"))))
28360 (clobber (reg:CC FLAGS_REG))])]
28361 "TARGET_64BIT
28362 && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
28363 && !satisfies_constraint_K (operands[2])"
28364 [(set (match_dup 3) (match_dup 1))
28365 (parallel [(set (match_dup 0)
28366 (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
28367 (clobber (reg:CC FLAGS_REG))])])
28368
28369 ;; imul $8/16bit_imm, regmem, reg is vector decoded.
28370 ;; Convert it into imul reg, reg
28371 ;; It would be better to force assembler to encode instruction using long
28372 ;; immediate, but there is apparently no way to do so.
28373 (define_peephole2
28374 [(parallel [(set (match_operand:SWI248 0 "register_operand")
28375 (mult:SWI248
28376 (match_operand:SWI248 1 "nonimmediate_operand")
28377 (match_operand:SWI248 2 "const_int_operand")))
28378 (clobber (reg:CC FLAGS_REG))])
28379 (match_scratch:SWI248 3 "r")]
28380 "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
28381 && satisfies_constraint_K (operands[2])"
28382 [(set (match_dup 3) (match_dup 2))
28383 (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
28384 (clobber (reg:CC FLAGS_REG))])]
28385 {
28386 if (!rtx_equal_p (operands[0], operands[1]))
28387 emit_move_insn (operands[0], operands[1]);
28388 })
28389
28390 ;; After splitting up read-modify operations, array accesses with memory
28391 ;; operands might end up in form:
28392 ;; sall $2, %eax
28393 ;; movl 4(%esp), %edx
28394 ;; addl %edx, %eax
28395 ;; instead of pre-splitting:
28396 ;; sall $2, %eax
28397 ;; addl 4(%esp), %eax
28398 ;; Turn it into:
28399 ;; movl 4(%esp), %edx
28400 ;; leal (%edx,%eax,4), %eax
28401
28402 (define_peephole2
28403 [(match_scratch:W 5 "r")
28404 (parallel [(set (match_operand 0 "register_operand")
28405 (ashift (match_operand 1 "register_operand")
28406 (match_operand 2 "const_int_operand")))
28407 (clobber (reg:CC FLAGS_REG))])
28408 (parallel [(set (match_operand 3 "register_operand")
28409 (plus (match_dup 0)
28410 (match_operand 4 "x86_64_general_operand")))
28411 (clobber (reg:CC FLAGS_REG))])]
28412 "IN_RANGE (INTVAL (operands[2]), 1, 3)
28413 /* Validate MODE for lea. */
28414 && ((!TARGET_PARTIAL_REG_STALL
28415 && (GET_MODE (operands[0]) == QImode
28416 || GET_MODE (operands[0]) == HImode))
28417 || GET_MODE (operands[0]) == SImode
28418 || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
28419 && (rtx_equal_p (operands[0], operands[3])
28420 || peep2_reg_dead_p (2, operands[0]))
28421 /* We reorder load and the shift. */
28422 && !reg_overlap_mentioned_p (operands[0], operands[4])"
28423 [(set (match_dup 5) (match_dup 4))
28424 (set (match_dup 0) (match_dup 1))]
28425 {
28426 machine_mode op1mode = GET_MODE (operands[1]);
28427 machine_mode mode = op1mode == DImode ? DImode : SImode;
28428 int scale = 1 << INTVAL (operands[2]);
28429 rtx index = gen_lowpart (word_mode, operands[1]);
28430 rtx base = gen_lowpart (word_mode, operands[5]);
28431 rtx dest = gen_lowpart (mode, operands[3]);
28432
28433 operands[1] = gen_rtx_PLUS (word_mode, base,
28434 gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
28435 if (mode != word_mode)
28436 operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
28437
28438 operands[5] = base;
28439 if (op1mode != word_mode)
28440 operands[5] = gen_lowpart (op1mode, operands[5]);
28441
28442 operands[0] = dest;
28443 })
28444 \f
28445 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
28446 ;; That, however, is usually mapped by the OS to SIGSEGV, which is often
28447 ;; caught for use by garbage collectors and the like. Using an insn that
28448 ;; maps to SIGILL makes it more likely the program will rightfully die.
28449 ;; Keeping with tradition, "6" is in honor of #UD.
28450 (define_insn "trap"
28451 [(trap_if (const_int 1) (const_int 6))]
28452 ""
28453 {
28454 #ifdef HAVE_AS_IX86_UD2
28455 return "ud2";
28456 #else
28457 return ASM_SHORT "0x0b0f";
28458 #endif
28459 }
28460 [(set_attr "length" "2")])
28461
28462 (define_insn "ud2"
28463 [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
28464 ""
28465 {
28466 #ifdef HAVE_AS_IX86_UD2
28467 return "ud2";
28468 #else
28469 return ASM_SHORT "0x0b0f";
28470 #endif
28471 }
28472 [(set_attr "length" "2")])
28473
28474 (define_expand "prefetch"
28475 [(prefetch (match_operand 0 "address_operand")
28476 (match_operand:SI 1 "const_int_operand")
28477 (match_operand:SI 2 "const_int_operand"))]
28478 "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW"
28479 {
28480 bool write = operands[1] != const0_rtx;
28481 int locality = INTVAL (operands[2]);
28482
28483 gcc_assert (IN_RANGE (locality, 0, 3));
28484
28485 /* Use 3dNOW prefetch in case we are asking for write prefetch not
28486 supported by SSE counterpart (non-SSE2 athlon machines) or the
28487 SSE prefetch is not available (K6 machines). Otherwise use SSE
28488 prefetch as it allows specifying of locality. */
28489
28490 if (write)
28491 {
28492 if (TARGET_PRFCHW)
28493 operands[2] = GEN_INT (3);
28494 else if (TARGET_3DNOW && !TARGET_SSE2)
28495 operands[2] = GEN_INT (3);
28496 else if (TARGET_PREFETCH_SSE)
28497 operands[1] = const0_rtx;
28498 else
28499 {
28500 gcc_assert (TARGET_3DNOW);
28501 operands[2] = GEN_INT (3);
28502 }
28503 }
28504 else
28505 {
28506 if (TARGET_PREFETCH_SSE)
28507 ;
28508 else
28509 {
28510 gcc_assert (TARGET_3DNOW);
28511 operands[2] = GEN_INT (3);
28512 }
28513 }
28514 })
28515
28516 (define_insn "*prefetch_sse"
28517 [(prefetch (match_operand 0 "address_operand" "p")
28518 (const_int 0)
28519 (match_operand:SI 1 "const_int_operand"))]
28520 "TARGET_PREFETCH_SSE"
28521 {
28522 static const char * const patterns[4] = {
28523 "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
28524 };
28525
28526 int locality = INTVAL (operands[1]);
28527 gcc_assert (IN_RANGE (locality, 0, 3));
28528
28529 return patterns[locality];
28530 }
28531 [(set_attr "type" "sse")
28532 (set_attr "atom_sse_attr" "prefetch")
28533 (set (attr "length_address")
28534 (symbol_ref "memory_address_length (operands[0], false)"))
28535 (set_attr "memory" "none")])
28536
28537 (define_insn "*prefetch_3dnow"
28538 [(prefetch (match_operand 0 "address_operand" "p")
28539 (match_operand:SI 1 "const_int_operand")
28540 (const_int 3))]
28541 "TARGET_3DNOW || TARGET_PRFCHW"
28542 {
28543 if (operands[1] == const0_rtx)
28544 return "prefetch\t%a0";
28545 else
28546 return "prefetchw\t%a0";
28547 }
28548 [(set_attr "type" "mmx")
28549 (set (attr "length_address")
28550 (symbol_ref "memory_address_length (operands[0], false)"))
28551 (set_attr "memory" "none")])
28552
28553 (define_insn "prefetchi"
28554 [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
28555 (match_operand:SI 1 "const_int_operand")]
28556 UNSPECV_PREFETCHI)]
28557 "TARGET_PREFETCHI && TARGET_64BIT"
28558 {
28559 static const char * const patterns[2] = {
28560 "prefetchit1\t%a0", "prefetchit0\t%a0"
28561 };
28562
28563 int locality = INTVAL (operands[1]);
28564 gcc_assert (IN_RANGE (locality, 2, 3));
28565
28566 return patterns[locality - 2];
28567 }
28568 [(set_attr "type" "sse")
28569 (set (attr "length_address")
28570 (symbol_ref "memory_address_length (operands[0], false)"))
28571 (set_attr "memory" "none")])
28572
28573 (define_insn "sse4_2_crc32<mode>"
28574 [(set (match_operand:SI 0 "register_operand" "=r")
28575 (unspec:SI
28576 [(match_operand:SI 1 "register_operand" "0")
28577 (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
28578 UNSPEC_CRC32))]
28579 "TARGET_CRC32"
28580 "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
28581 [(set_attr "type" "sselog1")
28582 (set_attr "prefix_rep" "1")
28583 (set_attr "prefix_extra" "1")
28584 (set (attr "prefix_data16")
28585 (if_then_else (match_operand:HI 2)
28586 (const_string "1")
28587 (const_string "*")))
28588 (set (attr "prefix_rex")
28589 (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
28590 (const_string "1")
28591 (const_string "*")))
28592 (set_attr "mode" "SI")])
28593
28594 (define_insn "sse4_2_crc32di"
28595 [(set (match_operand:DI 0 "register_operand" "=r")
28596 (zero_extend:DI
28597 (unspec:SI
28598 [(match_operand:SI 1 "register_operand" "0")
28599 (match_operand:DI 2 "nonimmediate_operand" "rm")]
28600 UNSPEC_CRC32)))]
28601 "TARGET_64BIT && TARGET_CRC32"
28602 "crc32{q}\t{%2, %0|%0, %2}"
28603 [(set_attr "type" "sselog1")
28604 (set_attr "prefix_rep" "1")
28605 (set_attr "prefix_extra" "1")
28606 (set_attr "mode" "DI")])
28607
28608 (define_insn "rdpmc"
28609 [(set (match_operand:DI 0 "register_operand" "=A")
28610 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
28611 UNSPECV_RDPMC))]
28612 "!TARGET_64BIT"
28613 "rdpmc"
28614 [(set_attr "type" "other")
28615 (set_attr "length" "2")])
28616
28617 (define_insn "rdpmc_rex64"
28618 [(set (match_operand:DI 0 "register_operand" "=a")
28619 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
28620 UNSPECV_RDPMC))
28621 (set (match_operand:DI 1 "register_operand" "=d")
28622 (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
28623 "TARGET_64BIT"
28624 "rdpmc"
28625 [(set_attr "type" "other")
28626 (set_attr "length" "2")])
28627
28628 (define_insn "rdtsc"
28629 [(set (match_operand:DI 0 "register_operand" "=A")
28630 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
28631 "!TARGET_64BIT"
28632 "rdtsc"
28633 [(set_attr "type" "other")
28634 (set_attr "length" "2")])
28635
28636 (define_insn "rdtsc_rex64"
28637 [(set (match_operand:DI 0 "register_operand" "=a")
28638 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
28639 (set (match_operand:DI 1 "register_operand" "=d")
28640 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
28641 "TARGET_64BIT"
28642 "rdtsc"
28643 [(set_attr "type" "other")
28644 (set_attr "length" "2")])
28645
28646 (define_insn "rdtscp"
28647 [(set (match_operand:DI 0 "register_operand" "=A")
28648 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
28649 (set (match_operand:SI 1 "register_operand" "=c")
28650 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
28651 "!TARGET_64BIT"
28652 "rdtscp"
28653 [(set_attr "type" "other")
28654 (set_attr "length" "3")])
28655
28656 (define_insn "rdtscp_rex64"
28657 [(set (match_operand:DI 0 "register_operand" "=a")
28658 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
28659 (set (match_operand:DI 1 "register_operand" "=d")
28660 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
28661 (set (match_operand:SI 2 "register_operand" "=c")
28662 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
28663 "TARGET_64BIT"
28664 "rdtscp"
28665 [(set_attr "type" "other")
28666 (set_attr "length" "3")])
28667
28668 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28669 ;;
28670 ;; FXSR, XSAVE and XSAVEOPT instructions
28671 ;;
28672 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28673
28674 (define_insn "fxsave"
28675 [(set (match_operand:BLK 0 "memory_operand" "=m")
28676 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
28677 "TARGET_FXSR"
28678 "fxsave\t%0"
28679 [(set_attr "type" "other")
28680 (set_attr "memory" "store")
28681 (set (attr "length")
28682 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
28683
28684 (define_insn "fxsave64"
28685 [(set (match_operand:BLK 0 "memory_operand" "=jm")
28686 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
28687 "TARGET_64BIT && TARGET_FXSR"
28688 "fxsave64\t%0"
28689 [(set_attr "type" "other")
28690 (set_attr "addr" "gpr16")
28691 (set_attr "memory" "store")
28692 (set (attr "length")
28693 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
28694
28695 (define_insn "fxrstor"
28696 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
28697 UNSPECV_FXRSTOR)]
28698 "TARGET_FXSR"
28699 "fxrstor\t%0"
28700 [(set_attr "type" "other")
28701 (set_attr "memory" "load")
28702 (set (attr "length")
28703 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
28704
28705 (define_insn "fxrstor64"
28706 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "jm")]
28707 UNSPECV_FXRSTOR64)]
28708 "TARGET_64BIT && TARGET_FXSR"
28709 "fxrstor64\t%0"
28710 [(set_attr "type" "other")
28711 (set_attr "addr" "gpr16")
28712 (set_attr "memory" "load")
28713 (set (attr "length")
28714 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
28715
28716 (define_int_iterator ANY_XSAVE
28717 [UNSPECV_XSAVE
28718 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")
28719 (UNSPECV_XSAVEC "TARGET_XSAVEC")
28720 (UNSPECV_XSAVES "TARGET_XSAVES")])
28721
28722 (define_int_iterator ANY_XSAVE64
28723 [UNSPECV_XSAVE64
28724 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")
28725 (UNSPECV_XSAVEC64 "TARGET_XSAVEC")
28726 (UNSPECV_XSAVES64 "TARGET_XSAVES")])
28727
28728 (define_int_attr xsave
28729 [(UNSPECV_XSAVE "xsave")
28730 (UNSPECV_XSAVE64 "xsave64")
28731 (UNSPECV_XSAVEOPT "xsaveopt")
28732 (UNSPECV_XSAVEOPT64 "xsaveopt64")
28733 (UNSPECV_XSAVEC "xsavec")
28734 (UNSPECV_XSAVEC64 "xsavec64")
28735 (UNSPECV_XSAVES "xsaves")
28736 (UNSPECV_XSAVES64 "xsaves64")])
28737
28738 (define_int_iterator ANY_XRSTOR
28739 [UNSPECV_XRSTOR
28740 (UNSPECV_XRSTORS "TARGET_XSAVES")])
28741
28742 (define_int_iterator ANY_XRSTOR64
28743 [UNSPECV_XRSTOR64
28744 (UNSPECV_XRSTORS64 "TARGET_XSAVES")])
28745
28746 (define_int_attr xrstor
28747 [(UNSPECV_XRSTOR "xrstor")
28748 (UNSPECV_XRSTOR64 "xrstor")
28749 (UNSPECV_XRSTORS "xrstors")
28750 (UNSPECV_XRSTORS64 "xrstors")])
28751
28752 (define_insn "<xsave>"
28753 [(set (match_operand:BLK 0 "memory_operand" "=m")
28754 (unspec_volatile:BLK
28755 [(match_operand:DI 1 "register_operand" "A")]
28756 ANY_XSAVE))]
28757 "!TARGET_64BIT && TARGET_XSAVE"
28758 "<xsave>\t%0"
28759 [(set_attr "type" "other")
28760 (set_attr "memory" "store")
28761 (set (attr "length")
28762 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
28763
28764 (define_insn "<xsave>_rex64"
28765 [(set (match_operand:BLK 0 "memory_operand" "=jm")
28766 (unspec_volatile:BLK
28767 [(match_operand:SI 1 "register_operand" "a")
28768 (match_operand:SI 2 "register_operand" "d")]
28769 ANY_XSAVE))]
28770 "TARGET_64BIT && TARGET_XSAVE"
28771 "<xsave>\t%0"
28772 [(set_attr "type" "other")
28773 (set_attr "memory" "store")
28774 (set_attr "addr" "gpr16")
28775 (set (attr "length")
28776 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
28777
28778 (define_insn "<xsave>"
28779 [(set (match_operand:BLK 0 "memory_operand" "=jm")
28780 (unspec_volatile:BLK
28781 [(match_operand:SI 1 "register_operand" "a")
28782 (match_operand:SI 2 "register_operand" "d")]
28783 ANY_XSAVE64))]
28784 "TARGET_64BIT && TARGET_XSAVE"
28785 "<xsave>\t%0"
28786 [(set_attr "type" "other")
28787 (set_attr "memory" "store")
28788 (set_attr "addr" "gpr16")
28789 (set (attr "length")
28790 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
28791
28792 (define_insn "<xrstor>"
28793 [(unspec_volatile:BLK
28794 [(match_operand:BLK 0 "memory_operand" "m")
28795 (match_operand:DI 1 "register_operand" "A")]
28796 ANY_XRSTOR)]
28797 "!TARGET_64BIT && TARGET_XSAVE"
28798 "<xrstor>\t%0"
28799 [(set_attr "type" "other")
28800 (set_attr "memory" "load")
28801 (set (attr "length")
28802 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
28803
28804 (define_insn "<xrstor>_rex64"
28805 [(unspec_volatile:BLK
28806 [(match_operand:BLK 0 "memory_operand" "jm")
28807 (match_operand:SI 1 "register_operand" "a")
28808 (match_operand:SI 2 "register_operand" "d")]
28809 ANY_XRSTOR)]
28810 "TARGET_64BIT && TARGET_XSAVE"
28811 "<xrstor>\t%0"
28812 [(set_attr "type" "other")
28813 (set_attr "memory" "load")
28814 (set_attr "addr" "gpr16")
28815 (set (attr "length")
28816 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
28817
28818 (define_insn "<xrstor>64"
28819 [(unspec_volatile:BLK
28820 [(match_operand:BLK 0 "memory_operand" "jm")
28821 (match_operand:SI 1 "register_operand" "a")
28822 (match_operand:SI 2 "register_operand" "d")]
28823 ANY_XRSTOR64)]
28824 "TARGET_64BIT && TARGET_XSAVE"
28825 "<xrstor>64\t%0"
28826 [(set_attr "type" "other")
28827 (set_attr "memory" "load")
28828 (set_attr "addr" "gpr16")
28829 (set (attr "length")
28830 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
28831
28832 (define_insn "xsetbv"
28833 [(unspec_volatile:SI
28834 [(match_operand:SI 0 "register_operand" "c")
28835 (match_operand:DI 1 "register_operand" "A")]
28836 UNSPECV_XSETBV)]
28837 "!TARGET_64BIT && TARGET_XSAVE"
28838 "xsetbv"
28839 [(set_attr "type" "other")])
28840
28841 (define_insn "xsetbv_rex64"
28842 [(unspec_volatile:SI
28843 [(match_operand:SI 0 "register_operand" "c")
28844 (match_operand:SI 1 "register_operand" "a")
28845 (match_operand:SI 2 "register_operand" "d")]
28846 UNSPECV_XSETBV)]
28847 "TARGET_64BIT && TARGET_XSAVE"
28848 "xsetbv"
28849 [(set_attr "type" "other")])
28850
28851 (define_insn "xgetbv"
28852 [(set (match_operand:DI 0 "register_operand" "=A")
28853 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
28854 UNSPECV_XGETBV))]
28855 "!TARGET_64BIT && TARGET_XSAVE"
28856 "xgetbv"
28857 [(set_attr "type" "other")])
28858
28859 (define_insn "xgetbv_rex64"
28860 [(set (match_operand:DI 0 "register_operand" "=a")
28861 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
28862 UNSPECV_XGETBV))
28863 (set (match_operand:DI 1 "register_operand" "=d")
28864 (unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))]
28865 "TARGET_64BIT && TARGET_XSAVE"
28866 "xgetbv"
28867 [(set_attr "type" "other")])
28868
28869 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28870 ;;
28871 ;; Floating-point instructions for atomic compound assignments
28872 ;;
28873 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28874
28875 ; Clobber all floating-point registers on environment save and restore
28876 ; to ensure that the TOS value saved at fnstenv is valid after fldenv.
28877 (define_insn "fnstenv"
28878 [(set (match_operand:BLK 0 "memory_operand" "=m")
28879 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
28880 (clobber (reg:XF ST0_REG))
28881 (clobber (reg:XF ST1_REG))
28882 (clobber (reg:XF ST2_REG))
28883 (clobber (reg:XF ST3_REG))
28884 (clobber (reg:XF ST4_REG))
28885 (clobber (reg:XF ST5_REG))
28886 (clobber (reg:XF ST6_REG))
28887 (clobber (reg:XF ST7_REG))]
28888 "TARGET_80387"
28889 "fnstenv\t%0"
28890 [(set_attr "type" "other")
28891 (set_attr "memory" "store")
28892 (set (attr "length")
28893 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
28894
28895 (define_insn "fldenv"
28896 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
28897 UNSPECV_FLDENV)
28898 (clobber (reg:XF ST0_REG))
28899 (clobber (reg:XF ST1_REG))
28900 (clobber (reg:XF ST2_REG))
28901 (clobber (reg:XF ST3_REG))
28902 (clobber (reg:XF ST4_REG))
28903 (clobber (reg:XF ST5_REG))
28904 (clobber (reg:XF ST6_REG))
28905 (clobber (reg:XF ST7_REG))]
28906 "TARGET_80387"
28907 "fldenv\t%0"
28908 [(set_attr "type" "other")
28909 (set_attr "memory" "load")
28910 (set (attr "length")
28911 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
28912
28913 (define_insn "fnstsw"
28914 [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
28915 (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
28916 "TARGET_80387"
28917 "fnstsw\t%0"
28918 [(set_attr "type" "other,other")
28919 (set_attr "memory" "none,store")
28920 (set (attr "length")
28921 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
28922
28923 (define_insn "fnclex"
28924 [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
28925 "TARGET_80387"
28926 "fnclex"
28927 [(set_attr "type" "other")
28928 (set_attr "memory" "none")
28929 (set_attr "length" "2")])
28930
28931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28932 ;;
28933 ;; LWP instructions
28934 ;;
28935 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28936
28937 (define_insn "@lwp_llwpcb<mode>"
28938 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
28939 UNSPECV_LLWP_INTRINSIC)]
28940 "TARGET_LWP"
28941 "llwpcb\t%0"
28942 [(set_attr "type" "lwp")
28943 (set_attr "mode" "<MODE>")
28944 (set_attr "length" "5")])
28945
28946 (define_insn "@lwp_slwpcb<mode>"
28947 [(set (match_operand:P 0 "register_operand" "=r")
28948 (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
28949 "TARGET_LWP"
28950 "slwpcb\t%0"
28951 [(set_attr "type" "lwp")
28952 (set_attr "mode" "<MODE>")
28953 (set_attr "length" "5")])
28954
28955 (define_insn "@lwp_lwpval<mode>"
28956 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
28957 (match_operand:SI 1 "nonimmediate_operand" "rm")
28958 (match_operand:SI 2 "const_int_operand")]
28959 UNSPECV_LWPVAL_INTRINSIC)]
28960 "TARGET_LWP"
28961 "lwpval\t{%2, %1, %0|%0, %1, %2}"
28962 [(set_attr "type" "lwp")
28963 (set_attr "mode" "<MODE>")
28964 (set (attr "length")
28965 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
28966
28967 (define_insn "@lwp_lwpins<mode>"
28968 [(set (reg:CCC FLAGS_REG)
28969 (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
28970 (match_operand:SI 1 "nonimmediate_operand" "rm")
28971 (match_operand:SI 2 "const_int_operand")]
28972 UNSPECV_LWPINS_INTRINSIC))]
28973 "TARGET_LWP"
28974 "lwpins\t{%2, %1, %0|%0, %1, %2}"
28975 [(set_attr "type" "lwp")
28976 (set_attr "mode" "<MODE>")
28977 (set (attr "length")
28978 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
28979
28980 (define_int_iterator RDFSGSBASE
28981 [UNSPECV_RDFSBASE
28982 UNSPECV_RDGSBASE])
28983
28984 (define_int_iterator WRFSGSBASE
28985 [UNSPECV_WRFSBASE
28986 UNSPECV_WRGSBASE])
28987
28988 (define_int_attr fsgs
28989 [(UNSPECV_RDFSBASE "fs")
28990 (UNSPECV_RDGSBASE "gs")
28991 (UNSPECV_WRFSBASE "fs")
28992 (UNSPECV_WRGSBASE "gs")])
28993
28994 (define_insn "rd<fsgs>base<mode>"
28995 [(set (match_operand:SWI48 0 "register_operand" "=r")
28996 (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
28997 "TARGET_64BIT && TARGET_FSGSBASE"
28998 "rd<fsgs>base\t%0"
28999 [(set_attr "type" "other")
29000 (set_attr "prefix_0f" "1")
29001 (set_attr "prefix_rep" "1")])
29002
29003 (define_insn "wr<fsgs>base<mode>"
29004 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
29005 WRFSGSBASE)]
29006 "TARGET_64BIT && TARGET_FSGSBASE"
29007 "wr<fsgs>base\t%0"
29008 [(set_attr "type" "other")
29009 (set_attr "prefix_0f" "1")
29010 (set_attr "prefix_rep" "1")])
29011
29012 (define_insn "ptwrite<mode>"
29013 [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
29014 UNSPECV_PTWRITE)]
29015 "TARGET_PTWRITE"
29016 "ptwrite\t%0"
29017 [(set_attr "type" "other")
29018 (set_attr "prefix_0f" "1")
29019 (set_attr "prefix_rep" "1")])
29020
29021 (define_insn "@rdrand<mode>"
29022 [(set (match_operand:SWI248 0 "register_operand" "=r")
29023 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
29024 (set (reg:CCC FLAGS_REG)
29025 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
29026 "TARGET_RDRND"
29027 "rdrand\t%0"
29028 [(set_attr "type" "other")
29029 (set_attr "prefix_0f" "1")])
29030
29031 (define_insn "@rdseed<mode>"
29032 [(set (match_operand:SWI248 0 "register_operand" "=r")
29033 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
29034 (set (reg:CCC FLAGS_REG)
29035 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
29036 "TARGET_RDSEED"
29037 "rdseed\t%0"
29038 [(set_attr "type" "other")
29039 (set_attr "prefix_0f" "1")])
29040
29041 (define_expand "pause"
29042 [(set (match_dup 0)
29043 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
29044 ""
29045 {
29046 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
29047 MEM_VOLATILE_P (operands[0]) = 1;
29048 })
29049
29050 ;; Use "rep; nop", instead of "pause", to support older assemblers.
29051 ;; They have the same encoding.
29052 (define_insn "*pause"
29053 [(set (match_operand:BLK 0)
29054 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
29055 ""
29056 "rep%; nop"
29057 [(set_attr "length" "2")
29058 (set_attr "memory" "unknown")])
29059
29060 ;; CET instructions
29061 (define_insn "@rdssp<mode>"
29062 [(set (match_operand:SWI48 0 "register_operand" "=r")
29063 (unspec_volatile:SWI48 [(match_operand:SWI48 1 "register_operand" "0")]
29064 UNSPECV_NOP_RDSSP))]
29065 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
29066 "rdssp<mskmodesuffix>\t%0"
29067 [(set_attr "length" "6")
29068 (set_attr "type" "other")])
29069
29070 (define_insn "@incssp<mode>"
29071 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
29072 UNSPECV_INCSSP)]
29073 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
29074 "incssp<mskmodesuffix>\t%0"
29075 [(set_attr "length" "4")
29076 (set_attr "type" "other")])
29077
29078 (define_insn "saveprevssp"
29079 [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)]
29080 "TARGET_SHSTK"
29081 "saveprevssp"
29082 [(set_attr "length" "5")
29083 (set_attr "type" "other")])
29084
29085 (define_insn "rstorssp"
29086 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
29087 UNSPECV_RSTORSSP)]
29088 "TARGET_SHSTK"
29089 "rstorssp\t%0"
29090 [(set_attr "length" "5")
29091 (set_attr "type" "other")])
29092
29093 (define_insn "@wrss<mode>"
29094 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
29095 (match_operand:SWI48 1 "memory_operand" "m")]
29096 UNSPECV_WRSS)]
29097 "TARGET_SHSTK"
29098 "wrss<mskmodesuffix>\t%0, %1"
29099 [(set_attr "length" "3")
29100 (set_attr "type" "other")])
29101
29102 (define_insn "@wruss<mode>"
29103 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
29104 (match_operand:SWI48 1 "memory_operand" "m")]
29105 UNSPECV_WRUSS)]
29106 "TARGET_SHSTK"
29107 "wruss<mskmodesuffix>\t%0, %1"
29108 [(set_attr "length" "4")
29109 (set_attr "type" "other")])
29110
29111 (define_insn "setssbsy"
29112 [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)]
29113 "TARGET_SHSTK"
29114 "setssbsy"
29115 [(set_attr "length" "4")
29116 (set_attr "type" "other")])
29117
29118 (define_insn "clrssbsy"
29119 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
29120 UNSPECV_CLRSSBSY)]
29121 "TARGET_SHSTK"
29122 "clrssbsy\t%0"
29123 [(set_attr "length" "4")
29124 (set_attr "type" "other")])
29125
29126 (define_insn "nop_endbr"
29127 [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
29128 "(flag_cf_protection & CF_BRANCH)"
29129 {
29130 return TARGET_64BIT ? "endbr64" : "endbr32";
29131 }
29132 [(set_attr "length" "4")
29133 (set_attr "length_immediate" "0")
29134 (set_attr "modrm" "0")])
29135
29136 ;; For RTM support
29137 (define_expand "xbegin"
29138 [(set (match_operand:SI 0 "register_operand")
29139 (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
29140 "TARGET_RTM"
29141 {
29142 rtx_code_label *label = gen_label_rtx ();
29143
29144 /* xbegin is emitted as jump_insn, so reload won't be able
29145 to reload its operand. Force the value into AX hard register. */
29146 rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
29147 emit_move_insn (ax_reg, constm1_rtx);
29148
29149 emit_jump_insn (gen_xbegin_1 (ax_reg, label));
29150
29151 emit_label (label);
29152 LABEL_NUSES (label) = 1;
29153
29154 emit_move_insn (operands[0], ax_reg);
29155
29156 DONE;
29157 })
29158
29159 (define_insn "xbegin_1"
29160 [(set (pc)
29161 (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
29162 (const_int 0))
29163 (label_ref (match_operand 1))
29164 (pc)))
29165 (set (match_operand:SI 0 "register_operand" "+a")
29166 (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
29167 "TARGET_RTM"
29168 "xbegin\t%l1"
29169 [(set_attr "type" "other")
29170 (set_attr "length" "6")])
29171
29172 (define_insn "xend"
29173 [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
29174 "TARGET_RTM"
29175 "xend"
29176 [(set_attr "type" "other")
29177 (set_attr "length" "3")])
29178
29179 (define_insn "xabort"
29180 [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand")]
29181 UNSPECV_XABORT)]
29182 "TARGET_RTM"
29183 "xabort\t%0"
29184 [(set_attr "type" "other")
29185 (set_attr "length" "3")])
29186
29187 (define_expand "xtest"
29188 [(set (match_operand:QI 0 "register_operand")
29189 (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
29190 "TARGET_RTM"
29191 {
29192 emit_insn (gen_xtest_1 ());
29193
29194 ix86_expand_setcc (operands[0], NE,
29195 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
29196 DONE;
29197 })
29198
29199 (define_insn "xtest_1"
29200 [(set (reg:CCZ FLAGS_REG)
29201 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
29202 "TARGET_RTM"
29203 "xtest"
29204 [(set_attr "type" "other")
29205 (set_attr "length" "3")])
29206
29207 (define_insn "clwb"
29208 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
29209 UNSPECV_CLWB)]
29210 "TARGET_CLWB"
29211 "clwb\t%a0"
29212 [(set_attr "type" "sse")
29213 (set_attr "atom_sse_attr" "fence")
29214 (set_attr "memory" "unknown")])
29215
29216 (define_insn "clflushopt"
29217 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
29218 UNSPECV_CLFLUSHOPT)]
29219 "TARGET_CLFLUSHOPT"
29220 "clflushopt\t%a0"
29221 [(set_attr "type" "sse")
29222 (set_attr "atom_sse_attr" "fence")
29223 (set_attr "memory" "unknown")])
29224
29225 ;; MONITORX and MWAITX
29226 (define_insn "mwaitx"
29227 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
29228 (match_operand:SI 1 "register_operand" "a")
29229 (match_operand:SI 2 "register_operand" "b")]
29230 UNSPECV_MWAITX)]
29231 "TARGET_MWAITX"
29232 ;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
29233 ;; Since 32bit register operands are implicitly zero extended to 64bit,
29234 ;; we only need to set up 32bit registers.
29235 "mwaitx"
29236 [(set_attr "length" "3")])
29237
29238 (define_insn "@monitorx_<mode>"
29239 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
29240 (match_operand:SI 1 "register_operand" "c")
29241 (match_operand:SI 2 "register_operand" "d")]
29242 UNSPECV_MONITORX)]
29243 "TARGET_MWAITX"
29244 ;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
29245 ;; RCX and RDX are used. Since 32bit register operands are implicitly
29246 ;; zero extended to 64bit, we only need to set up 32bit registers.
29247 "%^monitorx"
29248 [(set (attr "length")
29249 (symbol_ref ("(Pmode != word_mode) + 3")))])
29250
29251 ;; CLZERO
29252 (define_insn "@clzero_<mode>"
29253 [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
29254 UNSPECV_CLZERO)]
29255 "TARGET_CLZERO"
29256 "clzero"
29257 [(set_attr "length" "3")
29258 (set_attr "memory" "unknown")])
29259
29260 ;; RDPKRU and WRPKRU
29261
29262 (define_expand "rdpkru"
29263 [(parallel
29264 [(set (match_operand:SI 0 "register_operand")
29265 (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
29266 (set (match_dup 2) (const_int 0))])]
29267 "TARGET_PKU"
29268 {
29269 operands[1] = force_reg (SImode, const0_rtx);
29270 operands[2] = gen_reg_rtx (SImode);
29271 })
29272
29273 (define_insn "*rdpkru"
29274 [(set (match_operand:SI 0 "register_operand" "=a")
29275 (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
29276 UNSPECV_PKU))
29277 (set (match_operand:SI 1 "register_operand" "=d")
29278 (const_int 0))]
29279 "TARGET_PKU"
29280 "rdpkru"
29281 [(set_attr "type" "other")])
29282
29283 (define_expand "wrpkru"
29284 [(unspec_volatile:SI
29285 [(match_operand:SI 0 "register_operand")
29286 (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
29287 "TARGET_PKU"
29288 {
29289 operands[1] = force_reg (SImode, const0_rtx);
29290 operands[2] = force_reg (SImode, const0_rtx);
29291 })
29292
29293 (define_insn "*wrpkru"
29294 [(unspec_volatile:SI
29295 [(match_operand:SI 0 "register_operand" "a")
29296 (match_operand:SI 1 "register_operand" "d")
29297 (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
29298 "TARGET_PKU"
29299 "wrpkru"
29300 [(set_attr "type" "other")])
29301
29302 (define_insn "rdpid"
29303 [(set (match_operand:SI 0 "register_operand" "=r")
29304 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
29305 "!TARGET_64BIT && TARGET_RDPID"
29306 "rdpid\t%0"
29307 [(set_attr "type" "other")])
29308
29309 (define_insn "rdpid_rex64"
29310 [(set (match_operand:DI 0 "register_operand" "=r")
29311 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
29312 "TARGET_64BIT && TARGET_RDPID"
29313 "rdpid\t%0"
29314 [(set_attr "type" "other")])
29315
29316 ;; Intirinsics for > i486
29317
29318 (define_insn "wbinvd"
29319 [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
29320 ""
29321 "wbinvd"
29322 [(set_attr "type" "other")])
29323
29324 (define_insn "wbnoinvd"
29325 [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
29326 "TARGET_WBNOINVD"
29327 "wbnoinvd"
29328 [(set_attr "type" "other")])
29329
29330 ;; MOVDIRI and MOVDIR64B
29331
29332 (define_insn "movdiri<mode>"
29333 [(set (match_operand:SWI48 0 "memory_operand" "=m")
29334 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
29335 UNSPEC_MOVDIRI))]
29336 "TARGET_MOVDIRI"
29337 "movdiri\t{%1, %0|%0, %1}"
29338 [(set_attr "type" "other")])
29339
29340 (define_insn "@movdir64b_<mode>"
29341 [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
29342 (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
29343 UNSPEC_MOVDIR64B))]
29344 "TARGET_MOVDIR64B"
29345 "movdir64b\t{%1, %0|%0, %1}"
29346 [(set_attr "type" "other")])
29347
29348 ;; TSXLDTRK
29349 (define_int_iterator TSXLDTRK [UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK])
29350 (define_int_attr tsxldtrk [(UNSPECV_XSUSLDTRK "xsusldtrk")
29351 (UNSPECV_XRESLDTRK "xresldtrk")])
29352 (define_insn "<tsxldtrk>"
29353 [(unspec_volatile [(const_int 0)] TSXLDTRK)]
29354 "TARGET_TSXLDTRK"
29355 "<tsxldtrk>"
29356 [(set_attr "type" "other")
29357 (set_attr "length" "4")])
29358
29359 ;; ENQCMD and ENQCMDS
29360
29361 (define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
29362 (define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])
29363
29364 (define_insn "@enqcmd<enqcmd_sfx>_<mode>"
29365 [(set (reg:CCZ FLAGS_REG)
29366 (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
29367 (match_operand:XI 1 "memory_operand" "m")]
29368 ENQCMD))]
29369 "TARGET_ENQCMD"
29370 "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
29371 [(set_attr "type" "other")])
29372
29373 ;; UINTR
29374 (define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
29375 (define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
29376
29377 (define_insn "<uintr>"
29378 [(unspec_volatile [(const_int 0)] UINTR)]
29379 "TARGET_UINTR && TARGET_64BIT"
29380 "<uintr>"
29381 [(set_attr "type" "other")
29382 (set_attr "length" "4")])
29383
29384 (define_insn "testui"
29385 [(set (reg:CCC FLAGS_REG)
29386 (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
29387 "TARGET_UINTR && TARGET_64BIT"
29388 "testui"
29389 [(set_attr "type" "other")
29390 (set_attr "length" "4")])
29391
29392 (define_insn "senduipi"
29393 [(unspec_volatile
29394 [(match_operand:DI 0 "register_operand" "r")]
29395 UNSPECV_SENDUIPI)]
29396 "TARGET_UINTR && TARGET_64BIT"
29397 "senduipi\t%0"
29398 [(set_attr "type" "other")
29399 (set_attr "length" "4")])
29400
29401 ;; WAITPKG
29402
29403 (define_insn "umwait"
29404 [(set (reg:CCC FLAGS_REG)
29405 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
29406 (match_operand:DI 1 "register_operand" "A")]
29407 UNSPECV_UMWAIT))]
29408 "!TARGET_64BIT && TARGET_WAITPKG"
29409 "umwait\t%0"
29410 [(set_attr "length" "3")])
29411
29412 (define_insn "umwait_rex64"
29413 [(set (reg:CCC FLAGS_REG)
29414 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
29415 (match_operand:SI 1 "register_operand" "a")
29416 (match_operand:SI 2 "register_operand" "d")]
29417 UNSPECV_UMWAIT))]
29418 "TARGET_64BIT && TARGET_WAITPKG"
29419 "umwait\t%0"
29420 [(set_attr "length" "3")])
29421
29422 (define_insn "@umonitor_<mode>"
29423 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
29424 UNSPECV_UMONITOR)]
29425 "TARGET_WAITPKG"
29426 "umonitor\t%0"
29427 [(set (attr "length")
29428 (symbol_ref ("(Pmode != word_mode) + 3")))])
29429
29430 (define_insn "tpause"
29431 [(set (reg:CCC FLAGS_REG)
29432 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
29433 (match_operand:DI 1 "register_operand" "A")]
29434 UNSPECV_TPAUSE))]
29435 "!TARGET_64BIT && TARGET_WAITPKG"
29436 "tpause\t%0"
29437 [(set_attr "length" "3")])
29438
29439 (define_insn "tpause_rex64"
29440 [(set (reg:CCC FLAGS_REG)
29441 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
29442 (match_operand:SI 1 "register_operand" "a")
29443 (match_operand:SI 2 "register_operand" "d")]
29444 UNSPECV_TPAUSE))]
29445 "TARGET_64BIT && TARGET_WAITPKG"
29446 "tpause\t%0"
29447 [(set_attr "length" "3")])
29448
29449 (define_insn "cldemote"
29450 [(unspec_volatile[(match_operand 0 "address_operand" "p")]
29451 UNSPECV_CLDEMOTE)]
29452 "TARGET_CLDEMOTE"
29453 "cldemote\t%a0"
29454 [(set_attr "type" "other")
29455 (set_attr "memory" "unknown")])
29456
29457 (define_insn "speculation_barrier"
29458 [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
29459 ""
29460 "lfence"
29461 [(set_attr "type" "other")
29462 (set_attr "length" "3")])
29463
29464 (define_insn "serialize"
29465 [(unspec_volatile [(const_int 0)] UNSPECV_SERIALIZE)]
29466 "TARGET_SERIALIZE"
29467 "serialize"
29468 [(set_attr "type" "other")
29469 (set_attr "length" "3")])
29470
29471 (define_insn "patchable_area"
29472 [(unspec_volatile [(match_operand 0 "const_int_operand")
29473 (match_operand 1 "const_int_operand")]
29474 UNSPECV_PATCHABLE_AREA)]
29475 ""
29476 {
29477 ix86_output_patchable_area (INTVAL (operands[0]),
29478 INTVAL (operands[1]) != 0);
29479 return "";
29480 }
29481 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
29482 (set_attr "length_immediate" "0")
29483 (set_attr "modrm" "0")])
29484
29485 (define_insn "hreset"
29486 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
29487 UNSPECV_HRESET)]
29488 "TARGET_HRESET"
29489 "hreset\t{$0|0}"
29490 [(set_attr "type" "other")
29491 (set_attr "length" "4")])
29492
29493 ;; Spaceship optimization
29494 (define_expand "spaceship<mode>3"
29495 [(match_operand:SI 0 "register_operand")
29496 (match_operand:MODEF 1 "cmp_fp_expander_operand")
29497 (match_operand:MODEF 2 "cmp_fp_expander_operand")]
29498 "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
29499 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
29500 {
29501 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
29502 DONE;
29503 })
29504
29505 (define_expand "spaceshipxf3"
29506 [(match_operand:SI 0 "register_operand")
29507 (match_operand:XF 1 "nonmemory_operand")
29508 (match_operand:XF 2 "nonmemory_operand")]
29509 "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
29510 {
29511 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
29512 DONE;
29513 })
29514
29515 ;; Defined because the generic expand_builtin_issignaling for XFmode
29516 ;; only tests for sNaNs, but i387 treats also pseudo numbers as always
29517 ;; signaling.
29518 (define_expand "issignalingxf2"
29519 [(match_operand:SI 0 "register_operand")
29520 (match_operand:XF 1 "general_operand")]
29521 ""
29522 {
29523 rtx temp = operands[1];
29524 if (!MEM_P (temp))
29525 {
29526 rtx mem = assign_stack_temp (XFmode, GET_MODE_SIZE (XFmode));
29527 emit_move_insn (mem, temp);
29528 temp = mem;
29529 }
29530 rtx ex = adjust_address (temp, HImode, 8);
29531 rtx hi = adjust_address (temp, SImode, 4);
29532 rtx lo = adjust_address (temp, SImode, 0);
29533 rtx val = GEN_INT (HOST_WIDE_INT_M1U << 30);
29534 rtx mask = GEN_INT (0x7fff);
29535 rtx bit = GEN_INT (HOST_WIDE_INT_1U << 30);
29536 /* Expand to:
29537 ((ex & mask) && (int) hi >= 0)
29538 || ((ex & mask) == mask && ((hi ^ bit) | ((lo | -lo) >> 31)) > val). */
29539 rtx nlo = expand_unop (SImode, neg_optab, lo, NULL_RTX, 0);
29540 lo = expand_binop (SImode, ior_optab, lo, nlo,
29541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29542 lo = expand_shift (RSHIFT_EXPR, SImode, lo, 31, NULL_RTX, 1);
29543 temp = expand_binop (SImode, xor_optab, hi, bit,
29544 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29545 temp = expand_binop (SImode, ior_optab, temp, lo,
29546 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29547 temp = emit_store_flag_force (gen_reg_rtx (SImode), GTU, temp, val,
29548 SImode, 1, 1);
29549 ex = expand_binop (HImode, and_optab, ex, mask,
29550 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29551 rtx temp2 = emit_store_flag_force (gen_reg_rtx (SImode), NE,
29552 ex, const0_rtx, SImode, 1, 1);
29553 ex = emit_store_flag_force (gen_reg_rtx (SImode), EQ,
29554 ex, mask, HImode, 1, 1);
29555 temp = expand_binop (SImode, and_optab, temp, ex,
29556 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29557 rtx temp3 = emit_store_flag_force (gen_reg_rtx (SImode), GE,
29558 hi, const0_rtx, SImode, 0, 1);
29559 temp2 = expand_binop (SImode, and_optab, temp2, temp3,
29560 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29561 temp = expand_binop (SImode, ior_optab, temp, temp2,
29562 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29563 emit_move_insn (operands[0], temp);
29564 DONE;
29565 })
29566
29567 (define_insn "urdmsr"
29568 [(set (match_operand:DI 0 "register_operand" "=r")
29569 (unspec_volatile:DI
29570 [(match_operand:DI 1 "x86_64_szext_nonmemory_operand" "reZ")]
29571 UNSPECV_URDMSR))]
29572 "TARGET_USER_MSR && TARGET_64BIT"
29573 "urdmsr\t{%1, %0|%0, %1}"
29574 [(set_attr "prefix" "vex")
29575 (set_attr "type" "other")])
29576
29577 (define_insn "uwrmsr"
29578 [(unspec_volatile
29579 [(match_operand:DI 0 "x86_64_szext_nonmemory_operand" "reZ")
29580 (match_operand:DI 1 "register_operand" "r")]
29581 UNSPECV_UWRMSR)]
29582 "TARGET_USER_MSR && TARGET_64BIT"
29583 "uwrmsr\t{%1, %0|%0, %1}"
29584 [(set_attr "prefix" "vex")
29585 (set_attr "type" "other")])
29586
29587 (define_insn "ldtilecfg"
29588 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
29589 UNSPECV_LDTILECFG)]
29590 "TARGET_AMX_TILE"
29591 "ldtilecfg\t%0"
29592 [(set_attr "type" "other")
29593 (set_attr "prefix" "maybe_evex")
29594 (set_attr "memory" "load")])
29595
29596 (define_insn "sttilecfg"
29597 [(set (match_operand:BLK 0 "memory_operand" "=m")
29598 (unspec_volatile:BLK [(const_int 0)] UNSPECV_STTILECFG))]
29599 "TARGET_AMX_TILE"
29600 "sttilecfg\t%0"
29601 [(set_attr "type" "other")
29602 (set_attr "prefix" "maybe_evex")
29603 (set_attr "memory" "store")])
29604
29605 (include "mmx.md")
29606 (include "sse.md")
29607 (include "sync.md")
This page took 1.219544 seconds and 6 git commands to generate.