1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_fisttp
= m_NOCONA
;
529 const int x86_3dnow_a
= m_ATHLON_K8
;
530 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints
= 0;
536 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
537 const int x86_partial_reg_stall
= m_PPRO
;
538 const int x86_use_loop
= m_K6
;
539 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6
;
540 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
541 const int x86_use_mov0
= m_K6
;
542 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
543 const int x86_read_modify_write
= ~m_PENT
;
544 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
545 const int x86_split_long_moves
= m_PPRO
;
546 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
547 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
548 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
549 const int x86_qimode_math
= ~(0);
550 const int x86_promote_qi_regs
= 0;
551 const int x86_himode_math
= ~(m_PPRO
);
552 const int x86_promote_hi_regs
= m_PPRO
;
553 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
554 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
555 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
556 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
557 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
558 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
559 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
561 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
562 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
563 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
564 const int x86_shift1
= ~m_486
;
565 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
566 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
571 const int x86_sse_split_regs
= m_ATHLON_K8
;
572 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
573 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
574 const int x86_use_ffreep
= m_ATHLON_K8
;
575 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
585 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6
| m_PENT
;
586 const int x86_use_bt
= m_ATHLON_K8
;
588 /* In case the average insn count for single function invocation is
589 lower than this constant, emit fast (but longer) prologue and
591 #define FAST_PROLOGUE_INSN_COUNT 20
593 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
594 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
595 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
596 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
598 /* Array of the smallest class containing reg number REGNO, indexed by
599 REGNO. Used by REGNO_REG_CLASS in i386.h. */
601 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
604 AREG
, DREG
, CREG
, BREG
,
606 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
608 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
609 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
612 /* flags, fpsr, dirflag, frame */
613 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
614 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
616 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
618 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
619 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
620 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
624 /* The "default" register map used in 32bit mode. */
626 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
628 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
629 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
630 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
631 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
632 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
633 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
634 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
637 static int const x86_64_int_parameter_registers
[6] =
639 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
640 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
643 static int const x86_64_int_return_registers
[4] =
645 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
648 /* The "default" register map used in 64bit mode. */
649 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
651 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
652 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
653 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
654 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
655 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
656 8,9,10,11,12,13,14,15, /* extended integer registers */
657 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
660 /* Define the register numbers to be used in Dwarf debugging information.
661 The SVR4 reference port C compiler uses the following register numbers
662 in its Dwarf output code:
663 0 for %eax (gcc regno = 0)
664 1 for %ecx (gcc regno = 2)
665 2 for %edx (gcc regno = 1)
666 3 for %ebx (gcc regno = 3)
667 4 for %esp (gcc regno = 7)
668 5 for %ebp (gcc regno = 6)
669 6 for %esi (gcc regno = 4)
670 7 for %edi (gcc regno = 5)
671 The following three DWARF register numbers are never generated by
672 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
673 believes these numbers have these meanings.
674 8 for %eip (no gcc equivalent)
675 9 for %eflags (gcc regno = 17)
676 10 for %trapno (no gcc equivalent)
677 It is not at all clear how we should number the FP stack registers
678 for the x86 architecture. If the version of SDB on x86/svr4 were
679 a bit less brain dead with respect to floating-point then we would
680 have a precedent to follow with respect to DWARF register numbers
681 for x86 FP registers, but the SDB on x86/svr4 is so completely
682 broken with respect to FP registers that it is hardly worth thinking
683 of it as something to strive for compatibility with.
684 The version of x86/svr4 SDB I have at the moment does (partially)
685 seem to believe that DWARF register number 11 is associated with
686 the x86 register %st(0), but that's about all. Higher DWARF
687 register numbers don't seem to be associated with anything in
688 particular, and even for DWARF regno 11, SDB only seems to under-
689 stand that it should say that a variable lives in %st(0) (when
690 asked via an `=' command) if we said it was in DWARF regno 11,
691 but SDB still prints garbage when asked for the value of the
692 variable in question (via a `/' command).
693 (Also note that the labels SDB prints for various FP stack regs
694 when doing an `x' command are all wrong.)
695 Note that these problems generally don't affect the native SVR4
696 C compiler because it doesn't allow the use of -O with -g and
697 because when it is *not* optimizing, it allocates a memory
698 location for each floating-point variable, and the memory
699 location is what gets described in the DWARF AT_location
700 attribute for the variable in question.
701 Regardless of the severe mental illness of the x86/svr4 SDB, we
702 do something sensible here and we use the following DWARF
703 register numbers. Note that these are all stack-top-relative
705 11 for %st(0) (gcc regno = 8)
706 12 for %st(1) (gcc regno = 9)
707 13 for %st(2) (gcc regno = 10)
708 14 for %st(3) (gcc regno = 11)
709 15 for %st(4) (gcc regno = 12)
710 16 for %st(5) (gcc regno = 13)
711 17 for %st(6) (gcc regno = 14)
712 18 for %st(7) (gcc regno = 15)
714 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
716 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
717 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
718 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
719 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
720 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
722 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
725 /* Test and compare insns in i386.md store the information needed to
726 generate branch and scc insns here. */
728 rtx ix86_compare_op0
= NULL_RTX
;
729 rtx ix86_compare_op1
= NULL_RTX
;
731 #define MAX_386_STACK_LOCALS 3
732 /* Size of the register save area. */
733 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
735 /* Define the structure for the machine field in struct function. */
737 struct stack_local_entry
GTY(())
742 struct stack_local_entry
*next
;
745 /* Structure describing stack frame layout.
746 Stack grows downward:
752 saved frame pointer if frame_pointer_needed
753 <- HARD_FRAME_POINTER
759 > to_allocate <- FRAME_POINTER
771 int outgoing_arguments_size
;
774 HOST_WIDE_INT to_allocate
;
775 /* The offsets relative to ARG_POINTER. */
776 HOST_WIDE_INT frame_pointer_offset
;
777 HOST_WIDE_INT hard_frame_pointer_offset
;
778 HOST_WIDE_INT stack_pointer_offset
;
780 /* When save_regs_using_mov is set, emit prologue using
781 move instead of push instructions. */
782 bool save_regs_using_mov
;
785 /* Used to enable/disable debugging features. */
786 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
787 /* Code model option as passed by user. */
788 const char *ix86_cmodel_string
;
790 enum cmodel ix86_cmodel
;
792 const char *ix86_asm_string
;
793 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
795 const char *ix86_tls_dialect_string
;
796 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
798 /* Which unit we are generating floating point math for. */
799 enum fpmath_unit ix86_fpmath
;
801 /* Which cpu are we scheduling for. */
802 enum processor_type ix86_tune
;
803 /* Which instruction set architecture to use. */
804 enum processor_type ix86_arch
;
806 /* Strings to hold which cpu and instruction set architecture to use. */
807 const char *ix86_tune_string
; /* for -mtune=<xxx> */
808 const char *ix86_arch_string
; /* for -march=<xxx> */
809 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
811 /* # of registers to use to pass arguments. */
812 const char *ix86_regparm_string
;
814 /* true if sse prefetch instruction is not NOOP. */
815 int x86_prefetch_sse
;
817 /* ix86_regparm_string as a number */
820 /* Alignment to use for loops and jumps: */
822 /* Power of two alignment for loops. */
823 const char *ix86_align_loops_string
;
825 /* Power of two alignment for non-loop jumps. */
826 const char *ix86_align_jumps_string
;
828 /* Power of two alignment for stack boundary in bytes. */
829 const char *ix86_preferred_stack_boundary_string
;
831 /* Preferred alignment for stack boundary in bits. */
832 unsigned int ix86_preferred_stack_boundary
;
834 /* Values 1-5: see jump.c */
835 int ix86_branch_cost
;
836 const char *ix86_branch_cost_string
;
838 /* Power of two alignment for functions. */
839 const char *ix86_align_funcs_string
;
841 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
842 char internal_label_prefix
[16];
843 int internal_label_prefix_len
;
845 static void output_pic_addr_const (FILE *, rtx
, int);
846 static void put_condition_code (enum rtx_code
, enum machine_mode
,
848 static const char *get_some_local_dynamic_name (void);
849 static int get_some_local_dynamic_name_1 (rtx
*, void *);
850 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
851 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
853 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
854 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
856 static rtx
get_thread_pointer (int);
857 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
858 static void get_pc_thunk_name (char [32], unsigned int);
859 static rtx
gen_push (rtx
);
860 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
861 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
862 static struct machine_function
* ix86_init_machine_status (void);
863 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
864 static int ix86_nsaved_regs (void);
865 static void ix86_emit_save_regs (void);
866 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
867 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
868 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
869 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
870 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
871 static rtx
ix86_expand_aligntest (rtx
, int);
872 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
873 static int ix86_issue_rate (void);
874 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
875 static int ia32_multipass_dfa_lookahead (void);
876 static void ix86_init_mmx_sse_builtins (void);
877 static rtx
x86_this_parameter (tree
);
878 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
879 HOST_WIDE_INT
, tree
);
880 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
881 static void x86_file_start (void);
882 static void ix86_reorg (void);
883 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
884 static tree
ix86_build_builtin_va_list (void);
885 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
887 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
888 static bool ix86_vector_mode_supported_p (enum machine_mode
);
890 static int ix86_address_cost (rtx
);
891 static bool ix86_cannot_force_const_mem (rtx
);
892 static rtx
ix86_delegitimize_address (rtx
);
894 struct builtin_description
;
895 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
897 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
899 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
900 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
901 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
902 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
903 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
904 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
905 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
906 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
907 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
908 static int ix86_fp_comparison_cost (enum rtx_code code
);
909 static unsigned int ix86_select_alt_pic_regnum (void);
910 static int ix86_save_reg (unsigned int, int);
911 static void ix86_compute_frame_layout (struct ix86_frame
*);
912 static int ix86_comp_type_attributes (tree
, tree
);
913 static int ix86_function_regparm (tree
, tree
);
914 const struct attribute_spec ix86_attribute_table
[];
915 static bool ix86_function_ok_for_sibcall (tree
, tree
);
916 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
917 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
918 static int ix86_value_regno (enum machine_mode
, tree
);
919 static bool contains_128bit_aligned_vector_p (tree
);
920 static rtx
ix86_struct_value_rtx (tree
, int);
921 static bool ix86_ms_bitfield_layout_p (tree
);
922 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
923 static int extended_reg_mentioned_1 (rtx
*, void *);
924 static bool ix86_rtx_costs (rtx
, int, int, int *);
925 static int min_insn_size (rtx
);
926 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
927 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
928 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
930 static void ix86_init_builtins (void);
931 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
933 /* This function is only used on Solaris. */
934 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
937 /* Register class used for passing given 64bit part of the argument.
938 These represent classes as documented by the PS ABI, with the exception
939 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
940 use SF or DFmode move instead of DImode to avoid reformatting penalties.
942 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
943 whenever possible (upper half does contain padding).
945 enum x86_64_reg_class
948 X86_64_INTEGER_CLASS
,
949 X86_64_INTEGERSI_CLASS
,
956 X86_64_COMPLEX_X87_CLASS
,
959 static const char * const x86_64_reg_class_name
[] = {
960 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
961 "sseup", "x87", "x87up", "cplx87", "no"
964 #define MAX_CLASSES 4
966 /* Table of constants used by fldpi, fldln2, etc.... */
967 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
968 static bool ext_80387_constants_init
= 0;
969 static void init_ext_80387_constants (void);
971 /* Initialize the GCC target structure. */
972 #undef TARGET_ATTRIBUTE_TABLE
973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
974 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
975 # undef TARGET_MERGE_DECL_ATTRIBUTES
976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
979 #undef TARGET_COMP_TYPE_ATTRIBUTES
980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
982 #undef TARGET_INIT_BUILTINS
983 #define TARGET_INIT_BUILTINS ix86_init_builtins
984 #undef TARGET_EXPAND_BUILTIN
985 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
987 #undef TARGET_ASM_FUNCTION_EPILOGUE
988 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
990 #undef TARGET_ASM_OPEN_PAREN
991 #define TARGET_ASM_OPEN_PAREN ""
992 #undef TARGET_ASM_CLOSE_PAREN
993 #define TARGET_ASM_CLOSE_PAREN ""
995 #undef TARGET_ASM_ALIGNED_HI_OP
996 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997 #undef TARGET_ASM_ALIGNED_SI_OP
998 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1000 #undef TARGET_ASM_ALIGNED_DI_OP
1001 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1004 #undef TARGET_ASM_UNALIGNED_HI_OP
1005 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006 #undef TARGET_ASM_UNALIGNED_SI_OP
1007 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008 #undef TARGET_ASM_UNALIGNED_DI_OP
1009 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1011 #undef TARGET_SCHED_ADJUST_COST
1012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013 #undef TARGET_SCHED_ISSUE_RATE
1014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
1019 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1020 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1023 #undef TARGET_HAVE_TLS
1024 #define TARGET_HAVE_TLS true
1026 #undef TARGET_CANNOT_FORCE_CONST_MEM
1027 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1029 #undef TARGET_DELEGITIMIZE_ADDRESS
1030 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1032 #undef TARGET_MS_BITFIELD_LAYOUT_P
1033 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1035 #undef TARGET_ASM_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1037 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1038 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1040 #undef TARGET_ASM_FILE_START
1041 #define TARGET_ASM_FILE_START x86_file_start
1043 #undef TARGET_RTX_COSTS
1044 #define TARGET_RTX_COSTS ix86_rtx_costs
1045 #undef TARGET_ADDRESS_COST
1046 #define TARGET_ADDRESS_COST ix86_address_cost
1048 #undef TARGET_FIXED_CONDITION_CODE_REGS
1049 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1050 #undef TARGET_CC_MODES_COMPATIBLE
1051 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1053 #undef TARGET_MACHINE_DEPENDENT_REORG
1054 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1056 #undef TARGET_BUILD_BUILTIN_VA_LIST
1057 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1059 #undef TARGET_MD_ASM_CLOBBERS
1060 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1062 #undef TARGET_PROMOTE_PROTOTYPES
1063 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1064 #undef TARGET_STRUCT_VALUE_RTX
1065 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1066 #undef TARGET_SETUP_INCOMING_VARARGS
1067 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1068 #undef TARGET_MUST_PASS_IN_STACK
1069 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1070 #undef TARGET_PASS_BY_REFERENCE
1071 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1073 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1074 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1076 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1077 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1079 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1080 #undef TARGET_INSERT_ATTRIBUTES
1081 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1084 struct gcc_target targetm
= TARGET_INITIALIZER
;
1087 /* The svr4 ABI for the i386 says that records and unions are returned
1089 #ifndef DEFAULT_PCC_STRUCT_RETURN
1090 #define DEFAULT_PCC_STRUCT_RETURN 1
1093 /* Sometimes certain combinations of command options do not make
1094 sense on a particular target machine. You can define a macro
1095 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1096 defined, is executed once just after all the command options have
1099 Don't use this macro to turn on various extra optimizations for
1100 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1103 override_options (void)
1106 int ix86_tune_defaulted
= 0;
1108 /* Comes from final.c -- no real reason to change it. */
1109 #define MAX_CODE_ALIGN 16
1113 const struct processor_costs
*cost
; /* Processor costs */
1114 const int target_enable
; /* Target flags to enable. */
1115 const int target_disable
; /* Target flags to disable. */
1116 const int align_loop
; /* Default alignments. */
1117 const int align_loop_max_skip
;
1118 const int align_jump
;
1119 const int align_jump_max_skip
;
1120 const int align_func
;
1122 const processor_target_table
[PROCESSOR_max
] =
1124 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1125 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1126 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1127 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1128 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1129 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1131 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1132 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1135 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1138 const char *const name
; /* processor name or nickname. */
1139 const enum processor_type processor
;
1140 const enum pta_flags
1146 PTA_PREFETCH_SSE
= 16,
1152 const processor_alias_table
[] =
1154 {"i386", PROCESSOR_I386
, 0},
1155 {"i486", PROCESSOR_I486
, 0},
1156 {"i586", PROCESSOR_PENTIUM
, 0},
1157 {"pentium", PROCESSOR_PENTIUM
, 0},
1158 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1159 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1160 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1161 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1162 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1163 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1164 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1165 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1166 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1167 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1168 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1169 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1170 | PTA_MMX
| PTA_PREFETCH_SSE
},
1171 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1172 | PTA_MMX
| PTA_PREFETCH_SSE
},
1173 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1174 | PTA_MMX
| PTA_PREFETCH_SSE
},
1175 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1176 | PTA_MMX
| PTA_PREFETCH_SSE
},
1177 {"k6", PROCESSOR_K6
, PTA_MMX
},
1178 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1179 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1180 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1182 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1183 | PTA_3DNOW
| PTA_3DNOW_A
},
1184 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1185 | PTA_3DNOW_A
| PTA_SSE
},
1186 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1187 | PTA_3DNOW_A
| PTA_SSE
},
1188 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1189 | PTA_3DNOW_A
| PTA_SSE
},
1190 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1191 | PTA_SSE
| PTA_SSE2
},
1192 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1193 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1194 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1195 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1196 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1197 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1198 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1199 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1202 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1204 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1205 SUBTARGET_OVERRIDE_OPTIONS
;
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1212 if (flag_omit_frame_pointer
== 2)
1213 flag_omit_frame_pointer
= 1;
1214 if (flag_asynchronous_unwind_tables
== 2)
1215 flag_asynchronous_unwind_tables
= 1;
1216 if (flag_pcc_struct_return
== 2)
1217 flag_pcc_struct_return
= 0;
1221 if (flag_omit_frame_pointer
== 2)
1222 flag_omit_frame_pointer
= 0;
1223 if (flag_asynchronous_unwind_tables
== 2)
1224 flag_asynchronous_unwind_tables
= 0;
1225 if (flag_pcc_struct_return
== 2)
1226 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1229 if (!ix86_tune_string
&& ix86_arch_string
)
1230 ix86_tune_string
= ix86_arch_string
;
1231 if (!ix86_tune_string
)
1233 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1234 ix86_tune_defaulted
= 1;
1236 if (!ix86_arch_string
)
1237 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1239 if (ix86_cmodel_string
!= 0)
1241 if (!strcmp (ix86_cmodel_string
, "small"))
1242 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1244 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1245 else if (!strcmp (ix86_cmodel_string
, "32"))
1246 ix86_cmodel
= CM_32
;
1247 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1248 ix86_cmodel
= CM_KERNEL
;
1249 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1250 ix86_cmodel
= CM_MEDIUM
;
1251 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1252 ix86_cmodel
= CM_LARGE
;
1254 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1258 ix86_cmodel
= CM_32
;
1260 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1262 if (ix86_asm_string
!= 0)
1264 if (!strcmp (ix86_asm_string
, "intel"))
1265 ix86_asm_dialect
= ASM_INTEL
;
1266 else if (!strcmp (ix86_asm_string
, "att"))
1267 ix86_asm_dialect
= ASM_ATT
;
1269 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1271 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1272 error ("code model %qs not supported in the %s bit mode",
1273 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1274 if (ix86_cmodel
== CM_LARGE
)
1275 sorry ("code model %<large%> not supported yet");
1276 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1277 sorry ("%i-bit mode not compiled in",
1278 (target_flags
& MASK_64BIT
) ? 64 : 32);
1280 for (i
= 0; i
< pta_size
; i
++)
1281 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1283 ix86_arch
= processor_alias_table
[i
].processor
;
1284 /* Default cpu tuning to the architecture. */
1285 ix86_tune
= ix86_arch
;
1286 if (processor_alias_table
[i
].flags
& PTA_MMX
1287 && !(target_flags_explicit
& MASK_MMX
))
1288 target_flags
|= MASK_MMX
;
1289 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1290 && !(target_flags_explicit
& MASK_3DNOW
))
1291 target_flags
|= MASK_3DNOW
;
1292 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1293 && !(target_flags_explicit
& MASK_3DNOW_A
))
1294 target_flags
|= MASK_3DNOW_A
;
1295 if (processor_alias_table
[i
].flags
& PTA_SSE
1296 && !(target_flags_explicit
& MASK_SSE
))
1297 target_flags
|= MASK_SSE
;
1298 if (processor_alias_table
[i
].flags
& PTA_SSE2
1299 && !(target_flags_explicit
& MASK_SSE2
))
1300 target_flags
|= MASK_SSE2
;
1301 if (processor_alias_table
[i
].flags
& PTA_SSE3
1302 && !(target_flags_explicit
& MASK_SSE3
))
1303 target_flags
|= MASK_SSE3
;
1304 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1305 x86_prefetch_sse
= true;
1306 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1307 error ("CPU you selected does not support x86-64 "
1313 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1315 for (i
= 0; i
< pta_size
; i
++)
1316 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1318 ix86_tune
= processor_alias_table
[i
].processor
;
1319 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1321 if (ix86_tune_defaulted
)
1323 ix86_tune_string
= "x86-64";
1324 for (i
= 0; i
< pta_size
; i
++)
1325 if (! strcmp (ix86_tune_string
,
1326 processor_alias_table
[i
].name
))
1328 ix86_tune
= processor_alias_table
[i
].processor
;
1331 error ("CPU you selected does not support x86-64 "
1334 /* Intel CPUs have always interpreted SSE prefetch instructions as
1335 NOPs; so, we can enable SSE prefetch instructions even when
1336 -mtune (rather than -march) points us to a processor that has them.
1337 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1338 higher processors. */
1339 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1340 x86_prefetch_sse
= true;
1344 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1347 ix86_cost
= &size_cost
;
1349 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1350 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1351 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1353 /* Arrange to set up i386_stack_locals for all functions. */
1354 init_machine_status
= ix86_init_machine_status
;
1356 /* Validate -mregparm= value. */
1357 if (ix86_regparm_string
)
1359 i
= atoi (ix86_regparm_string
);
1360 if (i
< 0 || i
> REGPARM_MAX
)
1361 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1367 ix86_regparm
= REGPARM_MAX
;
1369 /* If the user has provided any of the -malign-* options,
1370 warn and use that value only if -falign-* is not set.
1371 Remove this code in GCC 3.2 or later. */
1372 if (ix86_align_loops_string
)
1374 warning ("-malign-loops is obsolete, use -falign-loops");
1375 if (align_loops
== 0)
1377 i
= atoi (ix86_align_loops_string
);
1378 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1379 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1381 align_loops
= 1 << i
;
1385 if (ix86_align_jumps_string
)
1387 warning ("-malign-jumps is obsolete, use -falign-jumps");
1388 if (align_jumps
== 0)
1390 i
= atoi (ix86_align_jumps_string
);
1391 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1392 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1394 align_jumps
= 1 << i
;
1398 if (ix86_align_funcs_string
)
1400 warning ("-malign-functions is obsolete, use -falign-functions");
1401 if (align_functions
== 0)
1403 i
= atoi (ix86_align_funcs_string
);
1404 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1405 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1407 align_functions
= 1 << i
;
1411 /* Default align_* from the processor table. */
1412 if (align_loops
== 0)
1414 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1415 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1417 if (align_jumps
== 0)
1419 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1420 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1422 if (align_functions
== 0)
1424 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1427 /* Validate -mpreferred-stack-boundary= value, or provide default.
1428 The default of 128 bits is for Pentium III's SSE __m128, but we
1429 don't want additional code to keep the stack aligned when
1430 optimizing for code size. */
1431 ix86_preferred_stack_boundary
= (optimize_size
1432 ? TARGET_64BIT
? 128 : 32
1434 if (ix86_preferred_stack_boundary_string
)
1436 i
= atoi (ix86_preferred_stack_boundary_string
);
1437 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1438 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1439 TARGET_64BIT
? 4 : 2);
1441 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1444 /* Validate -mbranch-cost= value, or provide default. */
1445 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1446 if (ix86_branch_cost_string
)
1448 i
= atoi (ix86_branch_cost_string
);
1450 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1452 ix86_branch_cost
= i
;
1455 if (ix86_tls_dialect_string
)
1457 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1458 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1459 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1460 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1462 error ("bad value (%s) for -mtls-dialect= switch",
1463 ix86_tls_dialect_string
);
1466 /* Keep nonleaf frame pointers. */
1467 if (flag_omit_frame_pointer
)
1468 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1469 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1470 flag_omit_frame_pointer
= 1;
1472 /* If we're doing fast math, we don't care about comparison order
1473 wrt NaNs. This lets us use a shorter comparison sequence. */
1474 if (flag_unsafe_math_optimizations
)
1475 target_flags
&= ~MASK_IEEE_FP
;
1477 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1478 since the insns won't need emulation. */
1479 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1480 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1482 /* Likewise, if the target doesn't have a 387, or we've specified
1483 software floating point, don't use 387 inline instrinsics. */
1485 target_flags
|= MASK_NO_FANCY_MATH_387
;
1487 /* Turn on SSE2 builtins for -msse3. */
1489 target_flags
|= MASK_SSE2
;
1491 /* Turn on SSE builtins for -msse2. */
1493 target_flags
|= MASK_SSE
;
1495 /* Turn on MMX builtins for -msse. */
1498 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1499 x86_prefetch_sse
= true;
1502 /* Turn on MMX builtins for 3Dnow. */
1504 target_flags
|= MASK_MMX
;
1508 if (TARGET_ALIGN_DOUBLE
)
1509 error ("-malign-double makes no sense in the 64bit mode");
1511 error ("-mrtd calling convention not supported in the 64bit mode");
1513 /* Enable by default the SSE and MMX builtins. Do allow the user to
1514 explicitly disable any of these. In particular, disabling SSE and
1515 MMX for kernel code is extremely useful. */
1517 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1518 & ~target_flags_explicit
);
1521 ix86_fpmath
= FPMATH_SSE
;
1525 ix86_fpmath
= FPMATH_387
;
1526 /* i386 ABI does not specify red zone. It still makes sense to use it
1527 when programmer takes care to stack from being destroyed. */
1528 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1529 target_flags
|= MASK_NO_RED_ZONE
;
1532 if (ix86_fpmath_string
!= 0)
1534 if (! strcmp (ix86_fpmath_string
, "387"))
1535 ix86_fpmath
= FPMATH_387
;
1536 else if (! strcmp (ix86_fpmath_string
, "sse"))
1540 warning ("SSE instruction set disabled, using 387 arithmetics");
1541 ix86_fpmath
= FPMATH_387
;
1544 ix86_fpmath
= FPMATH_SSE
;
1546 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1547 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1551 warning ("SSE instruction set disabled, using 387 arithmetics");
1552 ix86_fpmath
= FPMATH_387
;
1554 else if (!TARGET_80387
)
1556 warning ("387 instruction set disabled, using SSE arithmetics");
1557 ix86_fpmath
= FPMATH_SSE
;
1560 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1563 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1566 /* If the i387 is disabled, then do not return values in it. */
1568 target_flags
&= ~MASK_FLOAT_RETURNS
;
1570 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1571 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1573 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1575 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1578 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1579 p
= strchr (internal_label_prefix
, 'X');
1580 internal_label_prefix_len
= p
- internal_label_prefix
;
1584 /* When scheduling description is not available, disable scheduler pass
1585 so it won't slow down the compilation and make x87 code slower. */
1586 if (!TARGET_SCHEDULE
)
1587 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
1591 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1593 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1594 make the problem with not enough registers even worse. */
1595 #ifdef INSN_SCHEDULING
1597 flag_schedule_insns
= 0;
1600 /* The default values of these switches depend on the TARGET_64BIT
1601 that is not known at this moment. Mark these values with 2 and
1602 let user the to override these. In case there is no command line option
1603 specifying them, we will set the defaults in override_options. */
1605 flag_omit_frame_pointer
= 2;
1606 flag_pcc_struct_return
= 2;
1607 flag_asynchronous_unwind_tables
= 2;
1608 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1609 SUBTARGET_OPTIMIZATION_OPTIONS
;
1613 /* Table of valid machine attributes. */
1614 const struct attribute_spec ix86_attribute_table
[] =
1616 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1617 /* Stdcall attribute says callee is responsible for popping arguments
1618 if they are not variable. */
1619 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1620 /* Fastcall attribute says callee is responsible for popping arguments
1621 if they are not variable. */
1622 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1623 /* Cdecl attribute says the callee is a normal C declaration */
1624 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1625 /* Regparm attribute specifies how many integer arguments are to be
1626 passed in registers. */
1627 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1628 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1629 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1630 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1631 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1633 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1634 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1635 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1636 SUBTARGET_ATTRIBUTE_TABLE
,
1638 { NULL
, 0, 0, false, false, false, NULL
}
1641 /* Decide whether we can make a sibling call to a function. DECL is the
1642 declaration of the function being targeted by the call and EXP is the
1643 CALL_EXPR representing the call. */
1646 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1650 /* If we are generating position-independent code, we cannot sibcall
1651 optimize any indirect call, or a direct call to a global function,
1652 as the PLT requires %ebx be live. */
1653 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1661 /* If we are returning floats on the 80387 register stack, we cannot
1662 make a sibcall from a function that doesn't return a float to a
1663 function that does or, conversely, from a function that does return
1664 a float to a function that doesn't; the necessary stack adjustment
1665 would not be executed. */
1666 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
), func
))
1667 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
1671 /* If this call is indirect, we'll need to be able to use a call-clobbered
1672 register for the address of the target function. Make sure that all
1673 such registers are not used for passing parameters. */
1674 if (!decl
&& !TARGET_64BIT
)
1678 /* We're looking at the CALL_EXPR, we need the type of the function. */
1679 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1680 type
= TREE_TYPE (type
); /* pointer type */
1681 type
= TREE_TYPE (type
); /* function type */
1683 if (ix86_function_regparm (type
, NULL
) >= 3)
1685 /* ??? Need to count the actual number of registers to be used,
1686 not the possible number of registers. Fix later. */
1691 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1692 /* Dllimport'd functions are also called indirectly. */
1693 if (decl
&& lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl
))
1694 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
1698 /* Otherwise okay. That also includes certain types of indirect calls. */
1702 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1703 arguments as in struct attribute_spec.handler. */
1705 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1706 tree args ATTRIBUTE_UNUSED
,
1707 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1709 if (TREE_CODE (*node
) != FUNCTION_TYPE
1710 && TREE_CODE (*node
) != METHOD_TYPE
1711 && TREE_CODE (*node
) != FIELD_DECL
1712 && TREE_CODE (*node
) != TYPE_DECL
)
1714 warning ("%qs attribute only applies to functions",
1715 IDENTIFIER_POINTER (name
));
1716 *no_add_attrs
= true;
1720 if (is_attribute_p ("fastcall", name
))
1722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1724 error ("fastcall and stdcall attributes are not compatible");
1726 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1728 error ("fastcall and regparm attributes are not compatible");
1731 else if (is_attribute_p ("stdcall", name
))
1733 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1735 error ("fastcall and stdcall attributes are not compatible");
1742 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name
));
1743 *no_add_attrs
= true;
1749 /* Handle a "regparm" attribute;
1750 arguments as in struct attribute_spec.handler. */
1752 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1753 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1755 if (TREE_CODE (*node
) != FUNCTION_TYPE
1756 && TREE_CODE (*node
) != METHOD_TYPE
1757 && TREE_CODE (*node
) != FIELD_DECL
1758 && TREE_CODE (*node
) != TYPE_DECL
)
1760 warning ("%qs attribute only applies to functions",
1761 IDENTIFIER_POINTER (name
));
1762 *no_add_attrs
= true;
1768 cst
= TREE_VALUE (args
);
1769 if (TREE_CODE (cst
) != INTEGER_CST
)
1771 warning ("%qs attribute requires an integer constant argument",
1772 IDENTIFIER_POINTER (name
));
1773 *no_add_attrs
= true;
1775 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1777 warning ("argument to %qs attribute larger than %d",
1778 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1779 *no_add_attrs
= true;
1782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1784 error ("fastcall and regparm attributes are not compatible");
1791 /* Return 0 if the attributes for two types are incompatible, 1 if they
1792 are compatible, and 2 if they are nearly compatible (which causes a
1793 warning to be generated). */
1796 ix86_comp_type_attributes (tree type1
, tree type2
)
1798 /* Check for mismatch of non-default calling convention. */
1799 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1801 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1804 /* Check for mismatched fastcall types */
1805 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1806 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1809 /* Check for mismatched return types (cdecl vs stdcall). */
1810 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1811 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1813 if (ix86_function_regparm (type1
, NULL
)
1814 != ix86_function_regparm (type2
, NULL
))
1819 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1820 DECL may be NULL when calling function indirectly
1821 or considering a libcall. */
1824 ix86_function_regparm (tree type
, tree decl
)
1827 int regparm
= ix86_regparm
;
1828 bool user_convention
= false;
1832 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1835 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1836 user_convention
= true;
1839 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1842 user_convention
= true;
1845 /* Use register calling convention for local functions when possible. */
1846 if (!TARGET_64BIT
&& !user_convention
&& decl
1847 && flag_unit_at_a_time
&& !profile_flag
)
1849 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1852 /* We can't use regparm(3) for nested functions as these use
1853 static chain pointer in third argument. */
1854 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1864 /* Return true if EAX is live at the start of the function. Used by
1865 ix86_expand_prologue to determine if we need special help before
1866 calling allocate_stack_worker. */
1869 ix86_eax_live_at_start_p (void)
1871 /* Cheat. Don't bother working forward from ix86_function_regparm
1872 to the function type to whether an actual argument is located in
1873 eax. Instead just look at cfg info, which is still close enough
1874 to correct at this point. This gives false positives for broken
1875 functions that might use uninitialized data that happens to be
1876 allocated in eax, but who cares? */
1877 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1880 /* Value is the number of bytes of arguments automatically
1881 popped when returning from a subroutine call.
1882 FUNDECL is the declaration node of the function (as a tree),
1883 FUNTYPE is the data type of the function (as a tree),
1884 or for a library call it is an identifier node for the subroutine name.
1885 SIZE is the number of bytes of arguments passed on the stack.
1887 On the 80386, the RTD insn may be used to pop them if the number
1888 of args is fixed, but if the number is variable then the caller
1889 must pop them all. RTD can't be used for library calls now
1890 because the library is compiled with the Unix compiler.
1891 Use of RTD is a selectable option, since it is incompatible with
1892 standard Unix calling sequences. If the option is not selected,
1893 the caller must always pop the args.
1895 The attribute stdcall is equivalent to RTD on a per module basis. */
1898 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1900 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1902 /* Cdecl functions override -mrtd, and never pop the stack. */
1903 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1905 /* Stdcall and fastcall functions will pop the stack if not
1907 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1908 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1912 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1913 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1914 == void_type_node
)))
1918 /* Lose any fake structure return argument if it is passed on the stack. */
1919 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1921 && !KEEP_AGGREGATE_RETURN_POINTER
)
1923 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1926 return GET_MODE_SIZE (Pmode
);
1932 /* Argument support functions. */
1934 /* Return true when register may be used to pass function parameters. */
1936 ix86_function_arg_regno_p (int regno
)
1940 return (regno
< REGPARM_MAX
1941 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1942 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1944 /* RAX is used as hidden argument to va_arg functions. */
1947 for (i
= 0; i
< REGPARM_MAX
; i
++)
1948 if (regno
== x86_64_int_parameter_registers
[i
])
1953 /* Return if we do not know how to pass TYPE solely in registers. */
1956 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
1958 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1961 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1962 The layout_type routine is crafty and tries to trick us into passing
1963 currently unsupported vector types on the stack by using TImode. */
1964 return (!TARGET_64BIT
&& mode
== TImode
1965 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
1968 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1969 for a call to a function whose data type is FNTYPE.
1970 For a library call, FNTYPE is 0. */
1973 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1974 tree fntype
, /* tree ptr for function decl */
1975 rtx libname
, /* SYMBOL_REF of library name or 0 */
1978 static CUMULATIVE_ARGS zero_cum
;
1979 tree param
, next_param
;
1981 if (TARGET_DEBUG_ARG
)
1983 fprintf (stderr
, "\ninit_cumulative_args (");
1985 fprintf (stderr
, "fntype code = %s, ret code = %s",
1986 tree_code_name
[(int) TREE_CODE (fntype
)],
1987 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1989 fprintf (stderr
, "no fntype");
1992 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1997 /* Set up the number of registers to use for passing arguments. */
1999 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2001 cum
->nregs
= ix86_regparm
;
2003 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2005 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2006 cum
->warn_sse
= true;
2007 cum
->warn_mmx
= true;
2008 cum
->maybe_vaarg
= false;
2010 /* Use ecx and edx registers if function has fastcall attribute */
2011 if (fntype
&& !TARGET_64BIT
)
2013 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2020 /* Determine if this function has variable arguments. This is
2021 indicated by the last argument being 'void_type_mode' if there
2022 are no variable arguments. If there are variable arguments, then
2023 we won't pass anything in registers in 32-bit mode. */
2025 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2027 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2028 param
!= 0; param
= next_param
)
2030 next_param
= TREE_CHAIN (param
);
2031 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2042 cum
->maybe_vaarg
= true;
2046 if ((!fntype
&& !libname
)
2047 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2048 cum
->maybe_vaarg
= true;
2050 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2051 in SSE registers even for 32-bit mode and not just 3, but up to
2052 8 SSE arguments in registers. */
2053 if (!TARGET_64BIT
&& !cum
->maybe_vaarg
&& !cum
->fastcall
2054 && cum
->sse_nregs
== SSE_REGPARM_MAX
&& fndecl
2055 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2057 struct cgraph_local_info
*i
= cgraph_local_info (fndecl
);
2061 cum
->float_in_sse
= true;
2065 if (TARGET_DEBUG_ARG
)
2066 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2071 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2072 But in the case of vector types, it is some vector mode.
2074 When we have only some of our vector isa extensions enabled, then there
2075 are some modes for which vector_mode_supported_p is false. For these
2076 modes, the generic vector support in gcc will choose some non-vector mode
2077 in order to implement the type. By computing the natural mode, we'll
2078 select the proper ABI location for the operand and not depend on whatever
2079 the middle-end decides to do with these vector types. */
2081 static enum machine_mode
2082 type_natural_mode (tree type
)
2084 enum machine_mode mode
= TYPE_MODE (type
);
2086 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2088 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2089 if ((size
== 8 || size
== 16)
2090 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2091 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2093 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2095 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2096 mode
= MIN_MODE_VECTOR_FLOAT
;
2098 mode
= MIN_MODE_VECTOR_INT
;
2100 /* Get the mode which has this inner mode and number of units. */
2101 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2102 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2103 && GET_MODE_INNER (mode
) == innermode
)
2113 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2114 this may not agree with the mode that the type system has chosen for the
2115 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2116 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2119 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2124 if (orig_mode
!= BLKmode
)
2125 tmp
= gen_rtx_REG (orig_mode
, regno
);
2128 tmp
= gen_rtx_REG (mode
, regno
);
2129 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2130 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2136 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2137 of this code is to classify each 8bytes of incoming argument by the register
2138 class and assign registers accordingly. */
2140 /* Return the union class of CLASS1 and CLASS2.
2141 See the x86-64 PS ABI for details. */
2143 static enum x86_64_reg_class
2144 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2146 /* Rule #1: If both classes are equal, this is the resulting class. */
2147 if (class1
== class2
)
2150 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2152 if (class1
== X86_64_NO_CLASS
)
2154 if (class2
== X86_64_NO_CLASS
)
2157 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2158 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2159 return X86_64_MEMORY_CLASS
;
2161 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2162 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2163 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2164 return X86_64_INTEGERSI_CLASS
;
2165 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2166 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2167 return X86_64_INTEGER_CLASS
;
2169 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2171 if (class1
== X86_64_X87_CLASS
2172 || class1
== X86_64_X87UP_CLASS
2173 || class1
== X86_64_COMPLEX_X87_CLASS
2174 || class2
== X86_64_X87_CLASS
2175 || class2
== X86_64_X87UP_CLASS
2176 || class2
== X86_64_COMPLEX_X87_CLASS
)
2177 return X86_64_MEMORY_CLASS
;
2179 /* Rule #6: Otherwise class SSE is used. */
2180 return X86_64_SSE_CLASS
;
2183 /* Classify the argument of type TYPE and mode MODE.
2184 CLASSES will be filled by the register class used to pass each word
2185 of the operand. The number of words is returned. In case the parameter
2186 should be passed in memory, 0 is returned. As a special case for zero
2187 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2189 BIT_OFFSET is used internally for handling records and specifies offset
2190 of the offset in bits modulo 256 to avoid overflow cases.
2192 See the x86-64 PS ABI for details.
2196 classify_argument (enum machine_mode mode
, tree type
,
2197 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2199 HOST_WIDE_INT bytes
=
2200 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2201 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2203 /* Variable sized entities are always passed/returned in memory. */
2207 if (mode
!= VOIDmode
2208 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2211 if (type
&& AGGREGATE_TYPE_P (type
))
2215 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2217 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2221 for (i
= 0; i
< words
; i
++)
2222 classes
[i
] = X86_64_NO_CLASS
;
2224 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2225 signalize memory class, so handle it as special case. */
2228 classes
[0] = X86_64_NO_CLASS
;
2232 /* Classify each field of record and merge classes. */
2233 if (TREE_CODE (type
) == RECORD_TYPE
)
2235 /* For classes first merge in the field of the subclasses. */
2236 if (TYPE_BINFO (type
))
2238 tree binfo
, base_binfo
;
2241 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2242 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2245 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2246 tree type
= BINFO_TYPE (base_binfo
);
2248 num
= classify_argument (TYPE_MODE (type
),
2250 (offset
+ bit_offset
) % 256);
2253 for (i
= 0; i
< num
; i
++)
2255 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2257 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2261 /* And now merge the fields of structure. */
2262 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2264 if (TREE_CODE (field
) == FIELD_DECL
)
2268 /* Bitfields are always classified as integer. Handle them
2269 early, since later code would consider them to be
2270 misaligned integers. */
2271 if (DECL_BIT_FIELD (field
))
2273 for (i
= int_bit_position (field
) / 8 / 8;
2274 i
< (int_bit_position (field
)
2275 + tree_low_cst (DECL_SIZE (field
), 0)
2278 merge_classes (X86_64_INTEGER_CLASS
,
2283 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2284 TREE_TYPE (field
), subclasses
,
2285 (int_bit_position (field
)
2286 + bit_offset
) % 256);
2289 for (i
= 0; i
< num
; i
++)
2292 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2294 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2300 /* Arrays are handled as small records. */
2301 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2304 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2305 TREE_TYPE (type
), subclasses
, bit_offset
);
2309 /* The partial classes are now full classes. */
2310 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2311 subclasses
[0] = X86_64_SSE_CLASS
;
2312 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2313 subclasses
[0] = X86_64_INTEGER_CLASS
;
2315 for (i
= 0; i
< words
; i
++)
2316 classes
[i
] = subclasses
[i
% num
];
2318 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2319 else if (TREE_CODE (type
) == UNION_TYPE
2320 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2322 /* For classes first merge in the field of the subclasses. */
2323 if (TYPE_BINFO (type
))
2325 tree binfo
, base_binfo
;
2328 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2329 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2332 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2333 tree type
= BINFO_TYPE (base_binfo
);
2335 num
= classify_argument (TYPE_MODE (type
),
2337 (offset
+ (bit_offset
% 64)) % 256);
2340 for (i
= 0; i
< num
; i
++)
2342 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2344 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2348 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2350 if (TREE_CODE (field
) == FIELD_DECL
)
2353 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2354 TREE_TYPE (field
), subclasses
,
2358 for (i
= 0; i
< num
; i
++)
2359 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2366 /* Final merger cleanup. */
2367 for (i
= 0; i
< words
; i
++)
2369 /* If one class is MEMORY, everything should be passed in
2371 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2374 /* The X86_64_SSEUP_CLASS should be always preceded by
2375 X86_64_SSE_CLASS. */
2376 if (classes
[i
] == X86_64_SSEUP_CLASS
2377 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2378 classes
[i
] = X86_64_SSE_CLASS
;
2380 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2381 if (classes
[i
] == X86_64_X87UP_CLASS
2382 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2383 classes
[i
] = X86_64_SSE_CLASS
;
2388 /* Compute alignment needed. We align all types to natural boundaries with
2389 exception of XFmode that is aligned to 64bits. */
2390 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2392 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2395 mode_alignment
= 128;
2396 else if (mode
== XCmode
)
2397 mode_alignment
= 256;
2398 if (COMPLEX_MODE_P (mode
))
2399 mode_alignment
/= 2;
2400 /* Misaligned fields are always returned in memory. */
2401 if (bit_offset
% mode_alignment
)
2405 /* for V1xx modes, just use the base mode */
2406 if (VECTOR_MODE_P (mode
)
2407 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2408 mode
= GET_MODE_INNER (mode
);
2410 /* Classification of atomic types. */
2420 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2421 classes
[0] = X86_64_INTEGERSI_CLASS
;
2423 classes
[0] = X86_64_INTEGER_CLASS
;
2427 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2432 if (!(bit_offset
% 64))
2433 classes
[0] = X86_64_SSESF_CLASS
;
2435 classes
[0] = X86_64_SSE_CLASS
;
2438 classes
[0] = X86_64_SSEDF_CLASS
;
2441 classes
[0] = X86_64_X87_CLASS
;
2442 classes
[1] = X86_64_X87UP_CLASS
;
2445 classes
[0] = X86_64_SSE_CLASS
;
2446 classes
[1] = X86_64_SSEUP_CLASS
;
2449 classes
[0] = X86_64_SSE_CLASS
;
2452 classes
[0] = X86_64_SSEDF_CLASS
;
2453 classes
[1] = X86_64_SSEDF_CLASS
;
2456 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2459 /* This modes is larger than 16 bytes. */
2467 classes
[0] = X86_64_SSE_CLASS
;
2468 classes
[1] = X86_64_SSEUP_CLASS
;
2474 classes
[0] = X86_64_SSE_CLASS
;
2480 if (VECTOR_MODE_P (mode
))
2484 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2486 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2487 classes
[0] = X86_64_INTEGERSI_CLASS
;
2489 classes
[0] = X86_64_INTEGER_CLASS
;
2490 classes
[1] = X86_64_INTEGER_CLASS
;
2491 return 1 + (bytes
> 8);
2498 /* Examine the argument and return set number of register required in each
2499 class. Return 0 iff parameter should be passed in memory. */
2501 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2502 int *int_nregs
, int *sse_nregs
)
2504 enum x86_64_reg_class
class[MAX_CLASSES
];
2505 int n
= classify_argument (mode
, type
, class, 0);
2511 for (n
--; n
>= 0; n
--)
2514 case X86_64_INTEGER_CLASS
:
2515 case X86_64_INTEGERSI_CLASS
:
2518 case X86_64_SSE_CLASS
:
2519 case X86_64_SSESF_CLASS
:
2520 case X86_64_SSEDF_CLASS
:
2523 case X86_64_NO_CLASS
:
2524 case X86_64_SSEUP_CLASS
:
2526 case X86_64_X87_CLASS
:
2527 case X86_64_X87UP_CLASS
:
2531 case X86_64_COMPLEX_X87_CLASS
:
2532 return in_return
? 2 : 0;
2533 case X86_64_MEMORY_CLASS
:
2539 /* Construct container for the argument used by GCC interface. See
2540 FUNCTION_ARG for the detailed description. */
2543 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
2544 tree type
, int in_return
, int nintregs
, int nsseregs
,
2545 const int *intreg
, int sse_regno
)
2547 enum machine_mode tmpmode
;
2549 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2550 enum x86_64_reg_class
class[MAX_CLASSES
];
2554 int needed_sseregs
, needed_intregs
;
2555 rtx exp
[MAX_CLASSES
];
2558 n
= classify_argument (mode
, type
, class, 0);
2559 if (TARGET_DEBUG_ARG
)
2562 fprintf (stderr
, "Memory class\n");
2565 fprintf (stderr
, "Classes:");
2566 for (i
= 0; i
< n
; i
++)
2568 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2570 fprintf (stderr
, "\n");
2575 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
2578 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2581 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2582 some less clueful developer tries to use floating-point anyway. */
2583 if (needed_sseregs
&& !TARGET_SSE
)
2585 static bool issued_error
;
2588 issued_error
= true;
2590 error ("SSE register return with SSE disabled");
2592 error ("SSE register argument with SSE disabled");
2597 /* First construct simple cases. Avoid SCmode, since we want to use
2598 single register to pass this type. */
2599 if (n
== 1 && mode
!= SCmode
)
2602 case X86_64_INTEGER_CLASS
:
2603 case X86_64_INTEGERSI_CLASS
:
2604 return gen_rtx_REG (mode
, intreg
[0]);
2605 case X86_64_SSE_CLASS
:
2606 case X86_64_SSESF_CLASS
:
2607 case X86_64_SSEDF_CLASS
:
2608 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
2609 case X86_64_X87_CLASS
:
2610 case X86_64_COMPLEX_X87_CLASS
:
2611 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2612 case X86_64_NO_CLASS
:
2613 /* Zero sized array, struct or class. */
2618 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2620 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2622 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2623 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2624 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2625 && class[1] == X86_64_INTEGER_CLASS
2626 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2627 && intreg
[0] + 1 == intreg
[1])
2628 return gen_rtx_REG (mode
, intreg
[0]);
2630 /* Otherwise figure out the entries of the PARALLEL. */
2631 for (i
= 0; i
< n
; i
++)
2635 case X86_64_NO_CLASS
:
2637 case X86_64_INTEGER_CLASS
:
2638 case X86_64_INTEGERSI_CLASS
:
2639 /* Merge TImodes on aligned occasions here too. */
2640 if (i
* 8 + 8 > bytes
)
2641 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2642 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2646 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2647 if (tmpmode
== BLKmode
)
2649 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2650 gen_rtx_REG (tmpmode
, *intreg
),
2654 case X86_64_SSESF_CLASS
:
2655 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2656 gen_rtx_REG (SFmode
,
2657 SSE_REGNO (sse_regno
)),
2661 case X86_64_SSEDF_CLASS
:
2662 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2663 gen_rtx_REG (DFmode
,
2664 SSE_REGNO (sse_regno
)),
2668 case X86_64_SSE_CLASS
:
2669 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2673 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2674 gen_rtx_REG (tmpmode
,
2675 SSE_REGNO (sse_regno
)),
2677 if (tmpmode
== TImode
)
2686 /* Empty aligned struct, union or class. */
2690 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2691 for (i
= 0; i
< nexps
; i
++)
2692 XVECEXP (ret
, 0, i
) = exp
[i
];
2696 /* Update the data in CUM to advance over an argument
2697 of mode MODE and data type TYPE.
2698 (TYPE is null for libcalls where that information may not be available.) */
2701 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2702 tree type
, int named
)
2705 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2706 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2709 mode
= type_natural_mode (type
);
2711 if (TARGET_DEBUG_ARG
)
2712 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2713 "mode=%s, named=%d)\n\n",
2714 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
2715 GET_MODE_NAME (mode
), named
);
2719 int int_nregs
, sse_nregs
;
2720 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2721 cum
->words
+= words
;
2722 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2724 cum
->nregs
-= int_nregs
;
2725 cum
->sse_nregs
-= sse_nregs
;
2726 cum
->regno
+= int_nregs
;
2727 cum
->sse_regno
+= sse_nregs
;
2730 cum
->words
+= words
;
2748 cum
->words
+= words
;
2749 cum
->nregs
-= words
;
2750 cum
->regno
+= words
;
2752 if (cum
->nregs
<= 0)
2763 if (!cum
->float_in_sse
)
2774 if (!type
|| !AGGREGATE_TYPE_P (type
))
2776 cum
->sse_words
+= words
;
2777 cum
->sse_nregs
-= 1;
2778 cum
->sse_regno
+= 1;
2779 if (cum
->sse_nregs
<= 0)
2791 if (!type
|| !AGGREGATE_TYPE_P (type
))
2793 cum
->mmx_words
+= words
;
2794 cum
->mmx_nregs
-= 1;
2795 cum
->mmx_regno
+= 1;
2796 if (cum
->mmx_nregs
<= 0)
2807 /* Define where to put the arguments to a function.
2808 Value is zero to push the argument on the stack,
2809 or a hard register in which to store the argument.
2811 MODE is the argument's machine mode.
2812 TYPE is the data type of the argument (as a tree).
2813 This is null for libcalls where that information may
2815 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2816 the preceding args and about the function being called.
2817 NAMED is nonzero if this argument is a named parameter
2818 (otherwise it is an extra parameter matching an ellipsis). */
2821 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
2822 tree type
, int named
)
2824 enum machine_mode mode
= orig_mode
;
2827 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2828 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2829 static bool warnedsse
, warnedmmx
;
2831 /* To simplify the code below, represent vector types with a vector mode
2832 even if MMX/SSE are not active. */
2833 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
2834 mode
= type_natural_mode (type
);
2836 /* Handle a hidden AL argument containing number of registers for varargs
2837 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2839 if (mode
== VOIDmode
)
2842 return GEN_INT (cum
->maybe_vaarg
2843 ? (cum
->sse_nregs
< 0
2851 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
2853 &x86_64_int_parameter_registers
[cum
->regno
],
2858 /* For now, pass fp/complex values on the stack. */
2870 if (words
<= cum
->nregs
)
2872 int regno
= cum
->regno
;
2874 /* Fastcall allocates the first two DWORD (SImode) or
2875 smaller arguments to ECX and EDX. */
2878 if (mode
== BLKmode
|| mode
== DImode
)
2881 /* ECX not EAX is the first allocated register. */
2885 ret
= gen_rtx_REG (mode
, regno
);
2892 if (!cum
->float_in_sse
)
2902 if (!type
|| !AGGREGATE_TYPE_P (type
))
2904 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2907 warning ("SSE vector argument without SSE enabled "
2911 ret
= gen_reg_or_parallel (mode
, orig_mode
,
2912 cum
->sse_regno
+ FIRST_SSE_REG
);
2919 if (!type
|| !AGGREGATE_TYPE_P (type
))
2921 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2924 warning ("MMX vector argument without MMX enabled "
2928 ret
= gen_reg_or_parallel (mode
, orig_mode
,
2929 cum
->mmx_regno
+ FIRST_MMX_REG
);
2934 if (TARGET_DEBUG_ARG
)
2937 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2938 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2941 print_simple_rtl (stderr
, ret
);
2943 fprintf (stderr
, ", stack");
2945 fprintf (stderr
, " )\n");
2951 /* A C expression that indicates when an argument must be passed by
2952 reference. If nonzero for an argument, a copy of that argument is
2953 made in memory and a pointer to the argument is passed instead of
2954 the argument itself. The pointer is passed in whatever way is
2955 appropriate for passing a pointer to that type. */
2958 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2959 enum machine_mode mode ATTRIBUTE_UNUSED
,
2960 tree type
, bool named ATTRIBUTE_UNUSED
)
2965 if (type
&& int_size_in_bytes (type
) == -1)
2967 if (TARGET_DEBUG_ARG
)
2968 fprintf (stderr
, "function_arg_pass_by_reference\n");
2975 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2976 ABI. Only called if TARGET_SSE. */
2978 contains_128bit_aligned_vector_p (tree type
)
2980 enum machine_mode mode
= TYPE_MODE (type
);
2981 if (SSE_REG_MODE_P (mode
)
2982 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2984 if (TYPE_ALIGN (type
) < 128)
2987 if (AGGREGATE_TYPE_P (type
))
2989 /* Walk the aggregates recursively. */
2990 if (TREE_CODE (type
) == RECORD_TYPE
2991 || TREE_CODE (type
) == UNION_TYPE
2992 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2996 if (TYPE_BINFO (type
))
2998 tree binfo
, base_binfo
;
3001 for (binfo
= TYPE_BINFO (type
), i
= 0;
3002 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
3003 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo
)))
3006 /* And now merge the fields of structure. */
3007 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3009 if (TREE_CODE (field
) == FIELD_DECL
3010 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3014 /* Just for use if some languages passes arrays by value. */
3015 else if (TREE_CODE (type
) == ARRAY_TYPE
)
3017 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3026 /* Gives the alignment boundary, in bits, of an argument with the
3027 specified mode and type. */
3030 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3034 align
= TYPE_ALIGN (type
);
3036 align
= GET_MODE_ALIGNMENT (mode
);
3037 if (align
< PARM_BOUNDARY
)
3038 align
= PARM_BOUNDARY
;
3041 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3042 make an exception for SSE modes since these require 128bit
3045 The handling here differs from field_alignment. ICC aligns MMX
3046 arguments to 4 byte boundaries, while structure fields are aligned
3047 to 8 byte boundaries. */
3049 align
= PARM_BOUNDARY
;
3052 if (!SSE_REG_MODE_P (mode
))
3053 align
= PARM_BOUNDARY
;
3057 if (!contains_128bit_aligned_vector_p (type
))
3058 align
= PARM_BOUNDARY
;
3066 /* Return true if N is a possible register number of function value. */
3068 ix86_function_value_regno_p (int regno
)
3072 return ((regno
) == 0
3073 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3074 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
3076 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
3077 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
3078 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
3081 /* Define how to find the value returned by a function.
3082 VALTYPE is the data type of the value (as a tree).
3083 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3084 otherwise, FUNC is 0. */
3086 ix86_function_value (tree valtype
, tree func
)
3088 enum machine_mode natmode
= type_natural_mode (valtype
);
3092 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3093 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3094 x86_64_int_return_registers
, 0);
3095 /* For zero sized structures, construct_container return NULL, but we
3096 need to keep rest of compiler happy by returning meaningful value. */
3098 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3102 return gen_rtx_REG (TYPE_MODE (valtype
), ix86_value_regno (natmode
, func
));
3105 /* Return false iff type is returned in memory. */
3107 ix86_return_in_memory (tree type
)
3109 int needed_intregs
, needed_sseregs
, size
;
3110 enum machine_mode mode
= type_natural_mode (type
);
3113 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3115 if (mode
== BLKmode
)
3118 size
= int_size_in_bytes (type
);
3120 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3123 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3125 /* User-created vectors small enough to fit in EAX. */
3129 /* MMX/3dNow values are returned on the stack, since we've
3130 got to EMMS/FEMMS before returning. */
3134 /* SSE values are returned in XMM0, except when it doesn't exist. */
3136 return (TARGET_SSE
? 0 : 1);
3147 /* When returning SSE vector types, we have a choice of either
3148 (1) being abi incompatible with a -march switch, or
3149 (2) generating an error.
3150 Given no good solution, I think the safest thing is one warning.
3151 The user won't be able to use -Werror, but....
3153 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3154 called in response to actually generating a caller or callee that
3155 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3156 via aggregate_value_p for general type probing from tree-ssa. */
3159 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3163 if (!TARGET_SSE
&& type
&& !warned
)
3165 /* Look at the return type of the function, not the function type. */
3166 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3169 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3172 warning ("SSE vector return without SSE enabled changes the ABI");
3179 /* Define how to find the value returned by a library function
3180 assuming the value has mode MODE. */
3182 ix86_libcall_value (enum machine_mode mode
)
3193 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3196 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3200 return gen_rtx_REG (mode
, 0);
3204 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
));
3207 /* Given a mode, return the register to use for a return value. */
3210 ix86_value_regno (enum machine_mode mode
, tree func
)
3212 gcc_assert (!TARGET_64BIT
);
3214 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3215 we prevent this case when sse is not available. */
3216 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3217 return FIRST_SSE_REG
;
3219 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3220 if (GET_MODE_CLASS (mode
) != MODE_FLOAT
|| !TARGET_FLOAT_RETURNS_IN_80387
)
3223 /* Floating point return values in %st(0), except for local functions when
3224 SSE math is enabled. */
3225 if (func
&& SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
3226 && flag_unit_at_a_time
)
3228 struct cgraph_local_info
*i
= cgraph_local_info (func
);
3230 return FIRST_SSE_REG
;
3233 return FIRST_FLOAT_REG
;
3236 /* Create the va_list data type. */
3239 ix86_build_builtin_va_list (void)
3241 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3243 /* For i386 we use plain pointer to argument area. */
3245 return build_pointer_type (char_type_node
);
3247 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3248 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3250 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3251 unsigned_type_node
);
3252 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3253 unsigned_type_node
);
3254 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3256 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3259 va_list_gpr_counter_field
= f_gpr
;
3260 va_list_fpr_counter_field
= f_fpr
;
3262 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3263 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3264 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3265 DECL_FIELD_CONTEXT (f_sav
) = record
;
3267 TREE_CHAIN (record
) = type_decl
;
3268 TYPE_NAME (record
) = type_decl
;
3269 TYPE_FIELDS (record
) = f_gpr
;
3270 TREE_CHAIN (f_gpr
) = f_fpr
;
3271 TREE_CHAIN (f_fpr
) = f_ovf
;
3272 TREE_CHAIN (f_ovf
) = f_sav
;
3274 layout_type (record
);
3276 /* The correct type is an array type of one element. */
3277 return build_array_type (record
, build_index_type (size_zero_node
));
3280 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3283 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3284 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3287 CUMULATIVE_ARGS next_cum
;
3288 rtx save_area
= NULL_RTX
, mem
;
3301 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
3304 /* Indicate to allocate space on the stack for varargs save area. */
3305 ix86_save_varrargs_registers
= 1;
3307 cfun
->stack_alignment_needed
= 128;
3309 fntype
= TREE_TYPE (current_function_decl
);
3310 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3311 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3312 != void_type_node
));
3314 /* For varargs, we do not want to skip the dummy va_dcl argument.
3315 For stdargs, we do want to skip the last named argument. */
3318 function_arg_advance (&next_cum
, mode
, type
, 1);
3321 save_area
= frame_pointer_rtx
;
3323 set
= get_varargs_alias_set ();
3325 for (i
= next_cum
.regno
;
3327 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
3330 mem
= gen_rtx_MEM (Pmode
,
3331 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3332 set_mem_alias_set (mem
, set
);
3333 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3334 x86_64_int_parameter_registers
[i
]));
3337 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
3339 /* Now emit code to save SSE registers. The AX parameter contains number
3340 of SSE parameter registers used to call this function. We use
3341 sse_prologue_save insn template that produces computed jump across
3342 SSE saves. We need some preparation work to get this working. */
3344 label
= gen_label_rtx ();
3345 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3347 /* Compute address to jump to :
3348 label - 5*eax + nnamed_sse_arguments*5 */
3349 tmp_reg
= gen_reg_rtx (Pmode
);
3350 nsse_reg
= gen_reg_rtx (Pmode
);
3351 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3352 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3353 gen_rtx_MULT (Pmode
, nsse_reg
,
3355 if (next_cum
.sse_regno
)
3358 gen_rtx_CONST (DImode
,
3359 gen_rtx_PLUS (DImode
,
3361 GEN_INT (next_cum
.sse_regno
* 4))));
3363 emit_move_insn (nsse_reg
, label_ref
);
3364 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3366 /* Compute address of memory block we save into. We always use pointer
3367 pointing 127 bytes after first byte to store - this is needed to keep
3368 instruction size limited by 4 bytes. */
3369 tmp_reg
= gen_reg_rtx (Pmode
);
3370 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3371 plus_constant (save_area
,
3372 8 * REGPARM_MAX
+ 127)));
3373 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3374 set_mem_alias_set (mem
, set
);
3375 set_mem_align (mem
, BITS_PER_WORD
);
3377 /* And finally do the dirty job! */
3378 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3379 GEN_INT (next_cum
.sse_regno
), label
));
3384 /* Implement va_start. */
3387 ix86_va_start (tree valist
, rtx nextarg
)
3389 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3390 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3391 tree gpr
, fpr
, ovf
, sav
, t
;
3393 /* Only 64bit target needs something special. */
3396 std_expand_builtin_va_start (valist
, nextarg
);
3400 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3401 f_fpr
= TREE_CHAIN (f_gpr
);
3402 f_ovf
= TREE_CHAIN (f_fpr
);
3403 f_sav
= TREE_CHAIN (f_ovf
);
3405 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3406 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3407 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3408 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3409 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3411 /* Count number of gp and fp argument registers used. */
3412 words
= current_function_args_info
.words
;
3413 n_gpr
= current_function_args_info
.regno
;
3414 n_fpr
= current_function_args_info
.sse_regno
;
3416 if (TARGET_DEBUG_ARG
)
3417 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3418 (int) words
, (int) n_gpr
, (int) n_fpr
);
3420 if (cfun
->va_list_gpr_size
)
3422 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3423 build_int_cst (NULL_TREE
, n_gpr
* 8));
3424 TREE_SIDE_EFFECTS (t
) = 1;
3425 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3428 if (cfun
->va_list_fpr_size
)
3430 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3431 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
));
3432 TREE_SIDE_EFFECTS (t
) = 1;
3433 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3436 /* Find the overflow area. */
3437 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3439 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3440 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
));
3441 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3442 TREE_SIDE_EFFECTS (t
) = 1;
3443 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3445 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
3447 /* Find the register save area.
3448 Prologue of the function save it right above stack frame. */
3449 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3450 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3451 TREE_SIDE_EFFECTS (t
) = 1;
3452 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3456 /* Implement va_arg. */
3459 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3461 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3462 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3463 tree gpr
, fpr
, ovf
, sav
, t
;
3465 tree lab_false
, lab_over
= NULL_TREE
;
3470 enum machine_mode nat_mode
;
3472 /* Only 64bit target needs something special. */
3474 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3476 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3477 f_fpr
= TREE_CHAIN (f_gpr
);
3478 f_ovf
= TREE_CHAIN (f_fpr
);
3479 f_sav
= TREE_CHAIN (f_ovf
);
3481 valist
= build_va_arg_indirect_ref (valist
);
3482 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3483 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3484 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3485 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3487 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3489 type
= build_pointer_type (type
);
3490 size
= int_size_in_bytes (type
);
3491 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3493 nat_mode
= type_natural_mode (type
);
3494 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
3495 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3497 /* Pull the value out of the saved registers. */
3499 addr
= create_tmp_var (ptr_type_node
, "addr");
3500 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3504 int needed_intregs
, needed_sseregs
;
3506 tree int_addr
, sse_addr
;
3508 lab_false
= create_artificial_label ();
3509 lab_over
= create_artificial_label ();
3511 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
3513 need_temp
= (!REG_P (container
)
3514 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3515 || TYPE_ALIGN (type
) > 128));
3517 /* In case we are passing structure, verify that it is consecutive block
3518 on the register save area. If not we need to do moves. */
3519 if (!need_temp
&& !REG_P (container
))
3521 /* Verify that all registers are strictly consecutive */
3522 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3526 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3528 rtx slot
= XVECEXP (container
, 0, i
);
3529 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3530 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3538 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3540 rtx slot
= XVECEXP (container
, 0, i
);
3541 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3542 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3554 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3555 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3556 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3557 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3560 /* First ensure that we fit completely in registers. */
3563 t
= build_int_cst (TREE_TYPE (gpr
),
3564 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
3565 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3566 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3567 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3568 gimplify_and_add (t
, pre_p
);
3572 t
= build_int_cst (TREE_TYPE (fpr
),
3573 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3575 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3576 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3577 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3578 gimplify_and_add (t
, pre_p
);
3581 /* Compute index to start of area used for integer regs. */
3584 /* int_addr = gpr + sav; */
3585 t
= fold_convert (ptr_type_node
, gpr
);
3586 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
3587 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3588 gimplify_and_add (t
, pre_p
);
3592 /* sse_addr = fpr + sav; */
3593 t
= fold_convert (ptr_type_node
, fpr
);
3594 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
3595 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3596 gimplify_and_add (t
, pre_p
);
3601 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3604 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3605 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3606 gimplify_and_add (t
, pre_p
);
3608 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3610 rtx slot
= XVECEXP (container
, 0, i
);
3611 rtx reg
= XEXP (slot
, 0);
3612 enum machine_mode mode
= GET_MODE (reg
);
3613 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3614 tree addr_type
= build_pointer_type (piece_type
);
3617 tree dest_addr
, dest
;
3619 if (SSE_REGNO_P (REGNO (reg
)))
3621 src_addr
= sse_addr
;
3622 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3626 src_addr
= int_addr
;
3627 src_offset
= REGNO (reg
) * 8;
3629 src_addr
= fold_convert (addr_type
, src_addr
);
3630 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3631 size_int (src_offset
)));
3632 src
= build_va_arg_indirect_ref (src_addr
);
3634 dest_addr
= fold_convert (addr_type
, addr
);
3635 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3636 size_int (INTVAL (XEXP (slot
, 1)))));
3637 dest
= build_va_arg_indirect_ref (dest_addr
);
3639 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3640 gimplify_and_add (t
, pre_p
);
3646 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3647 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
3648 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3649 gimplify_and_add (t
, pre_p
);
3653 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3654 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
3655 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3656 gimplify_and_add (t
, pre_p
);
3659 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3660 gimplify_and_add (t
, pre_p
);
3662 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3663 append_to_statement_list (t
, pre_p
);
3666 /* ... otherwise out of the overflow area. */
3668 /* Care for on-stack alignment if needed. */
3669 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3673 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3674 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
3675 build_int_cst (TREE_TYPE (ovf
), align
- 1));
3676 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3677 build_int_cst (TREE_TYPE (t
), -align
));
3679 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3681 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3682 gimplify_and_add (t2
, pre_p
);
3684 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3685 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
3686 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3687 gimplify_and_add (t
, pre_p
);
3691 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3692 append_to_statement_list (t
, pre_p
);
3695 ptrtype
= build_pointer_type (type
);
3696 addr
= fold_convert (ptrtype
, addr
);
3699 addr
= build_va_arg_indirect_ref (addr
);
3700 return build_va_arg_indirect_ref (addr
);
3703 /* Return nonzero if OPNUM's MEM should be matched
3704 in movabs* patterns. */
3707 ix86_check_movabs (rtx insn
, int opnum
)
3711 set
= PATTERN (insn
);
3712 if (GET_CODE (set
) == PARALLEL
)
3713 set
= XVECEXP (set
, 0, 0);
3714 if (GET_CODE (set
) != SET
)
3716 mem
= XEXP (set
, opnum
);
3717 while (GET_CODE (mem
) == SUBREG
)
3718 mem
= SUBREG_REG (mem
);
3719 if (GET_CODE (mem
) != MEM
)
3721 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3724 /* Initialize the table of extra 80387 mathematical constants. */
3727 init_ext_80387_constants (void)
3729 static const char * cst
[5] =
3731 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3732 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3733 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3734 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3735 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3739 for (i
= 0; i
< 5; i
++)
3741 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
3742 /* Ensure each constant is rounded to XFmode precision. */
3743 real_convert (&ext_80387_constants_table
[i
],
3744 XFmode
, &ext_80387_constants_table
[i
]);
3747 ext_80387_constants_init
= 1;
3750 /* Return true if the constant is something that can be loaded with
3751 a special instruction. */
3754 standard_80387_constant_p (rtx x
)
3756 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3759 if (x
== CONST0_RTX (GET_MODE (x
)))
3761 if (x
== CONST1_RTX (GET_MODE (x
)))
3764 /* For XFmode constants, try to find a special 80387 instruction when
3765 optimizing for size or on those CPUs that benefit from them. */
3766 if (GET_MODE (x
) == XFmode
3767 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
3772 if (! ext_80387_constants_init
)
3773 init_ext_80387_constants ();
3775 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3776 for (i
= 0; i
< 5; i
++)
3777 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
3784 /* Return the opcode of the special instruction to be used to load
3788 standard_80387_constant_opcode (rtx x
)
3790 switch (standard_80387_constant_p (x
))
3810 /* Return the CONST_DOUBLE representing the 80387 constant that is
3811 loaded by the specified special instruction. The argument IDX
3812 matches the return value from standard_80387_constant_p. */
3815 standard_80387_constant_rtx (int idx
)
3819 if (! ext_80387_constants_init
)
3820 init_ext_80387_constants ();
3836 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
3840 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3843 standard_sse_constant_p (rtx x
)
3845 if (x
== const0_rtx
)
3847 return (x
== CONST0_RTX (GET_MODE (x
)));
3850 /* Returns 1 if OP contains a symbol reference */
3853 symbolic_reference_mentioned_p (rtx op
)
3858 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3861 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3862 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3868 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3869 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3873 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3880 /* Return 1 if it is appropriate to emit `ret' instructions in the
3881 body of a function. Do this only if the epilogue is simple, needing a
3882 couple of insns. Prior to reloading, we can't tell how many registers
3883 must be saved, so return 0 then. Return 0 if there is no frame
3884 marker to de-allocate. */
3887 ix86_can_use_return_insn_p (void)
3889 struct ix86_frame frame
;
3891 if (! reload_completed
|| frame_pointer_needed
)
3894 /* Don't allow more than 32 pop, since that's all we can do
3895 with one instruction. */
3896 if (current_function_pops_args
3897 && current_function_args_size
>= 32768)
3900 ix86_compute_frame_layout (&frame
);
3901 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3904 /* Value should be nonzero if functions must have frame pointers.
3905 Zero means the frame pointer need not be set up (and parms may
3906 be accessed via the stack pointer) in functions that seem suitable. */
3909 ix86_frame_pointer_required (void)
3911 /* If we accessed previous frames, then the generated code expects
3912 to be able to access the saved ebp value in our frame. */
3913 if (cfun
->machine
->accesses_prev_frame
)
3916 /* Several x86 os'es need a frame pointer for other reasons,
3917 usually pertaining to setjmp. */
3918 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3921 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3922 the frame pointer by default. Turn it back on now if we've not
3923 got a leaf function. */
3924 if (TARGET_OMIT_LEAF_FRAME_POINTER
3925 && (!current_function_is_leaf
))
3928 if (current_function_profile
)
3934 /* Record that the current function accesses previous call frames. */
3937 ix86_setup_frame_addresses (void)
3939 cfun
->machine
->accesses_prev_frame
= 1;
3942 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3943 # define USE_HIDDEN_LINKONCE 1
3945 # define USE_HIDDEN_LINKONCE 0
3948 static int pic_labels_used
;
3950 /* Fills in the label name that should be used for a pc thunk for
3951 the given register. */
3954 get_pc_thunk_name (char name
[32], unsigned int regno
)
3956 if (USE_HIDDEN_LINKONCE
)
3957 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3959 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3963 /* This function generates code for -fpic that loads %ebx with
3964 the return address of the caller and then returns. */
3967 ix86_file_end (void)
3972 for (regno
= 0; regno
< 8; ++regno
)
3976 if (! ((pic_labels_used
>> regno
) & 1))
3979 get_pc_thunk_name (name
, regno
);
3981 if (USE_HIDDEN_LINKONCE
)
3985 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3987 TREE_PUBLIC (decl
) = 1;
3988 TREE_STATIC (decl
) = 1;
3989 DECL_ONE_ONLY (decl
) = 1;
3991 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3992 named_section (decl
, NULL
, 0);
3994 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
3995 fputs ("\t.hidden\t", asm_out_file
);
3996 assemble_name (asm_out_file
, name
);
3997 fputc ('\n', asm_out_file
);
3998 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4003 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4006 xops
[0] = gen_rtx_REG (SImode
, regno
);
4007 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4008 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4009 output_asm_insn ("ret", xops
);
4012 if (NEED_INDICATE_EXEC_STACK
)
4013 file_end_indicate_exec_stack ();
4016 /* Emit code for the SET_GOT patterns. */
4019 output_set_got (rtx dest
)
4024 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4026 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4028 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4031 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4033 output_asm_insn ("call\t%a2", xops
);
4036 /* Output the "canonical" label name ("Lxx$pb") here too. This
4037 is what will be referred to by the Mach-O PIC subsystem. */
4038 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4040 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4041 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4044 output_asm_insn ("pop{l}\t%0", xops
);
4049 get_pc_thunk_name (name
, REGNO (dest
));
4050 pic_labels_used
|= 1 << REGNO (dest
);
4052 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4053 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4054 output_asm_insn ("call\t%X2", xops
);
4057 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4058 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4059 else if (!TARGET_MACHO
)
4060 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4065 /* Generate an "push" pattern for input ARG. */
4070 return gen_rtx_SET (VOIDmode
,
4072 gen_rtx_PRE_DEC (Pmode
,
4073 stack_pointer_rtx
)),
4077 /* Return >= 0 if there is an unused call-clobbered register available
4078 for the entire function. */
4081 ix86_select_alt_pic_regnum (void)
4083 if (current_function_is_leaf
&& !current_function_profile
)
4086 for (i
= 2; i
>= 0; --i
)
4087 if (!regs_ever_live
[i
])
4091 return INVALID_REGNUM
;
4094 /* Return 1 if we need to save REGNO. */
4096 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4098 if (pic_offset_table_rtx
4099 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4100 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4101 || current_function_profile
4102 || current_function_calls_eh_return
4103 || current_function_uses_const_pool
))
4105 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4110 if (current_function_calls_eh_return
&& maybe_eh_return
)
4115 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4116 if (test
== INVALID_REGNUM
)
4123 return (regs_ever_live
[regno
]
4124 && !call_used_regs
[regno
]
4125 && !fixed_regs
[regno
]
4126 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4129 /* Return number of registers to be saved on the stack. */
4132 ix86_nsaved_regs (void)
4137 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4138 if (ix86_save_reg (regno
, true))
4143 /* Return the offset between two registers, one to be eliminated, and the other
4144 its replacement, at the start of a routine. */
4147 ix86_initial_elimination_offset (int from
, int to
)
4149 struct ix86_frame frame
;
4150 ix86_compute_frame_layout (&frame
);
4152 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4153 return frame
.hard_frame_pointer_offset
;
4154 else if (from
== FRAME_POINTER_REGNUM
4155 && to
== HARD_FRAME_POINTER_REGNUM
)
4156 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4159 if (to
!= STACK_POINTER_REGNUM
)
4161 else if (from
== ARG_POINTER_REGNUM
)
4162 return frame
.stack_pointer_offset
;
4163 else if (from
!= FRAME_POINTER_REGNUM
)
4166 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4170 /* Fill structure ix86_frame about frame of currently computed function. */
4173 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4175 HOST_WIDE_INT total_size
;
4176 unsigned int stack_alignment_needed
;
4177 HOST_WIDE_INT offset
;
4178 unsigned int preferred_alignment
;
4179 HOST_WIDE_INT size
= get_frame_size ();
4181 frame
->nregs
= ix86_nsaved_regs ();
4184 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4185 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4187 /* During reload iteration the amount of registers saved can change.
4188 Recompute the value as needed. Do not recompute when amount of registers
4189 didn't change as reload does multiple calls to the function and does not
4190 expect the decision to change within single iteration. */
4192 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4194 int count
= frame
->nregs
;
4196 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4197 /* The fast prologue uses move instead of push to save registers. This
4198 is significantly longer, but also executes faster as modern hardware
4199 can execute the moves in parallel, but can't do that for push/pop.
4201 Be careful about choosing what prologue to emit: When function takes
4202 many instructions to execute we may use slow version as well as in
4203 case function is known to be outside hot spot (this is known with
4204 feedback only). Weight the size of function by number of registers
4205 to save as it is cheap to use one or two push instructions but very
4206 slow to use many of them. */
4208 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4209 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4210 || (flag_branch_probabilities
4211 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4212 cfun
->machine
->use_fast_prologue_epilogue
= false;
4214 cfun
->machine
->use_fast_prologue_epilogue
4215 = !expensive_function_p (count
);
4217 if (TARGET_PROLOGUE_USING_MOVE
4218 && cfun
->machine
->use_fast_prologue_epilogue
)
4219 frame
->save_regs_using_mov
= true;
4221 frame
->save_regs_using_mov
= false;
4224 /* Skip return address and saved base pointer. */
4225 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4227 frame
->hard_frame_pointer_offset
= offset
;
4229 /* Do some sanity checking of stack_alignment_needed and
4230 preferred_alignment, since i386 port is the only using those features
4231 that may break easily. */
4233 if (size
&& !stack_alignment_needed
)
4235 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4237 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4239 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4242 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4243 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4245 /* Register save area */
4246 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4249 if (ix86_save_varrargs_registers
)
4251 offset
+= X86_64_VARARGS_SIZE
;
4252 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4255 frame
->va_arg_size
= 0;
4257 /* Align start of frame for local function. */
4258 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4259 & -stack_alignment_needed
) - offset
;
4261 offset
+= frame
->padding1
;
4263 /* Frame pointer points here. */
4264 frame
->frame_pointer_offset
= offset
;
4268 /* Add outgoing arguments area. Can be skipped if we eliminated
4269 all the function calls as dead code.
4270 Skipping is however impossible when function calls alloca. Alloca
4271 expander assumes that last current_function_outgoing_args_size
4272 of stack frame are unused. */
4273 if (ACCUMULATE_OUTGOING_ARGS
4274 && (!current_function_is_leaf
|| current_function_calls_alloca
))
4276 offset
+= current_function_outgoing_args_size
;
4277 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4280 frame
->outgoing_arguments_size
= 0;
4282 /* Align stack boundary. Only needed if we're calling another function
4284 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4285 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4286 & -preferred_alignment
) - offset
;
4288 frame
->padding2
= 0;
4290 offset
+= frame
->padding2
;
4292 /* We've reached end of stack frame. */
4293 frame
->stack_pointer_offset
= offset
;
4295 /* Size prologue needs to allocate. */
4296 frame
->to_allocate
=
4297 (size
+ frame
->padding1
+ frame
->padding2
4298 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4300 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
4301 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
4302 frame
->save_regs_using_mov
= false;
4304 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4305 && current_function_is_leaf
)
4307 frame
->red_zone_size
= frame
->to_allocate
;
4308 if (frame
->save_regs_using_mov
)
4309 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
4310 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4311 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4314 frame
->red_zone_size
= 0;
4315 frame
->to_allocate
-= frame
->red_zone_size
;
4316 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4318 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4319 fprintf (stderr
, "size: %i\n", size
);
4320 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4321 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4322 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4323 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4324 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4325 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4326 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4327 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4328 frame
->hard_frame_pointer_offset
);
4329 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4333 /* Emit code to save registers in the prologue. */
4336 ix86_emit_save_regs (void)
4341 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4342 if (ix86_save_reg (regno
, true))
4344 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4345 RTX_FRAME_RELATED_P (insn
) = 1;
4349 /* Emit code to save registers using MOV insns. First register
4350 is restored from POINTER + OFFSET. */
4352 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
4357 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4358 if (ix86_save_reg (regno
, true))
4360 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4362 gen_rtx_REG (Pmode
, regno
));
4363 RTX_FRAME_RELATED_P (insn
) = 1;
4364 offset
+= UNITS_PER_WORD
;
4368 /* Expand prologue or epilogue stack adjustment.
4369 The pattern exist to put a dependency on all ebp-based memory accesses.
4370 STYLE should be negative if instructions should be marked as frame related,
4371 zero if %r11 register is live and cannot be freely used and positive
4375 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
4380 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
4381 else if (x86_64_immediate_operand (offset
, DImode
))
4382 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
4386 /* r11 is used by indirect sibcall return as well, set before the
4387 epilogue and used after the epilogue. ATM indirect sibcall
4388 shouldn't be used together with huge frame sizes in one
4389 function because of the frame_size check in sibcall.c. */
4392 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4393 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
4395 RTX_FRAME_RELATED_P (insn
) = 1;
4396 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
4400 RTX_FRAME_RELATED_P (insn
) = 1;
4403 /* Expand the prologue into a bunch of separate insns. */
4406 ix86_expand_prologue (void)
4410 struct ix86_frame frame
;
4411 HOST_WIDE_INT allocate
;
4413 ix86_compute_frame_layout (&frame
);
4415 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4416 slower on all targets. Also sdb doesn't like it. */
4418 if (frame_pointer_needed
)
4420 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4421 RTX_FRAME_RELATED_P (insn
) = 1;
4423 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4424 RTX_FRAME_RELATED_P (insn
) = 1;
4427 allocate
= frame
.to_allocate
;
4429 if (!frame
.save_regs_using_mov
)
4430 ix86_emit_save_regs ();
4432 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4434 /* When using red zone we may start register saving before allocating
4435 the stack frame saving one cycle of the prologue. */
4436 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
4437 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
4438 : stack_pointer_rtx
,
4439 -frame
.nregs
* UNITS_PER_WORD
);
4443 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4444 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4445 GEN_INT (-allocate
), -1);
4448 /* Only valid for Win32. */
4449 rtx eax
= gen_rtx_REG (SImode
, 0);
4450 bool eax_live
= ix86_eax_live_at_start_p ();
4458 emit_insn (gen_push (eax
));
4462 emit_move_insn (eax
, GEN_INT (allocate
));
4464 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4465 RTX_FRAME_RELATED_P (insn
) = 1;
4466 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
4467 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
4468 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
4469 t
, REG_NOTES (insn
));
4473 if (frame_pointer_needed
)
4474 t
= plus_constant (hard_frame_pointer_rtx
,
4477 - frame
.nregs
* UNITS_PER_WORD
);
4479 t
= plus_constant (stack_pointer_rtx
, allocate
);
4480 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4484 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
4486 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4487 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4489 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4490 -frame
.nregs
* UNITS_PER_WORD
);
4493 pic_reg_used
= false;
4494 if (pic_offset_table_rtx
4495 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4496 || current_function_profile
))
4498 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4500 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4501 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4503 pic_reg_used
= true;
4508 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4510 /* Even with accurate pre-reload life analysis, we can wind up
4511 deleting all references to the pic register after reload.
4512 Consider if cross-jumping unifies two sides of a branch
4513 controlled by a comparison vs the only read from a global.
4514 In which case, allow the set_got to be deleted, though we're
4515 too late to do anything about the ebx save in the prologue. */
4516 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4519 /* Prevent function calls from be scheduled before the call to mcount.
4520 In the pic_reg_used case, make sure that the got load isn't deleted. */
4521 if (current_function_profile
)
4522 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4525 /* Emit code to restore saved registers using MOV insns. First register
4526 is restored from POINTER + OFFSET. */
4528 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
4529 int maybe_eh_return
)
4532 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
4534 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4535 if (ix86_save_reg (regno
, maybe_eh_return
))
4537 /* Ensure that adjust_address won't be forced to produce pointer
4538 out of range allowed by x86-64 instruction set. */
4539 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
4543 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4544 emit_move_insn (r11
, GEN_INT (offset
));
4545 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
4546 base_address
= gen_rtx_MEM (Pmode
, r11
);
4549 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4550 adjust_address (base_address
, Pmode
, offset
));
4551 offset
+= UNITS_PER_WORD
;
4555 /* Restore function stack, frame, and registers. */
4558 ix86_expand_epilogue (int style
)
4561 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4562 struct ix86_frame frame
;
4563 HOST_WIDE_INT offset
;
4565 ix86_compute_frame_layout (&frame
);
4567 /* Calculate start of saved registers relative to ebp. Special care
4568 must be taken for the normal return case of a function using
4569 eh_return: the eax and edx registers are marked as saved, but not
4570 restored along this path. */
4571 offset
= frame
.nregs
;
4572 if (current_function_calls_eh_return
&& style
!= 2)
4574 offset
*= -UNITS_PER_WORD
;
4576 /* If we're only restoring one register and sp is not valid then
4577 using a move instruction to restore the register since it's
4578 less work than reloading sp and popping the register.
4580 The default code result in stack adjustment using add/lea instruction,
4581 while this code results in LEAVE instruction (or discrete equivalent),
4582 so it is profitable in some other cases as well. Especially when there
4583 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4584 and there is exactly one register to pop. This heuristic may need some
4585 tuning in future. */
4586 if ((!sp_valid
&& frame
.nregs
<= 1)
4587 || (TARGET_EPILOGUE_USING_MOVE
4588 && cfun
->machine
->use_fast_prologue_epilogue
4589 && (frame
.nregs
> 1 || frame
.to_allocate
))
4590 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4591 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4592 && cfun
->machine
->use_fast_prologue_epilogue
4593 && frame
.nregs
== 1)
4594 || current_function_calls_eh_return
)
4596 /* Restore registers. We can use ebp or esp to address the memory
4597 locations. If both are available, default to ebp, since offsets
4598 are known to be small. Only exception is esp pointing directly to the
4599 end of block of saved registers, where we may simplify addressing
4602 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4603 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4604 frame
.to_allocate
, style
== 2);
4606 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4607 offset
, style
== 2);
4609 /* eh_return epilogues need %ecx added to the stack pointer. */
4612 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4614 if (frame_pointer_needed
)
4616 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4617 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4618 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4620 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4621 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4623 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
4628 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4629 tmp
= plus_constant (tmp
, (frame
.to_allocate
4630 + frame
.nregs
* UNITS_PER_WORD
));
4631 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4634 else if (!frame_pointer_needed
)
4635 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4636 GEN_INT (frame
.to_allocate
4637 + frame
.nregs
* UNITS_PER_WORD
),
4639 /* If not an i386, mov & pop is faster than "leave". */
4640 else if (TARGET_USE_LEAVE
|| optimize_size
4641 || !cfun
->machine
->use_fast_prologue_epilogue
)
4642 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4645 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4646 hard_frame_pointer_rtx
,
4649 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4651 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4656 /* First step is to deallocate the stack frame so that we can
4657 pop the registers. */
4660 if (!frame_pointer_needed
)
4662 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4663 hard_frame_pointer_rtx
,
4664 GEN_INT (offset
), style
);
4666 else if (frame
.to_allocate
)
4667 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4668 GEN_INT (frame
.to_allocate
), style
);
4670 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4671 if (ix86_save_reg (regno
, false))
4674 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4676 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4678 if (frame_pointer_needed
)
4680 /* Leave results in shorter dependency chains on CPUs that are
4681 able to grok it fast. */
4682 if (TARGET_USE_LEAVE
)
4683 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4684 else if (TARGET_64BIT
)
4685 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4687 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4691 /* Sibcall epilogues don't want a return instruction. */
4695 if (current_function_pops_args
&& current_function_args_size
)
4697 rtx popc
= GEN_INT (current_function_pops_args
);
4699 /* i386 can only pop 64K bytes. If asked to pop more, pop
4700 return address, do explicit add, and jump indirectly to the
4703 if (current_function_pops_args
>= 65536)
4705 rtx ecx
= gen_rtx_REG (SImode
, 2);
4707 /* There is no "pascal" calling convention in 64bit ABI. */
4711 emit_insn (gen_popsi1 (ecx
));
4712 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4713 emit_jump_insn (gen_return_indirect_internal (ecx
));
4716 emit_jump_insn (gen_return_pop_internal (popc
));
4719 emit_jump_insn (gen_return_internal ());
4722 /* Reset from the function's potential modifications. */
4725 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4726 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4728 if (pic_offset_table_rtx
)
4729 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4732 /* Extract the parts of an RTL expression that is a valid memory address
4733 for an instruction. Return 0 if the structure of the address is
4734 grossly off. Return -1 if the address contains ASHIFT, so it is not
4735 strictly valid, but still used for computing length of lea instruction. */
4738 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
4740 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
4741 rtx base_reg
, index_reg
;
4742 HOST_WIDE_INT scale
= 1;
4743 rtx scale_rtx
= NULL_RTX
;
4745 enum ix86_address_seg seg
= SEG_DEFAULT
;
4747 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4749 else if (GET_CODE (addr
) == PLUS
)
4759 addends
[n
++] = XEXP (op
, 1);
4762 while (GET_CODE (op
) == PLUS
);
4767 for (i
= n
; i
>= 0; --i
)
4770 switch (GET_CODE (op
))
4775 index
= XEXP (op
, 0);
4776 scale_rtx
= XEXP (op
, 1);
4780 if (XINT (op
, 1) == UNSPEC_TP
4781 && TARGET_TLS_DIRECT_SEG_REFS
4782 && seg
== SEG_DEFAULT
)
4783 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
4812 else if (GET_CODE (addr
) == MULT
)
4814 index
= XEXP (addr
, 0); /* index*scale */
4815 scale_rtx
= XEXP (addr
, 1);
4817 else if (GET_CODE (addr
) == ASHIFT
)
4821 /* We're called for lea too, which implements ashift on occasion. */
4822 index
= XEXP (addr
, 0);
4823 tmp
= XEXP (addr
, 1);
4824 if (GET_CODE (tmp
) != CONST_INT
)
4826 scale
= INTVAL (tmp
);
4827 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4833 disp
= addr
; /* displacement */
4835 /* Extract the integral value of scale. */
4838 if (GET_CODE (scale_rtx
) != CONST_INT
)
4840 scale
= INTVAL (scale_rtx
);
4843 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
4844 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
4846 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4847 if (base_reg
&& index_reg
&& scale
== 1
4848 && (index_reg
== arg_pointer_rtx
4849 || index_reg
== frame_pointer_rtx
4850 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
4853 tmp
= base
, base
= index
, index
= tmp
;
4854 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
4857 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4858 if ((base_reg
== hard_frame_pointer_rtx
4859 || base_reg
== frame_pointer_rtx
4860 || base_reg
== arg_pointer_rtx
) && !disp
)
4863 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4864 Avoid this by transforming to [%esi+0]. */
4865 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
4866 && base_reg
&& !index_reg
&& !disp
4868 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
4871 /* Special case: encode reg+reg instead of reg*2. */
4872 if (!base
&& index
&& scale
&& scale
== 2)
4873 base
= index
, base_reg
= index_reg
, scale
= 1;
4875 /* Special case: scaling cannot be encoded without base or displacement. */
4876 if (!base
&& !disp
&& index
&& scale
!= 1)
4888 /* Return cost of the memory address x.
4889 For i386, it is better to use a complex address than let gcc copy
4890 the address into a reg and make a new pseudo. But not if the address
4891 requires to two regs - that would mean more pseudos with longer
4894 ix86_address_cost (rtx x
)
4896 struct ix86_address parts
;
4899 if (!ix86_decompose_address (x
, &parts
))
4902 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4903 parts
.base
= SUBREG_REG (parts
.base
);
4904 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4905 parts
.index
= SUBREG_REG (parts
.index
);
4907 /* More complex memory references are better. */
4908 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4910 if (parts
.seg
!= SEG_DEFAULT
)
4913 /* Attempt to minimize number of registers in the address. */
4915 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4917 && (!REG_P (parts
.index
)
4918 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4922 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4924 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4925 && parts
.base
!= parts
.index
)
4928 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4929 since it's predecode logic can't detect the length of instructions
4930 and it degenerates to vector decoded. Increase cost of such
4931 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4932 to split such addresses or even refuse such addresses at all.
4934 Following addressing modes are affected:
4939 The first and last case may be avoidable by explicitly coding the zero in
4940 memory address, but I don't have AMD-K6 machine handy to check this
4944 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4945 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4946 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4952 /* If X is a machine specific address (i.e. a symbol or label being
4953 referenced as a displacement from the GOT implemented using an
4954 UNSPEC), then return the base term. Otherwise return X. */
4957 ix86_find_base_term (rtx x
)
4963 if (GET_CODE (x
) != CONST
)
4966 if (GET_CODE (term
) == PLUS
4967 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4968 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4969 term
= XEXP (term
, 0);
4970 if (GET_CODE (term
) != UNSPEC
4971 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4974 term
= XVECEXP (term
, 0, 0);
4976 if (GET_CODE (term
) != SYMBOL_REF
4977 && GET_CODE (term
) != LABEL_REF
)
4983 term
= ix86_delegitimize_address (x
);
4985 if (GET_CODE (term
) != SYMBOL_REF
4986 && GET_CODE (term
) != LABEL_REF
)
4992 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4993 this is used for to form addresses to local data when -fPIC is in
4997 darwin_local_data_pic (rtx disp
)
4999 if (GET_CODE (disp
) == MINUS
)
5001 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5002 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5003 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5005 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5006 if (! strcmp (sym_name
, "<pic base>"))
5014 /* Determine if a given RTX is a valid constant. We already know this
5015 satisfies CONSTANT_P. */
5018 legitimate_constant_p (rtx x
)
5020 switch (GET_CODE (x
))
5025 if (GET_CODE (x
) == PLUS
)
5027 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
5032 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
5035 /* Only some unspecs are valid as "constants". */
5036 if (GET_CODE (x
) == UNSPEC
)
5037 switch (XINT (x
, 1))
5041 return local_exec_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
5043 return local_dynamic_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
5048 /* We must have drilled down to a symbol. */
5049 if (!symbolic_operand (x
, Pmode
))
5054 /* TLS symbols are never valid. */
5055 if (tls_symbolic_operand (x
, Pmode
))
5063 /* Otherwise we handle everything else in the move patterns. */
5067 /* Determine if it's legal to put X into the constant pool. This
5068 is not possible for the address of thread-local symbols, which
5069 is checked above. */
5072 ix86_cannot_force_const_mem (rtx x
)
5074 return !legitimate_constant_p (x
);
5077 /* Determine if a given RTX is a valid constant address. */
5080 constant_address_p (rtx x
)
5082 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5085 /* Nonzero if the constant value X is a legitimate general operand
5086 when generating PIC code. It is given that flag_pic is on and
5087 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5090 legitimate_pic_operand_p (rtx x
)
5094 switch (GET_CODE (x
))
5097 inner
= XEXP (x
, 0);
5099 /* Only some unspecs are valid as "constants". */
5100 if (GET_CODE (inner
) == UNSPEC
)
5101 switch (XINT (inner
, 1))
5104 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5112 return legitimate_pic_address_disp_p (x
);
5119 /* Determine if a given CONST RTX is a valid memory displacement
5123 legitimate_pic_address_disp_p (rtx disp
)
5127 /* In 64bit mode we can allow direct addresses of symbols and labels
5128 when they are not dynamic symbols. */
5131 /* TLS references should always be enclosed in UNSPEC. */
5132 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5134 if (GET_CODE (disp
) == SYMBOL_REF
5135 && ix86_cmodel
== CM_SMALL_PIC
5136 && SYMBOL_REF_LOCAL_P (disp
))
5138 if (GET_CODE (disp
) == LABEL_REF
)
5140 if (GET_CODE (disp
) == CONST
5141 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5143 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5144 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5146 /* TLS references should always be enclosed in UNSPEC. */
5147 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5149 if (((GET_CODE (op0
) == SYMBOL_REF
5150 && ix86_cmodel
== CM_SMALL_PIC
5151 && SYMBOL_REF_LOCAL_P (op0
))
5152 || GET_CODE (op0
) == LABEL_REF
)
5153 && GET_CODE (op1
) == CONST_INT
5154 && INTVAL (op1
) < 16*1024*1024
5155 && INTVAL (op1
) >= -16*1024*1024)
5159 if (GET_CODE (disp
) != CONST
)
5161 disp
= XEXP (disp
, 0);
5165 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5166 of GOT tables. We should not need these anyway. */
5167 if (GET_CODE (disp
) != UNSPEC
5168 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5171 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5172 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5178 if (GET_CODE (disp
) == PLUS
)
5180 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5182 disp
= XEXP (disp
, 0);
5186 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
5189 if (GET_CODE (disp
) != UNSPEC
)
5192 switch (XINT (disp
, 1))
5197 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5199 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5200 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5201 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5203 case UNSPEC_GOTTPOFF
:
5204 case UNSPEC_GOTNTPOFF
:
5205 case UNSPEC_INDNTPOFF
:
5208 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5210 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5212 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5218 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5219 memory address for an instruction. The MODE argument is the machine mode
5220 for the MEM expression that wants to use this address.
5222 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5223 convert common non-canonical forms to canonical form so that they will
5227 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5229 struct ix86_address parts
;
5230 rtx base
, index
, disp
;
5231 HOST_WIDE_INT scale
;
5232 const char *reason
= NULL
;
5233 rtx reason_rtx
= NULL_RTX
;
5235 if (TARGET_DEBUG_ADDR
)
5238 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5239 GET_MODE_NAME (mode
), strict
);
5243 if (ix86_decompose_address (addr
, &parts
) <= 0)
5245 reason
= "decomposition failed";
5250 index
= parts
.index
;
5252 scale
= parts
.scale
;
5254 /* Validate base register.
5256 Don't allow SUBREG's that span more than a word here. It can lead to spill
5257 failures when the base is one word out of a two word structure, which is
5258 represented internally as a DImode int. */
5267 else if (GET_CODE (base
) == SUBREG
5268 && REG_P (SUBREG_REG (base
))
5269 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
5271 reg
= SUBREG_REG (base
);
5274 reason
= "base is not a register";
5278 if (GET_MODE (base
) != Pmode
)
5280 reason
= "base is not in Pmode";
5284 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5285 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5287 reason
= "base is not valid";
5292 /* Validate index register.
5294 Don't allow SUBREG's that span more than a word here -- same as above. */
5303 else if (GET_CODE (index
) == SUBREG
5304 && REG_P (SUBREG_REG (index
))
5305 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
5307 reg
= SUBREG_REG (index
);
5310 reason
= "index is not a register";
5314 if (GET_MODE (index
) != Pmode
)
5316 reason
= "index is not in Pmode";
5320 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5321 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5323 reason
= "index is not valid";
5328 /* Validate scale factor. */
5331 reason_rtx
= GEN_INT (scale
);
5334 reason
= "scale without index";
5338 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5340 reason
= "scale is not a valid multiplier";
5345 /* Validate displacement. */
5350 if (GET_CODE (disp
) == CONST
5351 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5352 switch (XINT (XEXP (disp
, 0), 1))
5356 case UNSPEC_GOTPCREL
:
5359 goto is_legitimate_pic
;
5361 case UNSPEC_GOTTPOFF
:
5362 case UNSPEC_GOTNTPOFF
:
5363 case UNSPEC_INDNTPOFF
:
5369 reason
= "invalid address unspec";
5373 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5375 && !machopic_operand_p (disp
)
5380 if (TARGET_64BIT
&& (index
|| base
))
5382 /* foo@dtpoff(%rX) is ok. */
5383 if (GET_CODE (disp
) != CONST
5384 || GET_CODE (XEXP (disp
, 0)) != PLUS
5385 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5386 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5387 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5388 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5390 reason
= "non-constant pic memory reference";
5394 else if (! legitimate_pic_address_disp_p (disp
))
5396 reason
= "displacement is an invalid pic construct";
5400 /* This code used to verify that a symbolic pic displacement
5401 includes the pic_offset_table_rtx register.
5403 While this is good idea, unfortunately these constructs may
5404 be created by "adds using lea" optimization for incorrect
5413 This code is nonsensical, but results in addressing
5414 GOT table with pic_offset_table_rtx base. We can't
5415 just refuse it easily, since it gets matched by
5416 "addsi3" pattern, that later gets split to lea in the
5417 case output register differs from input. While this
5418 can be handled by separate addsi pattern for this case
5419 that never results in lea, this seems to be easier and
5420 correct fix for crash to disable this test. */
5422 else if (GET_CODE (disp
) != LABEL_REF
5423 && GET_CODE (disp
) != CONST_INT
5424 && (GET_CODE (disp
) != CONST
5425 || !legitimate_constant_p (disp
))
5426 && (GET_CODE (disp
) != SYMBOL_REF
5427 || !legitimate_constant_p (disp
)))
5429 reason
= "displacement is not constant";
5432 else if (TARGET_64BIT
5433 && !x86_64_immediate_operand (disp
, VOIDmode
))
5435 reason
= "displacement is out of range";
5440 /* Everything looks valid. */
5441 if (TARGET_DEBUG_ADDR
)
5442 fprintf (stderr
, "Success.\n");
5446 if (TARGET_DEBUG_ADDR
)
5448 fprintf (stderr
, "Error: %s\n", reason
);
5449 debug_rtx (reason_rtx
);
5454 /* Return an unique alias set for the GOT. */
5456 static HOST_WIDE_INT
5457 ix86_GOT_alias_set (void)
5459 static HOST_WIDE_INT set
= -1;
5461 set
= new_alias_set ();
5465 /* Return a legitimate reference for ORIG (an address) using the
5466 register REG. If REG is 0, a new pseudo is generated.
5468 There are two types of references that must be handled:
5470 1. Global data references must load the address from the GOT, via
5471 the PIC reg. An insn is emitted to do this load, and the reg is
5474 2. Static data references, constant pool addresses, and code labels
5475 compute the address as an offset from the GOT, whose base is in
5476 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5477 differentiate them from global data objects. The returned
5478 address is the PIC reg + an unspec constant.
5480 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5481 reg also appears in the address. */
5484 legitimize_pic_address (rtx orig
, rtx reg
)
5492 reg
= gen_reg_rtx (Pmode
);
5493 /* Use the generic Mach-O PIC machinery. */
5494 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5497 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5499 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5501 /* This symbol may be referenced via a displacement from the PIC
5502 base address (@GOTOFF). */
5504 if (reload_in_progress
)
5505 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5506 if (GET_CODE (addr
) == CONST
)
5507 addr
= XEXP (addr
, 0);
5508 if (GET_CODE (addr
) == PLUS
)
5510 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
5511 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
5514 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5515 new = gen_rtx_CONST (Pmode
, new);
5516 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5520 emit_move_insn (reg
, new);
5524 else if (GET_CODE (addr
) == SYMBOL_REF
)
5528 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5529 new = gen_rtx_CONST (Pmode
, new);
5530 new = gen_const_mem (Pmode
, new);
5531 set_mem_alias_set (new, ix86_GOT_alias_set ());
5534 reg
= gen_reg_rtx (Pmode
);
5535 /* Use directly gen_movsi, otherwise the address is loaded
5536 into register for CSE. We don't want to CSE this addresses,
5537 instead we CSE addresses from the GOT table, so skip this. */
5538 emit_insn (gen_movsi (reg
, new));
5543 /* This symbol must be referenced via a load from the
5544 Global Offset Table (@GOT). */
5546 if (reload_in_progress
)
5547 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5548 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5549 new = gen_rtx_CONST (Pmode
, new);
5550 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5551 new = gen_const_mem (Pmode
, new);
5552 set_mem_alias_set (new, ix86_GOT_alias_set ());
5555 reg
= gen_reg_rtx (Pmode
);
5556 emit_move_insn (reg
, new);
5562 if (GET_CODE (addr
) == CONST
)
5564 addr
= XEXP (addr
, 0);
5566 /* We must match stuff we generate before. Assume the only
5567 unspecs that can get here are ours. Not that we could do
5568 anything with them anyway.... */
5569 if (GET_CODE (addr
) == UNSPEC
5570 || (GET_CODE (addr
) == PLUS
5571 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5573 if (GET_CODE (addr
) != PLUS
)
5576 if (GET_CODE (addr
) == PLUS
)
5578 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5580 /* Check first to see if this is a constant offset from a @GOTOFF
5581 symbol reference. */
5582 if (local_symbolic_operand (op0
, Pmode
)
5583 && GET_CODE (op1
) == CONST_INT
)
5587 if (reload_in_progress
)
5588 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5589 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5591 new = gen_rtx_PLUS (Pmode
, new, op1
);
5592 new = gen_rtx_CONST (Pmode
, new);
5593 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5597 emit_move_insn (reg
, new);
5603 if (INTVAL (op1
) < -16*1024*1024
5604 || INTVAL (op1
) >= 16*1024*1024)
5605 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
5610 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5611 new = legitimize_pic_address (XEXP (addr
, 1),
5612 base
== reg
? NULL_RTX
: reg
);
5614 if (GET_CODE (new) == CONST_INT
)
5615 new = plus_constant (base
, INTVAL (new));
5618 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5620 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5621 new = XEXP (new, 1);
5623 new = gen_rtx_PLUS (Pmode
, base
, new);
5631 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5634 get_thread_pointer (int to_reg
)
5638 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5642 reg
= gen_reg_rtx (Pmode
);
5643 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
5644 insn
= emit_insn (insn
);
5649 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5650 false if we expect this to be used for a memory address and true if
5651 we expect to load the address into a register. */
5654 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
5656 rtx dest
, base
, off
, pic
;
5661 case TLS_MODEL_GLOBAL_DYNAMIC
:
5662 dest
= gen_reg_rtx (Pmode
);
5665 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5668 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5669 insns
= get_insns ();
5672 emit_libcall_block (insns
, dest
, rax
, x
);
5675 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5678 case TLS_MODEL_LOCAL_DYNAMIC
:
5679 base
= gen_reg_rtx (Pmode
);
5682 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5685 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5686 insns
= get_insns ();
5689 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5690 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5691 emit_libcall_block (insns
, base
, rax
, note
);
5694 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5696 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5697 off
= gen_rtx_CONST (Pmode
, off
);
5699 return gen_rtx_PLUS (Pmode
, base
, off
);
5701 case TLS_MODEL_INITIAL_EXEC
:
5705 type
= UNSPEC_GOTNTPOFF
;
5709 if (reload_in_progress
)
5710 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5711 pic
= pic_offset_table_rtx
;
5712 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5714 else if (!TARGET_GNU_TLS
)
5716 pic
= gen_reg_rtx (Pmode
);
5717 emit_insn (gen_set_got (pic
));
5718 type
= UNSPEC_GOTTPOFF
;
5723 type
= UNSPEC_INDNTPOFF
;
5726 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5727 off
= gen_rtx_CONST (Pmode
, off
);
5729 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5730 off
= gen_const_mem (Pmode
, off
);
5731 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5733 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5735 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5736 off
= force_reg (Pmode
, off
);
5737 return gen_rtx_PLUS (Pmode
, base
, off
);
5741 base
= get_thread_pointer (true);
5742 dest
= gen_reg_rtx (Pmode
);
5743 emit_insn (gen_subsi3 (dest
, base
, off
));
5747 case TLS_MODEL_LOCAL_EXEC
:
5748 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5749 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5750 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5751 off
= gen_rtx_CONST (Pmode
, off
);
5753 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5755 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5756 return gen_rtx_PLUS (Pmode
, base
, off
);
5760 base
= get_thread_pointer (true);
5761 dest
= gen_reg_rtx (Pmode
);
5762 emit_insn (gen_subsi3 (dest
, base
, off
));
5773 /* Try machine-dependent ways of modifying an illegitimate address
5774 to be legitimate. If we find one, return the new, valid address.
5775 This macro is used in only one place: `memory_address' in explow.c.
5777 OLDX is the address as it was before break_out_memory_refs was called.
5778 In some cases it is useful to look at this to decide what needs to be done.
5780 MODE and WIN are passed so that this macro can use
5781 GO_IF_LEGITIMATE_ADDRESS.
5783 It is always safe for this macro to do nothing. It exists to recognize
5784 opportunities to optimize the output.
5786 For the 80386, we handle X+REG by loading X into a register R and
5787 using R+REG. R will go in a general reg and indexing will be used.
5788 However, if REG is a broken-out memory address or multiplication,
5789 nothing needs to be done because REG can certainly go in a general reg.
5791 When -fpic is used, special handling is needed for symbolic references.
5792 See comments by legitimize_pic_address in i386.c for details. */
5795 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
5800 if (TARGET_DEBUG_ADDR
)
5802 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5803 GET_MODE_NAME (mode
));
5807 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
5809 return legitimize_tls_address (x
, log
, false);
5810 if (GET_CODE (x
) == CONST
5811 && GET_CODE (XEXP (x
, 0)) == PLUS
5812 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
5813 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
5815 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
5816 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
5819 if (flag_pic
&& SYMBOLIC_CONST (x
))
5820 return legitimize_pic_address (x
, 0);
5822 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5823 if (GET_CODE (x
) == ASHIFT
5824 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5825 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5828 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5829 GEN_INT (1 << log
));
5832 if (GET_CODE (x
) == PLUS
)
5834 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5836 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5837 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5838 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5841 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5842 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5843 GEN_INT (1 << log
));
5846 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5847 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5848 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5851 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5852 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5853 GEN_INT (1 << log
));
5856 /* Put multiply first if it isn't already. */
5857 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5859 rtx tmp
= XEXP (x
, 0);
5860 XEXP (x
, 0) = XEXP (x
, 1);
5865 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5866 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5867 created by virtual register instantiation, register elimination, and
5868 similar optimizations. */
5869 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5872 x
= gen_rtx_PLUS (Pmode
,
5873 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5874 XEXP (XEXP (x
, 1), 0)),
5875 XEXP (XEXP (x
, 1), 1));
5879 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5880 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5881 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5882 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5883 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5884 && CONSTANT_P (XEXP (x
, 1)))
5887 rtx other
= NULL_RTX
;
5889 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5891 constant
= XEXP (x
, 1);
5892 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5894 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5896 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5897 other
= XEXP (x
, 1);
5905 x
= gen_rtx_PLUS (Pmode
,
5906 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5907 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5908 plus_constant (other
, INTVAL (constant
)));
5912 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5915 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5918 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5921 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5924 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5928 && GET_CODE (XEXP (x
, 1)) == REG
5929 && GET_CODE (XEXP (x
, 0)) == REG
)
5932 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5935 x
= legitimize_pic_address (x
, 0);
5938 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5941 if (GET_CODE (XEXP (x
, 0)) == REG
)
5943 rtx temp
= gen_reg_rtx (Pmode
);
5944 rtx val
= force_operand (XEXP (x
, 1), temp
);
5946 emit_move_insn (temp
, val
);
5952 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5954 rtx temp
= gen_reg_rtx (Pmode
);
5955 rtx val
= force_operand (XEXP (x
, 0), temp
);
5957 emit_move_insn (temp
, val
);
5967 /* Print an integer constant expression in assembler syntax. Addition
5968 and subtraction are the only arithmetic that may appear in these
5969 expressions. FILE is the stdio stream to write to, X is the rtx, and
5970 CODE is the operand print code from the output string. */
5973 output_pic_addr_const (FILE *file
, rtx x
, int code
)
5977 switch (GET_CODE (x
))
5987 assemble_name (file
, XSTR (x
, 0));
5988 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
5989 fputs ("@PLT", file
);
5996 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5997 assemble_name (asm_out_file
, buf
);
6001 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6005 /* This used to output parentheses around the expression,
6006 but that does not work on the 386 (either ATT or BSD assembler). */
6007 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6011 if (GET_MODE (x
) == VOIDmode
)
6013 /* We can use %d if the number is <32 bits and positive. */
6014 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6015 fprintf (file
, "0x%lx%08lx",
6016 (unsigned long) CONST_DOUBLE_HIGH (x
),
6017 (unsigned long) CONST_DOUBLE_LOW (x
));
6019 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6022 /* We can't handle floating point constants;
6023 PRINT_OPERAND must handle them. */
6024 output_operand_lossage ("floating constant misused");
6028 /* Some assemblers need integer constants to appear first. */
6029 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6031 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6033 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6035 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6037 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6039 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6047 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6048 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6050 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6052 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6056 if (XVECLEN (x
, 0) != 1)
6058 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6059 switch (XINT (x
, 1))
6062 fputs ("@GOT", file
);
6065 fputs ("@GOTOFF", file
);
6067 case UNSPEC_GOTPCREL
:
6068 fputs ("@GOTPCREL(%rip)", file
);
6070 case UNSPEC_GOTTPOFF
:
6071 /* FIXME: This might be @TPOFF in Sun ld too. */
6072 fputs ("@GOTTPOFF", file
);
6075 fputs ("@TPOFF", file
);
6079 fputs ("@TPOFF", file
);
6081 fputs ("@NTPOFF", file
);
6084 fputs ("@DTPOFF", file
);
6086 case UNSPEC_GOTNTPOFF
:
6088 fputs ("@GOTTPOFF(%rip)", file
);
6090 fputs ("@GOTNTPOFF", file
);
6092 case UNSPEC_INDNTPOFF
:
6093 fputs ("@INDNTPOFF", file
);
6096 output_operand_lossage ("invalid UNSPEC as operand");
6102 output_operand_lossage ("invalid expression as operand");
6106 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6107 We need to emit DTP-relative relocations. */
6110 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
6112 fputs (ASM_LONG
, file
);
6113 output_addr_const (file
, x
);
6114 fputs ("@DTPOFF", file
);
6120 fputs (", 0", file
);
6127 /* In the name of slightly smaller debug output, and to cater to
6128 general assembler lossage, recognize PIC+GOTOFF and turn it back
6129 into a direct symbol reference. */
6132 ix86_delegitimize_address (rtx orig_x
)
6136 if (GET_CODE (x
) == MEM
)
6141 if (GET_CODE (x
) != CONST
6142 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6143 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6144 || GET_CODE (orig_x
) != MEM
)
6146 return XVECEXP (XEXP (x
, 0), 0, 0);
6149 if (GET_CODE (x
) != PLUS
6150 || GET_CODE (XEXP (x
, 1)) != CONST
)
6153 if (GET_CODE (XEXP (x
, 0)) == REG
6154 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6155 /* %ebx + GOT/GOTOFF */
6157 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6159 /* %ebx + %reg * scale + GOT/GOTOFF */
6161 if (GET_CODE (XEXP (y
, 0)) == REG
6162 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6164 else if (GET_CODE (XEXP (y
, 1)) == REG
6165 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6169 if (GET_CODE (y
) != REG
6170 && GET_CODE (y
) != MULT
6171 && GET_CODE (y
) != ASHIFT
)
6177 x
= XEXP (XEXP (x
, 1), 0);
6178 if (GET_CODE (x
) == UNSPEC
6179 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6180 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6183 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6184 return XVECEXP (x
, 0, 0);
6187 if (GET_CODE (x
) == PLUS
6188 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6189 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6190 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6191 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6192 && GET_CODE (orig_x
) != MEM
)))
6194 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6196 return gen_rtx_PLUS (Pmode
, y
, x
);
6204 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6209 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6211 enum rtx_code second_code
, bypass_code
;
6212 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6213 if (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
)
6215 code
= ix86_fp_compare_code_to_integer (code
);
6219 code
= reverse_condition (code
);
6230 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6235 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6236 Those same assemblers have the same but opposite lossage on cmov. */
6239 suffix
= fp
? "nbe" : "a";
6242 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6244 else if (mode
== CCmode
|| mode
== CCGCmode
)
6255 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6257 else if (mode
== CCmode
|| mode
== CCGCmode
)
6266 suffix
= fp
? "nb" : "ae";
6269 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6279 suffix
= fp
? "u" : "p";
6282 suffix
= fp
? "nu" : "np";
6287 fputs (suffix
, file
);
6290 /* Print the name of register X to FILE based on its machine mode and number.
6291 If CODE is 'w', pretend the mode is HImode.
6292 If CODE is 'b', pretend the mode is QImode.
6293 If CODE is 'k', pretend the mode is SImode.
6294 If CODE is 'q', pretend the mode is DImode.
6295 If CODE is 'h', pretend the reg is the `high' byte register.
6296 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6299 print_reg (rtx x
, int code
, FILE *file
)
6301 if (REGNO (x
) == ARG_POINTER_REGNUM
6302 || REGNO (x
) == FRAME_POINTER_REGNUM
6303 || REGNO (x
) == FLAGS_REG
6304 || REGNO (x
) == FPSR_REG
)
6307 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6310 if (code
== 'w' || MMX_REG_P (x
))
6312 else if (code
== 'b')
6314 else if (code
== 'k')
6316 else if (code
== 'q')
6318 else if (code
== 'y')
6320 else if (code
== 'h')
6323 code
= GET_MODE_SIZE (GET_MODE (x
));
6325 /* Irritatingly, AMD extended registers use different naming convention
6326 from the normal registers. */
6327 if (REX_INT_REG_P (x
))
6334 error ("extended registers have no high halves");
6337 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6340 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6343 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6346 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6349 error ("unsupported operand size for extended register");
6357 if (STACK_TOP_P (x
))
6359 fputs ("st(0)", file
);
6366 if (! ANY_FP_REG_P (x
))
6367 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6372 fputs (hi_reg_name
[REGNO (x
)], file
);
6375 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
6377 fputs (qi_reg_name
[REGNO (x
)], file
);
6380 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
6382 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6389 /* Locate some local-dynamic symbol still in use by this function
6390 so that we can print its name in some tls_local_dynamic_base
6394 get_some_local_dynamic_name (void)
6398 if (cfun
->machine
->some_ld_name
)
6399 return cfun
->machine
->some_ld_name
;
6401 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6403 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6404 return cfun
->machine
->some_ld_name
;
6410 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
6414 if (GET_CODE (x
) == SYMBOL_REF
6415 && local_dynamic_symbolic_operand (x
, Pmode
))
6417 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6425 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6426 C -- print opcode suffix for set/cmov insn.
6427 c -- like C, but print reversed condition
6428 F,f -- likewise, but for floating-point.
6429 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6431 R -- print the prefix for register names.
6432 z -- print the opcode suffix for the size of the current operand.
6433 * -- print a star (in certain assembler syntax)
6434 A -- print an absolute memory reference.
6435 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6436 s -- print a shift double count, followed by the assemblers argument
6438 b -- print the QImode name of the register for the indicated operand.
6439 %b0 would print %al if operands[0] is reg 0.
6440 w -- likewise, print the HImode name of the register.
6441 k -- likewise, print the SImode name of the register.
6442 q -- likewise, print the DImode name of the register.
6443 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6444 y -- print "st(0)" instead of "st" as a register.
6445 D -- print condition for SSE cmp instruction.
6446 P -- if PIC, print an @PLT suffix.
6447 X -- don't print any sort of PIC '@' suffix for a symbol.
6448 & -- print some in-use local-dynamic symbol name.
6449 H -- print a memory address offset by 8; used for sse high-parts
6453 print_operand (FILE *file
, rtx x
, int code
)
6460 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6465 assemble_name (file
, get_some_local_dynamic_name ());
6469 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6471 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6473 /* Intel syntax. For absolute addresses, registers should not
6474 be surrounded by braces. */
6475 if (GET_CODE (x
) != REG
)
6478 PRINT_OPERAND (file
, x
, 0);
6486 PRINT_OPERAND (file
, x
, 0);
6491 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6496 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6501 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6506 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6511 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6516 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6521 /* 387 opcodes don't get size suffixes if the operands are
6523 if (STACK_REG_P (x
))
6526 /* Likewise if using Intel opcodes. */
6527 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6530 /* This is the size of op from size of operand. */
6531 switch (GET_MODE_SIZE (GET_MODE (x
)))
6534 #ifdef HAVE_GAS_FILDS_FISTS
6540 if (GET_MODE (x
) == SFmode
)
6555 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6557 #ifdef GAS_MNEMONICS
6583 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6585 PRINT_OPERAND (file
, x
, 0);
6591 /* Little bit of braindamage here. The SSE compare instructions
6592 does use completely different names for the comparisons that the
6593 fp conditional moves. */
6594 switch (GET_CODE (x
))
6609 fputs ("unord", file
);
6613 fputs ("neq", file
);
6617 fputs ("nlt", file
);
6621 fputs ("nle", file
);
6624 fputs ("ord", file
);
6632 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6633 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6635 switch (GET_MODE (x
))
6637 case HImode
: putc ('w', file
); break;
6639 case SFmode
: putc ('l', file
); break;
6641 case DFmode
: putc ('q', file
); break;
6649 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6652 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6653 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6656 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6659 /* Like above, but reverse condition */
6661 /* Check to see if argument to %c is really a constant
6662 and not a condition code which needs to be reversed. */
6663 if (!COMPARISON_P (x
))
6665 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6668 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6671 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6672 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6675 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6679 /* It doesn't actually matter what mode we use here, as we're
6680 only going to use this for printing. */
6681 x
= adjust_address_nv (x
, DImode
, 8);
6688 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6691 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6694 int pred_val
= INTVAL (XEXP (x
, 0));
6696 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6697 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6699 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6700 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6702 /* Emit hints only in the case default branch prediction
6703 heuristics would fail. */
6704 if (taken
!= cputaken
)
6706 /* We use 3e (DS) prefix for taken branches and
6707 2e (CS) prefix for not taken branches. */
6709 fputs ("ds ; ", file
);
6711 fputs ("cs ; ", file
);
6718 output_operand_lossage ("invalid operand code '%c'", code
);
6722 if (GET_CODE (x
) == REG
)
6723 print_reg (x
, code
, file
);
6725 else if (GET_CODE (x
) == MEM
)
6727 /* No `byte ptr' prefix for call instructions. */
6728 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6731 switch (GET_MODE_SIZE (GET_MODE (x
)))
6733 case 1: size
= "BYTE"; break;
6734 case 2: size
= "WORD"; break;
6735 case 4: size
= "DWORD"; break;
6736 case 8: size
= "QWORD"; break;
6737 case 12: size
= "XWORD"; break;
6738 case 16: size
= "XMMWORD"; break;
6743 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6746 else if (code
== 'w')
6748 else if (code
== 'k')
6752 fputs (" PTR ", file
);
6756 /* Avoid (%rip) for call operands. */
6757 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6758 && GET_CODE (x
) != CONST_INT
)
6759 output_addr_const (file
, x
);
6760 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6761 output_operand_lossage ("invalid constraints for operand");
6766 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6771 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6772 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6774 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6776 fprintf (file
, "0x%08lx", l
);
6779 /* These float cases don't actually occur as immediate operands. */
6780 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6784 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6785 fprintf (file
, "%s", dstr
);
6788 else if (GET_CODE (x
) == CONST_DOUBLE
6789 && GET_MODE (x
) == XFmode
)
6793 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6794 fprintf (file
, "%s", dstr
);
6799 /* We have patterns that allow zero sets of memory, for instance.
6800 In 64-bit mode, we should probably support all 8-byte vectors,
6801 since we can in fact encode that into an immediate. */
6802 if (GET_CODE (x
) == CONST_VECTOR
)
6804 if (x
== CONST0_RTX (GET_MODE (x
)))
6812 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6814 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6817 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6818 || GET_CODE (x
) == LABEL_REF
)
6820 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6823 fputs ("OFFSET FLAT:", file
);
6826 if (GET_CODE (x
) == CONST_INT
)
6827 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6829 output_pic_addr_const (file
, x
, code
);
6831 output_addr_const (file
, x
);
6835 /* Print a memory operand whose address is ADDR. */
6838 print_operand_address (FILE *file
, rtx addr
)
6840 struct ix86_address parts
;
6841 rtx base
, index
, disp
;
6844 if (! ix86_decompose_address (addr
, &parts
))
6848 index
= parts
.index
;
6850 scale
= parts
.scale
;
6858 if (USER_LABEL_PREFIX
[0] == 0)
6860 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
6866 if (!base
&& !index
)
6868 /* Displacement only requires special attention. */
6870 if (GET_CODE (disp
) == CONST_INT
)
6872 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
6874 if (USER_LABEL_PREFIX
[0] == 0)
6876 fputs ("ds:", file
);
6878 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
6881 output_pic_addr_const (file
, disp
, 0);
6883 output_addr_const (file
, disp
);
6885 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6887 && ((GET_CODE (disp
) == SYMBOL_REF
6888 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
6889 || GET_CODE (disp
) == LABEL_REF
6890 || (GET_CODE (disp
) == CONST
6891 && GET_CODE (XEXP (disp
, 0)) == PLUS
6892 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
6893 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
6894 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
6895 fputs ("(%rip)", file
);
6899 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6904 output_pic_addr_const (file
, disp
, 0);
6905 else if (GET_CODE (disp
) == LABEL_REF
)
6906 output_asm_label (disp
);
6908 output_addr_const (file
, disp
);
6913 print_reg (base
, 0, file
);
6917 print_reg (index
, 0, file
);
6919 fprintf (file
, ",%d", scale
);
6925 rtx offset
= NULL_RTX
;
6929 /* Pull out the offset of a symbol; print any symbol itself. */
6930 if (GET_CODE (disp
) == CONST
6931 && GET_CODE (XEXP (disp
, 0)) == PLUS
6932 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6934 offset
= XEXP (XEXP (disp
, 0), 1);
6935 disp
= gen_rtx_CONST (VOIDmode
,
6936 XEXP (XEXP (disp
, 0), 0));
6940 output_pic_addr_const (file
, disp
, 0);
6941 else if (GET_CODE (disp
) == LABEL_REF
)
6942 output_asm_label (disp
);
6943 else if (GET_CODE (disp
) == CONST_INT
)
6946 output_addr_const (file
, disp
);
6952 print_reg (base
, 0, file
);
6955 if (INTVAL (offset
) >= 0)
6957 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6961 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6968 print_reg (index
, 0, file
);
6970 fprintf (file
, "*%d", scale
);
6978 output_addr_const_extra (FILE *file
, rtx x
)
6982 if (GET_CODE (x
) != UNSPEC
)
6985 op
= XVECEXP (x
, 0, 0);
6986 switch (XINT (x
, 1))
6988 case UNSPEC_GOTTPOFF
:
6989 output_addr_const (file
, op
);
6990 /* FIXME: This might be @TPOFF in Sun ld. */
6991 fputs ("@GOTTPOFF", file
);
6994 output_addr_const (file
, op
);
6995 fputs ("@TPOFF", file
);
6998 output_addr_const (file
, op
);
7000 fputs ("@TPOFF", file
);
7002 fputs ("@NTPOFF", file
);
7005 output_addr_const (file
, op
);
7006 fputs ("@DTPOFF", file
);
7008 case UNSPEC_GOTNTPOFF
:
7009 output_addr_const (file
, op
);
7011 fputs ("@GOTTPOFF(%rip)", file
);
7013 fputs ("@GOTNTPOFF", file
);
7015 case UNSPEC_INDNTPOFF
:
7016 output_addr_const (file
, op
);
7017 fputs ("@INDNTPOFF", file
);
7027 /* Split one or more DImode RTL references into pairs of SImode
7028 references. The RTL can be REG, offsettable MEM, integer constant, or
7029 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7030 split and "num" is its length. lo_half and hi_half are output arrays
7031 that parallel "operands". */
7034 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7038 rtx op
= operands
[num
];
7040 /* simplify_subreg refuse to split volatile memory addresses,
7041 but we still have to handle it. */
7042 if (GET_CODE (op
) == MEM
)
7044 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7045 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7049 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7050 GET_MODE (op
) == VOIDmode
7051 ? DImode
: GET_MODE (op
), 0);
7052 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7053 GET_MODE (op
) == VOIDmode
7054 ? DImode
: GET_MODE (op
), 4);
7058 /* Split one or more TImode RTL references into pairs of SImode
7059 references. The RTL can be REG, offsettable MEM, integer constant, or
7060 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7061 split and "num" is its length. lo_half and hi_half are output arrays
7062 that parallel "operands". */
7065 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7069 rtx op
= operands
[num
];
7071 /* simplify_subreg refuse to split volatile memory addresses, but we
7072 still have to handle it. */
7073 if (GET_CODE (op
) == MEM
)
7075 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7076 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7080 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7081 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7086 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7087 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7088 is the expression of the binary operation. The output may either be
7089 emitted here, or returned to the caller, like all output_* functions.
7091 There is no guarantee that the operands are the same mode, as they
7092 might be within FLOAT or FLOAT_EXTEND expressions. */
7094 #ifndef SYSV386_COMPAT
7095 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7096 wants to fix the assemblers because that causes incompatibility
7097 with gcc. No-one wants to fix gcc because that causes
7098 incompatibility with assemblers... You can use the option of
7099 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7100 #define SYSV386_COMPAT 1
7104 output_387_binary_op (rtx insn
, rtx
*operands
)
7106 static char buf
[30];
7109 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
7111 #ifdef ENABLE_CHECKING
7112 /* Even if we do not want to check the inputs, this documents input
7113 constraints. Which helps in understanding the following code. */
7114 if (STACK_REG_P (operands
[0])
7115 && ((REG_P (operands
[1])
7116 && REGNO (operands
[0]) == REGNO (operands
[1])
7117 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7118 || (REG_P (operands
[2])
7119 && REGNO (operands
[0]) == REGNO (operands
[2])
7120 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7121 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7127 switch (GET_CODE (operands
[3]))
7130 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7131 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7139 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7140 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7148 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7149 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7157 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7158 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7172 if (GET_MODE (operands
[0]) == SFmode
)
7173 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7175 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7180 switch (GET_CODE (operands
[3]))
7184 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7186 rtx temp
= operands
[2];
7187 operands
[2] = operands
[1];
7191 /* know operands[0] == operands[1]. */
7193 if (GET_CODE (operands
[2]) == MEM
)
7199 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7201 if (STACK_TOP_P (operands
[0]))
7202 /* How is it that we are storing to a dead operand[2]?
7203 Well, presumably operands[1] is dead too. We can't
7204 store the result to st(0) as st(0) gets popped on this
7205 instruction. Instead store to operands[2] (which I
7206 think has to be st(1)). st(1) will be popped later.
7207 gcc <= 2.8.1 didn't have this check and generated
7208 assembly code that the Unixware assembler rejected. */
7209 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7211 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7215 if (STACK_TOP_P (operands
[0]))
7216 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7218 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7223 if (GET_CODE (operands
[1]) == MEM
)
7229 if (GET_CODE (operands
[2]) == MEM
)
7235 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7238 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7239 derived assemblers, confusingly reverse the direction of
7240 the operation for fsub{r} and fdiv{r} when the
7241 destination register is not st(0). The Intel assembler
7242 doesn't have this brain damage. Read !SYSV386_COMPAT to
7243 figure out what the hardware really does. */
7244 if (STACK_TOP_P (operands
[0]))
7245 p
= "{p\t%0, %2|rp\t%2, %0}";
7247 p
= "{rp\t%2, %0|p\t%0, %2}";
7249 if (STACK_TOP_P (operands
[0]))
7250 /* As above for fmul/fadd, we can't store to st(0). */
7251 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7253 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7258 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7261 if (STACK_TOP_P (operands
[0]))
7262 p
= "{rp\t%0, %1|p\t%1, %0}";
7264 p
= "{p\t%1, %0|rp\t%0, %1}";
7266 if (STACK_TOP_P (operands
[0]))
7267 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7269 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7274 if (STACK_TOP_P (operands
[0]))
7276 if (STACK_TOP_P (operands
[1]))
7277 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7279 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7282 else if (STACK_TOP_P (operands
[1]))
7285 p
= "{\t%1, %0|r\t%0, %1}";
7287 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7293 p
= "{r\t%2, %0|\t%0, %2}";
7295 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7308 /* Output code to initialize control word copies used by trunc?f?i and
7309 rounding patterns. CURRENT_MODE is set to current control word,
7310 while NEW_MODE is set to new control word. */
7313 emit_i387_cw_initialization (rtx current_mode
, rtx new_mode
, int mode
)
7315 rtx reg
= gen_reg_rtx (HImode
);
7317 emit_insn (gen_x86_fnstcw_1 (current_mode
));
7318 emit_move_insn (reg
, current_mode
);
7320 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7326 /* round down toward -oo */
7327 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
7331 /* round up toward +oo */
7332 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
7336 /* round toward zero (truncate) */
7337 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7340 case I387_CW_MASK_PM
:
7341 /* mask precision exception for nearbyint() */
7342 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7354 /* round down toward -oo */
7355 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7356 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
7360 /* round up toward +oo */
7361 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7362 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
7366 /* round toward zero (truncate) */
7367 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
7370 case I387_CW_MASK_PM
:
7371 /* mask precision exception for nearbyint() */
7372 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7380 emit_move_insn (new_mode
, reg
);
7383 /* Output code for INSN to convert a float to a signed int. OPERANDS
7384 are the insn operands. The output may be [HSD]Imode and the input
7385 operand may be [SDX]Fmode. */
7388 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
7390 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7391 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7392 int round_mode
= get_attr_i387_cw (insn
);
7394 /* Jump through a hoop or two for DImode, since the hardware has no
7395 non-popping instruction. We used to do this a different way, but
7396 that was somewhat fragile and broke with post-reload splitters. */
7397 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
7398 output_asm_insn ("fld\t%y1", operands
);
7400 if (!STACK_TOP_P (operands
[1]))
7403 if (GET_CODE (operands
[0]) != MEM
)
7407 output_asm_insn ("fisttp%z0\t%0", operands
);
7410 if (round_mode
!= I387_CW_ANY
)
7411 output_asm_insn ("fldcw\t%3", operands
);
7412 if (stack_top_dies
|| dimode_p
)
7413 output_asm_insn ("fistp%z0\t%0", operands
);
7415 output_asm_insn ("fist%z0\t%0", operands
);
7416 if (round_mode
!= I387_CW_ANY
)
7417 output_asm_insn ("fldcw\t%2", operands
);
7423 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7424 should be used. UNORDERED_P is true when fucom should be used. */
7427 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
7430 rtx cmp_op0
, cmp_op1
;
7431 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
7435 cmp_op0
= operands
[0];
7436 cmp_op1
= operands
[1];
7440 cmp_op0
= operands
[1];
7441 cmp_op1
= operands
[2];
7446 if (GET_MODE (operands
[0]) == SFmode
)
7448 return "ucomiss\t{%1, %0|%0, %1}";
7450 return "comiss\t{%1, %0|%0, %1}";
7453 return "ucomisd\t{%1, %0|%0, %1}";
7455 return "comisd\t{%1, %0|%0, %1}";
7458 if (! STACK_TOP_P (cmp_op0
))
7461 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7463 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
7467 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
7468 return TARGET_USE_FFREEP
? "ffreep\t%y1" : "fstp\t%y1";
7471 return "ftst\n\tfnstsw\t%0";
7474 if (STACK_REG_P (cmp_op1
)
7476 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7477 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7479 /* If both the top of the 387 stack dies, and the other operand
7480 is also a stack register that dies, then this must be a
7481 `fcompp' float compare */
7485 /* There is no double popping fcomi variant. Fortunately,
7486 eflags is immune from the fstp's cc clobbering. */
7488 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7490 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7491 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
7496 return "fucompp\n\tfnstsw\t%0";
7498 return "fcompp\n\tfnstsw\t%0";
7503 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7505 static const char * const alt
[16] =
7507 "fcom%z2\t%y2\n\tfnstsw\t%0",
7508 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7509 "fucom%z2\t%y2\n\tfnstsw\t%0",
7510 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7512 "ficom%z2\t%y2\n\tfnstsw\t%0",
7513 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7517 "fcomi\t{%y1, %0|%0, %y1}",
7518 "fcomip\t{%y1, %0|%0, %y1}",
7519 "fucomi\t{%y1, %0|%0, %y1}",
7520 "fucomip\t{%y1, %0|%0, %y1}",
7531 mask
= eflags_p
<< 3;
7532 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
7533 mask
|= unordered_p
<< 1;
7534 mask
|= stack_top_dies
;
7547 ix86_output_addr_vec_elt (FILE *file
, int value
)
7549 const char *directive
= ASM_LONG
;
7554 directive
= ASM_QUAD
;
7560 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7564 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
7567 fprintf (file
, "%s%s%d-%s%d\n",
7568 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7569 else if (HAVE_AS_GOTOFF_IN_DATA
)
7570 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7572 else if (TARGET_MACHO
)
7574 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
7575 machopic_output_function_base_name (file
);
7576 fprintf(file
, "\n");
7580 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7581 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7584 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7588 ix86_expand_clear (rtx dest
)
7592 /* We play register width games, which are only valid after reload. */
7593 if (!reload_completed
)
7596 /* Avoid HImode and its attendant prefix byte. */
7597 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7598 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7600 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7602 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7603 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7605 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7606 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7612 /* X is an unchanging MEM. If it is a constant pool reference, return
7613 the constant pool rtx, else NULL. */
7616 maybe_get_pool_constant (rtx x
)
7618 x
= ix86_delegitimize_address (XEXP (x
, 0));
7620 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7621 return get_pool_constant (x
);
7627 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
7629 int strict
= (reload_in_progress
|| reload_completed
);
7631 enum tls_model model
;
7636 if (GET_CODE (op1
) == SYMBOL_REF
)
7638 model
= SYMBOL_REF_TLS_MODEL (op1
);
7641 op1
= legitimize_tls_address (op1
, model
, true);
7642 op1
= force_operand (op1
, op0
);
7647 else if (GET_CODE (op1
) == CONST
7648 && GET_CODE (XEXP (op1
, 0)) == PLUS
7649 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
7651 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
7654 rtx addend
= XEXP (XEXP (op1
, 0), 1);
7655 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
7656 op1
= force_operand (op1
, NULL
);
7657 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
7658 op0
, 1, OPTAB_DIRECT
);
7664 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7669 rtx temp
= ((reload_in_progress
7670 || ((op0
&& GET_CODE (op0
) == REG
)
7672 ? op0
: gen_reg_rtx (Pmode
));
7673 op1
= machopic_indirect_data_reference (op1
, temp
);
7674 op1
= machopic_legitimize_pic_address (op1
, mode
,
7675 temp
== op1
? 0 : temp
);
7677 else if (MACHOPIC_INDIRECT
)
7678 op1
= machopic_indirect_data_reference (op1
, 0);
7682 if (GET_CODE (op0
) == MEM
)
7683 op1
= force_reg (Pmode
, op1
);
7685 op1
= legitimize_address (op1
, op1
, Pmode
);
7686 #endif /* TARGET_MACHO */
7690 if (GET_CODE (op0
) == MEM
7691 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7692 || !push_operand (op0
, mode
))
7693 && GET_CODE (op1
) == MEM
)
7694 op1
= force_reg (mode
, op1
);
7696 if (push_operand (op0
, mode
)
7697 && ! general_no_elim_operand (op1
, mode
))
7698 op1
= copy_to_mode_reg (mode
, op1
);
7700 /* Force large constants in 64bit compilation into register
7701 to get them CSEed. */
7702 if (TARGET_64BIT
&& mode
== DImode
7703 && immediate_operand (op1
, mode
)
7704 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
7705 && !register_operand (op0
, mode
)
7706 && optimize
&& !reload_completed
&& !reload_in_progress
)
7707 op1
= copy_to_mode_reg (mode
, op1
);
7709 if (FLOAT_MODE_P (mode
))
7711 /* If we are loading a floating point constant to a register,
7712 force the value to memory now, since we'll get better code
7713 out the back end. */
7717 else if (GET_CODE (op1
) == CONST_DOUBLE
)
7719 op1
= validize_mem (force_const_mem (mode
, op1
));
7720 if (!register_operand (op0
, mode
))
7722 rtx temp
= gen_reg_rtx (mode
);
7723 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
7724 emit_move_insn (op0
, temp
);
7731 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7735 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
7737 rtx op0
= operands
[0], op1
= operands
[1];
7739 /* Force constants other than zero into memory. We do not know how
7740 the instructions used to build constants modify the upper 64 bits
7741 of the register, once we have that information we may be able
7742 to handle some of them more efficiently. */
7743 if ((reload_in_progress
| reload_completed
) == 0
7744 && register_operand (op0
, mode
)
7745 && CONSTANT_P (op1
) && op1
!= CONST0_RTX (mode
))
7746 op1
= validize_mem (force_const_mem (mode
, op1
));
7748 /* Make operand1 a register if it isn't already. */
7750 && !register_operand (op0
, mode
)
7751 && !register_operand (op1
, mode
))
7753 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
7757 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7760 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7761 straight to ix86_expand_vector_move. */
7764 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
7773 /* If we're optimizing for size, movups is the smallest. */
7776 op0
= gen_lowpart (V4SFmode
, op0
);
7777 op1
= gen_lowpart (V4SFmode
, op1
);
7778 emit_insn (gen_sse_movups (op0
, op1
));
7782 /* ??? If we have typed data, then it would appear that using
7783 movdqu is the only way to get unaligned data loaded with
7785 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7787 op0
= gen_lowpart (V16QImode
, op0
);
7788 op1
= gen_lowpart (V16QImode
, op1
);
7789 emit_insn (gen_sse2_movdqu (op0
, op1
));
7793 if (TARGET_SSE2
&& mode
== V2DFmode
)
7797 /* When SSE registers are split into halves, we can avoid
7798 writing to the top half twice. */
7799 if (TARGET_SSE_SPLIT_REGS
)
7801 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
7806 /* ??? Not sure about the best option for the Intel chips.
7807 The following would seem to satisfy; the register is
7808 entirely cleared, breaking the dependency chain. We
7809 then store to the upper half, with a dependency depth
7810 of one. A rumor has it that Intel recommends two movsd
7811 followed by an unpacklpd, but this is unconfirmed. And
7812 given that the dependency depth of the unpacklpd would
7813 still be one, I'm not sure why this would be better. */
7814 zero
= CONST0_RTX (V2DFmode
);
7817 m
= adjust_address (op1
, DFmode
, 0);
7818 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
7819 m
= adjust_address (op1
, DFmode
, 8);
7820 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
7824 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
7825 emit_move_insn (op0
, CONST0_RTX (mode
));
7827 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
7829 if (mode
!= V4SFmode
)
7830 op0
= gen_lowpart (V4SFmode
, op0
);
7831 m
= adjust_address (op1
, V2SFmode
, 0);
7832 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
7833 m
= adjust_address (op1
, V2SFmode
, 8);
7834 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
7837 else if (MEM_P (op0
))
7839 /* If we're optimizing for size, movups is the smallest. */
7842 op0
= gen_lowpart (V4SFmode
, op0
);
7843 op1
= gen_lowpart (V4SFmode
, op1
);
7844 emit_insn (gen_sse_movups (op0
, op1
));
7848 /* ??? Similar to above, only less clear because of quote
7849 typeless stores unquote. */
7850 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
7851 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7853 op0
= gen_lowpart (V16QImode
, op0
);
7854 op1
= gen_lowpart (V16QImode
, op1
);
7855 emit_insn (gen_sse2_movdqu (op0
, op1
));
7859 if (TARGET_SSE2
&& mode
== V2DFmode
)
7861 m
= adjust_address (op0
, DFmode
, 0);
7862 emit_insn (gen_sse2_storelpd (m
, op1
));
7863 m
= adjust_address (op0
, DFmode
, 8);
7864 emit_insn (gen_sse2_storehpd (m
, op1
));
7868 if (mode
!= V4SFmode
)
7869 op1
= gen_lowpart (V4SFmode
, op1
);
7870 m
= adjust_address (op0
, V2SFmode
, 0);
7871 emit_insn (gen_sse_storelps (m
, op1
));
7872 m
= adjust_address (op0
, V2SFmode
, 8);
7873 emit_insn (gen_sse_storehps (m
, op1
));
7880 /* Expand a push in MODE. This is some mode for which we do not support
7881 proper push instructions, at least from the registers that we expect
7882 the value to live in. */
7885 ix86_expand_push (enum machine_mode mode
, rtx x
)
7889 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
7890 GEN_INT (-GET_MODE_SIZE (mode
)),
7891 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
7892 if (tmp
!= stack_pointer_rtx
)
7893 emit_move_insn (stack_pointer_rtx
, tmp
);
7895 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
7896 emit_move_insn (tmp
, x
);
7899 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7900 destination to use for the operation. If different from the true
7901 destination in operands[0], a copy operation will be required. */
7904 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
7907 int matching_memory
;
7908 rtx src1
, src2
, dst
;
7914 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7915 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7916 && (rtx_equal_p (dst
, src2
)
7917 || immediate_operand (src1
, mode
)))
7924 /* If the destination is memory, and we do not have matching source
7925 operands, do things in registers. */
7926 matching_memory
= 0;
7927 if (GET_CODE (dst
) == MEM
)
7929 if (rtx_equal_p (dst
, src1
))
7930 matching_memory
= 1;
7931 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7932 && rtx_equal_p (dst
, src2
))
7933 matching_memory
= 2;
7935 dst
= gen_reg_rtx (mode
);
7938 /* Both source operands cannot be in memory. */
7939 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7941 if (matching_memory
!= 2)
7942 src2
= force_reg (mode
, src2
);
7944 src1
= force_reg (mode
, src1
);
7947 /* If the operation is not commutable, source 1 cannot be a constant
7948 or non-matching memory. */
7949 if ((CONSTANT_P (src1
)
7950 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7951 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7952 src1
= force_reg (mode
, src1
);
7954 /* If optimizing, copy to regs to improve CSE */
7955 if (optimize
&& ! no_new_pseudos
)
7957 if (GET_CODE (dst
) == MEM
)
7958 dst
= gen_reg_rtx (mode
);
7959 if (GET_CODE (src1
) == MEM
)
7960 src1
= force_reg (mode
, src1
);
7961 if (GET_CODE (src2
) == MEM
)
7962 src2
= force_reg (mode
, src2
);
7965 src1
= operands
[1] = src1
;
7966 src2
= operands
[2] = src2
;
7970 /* Similarly, but assume that the destination has already been
7974 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
7975 enum machine_mode mode
, rtx operands
[])
7977 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
7978 gcc_assert (dst
== operands
[0]);
7981 /* Attempt to expand a binary operator. Make the expansion closer to the
7982 actual machine, then just general_operand, which will allow 3 separate
7983 memory references (one output, two input) in a single insn. */
7986 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
7989 rtx src1
, src2
, dst
, op
, clob
;
7991 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
7995 /* Emit the instruction. */
7997 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7998 if (reload_in_progress
)
8000 /* Reload doesn't know about the flags register, and doesn't know that
8001 it doesn't want to clobber it. We can only do this with PLUS. */
8008 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8009 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8012 /* Fix up the destination if needed. */
8013 if (dst
!= operands
[0])
8014 emit_move_insn (operands
[0], dst
);
8017 /* Return TRUE or FALSE depending on whether the binary operator meets the
8018 appropriate constraints. */
8021 ix86_binary_operator_ok (enum rtx_code code
,
8022 enum machine_mode mode ATTRIBUTE_UNUSED
,
8025 /* Both source operands cannot be in memory. */
8026 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8028 /* If the operation is not commutable, source 1 cannot be a constant. */
8029 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8031 /* If the destination is memory, we must have a matching source operand. */
8032 if (GET_CODE (operands
[0]) == MEM
8033 && ! (rtx_equal_p (operands
[0], operands
[1])
8034 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8035 && rtx_equal_p (operands
[0], operands
[2]))))
8037 /* If the operation is not commutable and the source 1 is memory, we must
8038 have a matching destination. */
8039 if (GET_CODE (operands
[1]) == MEM
8040 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
8041 && ! rtx_equal_p (operands
[0], operands
[1]))
8046 /* Attempt to expand a unary operator. Make the expansion closer to the
8047 actual machine, then just general_operand, which will allow 2 separate
8048 memory references (one output, one input) in a single insn. */
8051 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8054 int matching_memory
;
8055 rtx src
, dst
, op
, clob
;
8060 /* If the destination is memory, and we do not have matching source
8061 operands, do things in registers. */
8062 matching_memory
= 0;
8065 if (rtx_equal_p (dst
, src
))
8066 matching_memory
= 1;
8068 dst
= gen_reg_rtx (mode
);
8071 /* When source operand is memory, destination must match. */
8072 if (MEM_P (src
) && !matching_memory
)
8073 src
= force_reg (mode
, src
);
8075 /* If optimizing, copy to regs to improve CSE. */
8076 if (optimize
&& ! no_new_pseudos
)
8078 if (GET_CODE (dst
) == MEM
)
8079 dst
= gen_reg_rtx (mode
);
8080 if (GET_CODE (src
) == MEM
)
8081 src
= force_reg (mode
, src
);
8084 /* Emit the instruction. */
8086 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8087 if (reload_in_progress
|| code
== NOT
)
8089 /* Reload doesn't know about the flags register, and doesn't know that
8090 it doesn't want to clobber it. */
8097 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8098 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8101 /* Fix up the destination if needed. */
8102 if (dst
!= operands
[0])
8103 emit_move_insn (operands
[0], dst
);
8106 /* Return TRUE or FALSE depending on whether the unary operator meets the
8107 appropriate constraints. */
8110 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8111 enum machine_mode mode ATTRIBUTE_UNUSED
,
8112 rtx operands
[2] ATTRIBUTE_UNUSED
)
8114 /* If one of operands is memory, source and destination must match. */
8115 if ((GET_CODE (operands
[0]) == MEM
8116 || GET_CODE (operands
[1]) == MEM
)
8117 && ! rtx_equal_p (operands
[0], operands
[1]))
8122 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8123 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8124 true, then replicate the mask for all elements of the vector register.
8125 If INVERT is true, then create a mask excluding the sign bit. */
8128 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
8130 enum machine_mode vec_mode
;
8131 HOST_WIDE_INT hi
, lo
;
8136 /* Find the sign bit, sign extended to 2*HWI. */
8138 lo
= 0x80000000, hi
= lo
< 0;
8139 else if (HOST_BITS_PER_WIDE_INT
>= 64)
8140 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
8142 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
8147 /* Force this value into the low part of a fp vector constant. */
8148 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
8149 mask
= gen_lowpart (mode
, mask
);
8154 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
8156 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
8157 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
8158 vec_mode
= V4SFmode
;
8163 v
= gen_rtvec (2, mask
, mask
);
8165 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
8166 vec_mode
= V2DFmode
;
8169 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
8172 /* Generate code for floating point ABS or NEG. */
8175 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
8178 rtx mask
, set
, use
, clob
, dst
, src
;
8179 bool matching_memory
;
8180 bool use_sse
= false;
8181 bool vector_mode
= VECTOR_MODE_P (mode
);
8182 enum machine_mode elt_mode
= mode
;
8186 elt_mode
= GET_MODE_INNER (mode
);
8189 else if (TARGET_SSE_MATH
)
8190 use_sse
= SSE_FLOAT_MODE_P (mode
);
8192 /* NEG and ABS performed with SSE use bitwise mask operations.
8193 Create the appropriate mask now. */
8195 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
8198 /* When not using SSE, we don't use the mask, but prefer to keep the
8199 same general form of the insn pattern to reduce duplication when
8200 it comes time to split. */
8207 /* If the destination is memory, and we don't have matching source
8208 operands, do things in registers. */
8209 matching_memory
= false;
8212 if (rtx_equal_p (dst
, src
) && (!optimize
|| no_new_pseudos
))
8213 matching_memory
= true;
8215 dst
= gen_reg_rtx (mode
);
8217 if (MEM_P (src
) && !matching_memory
)
8218 src
= force_reg (mode
, src
);
8222 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
8223 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
8228 set
= gen_rtx_fmt_e (code
, mode
, src
);
8229 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
8230 use
= gen_rtx_USE (VOIDmode
, mask
);
8231 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8232 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (3, set
, use
, clob
)));
8235 if (dst
!= operands
[0])
8236 emit_move_insn (operands
[0], dst
);
8239 /* Expand a copysign operation. Special case operand 0 being a constant. */
8242 ix86_expand_copysign (rtx operands
[])
8244 enum machine_mode mode
, vmode
;
8245 rtx dest
, op0
, op1
, mask
, nmask
;
8251 mode
= GET_MODE (dest
);
8252 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
8254 if (GET_CODE (op0
) == CONST_DOUBLE
)
8258 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
8259 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
8261 if (op0
== CONST0_RTX (mode
))
8262 op0
= CONST0_RTX (vmode
);
8266 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
8267 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
8269 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
8270 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
8273 mask
= ix86_build_signbit_mask (mode
, 0, 0);
8276 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
8278 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
8282 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
8283 mask
= ix86_build_signbit_mask (mode
, 0, 0);
8286 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
8288 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
8292 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8293 be a constant, and so has already been expanded into a vector constant. */
8296 ix86_split_copysign_const (rtx operands
[])
8298 enum machine_mode mode
, vmode
;
8299 rtx dest
, op0
, op1
, mask
, x
;
8306 mode
= GET_MODE (dest
);
8307 vmode
= GET_MODE (mask
);
8309 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
8310 x
= gen_rtx_AND (vmode
, dest
, mask
);
8311 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8313 if (op0
!= CONST0_RTX (vmode
))
8315 x
= gen_rtx_IOR (vmode
, dest
, op0
);
8316 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8320 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8321 so we have to do two masks. */
8324 ix86_split_copysign_var (rtx operands
[])
8326 enum machine_mode mode
, vmode
;
8327 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
8330 scratch
= operands
[1];
8333 nmask
= operands
[4];
8336 mode
= GET_MODE (dest
);
8337 vmode
= GET_MODE (mask
);
8339 if (rtx_equal_p (op0
, op1
))
8341 /* Shouldn't happen often (it's useless, obviously), but when it does
8342 we'd generate incorrect code if we continue below. */
8343 emit_move_insn (dest
, op0
);
8347 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
8349 gcc_assert (REGNO (op1
) == REGNO (scratch
));
8351 x
= gen_rtx_AND (vmode
, scratch
, mask
);
8352 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
8355 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
8356 x
= gen_rtx_NOT (vmode
, dest
);
8357 x
= gen_rtx_AND (vmode
, x
, op0
);
8358 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8362 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
8364 x
= gen_rtx_AND (vmode
, scratch
, mask
);
8366 else /* alternative 2,4 */
8368 gcc_assert (REGNO (mask
) == REGNO (scratch
));
8369 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
8370 x
= gen_rtx_AND (vmode
, scratch
, op1
);
8372 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
8374 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
8376 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
8377 x
= gen_rtx_AND (vmode
, dest
, nmask
);
8379 else /* alternative 3,4 */
8381 gcc_assert (REGNO (nmask
) == REGNO (dest
));
8383 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
8384 x
= gen_rtx_AND (vmode
, dest
, op0
);
8386 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8389 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
8390 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8393 /* Return TRUE or FALSE depending on whether the first SET in INSN
8394 has source and destination with matching CC modes, and that the
8395 CC mode is at least as constrained as REQ_MODE. */
8398 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8401 enum machine_mode set_mode
;
8403 set
= PATTERN (insn
);
8404 if (GET_CODE (set
) == PARALLEL
)
8405 set
= XVECEXP (set
, 0, 0);
8406 if (GET_CODE (set
) != SET
)
8408 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8411 set_mode
= GET_MODE (SET_DEST (set
));
8415 if (req_mode
!= CCNOmode
8416 && (req_mode
!= CCmode
8417 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8421 if (req_mode
== CCGCmode
)
8425 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8429 if (req_mode
== CCZmode
)
8439 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8442 /* Generate insn patterns to do an integer compare of OPERANDS. */
8445 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8447 enum machine_mode cmpmode
;
8450 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8451 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8453 /* This is very simple, but making the interface the same as in the
8454 FP case makes the rest of the code easier. */
8455 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8456 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8458 /* Return the test that should be put into the flags user, i.e.
8459 the bcc, scc, or cmov instruction. */
8460 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8463 /* Figure out whether to use ordered or unordered fp comparisons.
8464 Return the appropriate mode to use. */
8467 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8469 /* ??? In order to make all comparisons reversible, we do all comparisons
8470 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8471 all forms trapping and nontrapping comparisons, we can make inequality
8472 comparisons trapping again, since it results in better code when using
8473 FCOM based compares. */
8474 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8478 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8480 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8481 return ix86_fp_compare_mode (code
);
8484 /* Only zero flag is needed. */
8486 case NE
: /* ZF!=0 */
8488 /* Codes needing carry flag. */
8489 case GEU
: /* CF=0 */
8490 case GTU
: /* CF=0 & ZF=0 */
8491 case LTU
: /* CF=1 */
8492 case LEU
: /* CF=1 | ZF=1 */
8494 /* Codes possibly doable only with sign flag when
8495 comparing against zero. */
8496 case GE
: /* SF=OF or SF=0 */
8497 case LT
: /* SF<>OF or SF=1 */
8498 if (op1
== const0_rtx
)
8501 /* For other cases Carry flag is not required. */
8503 /* Codes doable only with sign flag when comparing
8504 against zero, but we miss jump instruction for it
8505 so we need to use relational tests against overflow
8506 that thus needs to be zero. */
8507 case GT
: /* ZF=0 & SF=OF */
8508 case LE
: /* ZF=1 | SF<>OF */
8509 if (op1
== const0_rtx
)
8513 /* strcmp pattern do (use flags) and combine may ask us for proper
8522 /* Return the fixed registers used for condition codes. */
8525 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8532 /* If two condition code modes are compatible, return a condition code
8533 mode which is compatible with both. Otherwise, return
8536 static enum machine_mode
8537 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8542 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8545 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8546 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8574 /* These are only compatible with themselves, which we already
8580 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8583 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8585 enum rtx_code swapped_code
= swap_condition (code
);
8586 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8587 || (ix86_fp_comparison_cost (swapped_code
)
8588 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8591 /* Swap, force into registers, or otherwise massage the two operands
8592 to a fp comparison. The operands are updated in place; the new
8593 comparison code is returned. */
8595 static enum rtx_code
8596 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8598 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8599 rtx op0
= *pop0
, op1
= *pop1
;
8600 enum machine_mode op_mode
= GET_MODE (op0
);
8601 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
8603 /* All of the unordered compare instructions only work on registers.
8604 The same is true of the fcomi compare instructions. The same is
8605 true of the XFmode compare instructions if not comparing with
8606 zero (ftst insn is used in this case). */
8609 && (fpcmp_mode
== CCFPUmode
8610 || (op_mode
== XFmode
8611 && ! (standard_80387_constant_p (op0
) == 1
8612 || standard_80387_constant_p (op1
) == 1))
8613 || ix86_use_fcomi_compare (code
)))
8615 op0
= force_reg (op_mode
, op0
);
8616 op1
= force_reg (op_mode
, op1
);
8620 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8621 things around if they appear profitable, otherwise force op0
8624 if (standard_80387_constant_p (op0
) == 0
8625 || (GET_CODE (op0
) == MEM
8626 && ! (standard_80387_constant_p (op1
) == 0
8627 || GET_CODE (op1
) == MEM
)))
8630 tmp
= op0
, op0
= op1
, op1
= tmp
;
8631 code
= swap_condition (code
);
8634 if (GET_CODE (op0
) != REG
)
8635 op0
= force_reg (op_mode
, op0
);
8637 if (CONSTANT_P (op1
))
8639 int tmp
= standard_80387_constant_p (op1
);
8641 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8645 op1
= force_reg (op_mode
, op1
);
8648 op1
= force_reg (op_mode
, op1
);
8652 /* Try to rearrange the comparison to make it cheaper. */
8653 if (ix86_fp_comparison_cost (code
)
8654 > ix86_fp_comparison_cost (swap_condition (code
))
8655 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8658 tmp
= op0
, op0
= op1
, op1
= tmp
;
8659 code
= swap_condition (code
);
8660 if (GET_CODE (op0
) != REG
)
8661 op0
= force_reg (op_mode
, op0
);
8669 /* Convert comparison codes we use to represent FP comparison to integer
8670 code that will result in proper branch. Return UNKNOWN if no such code
8674 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8703 /* Split comparison code CODE into comparisons we can do using branch
8704 instructions. BYPASS_CODE is comparison code for branch that will
8705 branch around FIRST_CODE and SECOND_CODE. If some of branches
8706 is not required, set value to UNKNOWN.
8707 We never require more than two branches. */
8710 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8711 enum rtx_code
*first_code
,
8712 enum rtx_code
*second_code
)
8715 *bypass_code
= UNKNOWN
;
8716 *second_code
= UNKNOWN
;
8718 /* The fcomi comparison sets flags as follows:
8728 case GT
: /* GTU - CF=0 & ZF=0 */
8729 case GE
: /* GEU - CF=0 */
8730 case ORDERED
: /* PF=0 */
8731 case UNORDERED
: /* PF=1 */
8732 case UNEQ
: /* EQ - ZF=1 */
8733 case UNLT
: /* LTU - CF=1 */
8734 case UNLE
: /* LEU - CF=1 | ZF=1 */
8735 case LTGT
: /* EQ - ZF=0 */
8737 case LT
: /* LTU - CF=1 - fails on unordered */
8739 *bypass_code
= UNORDERED
;
8741 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8743 *bypass_code
= UNORDERED
;
8745 case EQ
: /* EQ - ZF=1 - fails on unordered */
8747 *bypass_code
= UNORDERED
;
8749 case NE
: /* NE - ZF=0 - fails on unordered */
8751 *second_code
= UNORDERED
;
8753 case UNGE
: /* GEU - CF=0 - fails on unordered */
8755 *second_code
= UNORDERED
;
8757 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8759 *second_code
= UNORDERED
;
8764 if (!TARGET_IEEE_FP
)
8766 *second_code
= UNKNOWN
;
8767 *bypass_code
= UNKNOWN
;
8771 /* Return cost of comparison done fcom + arithmetics operations on AX.
8772 All following functions do use number of instructions as a cost metrics.
8773 In future this should be tweaked to compute bytes for optimize_size and
8774 take into account performance of various instructions on various CPUs. */
8776 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8778 if (!TARGET_IEEE_FP
)
8780 /* The cost of code output by ix86_expand_fp_compare. */
8808 /* Return cost of comparison done using fcomi operation.
8809 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8811 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
8813 enum rtx_code bypass_code
, first_code
, second_code
;
8814 /* Return arbitrarily high cost when instruction is not supported - this
8815 prevents gcc from using it. */
8818 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8819 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
8822 /* Return cost of comparison done using sahf operation.
8823 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8825 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
8827 enum rtx_code bypass_code
, first_code
, second_code
;
8828 /* Return arbitrarily high cost when instruction is not preferred - this
8829 avoids gcc from using it. */
8830 if (!TARGET_USE_SAHF
&& !optimize_size
)
8832 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8833 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
8836 /* Compute cost of the comparison done using any method.
8837 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8839 ix86_fp_comparison_cost (enum rtx_code code
)
8841 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8844 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8845 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8847 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8848 if (min
> sahf_cost
)
8850 if (min
> fcomi_cost
)
8855 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8858 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
8859 rtx
*second_test
, rtx
*bypass_test
)
8861 enum machine_mode fpcmp_mode
, intcmp_mode
;
8863 int cost
= ix86_fp_comparison_cost (code
);
8864 enum rtx_code bypass_code
, first_code
, second_code
;
8866 fpcmp_mode
= ix86_fp_compare_mode (code
);
8867 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8870 *second_test
= NULL_RTX
;
8872 *bypass_test
= NULL_RTX
;
8874 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8876 /* Do fcomi/sahf based test when profitable. */
8877 if ((bypass_code
== UNKNOWN
|| bypass_test
)
8878 && (second_code
== UNKNOWN
|| second_test
)
8879 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8883 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8884 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8890 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8891 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8893 scratch
= gen_reg_rtx (HImode
);
8894 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8895 emit_insn (gen_x86_sahf_1 (scratch
));
8898 /* The FP codes work out to act like unsigned. */
8899 intcmp_mode
= fpcmp_mode
;
8901 if (bypass_code
!= UNKNOWN
)
8902 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8903 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8905 if (second_code
!= UNKNOWN
)
8906 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8907 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8912 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8913 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8914 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8916 scratch
= gen_reg_rtx (HImode
);
8917 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8919 /* In the unordered case, we have to check C2 for NaN's, which
8920 doesn't happen to work out to anything nice combination-wise.
8921 So do some bit twiddling on the value we've got in AH to come
8922 up with an appropriate set of condition codes. */
8924 intcmp_mode
= CCNOmode
;
8929 if (code
== GT
|| !TARGET_IEEE_FP
)
8931 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8936 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8937 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8938 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8939 intcmp_mode
= CCmode
;
8945 if (code
== LT
&& TARGET_IEEE_FP
)
8947 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8948 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8949 intcmp_mode
= CCmode
;
8954 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8960 if (code
== GE
|| !TARGET_IEEE_FP
)
8962 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8967 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8968 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8975 if (code
== LE
&& TARGET_IEEE_FP
)
8977 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8978 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8979 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8980 intcmp_mode
= CCmode
;
8985 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8991 if (code
== EQ
&& TARGET_IEEE_FP
)
8993 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8994 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8995 intcmp_mode
= CCmode
;
9000 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9007 if (code
== NE
&& TARGET_IEEE_FP
)
9009 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9010 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9016 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9022 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9026 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9035 /* Return the test that should be put into the flags user, i.e.
9036 the bcc, scc, or cmov instruction. */
9037 return gen_rtx_fmt_ee (code
, VOIDmode
,
9038 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9043 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9046 op0
= ix86_compare_op0
;
9047 op1
= ix86_compare_op1
;
9050 *second_test
= NULL_RTX
;
9052 *bypass_test
= NULL_RTX
;
9054 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9055 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9056 second_test
, bypass_test
);
9058 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9063 /* Return true if the CODE will result in nontrivial jump sequence. */
9065 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9067 enum rtx_code bypass_code
, first_code
, second_code
;
9070 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9071 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
9075 ix86_expand_branch (enum rtx_code code
, rtx label
)
9079 switch (GET_MODE (ix86_compare_op0
))
9085 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9086 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9087 gen_rtx_LABEL_REF (VOIDmode
, label
),
9089 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9098 enum rtx_code bypass_code
, first_code
, second_code
;
9100 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9103 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9105 /* Check whether we will use the natural sequence with one jump. If
9106 so, we can expand jump early. Otherwise delay expansion by
9107 creating compound insn to not confuse optimizers. */
9108 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
9111 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9112 gen_rtx_LABEL_REF (VOIDmode
, label
),
9113 pc_rtx
, NULL_RTX
, NULL_RTX
);
9117 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9118 ix86_compare_op0
, ix86_compare_op1
);
9119 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9120 gen_rtx_LABEL_REF (VOIDmode
, label
),
9122 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9124 use_fcomi
= ix86_use_fcomi_compare (code
);
9125 vec
= rtvec_alloc (3 + !use_fcomi
);
9126 RTVEC_ELT (vec
, 0) = tmp
;
9128 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9130 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9133 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9135 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9143 /* Expand DImode branch into multiple compare+branch. */
9145 rtx lo
[2], hi
[2], label2
;
9146 enum rtx_code code1
, code2
, code3
;
9148 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9150 tmp
= ix86_compare_op0
;
9151 ix86_compare_op0
= ix86_compare_op1
;
9152 ix86_compare_op1
= tmp
;
9153 code
= swap_condition (code
);
9155 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9156 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9158 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9159 avoid two branches. This costs one extra insn, so disable when
9160 optimizing for size. */
9162 if ((code
== EQ
|| code
== NE
)
9164 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9169 if (hi
[1] != const0_rtx
)
9170 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9171 NULL_RTX
, 0, OPTAB_WIDEN
);
9174 if (lo
[1] != const0_rtx
)
9175 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9176 NULL_RTX
, 0, OPTAB_WIDEN
);
9178 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9179 NULL_RTX
, 0, OPTAB_WIDEN
);
9181 ix86_compare_op0
= tmp
;
9182 ix86_compare_op1
= const0_rtx
;
9183 ix86_expand_branch (code
, label
);
9187 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9188 op1 is a constant and the low word is zero, then we can just
9189 examine the high word. */
9191 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9194 case LT
: case LTU
: case GE
: case GEU
:
9195 ix86_compare_op0
= hi
[0];
9196 ix86_compare_op1
= hi
[1];
9197 ix86_expand_branch (code
, label
);
9203 /* Otherwise, we need two or three jumps. */
9205 label2
= gen_label_rtx ();
9208 code2
= swap_condition (code
);
9209 code3
= unsigned_condition (code
);
9213 case LT
: case GT
: case LTU
: case GTU
:
9216 case LE
: code1
= LT
; code2
= GT
; break;
9217 case GE
: code1
= GT
; code2
= LT
; break;
9218 case LEU
: code1
= LTU
; code2
= GTU
; break;
9219 case GEU
: code1
= GTU
; code2
= LTU
; break;
9221 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
9222 case NE
: code2
= UNKNOWN
; break;
9230 * if (hi(a) < hi(b)) goto true;
9231 * if (hi(a) > hi(b)) goto false;
9232 * if (lo(a) < lo(b)) goto true;
9236 ix86_compare_op0
= hi
[0];
9237 ix86_compare_op1
= hi
[1];
9239 if (code1
!= UNKNOWN
)
9240 ix86_expand_branch (code1
, label
);
9241 if (code2
!= UNKNOWN
)
9242 ix86_expand_branch (code2
, label2
);
9244 ix86_compare_op0
= lo
[0];
9245 ix86_compare_op1
= lo
[1];
9246 ix86_expand_branch (code3
, label
);
9248 if (code2
!= UNKNOWN
)
9249 emit_label (label2
);
9258 /* Split branch based on floating point condition. */
9260 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9261 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
9264 rtx label
= NULL_RTX
;
9266 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9269 if (target2
!= pc_rtx
)
9272 code
= reverse_condition_maybe_unordered (code
);
9277 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9278 tmp
, &second
, &bypass
);
9280 /* Remove pushed operand from stack. */
9282 ix86_free_from_memory (GET_MODE (pushed
));
9284 if (split_branch_probability
>= 0)
9286 /* Distribute the probabilities across the jumps.
9287 Assume the BYPASS and SECOND to be always test
9289 probability
= split_branch_probability
;
9291 /* Value of 1 is low enough to make no need for probability
9292 to be updated. Later we may run some experiments and see
9293 if unordered values are more frequent in practice. */
9295 bypass_probability
= 1;
9297 second_probability
= 1;
9299 if (bypass
!= NULL_RTX
)
9301 label
= gen_label_rtx ();
9302 i
= emit_jump_insn (gen_rtx_SET
9304 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9306 gen_rtx_LABEL_REF (VOIDmode
,
9309 if (bypass_probability
>= 0)
9311 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9312 GEN_INT (bypass_probability
),
9315 i
= emit_jump_insn (gen_rtx_SET
9317 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9318 condition
, target1
, target2
)));
9319 if (probability
>= 0)
9321 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9322 GEN_INT (probability
),
9324 if (second
!= NULL_RTX
)
9326 i
= emit_jump_insn (gen_rtx_SET
9328 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9330 if (second_probability
>= 0)
9332 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9333 GEN_INT (second_probability
),
9336 if (label
!= NULL_RTX
)
9341 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9343 rtx ret
, tmp
, tmpreg
, equiv
;
9344 rtx second_test
, bypass_test
;
9346 if (GET_MODE (ix86_compare_op0
) == DImode
9348 return 0; /* FAIL */
9350 if (GET_MODE (dest
) != QImode
)
9353 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9354 PUT_MODE (ret
, QImode
);
9359 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9360 if (bypass_test
|| second_test
)
9362 rtx test
= second_test
;
9364 rtx tmp2
= gen_reg_rtx (QImode
);
9371 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9373 PUT_MODE (test
, QImode
);
9374 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9377 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9379 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9382 /* Attach a REG_EQUAL note describing the comparison result. */
9383 equiv
= simplify_gen_relational (code
, QImode
,
9384 GET_MODE (ix86_compare_op0
),
9385 ix86_compare_op0
, ix86_compare_op1
);
9386 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9388 return 1; /* DONE */
9391 /* Expand comparison setting or clearing carry flag. Return true when
9392 successful and set pop for the operation. */
9394 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9396 enum machine_mode mode
=
9397 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9399 /* Do not handle DImode compares that go trought special path. Also we can't
9400 deal with FP compares yet. This is possible to add. */
9401 if ((mode
== DImode
&& !TARGET_64BIT
))
9403 if (FLOAT_MODE_P (mode
))
9405 rtx second_test
= NULL
, bypass_test
= NULL
;
9406 rtx compare_op
, compare_seq
;
9408 /* Shortcut: following common codes never translate into carry flag compares. */
9409 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9410 || code
== ORDERED
|| code
== UNORDERED
)
9413 /* These comparisons require zero flag; swap operands so they won't. */
9414 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9420 code
= swap_condition (code
);
9423 /* Try to expand the comparison and verify that we end up with carry flag
9424 based comparison. This is fails to be true only when we decide to expand
9425 comparison using arithmetic that is not too common scenario. */
9427 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9428 &second_test
, &bypass_test
);
9429 compare_seq
= get_insns ();
9432 if (second_test
|| bypass_test
)
9434 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9435 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9436 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9438 code
= GET_CODE (compare_op
);
9439 if (code
!= LTU
&& code
!= GEU
)
9441 emit_insn (compare_seq
);
9445 if (!INTEGRAL_MODE_P (mode
))
9453 /* Convert a==0 into (unsigned)a<1. */
9456 if (op1
!= const0_rtx
)
9459 code
= (code
== EQ
? LTU
: GEU
);
9462 /* Convert a>b into b<a or a>=b-1. */
9465 if (GET_CODE (op1
) == CONST_INT
)
9467 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9468 /* Bail out on overflow. We still can swap operands but that
9469 would force loading of the constant into register. */
9470 if (op1
== const0_rtx
9471 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9473 code
= (code
== GTU
? GEU
: LTU
);
9480 code
= (code
== GTU
? LTU
: GEU
);
9484 /* Convert a>=0 into (unsigned)a<0x80000000. */
9487 if (mode
== DImode
|| op1
!= const0_rtx
)
9489 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9490 code
= (code
== LT
? GEU
: LTU
);
9494 if (mode
== DImode
|| op1
!= constm1_rtx
)
9496 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9497 code
= (code
== LE
? GEU
: LTU
);
9503 /* Swapping operands may cause constant to appear as first operand. */
9504 if (!nonimmediate_operand (op0
, VOIDmode
))
9508 op0
= force_reg (mode
, op0
);
9510 ix86_compare_op0
= op0
;
9511 ix86_compare_op1
= op1
;
9512 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9513 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9519 ix86_expand_int_movcc (rtx operands
[])
9521 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9522 rtx compare_seq
, compare_op
;
9523 rtx second_test
, bypass_test
;
9524 enum machine_mode mode
= GET_MODE (operands
[0]);
9525 bool sign_bit_compare_p
= false;;
9528 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9529 compare_seq
= get_insns ();
9532 compare_code
= GET_CODE (compare_op
);
9534 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9535 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9536 sign_bit_compare_p
= true;
9538 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9539 HImode insns, we'd be swallowed in word prefix ops. */
9541 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9542 && (mode
!= DImode
|| TARGET_64BIT
)
9543 && GET_CODE (operands
[2]) == CONST_INT
9544 && GET_CODE (operands
[3]) == CONST_INT
)
9546 rtx out
= operands
[0];
9547 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9548 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9552 /* Sign bit compares are better done using shifts than we do by using
9554 if (sign_bit_compare_p
9555 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9556 ix86_compare_op1
, &compare_op
))
9558 /* Detect overlap between destination and compare sources. */
9561 if (!sign_bit_compare_p
)
9565 compare_code
= GET_CODE (compare_op
);
9567 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9568 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9571 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9574 /* To simplify rest of code, restrict to the GEU case. */
9575 if (compare_code
== LTU
)
9577 HOST_WIDE_INT tmp
= ct
;
9580 compare_code
= reverse_condition (compare_code
);
9581 code
= reverse_condition (code
);
9586 PUT_CODE (compare_op
,
9587 reverse_condition_maybe_unordered
9588 (GET_CODE (compare_op
)));
9590 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9594 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9595 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9596 tmp
= gen_reg_rtx (mode
);
9599 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9601 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9605 if (code
== GT
|| code
== GE
)
9606 code
= reverse_condition (code
);
9609 HOST_WIDE_INT tmp
= ct
;
9614 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9615 ix86_compare_op1
, VOIDmode
, 0, -1);
9628 tmp
= expand_simple_binop (mode
, PLUS
,
9630 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9641 tmp
= expand_simple_binop (mode
, IOR
,
9643 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9645 else if (diff
== -1 && ct
)
9655 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9657 tmp
= expand_simple_binop (mode
, PLUS
,
9658 copy_rtx (tmp
), GEN_INT (cf
),
9659 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9667 * andl cf - ct, dest
9677 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9680 tmp
= expand_simple_binop (mode
, AND
,
9682 gen_int_mode (cf
- ct
, mode
),
9683 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9685 tmp
= expand_simple_binop (mode
, PLUS
,
9686 copy_rtx (tmp
), GEN_INT (ct
),
9687 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9690 if (!rtx_equal_p (tmp
, out
))
9691 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9693 return 1; /* DONE */
9699 tmp
= ct
, ct
= cf
, cf
= tmp
;
9701 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9703 /* We may be reversing unordered compare to normal compare, that
9704 is not valid in general (we may convert non-trapping condition
9705 to trapping one), however on i386 we currently emit all
9706 comparisons unordered. */
9707 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9708 code
= reverse_condition_maybe_unordered (code
);
9712 compare_code
= reverse_condition (compare_code
);
9713 code
= reverse_condition (code
);
9717 compare_code
= UNKNOWN
;
9718 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9719 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9721 if (ix86_compare_op1
== const0_rtx
9722 && (code
== LT
|| code
== GE
))
9723 compare_code
= code
;
9724 else if (ix86_compare_op1
== constm1_rtx
)
9728 else if (code
== GT
)
9733 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9734 if (compare_code
!= UNKNOWN
9735 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9736 && (cf
== -1 || ct
== -1))
9738 /* If lea code below could be used, only optimize
9739 if it results in a 2 insn sequence. */
9741 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9742 || diff
== 3 || diff
== 5 || diff
== 9)
9743 || (compare_code
== LT
&& ct
== -1)
9744 || (compare_code
== GE
&& cf
== -1))
9747 * notl op1 (if necessary)
9755 code
= reverse_condition (code
);
9758 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9759 ix86_compare_op1
, VOIDmode
, 0, -1);
9761 out
= expand_simple_binop (mode
, IOR
,
9763 out
, 1, OPTAB_DIRECT
);
9764 if (out
!= operands
[0])
9765 emit_move_insn (operands
[0], out
);
9767 return 1; /* DONE */
9772 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9773 || diff
== 3 || diff
== 5 || diff
== 9)
9774 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9776 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
9782 * lea cf(dest*(ct-cf)),dest
9786 * This also catches the degenerate setcc-only case.
9792 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9793 ix86_compare_op1
, VOIDmode
, 0, 1);
9796 /* On x86_64 the lea instruction operates on Pmode, so we need
9797 to get arithmetics done in proper mode to match. */
9799 tmp
= copy_rtx (out
);
9803 out1
= copy_rtx (out
);
9804 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9808 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9814 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9817 if (!rtx_equal_p (tmp
, out
))
9820 out
= force_operand (tmp
, copy_rtx (out
));
9822 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9824 if (!rtx_equal_p (out
, operands
[0]))
9825 emit_move_insn (operands
[0], copy_rtx (out
));
9827 return 1; /* DONE */
9831 * General case: Jumpful:
9832 * xorl dest,dest cmpl op1, op2
9833 * cmpl op1, op2 movl ct, dest
9835 * decl dest movl cf, dest
9836 * andl (cf-ct),dest 1:
9841 * This is reasonably steep, but branch mispredict costs are
9842 * high on modern cpus, so consider failing only if optimizing
9846 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9847 && BRANCH_COST
>= 2)
9853 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9854 /* We may be reversing unordered compare to normal compare,
9855 that is not valid in general (we may convert non-trapping
9856 condition to trapping one), however on i386 we currently
9857 emit all comparisons unordered. */
9858 code
= reverse_condition_maybe_unordered (code
);
9861 code
= reverse_condition (code
);
9862 if (compare_code
!= UNKNOWN
)
9863 compare_code
= reverse_condition (compare_code
);
9867 if (compare_code
!= UNKNOWN
)
9869 /* notl op1 (if needed)
9874 For x < 0 (resp. x <= -1) there will be no notl,
9875 so if possible swap the constants to get rid of the
9877 True/false will be -1/0 while code below (store flag
9878 followed by decrement) is 0/-1, so the constants need
9879 to be exchanged once more. */
9881 if (compare_code
== GE
|| !cf
)
9883 code
= reverse_condition (code
);
9888 HOST_WIDE_INT tmp
= cf
;
9893 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9894 ix86_compare_op1
, VOIDmode
, 0, -1);
9898 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9899 ix86_compare_op1
, VOIDmode
, 0, 1);
9901 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9902 copy_rtx (out
), 1, OPTAB_DIRECT
);
9905 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9906 gen_int_mode (cf
- ct
, mode
),
9907 copy_rtx (out
), 1, OPTAB_DIRECT
);
9909 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9910 copy_rtx (out
), 1, OPTAB_DIRECT
);
9911 if (!rtx_equal_p (out
, operands
[0]))
9912 emit_move_insn (operands
[0], copy_rtx (out
));
9914 return 1; /* DONE */
9918 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9920 /* Try a few things more with specific constants and a variable. */
9923 rtx var
, orig_out
, out
, tmp
;
9925 if (BRANCH_COST
<= 2)
9926 return 0; /* FAIL */
9928 /* If one of the two operands is an interesting constant, load a
9929 constant with the above and mask it in with a logical operation. */
9931 if (GET_CODE (operands
[2]) == CONST_INT
)
9934 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
9935 operands
[3] = constm1_rtx
, op
= and_optab
;
9936 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
9937 operands
[3] = const0_rtx
, op
= ior_optab
;
9939 return 0; /* FAIL */
9941 else if (GET_CODE (operands
[3]) == CONST_INT
)
9944 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
9945 operands
[2] = constm1_rtx
, op
= and_optab
;
9946 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
9947 operands
[2] = const0_rtx
, op
= ior_optab
;
9949 return 0; /* FAIL */
9952 return 0; /* FAIL */
9954 orig_out
= operands
[0];
9955 tmp
= gen_reg_rtx (mode
);
9958 /* Recurse to get the constant loaded. */
9959 if (ix86_expand_int_movcc (operands
) == 0)
9960 return 0; /* FAIL */
9962 /* Mask in the interesting variable. */
9963 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9965 if (!rtx_equal_p (out
, orig_out
))
9966 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
9968 return 1; /* DONE */
9972 * For comparison with above,
9982 if (! nonimmediate_operand (operands
[2], mode
))
9983 operands
[2] = force_reg (mode
, operands
[2]);
9984 if (! nonimmediate_operand (operands
[3], mode
))
9985 operands
[3] = force_reg (mode
, operands
[3]);
9987 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9989 rtx tmp
= gen_reg_rtx (mode
);
9990 emit_move_insn (tmp
, operands
[3]);
9993 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9995 rtx tmp
= gen_reg_rtx (mode
);
9996 emit_move_insn (tmp
, operands
[2]);
10000 if (! register_operand (operands
[2], VOIDmode
)
10002 || ! register_operand (operands
[3], VOIDmode
)))
10003 operands
[2] = force_reg (mode
, operands
[2]);
10006 && ! register_operand (operands
[3], VOIDmode
))
10007 operands
[3] = force_reg (mode
, operands
[3]);
10009 emit_insn (compare_seq
);
10010 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10011 gen_rtx_IF_THEN_ELSE (mode
,
10012 compare_op
, operands
[2],
10015 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10016 gen_rtx_IF_THEN_ELSE (mode
,
10018 copy_rtx (operands
[3]),
10019 copy_rtx (operands
[0]))));
10021 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10022 gen_rtx_IF_THEN_ELSE (mode
,
10024 copy_rtx (operands
[2]),
10025 copy_rtx (operands
[0]))));
10027 return 1; /* DONE */
10031 ix86_expand_fp_movcc (rtx operands
[])
10033 enum machine_mode mode
= GET_MODE (operands
[0]);
10034 enum rtx_code code
= GET_CODE (operands
[1]);
10035 rtx tmp
, compare_op
, second_test
, bypass_test
;
10037 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
10039 rtx cmp_op0
, cmp_op1
, if_true
, if_false
;
10041 enum machine_mode vmode
, cmode
;
10042 bool is_minmax
= false;
10044 cmp_op0
= ix86_compare_op0
;
10045 cmp_op1
= ix86_compare_op1
;
10046 if_true
= operands
[2];
10047 if_false
= operands
[3];
10049 /* Since we've no cmove for sse registers, don't force bad register
10050 allocation just to gain access to it. Deny movcc when the
10051 comparison mode doesn't match the move mode. */
10052 cmode
= GET_MODE (cmp_op0
);
10053 if (cmode
== VOIDmode
)
10054 cmode
= GET_MODE (cmp_op1
);
10058 /* We have no LTGT as an operator. We could implement it with
10059 NE & ORDERED, but this requires an extra temporary. It's
10060 not clear that it's worth it. */
10061 if (code
== LTGT
|| code
== UNEQ
)
10064 /* Massage condition to satisfy sse_comparison_operator. Try
10065 to canonicalize the destination operand to be first in the
10066 comparison - this helps reload to avoid extra moves. */
10067 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10068 || (COMMUTATIVE_P (operands
[1])
10069 && rtx_equal_p (operands
[0], cmp_op1
)))
10074 code
= swap_condition (code
);
10077 /* Detect conditional moves that exactly match min/max operational
10078 semantics. Note that this is IEEE safe, as long as we don't
10079 interchange the operands. Which is why we keep this in the form
10080 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
10081 if ((code
== LT
|| code
== UNGE
) && REG_P (cmp_op0
) && REG_P (cmp_op1
))
10083 if (((cmp_op0
== if_true
&& cmp_op1
== if_false
)
10084 || (cmp_op0
== if_false
&& cmp_op1
== if_true
)))
10091 if_true
= if_false
;
10097 if (mode
== SFmode
)
10099 else if (mode
== DFmode
)
10102 gcc_unreachable ();
10104 cmp_op0
= force_reg (mode
, cmp_op0
);
10105 if (!nonimmediate_operand (cmp_op1
, mode
))
10106 cmp_op1
= force_reg (mode
, cmp_op1
);
10108 tmp
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
10109 gcc_assert (sse_comparison_operator (tmp
, VOIDmode
));
10111 tmp
= gen_rtx_IF_THEN_ELSE (mode
, tmp
, if_true
, if_false
);
10112 tmp
= gen_rtx_SET (VOIDmode
, operands
[0], tmp
);
10116 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (vmode
));
10117 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
10124 /* The floating point conditional move instructions don't directly
10125 support conditions resulting from a signed integer comparison. */
10127 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10129 /* The floating point conditional move instructions don't directly
10130 support signed integer comparisons. */
10132 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10134 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10136 tmp
= gen_reg_rtx (QImode
);
10137 ix86_expand_setcc (code
, tmp
);
10139 ix86_compare_op0
= tmp
;
10140 ix86_compare_op1
= const0_rtx
;
10141 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10143 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10145 tmp
= gen_reg_rtx (mode
);
10146 emit_move_insn (tmp
, operands
[3]);
10149 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10151 tmp
= gen_reg_rtx (mode
);
10152 emit_move_insn (tmp
, operands
[2]);
10156 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10157 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
10158 operands
[2], operands
[3])));
10160 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10161 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
10162 operands
[3], operands
[0])));
10164 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10165 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
10166 operands
[2], operands
[0])));
10172 ix86_split_sse_movcc (rtx operands
[])
10174 rtx dest
, scratch
, cmp
, op_true
, op_false
, x
;
10175 enum machine_mode mode
, vmode
;
10177 /* Note that the operator CMP has been set up with matching constraints
10178 such that dest is valid for the comparison. Unless one of the true
10179 or false operands are zero, the true operand has already been placed
10181 dest
= operands
[0];
10182 scratch
= operands
[1];
10183 op_true
= operands
[2];
10184 op_false
= operands
[3];
10187 mode
= GET_MODE (dest
);
10188 vmode
= GET_MODE (scratch
);
10190 /* We need to make sure that the TRUE and FALSE operands are out of the
10191 way of the destination. Marking the destination earlyclobber doesn't
10192 work, since we want matching constraints for the actual comparison, so
10193 at some point we always wind up having to do a copy ourselves here.
10194 We very much prefer the TRUE value to be in SCRATCH. If it turns out
10195 that FALSE overlaps DEST, then we invert the comparison so that we
10196 still only have to do one move. */
10197 if (rtx_equal_p (op_false
, dest
))
10199 enum rtx_code code
;
10201 if (rtx_equal_p (op_true
, dest
))
10203 /* ??? Really ought not happen. It means some optimizer managed
10204 to prove the operands were identical, but failed to fold the
10205 conditional move to a straight move. Do so here, because
10206 otherwise we'll generate incorrect code. And since they're
10207 both already in the destination register, nothing to do. */
10211 x
= gen_rtx_REG (mode
, REGNO (scratch
));
10212 emit_move_insn (x
, op_false
);
10213 op_false
= op_true
;
10216 code
= GET_CODE (cmp
);
10217 code
= reverse_condition_maybe_unordered (code
);
10218 cmp
= gen_rtx_fmt_ee (code
, mode
, XEXP (cmp
, 0), XEXP (cmp
, 1));
10220 else if (op_true
== CONST0_RTX (mode
))
10222 else if (op_false
== CONST0_RTX (mode
) && !rtx_equal_p (op_true
, dest
))
10226 x
= gen_rtx_REG (mode
, REGNO (scratch
));
10227 emit_move_insn (x
, op_true
);
10231 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
10232 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10234 if (op_false
== CONST0_RTX (mode
))
10236 op_true
= simplify_gen_subreg (vmode
, op_true
, mode
, 0);
10237 x
= gen_rtx_AND (vmode
, dest
, op_true
);
10238 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10242 op_false
= simplify_gen_subreg (vmode
, op_false
, mode
, 0);
10244 if (op_true
== CONST0_RTX (mode
))
10246 x
= gen_rtx_NOT (vmode
, dest
);
10247 x
= gen_rtx_AND (vmode
, x
, op_false
);
10248 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10252 x
= gen_rtx_AND (vmode
, scratch
, dest
);
10253 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10255 x
= gen_rtx_NOT (vmode
, dest
);
10256 x
= gen_rtx_AND (vmode
, x
, op_false
);
10257 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10259 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10260 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10265 /* Expand conditional increment or decrement using adb/sbb instructions.
10266 The default case using setcc followed by the conditional move can be
10267 done by generic code. */
10269 ix86_expand_int_addcc (rtx operands
[])
10271 enum rtx_code code
= GET_CODE (operands
[1]);
10273 rtx val
= const0_rtx
;
10274 bool fpcmp
= false;
10275 enum machine_mode mode
= GET_MODE (operands
[0]);
10277 if (operands
[3] != const1_rtx
10278 && operands
[3] != constm1_rtx
)
10280 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10281 ix86_compare_op1
, &compare_op
))
10283 code
= GET_CODE (compare_op
);
10285 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10286 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10289 code
= ix86_fp_compare_code_to_integer (code
);
10296 PUT_CODE (compare_op
,
10297 reverse_condition_maybe_unordered
10298 (GET_CODE (compare_op
)));
10300 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10302 PUT_MODE (compare_op
, mode
);
10304 /* Construct either adc or sbb insn. */
10305 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10307 switch (GET_MODE (operands
[0]))
10310 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10313 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10316 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10319 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10327 switch (GET_MODE (operands
[0]))
10330 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10333 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10336 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10339 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10345 return 1; /* DONE */
10349 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10350 works for floating pointer parameters and nonoffsetable memories.
10351 For pushes, it returns just stack offsets; the values will be saved
10352 in the right order. Maximally three parts are generated. */
10355 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10360 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10362 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10364 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10366 if (size
< 2 || size
> 3)
10369 /* Optimize constant pool reference to immediates. This is used by fp
10370 moves, that force all constants to memory to allow combining. */
10371 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
10373 rtx tmp
= maybe_get_pool_constant (operand
);
10378 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10380 /* The only non-offsetable memories we handle are pushes. */
10381 if (! push_operand (operand
, VOIDmode
))
10384 operand
= copy_rtx (operand
);
10385 PUT_MODE (operand
, Pmode
);
10386 parts
[0] = parts
[1] = parts
[2] = operand
;
10390 if (GET_CODE (operand
) == CONST_VECTOR
)
10392 enum machine_mode imode
= int_mode_for_mode (mode
);
10393 operand
= simplify_subreg (imode
, operand
, mode
, 0);
10394 gcc_assert (operand
!= NULL
);
10400 if (mode
== DImode
)
10401 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10404 if (REG_P (operand
))
10406 if (!reload_completed
)
10408 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10409 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10411 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10413 else if (offsettable_memref_p (operand
))
10415 operand
= adjust_address (operand
, SImode
, 0);
10416 parts
[0] = operand
;
10417 parts
[1] = adjust_address (operand
, SImode
, 4);
10419 parts
[2] = adjust_address (operand
, SImode
, 8);
10421 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10426 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10430 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10431 parts
[2] = gen_int_mode (l
[2], SImode
);
10434 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10439 parts
[1] = gen_int_mode (l
[1], SImode
);
10440 parts
[0] = gen_int_mode (l
[0], SImode
);
10448 if (mode
== TImode
)
10449 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10450 if (mode
== XFmode
|| mode
== TFmode
)
10452 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10453 if (REG_P (operand
))
10455 if (!reload_completed
)
10457 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10458 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10460 else if (offsettable_memref_p (operand
))
10462 operand
= adjust_address (operand
, DImode
, 0);
10463 parts
[0] = operand
;
10464 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10466 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10471 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10472 real_to_target (l
, &r
, mode
);
10474 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10475 if (HOST_BITS_PER_WIDE_INT
>= 64)
10478 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10479 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10482 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10484 if (upper_mode
== SImode
)
10485 parts
[1] = gen_int_mode (l
[2], SImode
);
10486 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10489 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10490 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10493 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10503 /* Emit insns to perform a move or push of DI, DF, and XF values.
10504 Return false when normal moves are needed; true when all required
10505 insns have been emitted. Operands 2-4 contain the input values
10506 int the correct order; operands 5-7 contain the output values. */
10509 ix86_split_long_move (rtx operands
[])
10514 int collisions
= 0;
10515 enum machine_mode mode
= GET_MODE (operands
[0]);
10517 /* The DFmode expanders may ask us to move double.
10518 For 64bit target this is single move. By hiding the fact
10519 here we simplify i386.md splitters. */
10520 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10522 /* Optimize constant pool reference to immediates. This is used by
10523 fp moves, that force all constants to memory to allow combining. */
10525 if (GET_CODE (operands
[1]) == MEM
10526 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10527 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10528 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10529 if (push_operand (operands
[0], VOIDmode
))
10531 operands
[0] = copy_rtx (operands
[0]);
10532 PUT_MODE (operands
[0], Pmode
);
10535 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10536 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10537 emit_move_insn (operands
[0], operands
[1]);
10541 /* The only non-offsettable memory we handle is push. */
10542 if (push_operand (operands
[0], VOIDmode
))
10544 else if (GET_CODE (operands
[0]) == MEM
10545 && ! offsettable_memref_p (operands
[0]))
10548 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10549 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10551 /* When emitting push, take care for source operands on the stack. */
10552 if (push
&& GET_CODE (operands
[1]) == MEM
10553 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10556 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10557 XEXP (part
[1][2], 0));
10558 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10559 XEXP (part
[1][1], 0));
10562 /* We need to do copy in the right order in case an address register
10563 of the source overlaps the destination. */
10564 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10566 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10568 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10571 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10574 /* Collision in the middle part can be handled by reordering. */
10575 if (collisions
== 1 && nparts
== 3
10576 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10579 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10580 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10583 /* If there are more collisions, we can't handle it by reordering.
10584 Do an lea to the last part and use only one colliding move. */
10585 else if (collisions
> 1)
10591 base
= part
[0][nparts
- 1];
10593 /* Handle the case when the last part isn't valid for lea.
10594 Happens in 64-bit mode storing the 12-byte XFmode. */
10595 if (GET_MODE (base
) != Pmode
)
10596 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10598 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10599 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10600 part
[1][1] = replace_equiv_address (part
[1][1],
10601 plus_constant (base
, UNITS_PER_WORD
));
10603 part
[1][2] = replace_equiv_address (part
[1][2],
10604 plus_constant (base
, 8));
10614 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10615 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10616 emit_move_insn (part
[0][2], part
[1][2]);
10621 /* In 64bit mode we don't have 32bit push available. In case this is
10622 register, it is OK - we will just use larger counterpart. We also
10623 retype memory - these comes from attempt to avoid REX prefix on
10624 moving of second half of TFmode value. */
10625 if (GET_MODE (part
[1][1]) == SImode
)
10627 if (GET_CODE (part
[1][1]) == MEM
)
10628 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10629 else if (REG_P (part
[1][1]))
10630 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10633 if (GET_MODE (part
[1][0]) == SImode
)
10634 part
[1][0] = part
[1][1];
10637 emit_move_insn (part
[0][1], part
[1][1]);
10638 emit_move_insn (part
[0][0], part
[1][0]);
10642 /* Choose correct order to not overwrite the source before it is copied. */
10643 if ((REG_P (part
[0][0])
10644 && REG_P (part
[1][1])
10645 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10647 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10649 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10653 operands
[2] = part
[0][2];
10654 operands
[3] = part
[0][1];
10655 operands
[4] = part
[0][0];
10656 operands
[5] = part
[1][2];
10657 operands
[6] = part
[1][1];
10658 operands
[7] = part
[1][0];
10662 operands
[2] = part
[0][1];
10663 operands
[3] = part
[0][0];
10664 operands
[5] = part
[1][1];
10665 operands
[6] = part
[1][0];
10672 operands
[2] = part
[0][0];
10673 operands
[3] = part
[0][1];
10674 operands
[4] = part
[0][2];
10675 operands
[5] = part
[1][0];
10676 operands
[6] = part
[1][1];
10677 operands
[7] = part
[1][2];
10681 operands
[2] = part
[0][0];
10682 operands
[3] = part
[0][1];
10683 operands
[5] = part
[1][0];
10684 operands
[6] = part
[1][1];
10688 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10691 if (GET_CODE (operands
[5]) == CONST_INT
10692 && operands
[5] != const0_rtx
10693 && REG_P (operands
[2]))
10695 if (GET_CODE (operands
[6]) == CONST_INT
10696 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
10697 operands
[6] = operands
[2];
10700 && GET_CODE (operands
[7]) == CONST_INT
10701 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
10702 operands
[7] = operands
[2];
10706 && GET_CODE (operands
[6]) == CONST_INT
10707 && operands
[6] != const0_rtx
10708 && REG_P (operands
[3])
10709 && GET_CODE (operands
[7]) == CONST_INT
10710 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
10711 operands
[7] = operands
[3];
10714 emit_move_insn (operands
[2], operands
[5]);
10715 emit_move_insn (operands
[3], operands
[6]);
10717 emit_move_insn (operands
[4], operands
[7]);
10722 /* Helper function of ix86_split_ashldi used to generate an SImode
10723 left shift by a constant, either using a single shift or
10724 a sequence of add instructions. */
10727 ix86_expand_ashlsi3_const (rtx operand
, int count
)
10730 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10731 else if (!optimize_size
10732 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
10735 for (i
=0; i
<count
; i
++)
10736 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10739 emit_insn (gen_ashlsi3 (operand
, operand
, GEN_INT (count
)));
10743 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10745 rtx low
[2], high
[2];
10748 if (GET_CODE (operands
[2]) == CONST_INT
)
10750 split_di (operands
, 2, low
, high
);
10751 count
= INTVAL (operands
[2]) & 63;
10755 emit_move_insn (high
[0], low
[1]);
10756 emit_move_insn (low
[0], const0_rtx
);
10759 ix86_expand_ashlsi3_const (high
[0], count
- 32);
10763 if (!rtx_equal_p (operands
[0], operands
[1]))
10764 emit_move_insn (operands
[0], operands
[1]);
10765 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10766 ix86_expand_ashlsi3_const (low
[0], count
);
10771 split_di (operands
, 1, low
, high
);
10773 if (operands
[1] == const1_rtx
)
10775 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10776 can be done with two 32-bit shifts, no branches, no cmoves. */
10777 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
10779 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
10781 ix86_expand_clear (low
[0]);
10782 ix86_expand_clear (high
[0]);
10783 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (32)));
10785 d
= gen_lowpart (QImode
, low
[0]);
10786 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
10787 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
10788 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
10790 d
= gen_lowpart (QImode
, high
[0]);
10791 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
10792 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
10793 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
10796 /* Otherwise, we can get the same results by manually performing
10797 a bit extract operation on bit 5, and then performing the two
10798 shifts. The two methods of getting 0/1 into low/high are exactly
10799 the same size. Avoiding the shift in the bit extract case helps
10800 pentium4 a bit; no one else seems to care much either way. */
10805 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
10806 x
= gen_rtx_ZERO_EXTEND (SImode
, operands
[2]);
10808 x
= gen_lowpart (SImode
, operands
[2]);
10809 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
10811 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (5)));
10812 emit_insn (gen_andsi3 (high
[0], high
[0], GEN_INT (1)));
10813 emit_move_insn (low
[0], high
[0]);
10814 emit_insn (gen_xorsi3 (low
[0], low
[0], GEN_INT (1)));
10817 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10818 emit_insn (gen_ashlsi3 (high
[0], high
[0], operands
[2]));
10822 if (operands
[1] == constm1_rtx
)
10824 /* For -1LL << N, we can avoid the shld instruction, because we
10825 know that we're shifting 0...31 ones into a -1. */
10826 emit_move_insn (low
[0], constm1_rtx
);
10828 emit_move_insn (high
[0], low
[0]);
10830 emit_move_insn (high
[0], constm1_rtx
);
10834 if (!rtx_equal_p (operands
[0], operands
[1]))
10835 emit_move_insn (operands
[0], operands
[1]);
10837 split_di (operands
, 1, low
, high
);
10838 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10841 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10843 if (TARGET_CMOVE
&& scratch
)
10845 ix86_expand_clear (scratch
);
10846 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
10849 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10853 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10855 rtx low
[2], high
[2];
10858 if (GET_CODE (operands
[2]) == CONST_INT
)
10860 split_di (operands
, 2, low
, high
);
10861 count
= INTVAL (operands
[2]) & 63;
10865 emit_move_insn (high
[0], high
[1]);
10866 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10867 emit_move_insn (low
[0], high
[0]);
10870 else if (count
>= 32)
10872 emit_move_insn (low
[0], high
[1]);
10873 emit_move_insn (high
[0], low
[0]);
10874 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10876 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10880 if (!rtx_equal_p (operands
[0], operands
[1]))
10881 emit_move_insn (operands
[0], operands
[1]);
10882 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10883 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10888 if (!rtx_equal_p (operands
[0], operands
[1]))
10889 emit_move_insn (operands
[0], operands
[1]);
10891 split_di (operands
, 1, low
, high
);
10893 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10894 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10896 if (TARGET_CMOVE
&& scratch
)
10898 emit_move_insn (scratch
, high
[0]);
10899 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10900 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10904 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10909 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
10911 rtx low
[2], high
[2];
10914 if (GET_CODE (operands
[2]) == CONST_INT
)
10916 split_di (operands
, 2, low
, high
);
10917 count
= INTVAL (operands
[2]) & 63;
10921 emit_move_insn (low
[0], high
[1]);
10922 ix86_expand_clear (high
[0]);
10925 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10929 if (!rtx_equal_p (operands
[0], operands
[1]))
10930 emit_move_insn (operands
[0], operands
[1]);
10931 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10932 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10937 if (!rtx_equal_p (operands
[0], operands
[1]))
10938 emit_move_insn (operands
[0], operands
[1]);
10940 split_di (operands
, 1, low
, high
);
10942 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10943 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10945 /* Heh. By reversing the arguments, we can reuse this pattern. */
10946 if (TARGET_CMOVE
&& scratch
)
10948 ix86_expand_clear (scratch
);
10949 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10953 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10957 /* Helper function for the string operations below. Dest VARIABLE whether
10958 it is aligned to VALUE bytes. If true, jump to the label. */
10960 ix86_expand_aligntest (rtx variable
, int value
)
10962 rtx label
= gen_label_rtx ();
10963 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10964 if (GET_MODE (variable
) == DImode
)
10965 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10967 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10968 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10973 /* Adjust COUNTER by the VALUE. */
10975 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
10977 if (GET_MODE (countreg
) == DImode
)
10978 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10980 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10983 /* Zero extend possibly SImode EXP to Pmode register. */
10985 ix86_zero_extend_to_Pmode (rtx exp
)
10988 if (GET_MODE (exp
) == VOIDmode
)
10989 return force_reg (Pmode
, exp
);
10990 if (GET_MODE (exp
) == Pmode
)
10991 return copy_to_mode_reg (Pmode
, exp
);
10992 r
= gen_reg_rtx (Pmode
);
10993 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10997 /* Expand string move (memcpy) operation. Use i386 string operations when
10998 profitable. expand_clrmem contains similar code. */
11000 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11002 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11003 enum machine_mode counter_mode
;
11004 HOST_WIDE_INT align
= 0;
11005 unsigned HOST_WIDE_INT count
= 0;
11007 if (GET_CODE (align_exp
) == CONST_INT
)
11008 align
= INTVAL (align_exp
);
11010 /* Can't use any of this if the user has appropriated esi or edi. */
11011 if (global_regs
[4] || global_regs
[5])
11014 /* This simple hack avoids all inlining code and simplifies code below. */
11015 if (!TARGET_ALIGN_STRINGOPS
)
11018 if (GET_CODE (count_exp
) == CONST_INT
)
11020 count
= INTVAL (count_exp
);
11021 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11025 /* Figure out proper mode for counter. For 32bits it is always SImode,
11026 for 64bits use SImode when possible, otherwise DImode.
11027 Set count to number of bytes copied when known at compile time. */
11029 || GET_MODE (count_exp
) == SImode
11030 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
11031 counter_mode
= SImode
;
11033 counter_mode
= DImode
;
11035 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
11038 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11039 if (destreg
!= XEXP (dst
, 0))
11040 dst
= replace_equiv_address_nv (dst
, destreg
);
11041 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11042 if (srcreg
!= XEXP (src
, 0))
11043 src
= replace_equiv_address_nv (src
, srcreg
);
11045 /* When optimizing for size emit simple rep ; movsb instruction for
11046 counts not divisible by 4. */
11048 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11050 emit_insn (gen_cld ());
11051 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11052 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11053 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11054 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11058 /* For constant aligned (or small unaligned) copies use rep movsl
11059 followed by code copying the rest. For PentiumPro ensure 8 byte
11060 alignment to allow rep movsl acceleration. */
11062 else if (count
!= 0
11064 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11065 || optimize_size
|| count
< (unsigned int) 64))
11067 unsigned HOST_WIDE_INT offset
= 0;
11068 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11069 rtx srcmem
, dstmem
;
11071 emit_insn (gen_cld ());
11072 if (count
& ~(size
- 1))
11074 countreg
= copy_to_mode_reg (counter_mode
,
11075 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11076 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11077 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11079 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11080 GEN_INT (size
== 4 ? 2 : 3));
11081 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11082 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11084 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11085 countreg
, destexp
, srcexp
));
11086 offset
= count
& ~(size
- 1);
11088 if (size
== 8 && (count
& 0x04))
11090 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11092 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11094 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11099 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11101 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11103 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11108 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11110 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11112 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11115 /* The generic code based on the glibc implementation:
11116 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11117 allowing accelerated copying there)
11118 - copy the data using rep movsl
11119 - copy the rest. */
11124 rtx srcmem
, dstmem
;
11125 int desired_alignment
= (TARGET_PENTIUMPRO
11126 && (count
== 0 || count
>= (unsigned int) 260)
11127 ? 8 : UNITS_PER_WORD
);
11128 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11129 dst
= change_address (dst
, BLKmode
, destreg
);
11130 src
= change_address (src
, BLKmode
, srcreg
);
11132 /* In case we don't know anything about the alignment, default to
11133 library version, since it is usually equally fast and result in
11136 Also emit call when we know that the count is large and call overhead
11137 will not be important. */
11138 if (!TARGET_INLINE_ALL_STRINGOPS
11139 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11142 if (TARGET_SINGLE_STRINGOP
)
11143 emit_insn (gen_cld ());
11145 countreg2
= gen_reg_rtx (Pmode
);
11146 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11148 /* We don't use loops to align destination and to copy parts smaller
11149 than 4 bytes, because gcc is able to optimize such code better (in
11150 the case the destination or the count really is aligned, gcc is often
11151 able to predict the branches) and also it is friendlier to the
11152 hardware branch prediction.
11154 Using loops is beneficial for generic case, because we can
11155 handle small counts using the loops. Many CPUs (such as Athlon)
11156 have large REP prefix setup costs.
11158 This is quite costly. Maybe we can revisit this decision later or
11159 add some customizability to this code. */
11161 if (count
== 0 && align
< desired_alignment
)
11163 label
= gen_label_rtx ();
11164 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11165 LEU
, 0, counter_mode
, 1, label
);
11169 rtx label
= ix86_expand_aligntest (destreg
, 1);
11170 srcmem
= change_address (src
, QImode
, srcreg
);
11171 dstmem
= change_address (dst
, QImode
, destreg
);
11172 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11173 ix86_adjust_counter (countreg
, 1);
11174 emit_label (label
);
11175 LABEL_NUSES (label
) = 1;
11179 rtx label
= ix86_expand_aligntest (destreg
, 2);
11180 srcmem
= change_address (src
, HImode
, srcreg
);
11181 dstmem
= change_address (dst
, HImode
, destreg
);
11182 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11183 ix86_adjust_counter (countreg
, 2);
11184 emit_label (label
);
11185 LABEL_NUSES (label
) = 1;
11187 if (align
<= 4 && desired_alignment
> 4)
11189 rtx label
= ix86_expand_aligntest (destreg
, 4);
11190 srcmem
= change_address (src
, SImode
, srcreg
);
11191 dstmem
= change_address (dst
, SImode
, destreg
);
11192 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11193 ix86_adjust_counter (countreg
, 4);
11194 emit_label (label
);
11195 LABEL_NUSES (label
) = 1;
11198 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11200 emit_label (label
);
11201 LABEL_NUSES (label
) = 1;
11204 if (!TARGET_SINGLE_STRINGOP
)
11205 emit_insn (gen_cld ());
11208 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11210 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11214 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11215 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11217 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11218 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11219 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11220 countreg2
, destexp
, srcexp
));
11224 emit_label (label
);
11225 LABEL_NUSES (label
) = 1;
11227 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11229 srcmem
= change_address (src
, SImode
, srcreg
);
11230 dstmem
= change_address (dst
, SImode
, destreg
);
11231 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11233 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11235 rtx label
= ix86_expand_aligntest (countreg
, 4);
11236 srcmem
= change_address (src
, SImode
, srcreg
);
11237 dstmem
= change_address (dst
, SImode
, destreg
);
11238 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11239 emit_label (label
);
11240 LABEL_NUSES (label
) = 1;
11242 if (align
> 2 && count
!= 0 && (count
& 2))
11244 srcmem
= change_address (src
, HImode
, srcreg
);
11245 dstmem
= change_address (dst
, HImode
, destreg
);
11246 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11248 if (align
<= 2 || count
== 0)
11250 rtx label
= ix86_expand_aligntest (countreg
, 2);
11251 srcmem
= change_address (src
, HImode
, srcreg
);
11252 dstmem
= change_address (dst
, HImode
, destreg
);
11253 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11254 emit_label (label
);
11255 LABEL_NUSES (label
) = 1;
11257 if (align
> 1 && count
!= 0 && (count
& 1))
11259 srcmem
= change_address (src
, QImode
, srcreg
);
11260 dstmem
= change_address (dst
, QImode
, destreg
);
11261 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11263 if (align
<= 1 || count
== 0)
11265 rtx label
= ix86_expand_aligntest (countreg
, 1);
11266 srcmem
= change_address (src
, QImode
, srcreg
);
11267 dstmem
= change_address (dst
, QImode
, destreg
);
11268 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11269 emit_label (label
);
11270 LABEL_NUSES (label
) = 1;
11277 /* Expand string clear operation (bzero). Use i386 string operations when
11278 profitable. expand_movmem contains similar code. */
11280 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
11282 rtx destreg
, zeroreg
, countreg
, destexp
;
11283 enum machine_mode counter_mode
;
11284 HOST_WIDE_INT align
= 0;
11285 unsigned HOST_WIDE_INT count
= 0;
11287 if (GET_CODE (align_exp
) == CONST_INT
)
11288 align
= INTVAL (align_exp
);
11290 /* Can't use any of this if the user has appropriated esi. */
11291 if (global_regs
[4])
11294 /* This simple hack avoids all inlining code and simplifies code below. */
11295 if (!TARGET_ALIGN_STRINGOPS
)
11298 if (GET_CODE (count_exp
) == CONST_INT
)
11300 count
= INTVAL (count_exp
);
11301 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11304 /* Figure out proper mode for counter. For 32bits it is always SImode,
11305 for 64bits use SImode when possible, otherwise DImode.
11306 Set count to number of bytes copied when known at compile time. */
11308 || GET_MODE (count_exp
) == SImode
11309 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
11310 counter_mode
= SImode
;
11312 counter_mode
= DImode
;
11314 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11315 if (destreg
!= XEXP (dst
, 0))
11316 dst
= replace_equiv_address_nv (dst
, destreg
);
11319 /* When optimizing for size emit simple rep ; movsb instruction for
11320 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11321 sequence is 7 bytes long, so if optimizing for size and count is
11322 small enough that some stosl, stosw and stosb instructions without
11323 rep are shorter, fall back into the next if. */
11325 if ((!optimize
|| optimize_size
)
11328 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
11330 emit_insn (gen_cld ());
11332 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11333 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11334 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11335 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11337 else if (count
!= 0
11339 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11340 || optimize_size
|| count
< (unsigned int) 64))
11342 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11343 unsigned HOST_WIDE_INT offset
= 0;
11345 emit_insn (gen_cld ());
11347 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11348 if (count
& ~(size
- 1))
11350 unsigned HOST_WIDE_INT repcount
;
11351 unsigned int max_nonrep
;
11353 repcount
= count
>> (size
== 4 ? 2 : 3);
11355 repcount
&= 0x3fffffff;
11357 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11358 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11359 bytes. In both cases the latter seems to be faster for small
11361 max_nonrep
= size
== 4 ? 7 : 4;
11362 if (!optimize_size
)
11365 case PROCESSOR_PENTIUM4
:
11366 case PROCESSOR_NOCONA
:
11373 if (repcount
<= max_nonrep
)
11374 while (repcount
-- > 0)
11376 rtx mem
= adjust_automodify_address_nv (dst
,
11377 GET_MODE (zeroreg
),
11379 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
11384 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
11385 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11386 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11387 GEN_INT (size
== 4 ? 2 : 3));
11388 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11389 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
11391 offset
= count
& ~(size
- 1);
11394 if (size
== 8 && (count
& 0x04))
11396 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11398 emit_insn (gen_strset (destreg
, mem
,
11399 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11404 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11406 emit_insn (gen_strset (destreg
, mem
,
11407 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11412 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11414 emit_insn (gen_strset (destreg
, mem
,
11415 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11422 /* Compute desired alignment of the string operation. */
11423 int desired_alignment
= (TARGET_PENTIUMPRO
11424 && (count
== 0 || count
>= (unsigned int) 260)
11425 ? 8 : UNITS_PER_WORD
);
11427 /* In case we don't know anything about the alignment, default to
11428 library version, since it is usually equally fast and result in
11431 Also emit call when we know that the count is large and call overhead
11432 will not be important. */
11433 if (!TARGET_INLINE_ALL_STRINGOPS
11434 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11437 if (TARGET_SINGLE_STRINGOP
)
11438 emit_insn (gen_cld ());
11440 countreg2
= gen_reg_rtx (Pmode
);
11441 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11442 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11443 /* Get rid of MEM_OFFSET, it won't be accurate. */
11444 dst
= change_address (dst
, BLKmode
, destreg
);
11446 if (count
== 0 && align
< desired_alignment
)
11448 label
= gen_label_rtx ();
11449 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11450 LEU
, 0, counter_mode
, 1, label
);
11454 rtx label
= ix86_expand_aligntest (destreg
, 1);
11455 emit_insn (gen_strset (destreg
, dst
,
11456 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11457 ix86_adjust_counter (countreg
, 1);
11458 emit_label (label
);
11459 LABEL_NUSES (label
) = 1;
11463 rtx label
= ix86_expand_aligntest (destreg
, 2);
11464 emit_insn (gen_strset (destreg
, dst
,
11465 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11466 ix86_adjust_counter (countreg
, 2);
11467 emit_label (label
);
11468 LABEL_NUSES (label
) = 1;
11470 if (align
<= 4 && desired_alignment
> 4)
11472 rtx label
= ix86_expand_aligntest (destreg
, 4);
11473 emit_insn (gen_strset (destreg
, dst
,
11475 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11477 ix86_adjust_counter (countreg
, 4);
11478 emit_label (label
);
11479 LABEL_NUSES (label
) = 1;
11482 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11484 emit_label (label
);
11485 LABEL_NUSES (label
) = 1;
11489 if (!TARGET_SINGLE_STRINGOP
)
11490 emit_insn (gen_cld ());
11493 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11495 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11499 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11500 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11502 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11503 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11507 emit_label (label
);
11508 LABEL_NUSES (label
) = 1;
11511 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11512 emit_insn (gen_strset (destreg
, dst
,
11513 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11514 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11516 rtx label
= ix86_expand_aligntest (countreg
, 4);
11517 emit_insn (gen_strset (destreg
, dst
,
11518 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11519 emit_label (label
);
11520 LABEL_NUSES (label
) = 1;
11522 if (align
> 2 && count
!= 0 && (count
& 2))
11523 emit_insn (gen_strset (destreg
, dst
,
11524 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11525 if (align
<= 2 || count
== 0)
11527 rtx label
= ix86_expand_aligntest (countreg
, 2);
11528 emit_insn (gen_strset (destreg
, dst
,
11529 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11530 emit_label (label
);
11531 LABEL_NUSES (label
) = 1;
11533 if (align
> 1 && count
!= 0 && (count
& 1))
11534 emit_insn (gen_strset (destreg
, dst
,
11535 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11536 if (align
<= 1 || count
== 0)
11538 rtx label
= ix86_expand_aligntest (countreg
, 1);
11539 emit_insn (gen_strset (destreg
, dst
,
11540 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11541 emit_label (label
);
11542 LABEL_NUSES (label
) = 1;
11548 /* Expand strlen. */
11550 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11552 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11554 /* The generic case of strlen expander is long. Avoid it's
11555 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11557 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11558 && !TARGET_INLINE_ALL_STRINGOPS
11560 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11563 addr
= force_reg (Pmode
, XEXP (src
, 0));
11564 scratch1
= gen_reg_rtx (Pmode
);
11566 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11569 /* Well it seems that some optimizer does not combine a call like
11570 foo(strlen(bar), strlen(bar));
11571 when the move and the subtraction is done here. It does calculate
11572 the length just once when these instructions are done inside of
11573 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11574 often used and I use one fewer register for the lifetime of
11575 output_strlen_unroll() this is better. */
11577 emit_move_insn (out
, addr
);
11579 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11581 /* strlensi_unroll_1 returns the address of the zero at the end of
11582 the string, like memchr(), so compute the length by subtracting
11583 the start address. */
11585 emit_insn (gen_subdi3 (out
, out
, addr
));
11587 emit_insn (gen_subsi3 (out
, out
, addr
));
11592 scratch2
= gen_reg_rtx (Pmode
);
11593 scratch3
= gen_reg_rtx (Pmode
);
11594 scratch4
= force_reg (Pmode
, constm1_rtx
);
11596 emit_move_insn (scratch3
, addr
);
11597 eoschar
= force_reg (QImode
, eoschar
);
11599 emit_insn (gen_cld ());
11600 src
= replace_equiv_address_nv (src
, scratch3
);
11602 /* If .md starts supporting :P, this can be done in .md. */
11603 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11604 scratch4
), UNSPEC_SCAS
);
11605 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11608 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11609 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11613 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11614 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11620 /* Expand the appropriate insns for doing strlen if not just doing
11623 out = result, initialized with the start address
11624 align_rtx = alignment of the address.
11625 scratch = scratch register, initialized with the startaddress when
11626 not aligned, otherwise undefined
11628 This is just the body. It needs the initializations mentioned above and
11629 some address computing at the end. These things are done in i386.md. */
11632 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11636 rtx align_2_label
= NULL_RTX
;
11637 rtx align_3_label
= NULL_RTX
;
11638 rtx align_4_label
= gen_label_rtx ();
11639 rtx end_0_label
= gen_label_rtx ();
11641 rtx tmpreg
= gen_reg_rtx (SImode
);
11642 rtx scratch
= gen_reg_rtx (SImode
);
11646 if (GET_CODE (align_rtx
) == CONST_INT
)
11647 align
= INTVAL (align_rtx
);
11649 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11651 /* Is there a known alignment and is it less than 4? */
11654 rtx scratch1
= gen_reg_rtx (Pmode
);
11655 emit_move_insn (scratch1
, out
);
11656 /* Is there a known alignment and is it not 2? */
11659 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11660 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11662 /* Leave just the 3 lower bits. */
11663 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11664 NULL_RTX
, 0, OPTAB_WIDEN
);
11666 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11667 Pmode
, 1, align_4_label
);
11668 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11669 Pmode
, 1, align_2_label
);
11670 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11671 Pmode
, 1, align_3_label
);
11675 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11676 check if is aligned to 4 - byte. */
11678 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11679 NULL_RTX
, 0, OPTAB_WIDEN
);
11681 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11682 Pmode
, 1, align_4_label
);
11685 mem
= change_address (src
, QImode
, out
);
11687 /* Now compare the bytes. */
11689 /* Compare the first n unaligned byte on a byte per byte basis. */
11690 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11691 QImode
, 1, end_0_label
);
11693 /* Increment the address. */
11695 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11697 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11699 /* Not needed with an alignment of 2 */
11702 emit_label (align_2_label
);
11704 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11708 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11710 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11712 emit_label (align_3_label
);
11715 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11719 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11721 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11724 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11725 align this loop. It gives only huge programs, but does not help to
11727 emit_label (align_4_label
);
11729 mem
= change_address (src
, SImode
, out
);
11730 emit_move_insn (scratch
, mem
);
11732 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11734 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11736 /* This formula yields a nonzero result iff one of the bytes is zero.
11737 This saves three branches inside loop and many cycles. */
11739 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11740 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11741 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11742 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11743 gen_int_mode (0x80808080, SImode
)));
11744 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11749 rtx reg
= gen_reg_rtx (SImode
);
11750 rtx reg2
= gen_reg_rtx (Pmode
);
11751 emit_move_insn (reg
, tmpreg
);
11752 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11754 /* If zero is not in the first two bytes, move two bytes forward. */
11755 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11756 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11757 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11758 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11759 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11762 /* Emit lea manually to avoid clobbering of flags. */
11763 emit_insn (gen_rtx_SET (SImode
, reg2
,
11764 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11766 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11767 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11768 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11769 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11776 rtx end_2_label
= gen_label_rtx ();
11777 /* Is zero in the first two bytes? */
11779 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11780 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11781 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11782 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11783 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11785 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11786 JUMP_LABEL (tmp
) = end_2_label
;
11788 /* Not in the first two. Move two bytes forward. */
11789 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11791 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11793 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11795 emit_label (end_2_label
);
11799 /* Avoid branch in fixing the byte. */
11800 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11801 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11802 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11804 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11806 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11808 emit_label (end_0_label
);
11812 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11813 rtx callarg2 ATTRIBUTE_UNUSED
,
11814 rtx pop
, int sibcall
)
11816 rtx use
= NULL
, call
;
11818 if (pop
== const0_rtx
)
11820 if (TARGET_64BIT
&& pop
)
11824 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11825 fnaddr
= machopic_indirect_call_target (fnaddr
);
11827 /* Static functions and indirect calls don't need the pic register. */
11828 if (! TARGET_64BIT
&& flag_pic
11829 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11830 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11831 use_reg (&use
, pic_offset_table_rtx
);
11833 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11835 rtx al
= gen_rtx_REG (QImode
, 0);
11836 emit_move_insn (al
, callarg2
);
11837 use_reg (&use
, al
);
11839 #endif /* TARGET_MACHO */
11841 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11843 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11844 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11846 if (sibcall
&& TARGET_64BIT
11847 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11850 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11851 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11852 emit_move_insn (fnaddr
, addr
);
11853 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11856 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11858 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11861 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11862 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11863 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11866 call
= emit_call_insn (call
);
11868 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11872 /* Clear stack slot assignments remembered from previous functions.
11873 This is called from INIT_EXPANDERS once before RTL is emitted for each
11876 static struct machine_function
*
11877 ix86_init_machine_status (void)
11879 struct machine_function
*f
;
11881 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11882 f
->use_fast_prologue_epilogue_nregs
= -1;
11887 /* Return a MEM corresponding to a stack slot with mode MODE.
11888 Allocate a new slot if necessary.
11890 The RTL for a function can have several slots available: N is
11891 which slot to use. */
11894 assign_386_stack_local (enum machine_mode mode
, int n
)
11896 struct stack_local_entry
*s
;
11898 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11901 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11902 if (s
->mode
== mode
&& s
->n
== n
)
11905 s
= (struct stack_local_entry
*)
11906 ggc_alloc (sizeof (struct stack_local_entry
));
11909 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11911 s
->next
= ix86_stack_locals
;
11912 ix86_stack_locals
= s
;
11916 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11918 static GTY(()) rtx ix86_tls_symbol
;
11920 ix86_tls_get_addr (void)
11923 if (!ix86_tls_symbol
)
11925 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11926 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11927 ? "___tls_get_addr"
11928 : "__tls_get_addr");
11931 return ix86_tls_symbol
;
11934 /* Calculate the length of the memory address in the instruction
11935 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11938 memory_address_length (rtx addr
)
11940 struct ix86_address parts
;
11941 rtx base
, index
, disp
;
11944 if (GET_CODE (addr
) == PRE_DEC
11945 || GET_CODE (addr
) == POST_INC
11946 || GET_CODE (addr
) == PRE_MODIFY
11947 || GET_CODE (addr
) == POST_MODIFY
)
11950 if (! ix86_decompose_address (addr
, &parts
))
11953 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11954 parts
.base
= SUBREG_REG (parts
.base
);
11955 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11956 parts
.index
= SUBREG_REG (parts
.index
);
11959 index
= parts
.index
;
11964 - esp as the base always wants an index,
11965 - ebp as the base always wants a displacement. */
11967 /* Register Indirect. */
11968 if (base
&& !index
&& !disp
)
11970 /* esp (for its index) and ebp (for its displacement) need
11971 the two-byte modrm form. */
11972 if (addr
== stack_pointer_rtx
11973 || addr
== arg_pointer_rtx
11974 || addr
== frame_pointer_rtx
11975 || addr
== hard_frame_pointer_rtx
)
11979 /* Direct Addressing. */
11980 else if (disp
&& !base
&& !index
)
11985 /* Find the length of the displacement constant. */
11988 if (GET_CODE (disp
) == CONST_INT
11989 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11995 /* ebp always wants a displacement. */
11996 else if (base
== hard_frame_pointer_rtx
)
11999 /* An index requires the two-byte modrm form.... */
12001 /* ...like esp, which always wants an index. */
12002 || base
== stack_pointer_rtx
12003 || base
== arg_pointer_rtx
12004 || base
== frame_pointer_rtx
)
12011 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12012 is set, expect that insn have 8bit immediate alternative. */
12014 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
12018 extract_insn_cached (insn
);
12019 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12020 if (CONSTANT_P (recog_data
.operand
[i
]))
12025 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
12026 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
12030 switch (get_attr_mode (insn
))
12041 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12046 fatal_insn ("unknown insn mode", insn
);
12052 /* Compute default value for "length_address" attribute. */
12054 ix86_attr_length_address_default (rtx insn
)
12058 if (get_attr_type (insn
) == TYPE_LEA
)
12060 rtx set
= PATTERN (insn
);
12061 if (GET_CODE (set
) == SET
)
12063 else if (GET_CODE (set
) == PARALLEL
12064 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
12065 set
= XVECEXP (set
, 0, 0);
12068 #ifdef ENABLE_CHECKING
12074 return memory_address_length (SET_SRC (set
));
12077 extract_insn_cached (insn
);
12078 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12079 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12081 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12087 /* Return the maximum number of instructions a cpu can issue. */
12090 ix86_issue_rate (void)
12094 case PROCESSOR_PENTIUM
:
12098 case PROCESSOR_PENTIUMPRO
:
12099 case PROCESSOR_PENTIUM4
:
12100 case PROCESSOR_ATHLON
:
12102 case PROCESSOR_NOCONA
:
12110 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12111 by DEP_INSN and nothing set by DEP_INSN. */
12114 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12118 /* Simplify the test for uninteresting insns. */
12119 if (insn_type
!= TYPE_SETCC
12120 && insn_type
!= TYPE_ICMOV
12121 && insn_type
!= TYPE_FCMOV
12122 && insn_type
!= TYPE_IBR
)
12125 if ((set
= single_set (dep_insn
)) != 0)
12127 set
= SET_DEST (set
);
12130 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12131 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12132 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12133 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12135 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12136 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12141 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12144 /* This test is true if the dependent insn reads the flags but
12145 not any other potentially set register. */
12146 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12149 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12155 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12156 address with operands set by DEP_INSN. */
12159 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12163 if (insn_type
== TYPE_LEA
12166 addr
= PATTERN (insn
);
12167 if (GET_CODE (addr
) == SET
)
12169 else if (GET_CODE (addr
) == PARALLEL
12170 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
12171 addr
= XVECEXP (addr
, 0, 0);
12174 addr
= SET_SRC (addr
);
12179 extract_insn_cached (insn
);
12180 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12181 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12183 addr
= XEXP (recog_data
.operand
[i
], 0);
12190 return modified_in_p (addr
, dep_insn
);
12194 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12196 enum attr_type insn_type
, dep_insn_type
;
12197 enum attr_memory memory
;
12199 int dep_insn_code_number
;
12201 /* Anti and output dependencies have zero cost on all CPUs. */
12202 if (REG_NOTE_KIND (link
) != 0)
12205 dep_insn_code_number
= recog_memoized (dep_insn
);
12207 /* If we can't recognize the insns, we can't really do anything. */
12208 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12211 insn_type
= get_attr_type (insn
);
12212 dep_insn_type
= get_attr_type (dep_insn
);
12216 case PROCESSOR_PENTIUM
:
12217 /* Address Generation Interlock adds a cycle of latency. */
12218 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12221 /* ??? Compares pair with jump/setcc. */
12222 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12225 /* Floating point stores require value to be ready one cycle earlier. */
12226 if (insn_type
== TYPE_FMOV
12227 && get_attr_memory (insn
) == MEMORY_STORE
12228 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12232 case PROCESSOR_PENTIUMPRO
:
12233 memory
= get_attr_memory (insn
);
12235 /* INT->FP conversion is expensive. */
12236 if (get_attr_fp_int_src (dep_insn
))
12239 /* There is one cycle extra latency between an FP op and a store. */
12240 if (insn_type
== TYPE_FMOV
12241 && (set
= single_set (dep_insn
)) != NULL_RTX
12242 && (set2
= single_set (insn
)) != NULL_RTX
12243 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12244 && GET_CODE (SET_DEST (set2
)) == MEM
)
12247 /* Show ability of reorder buffer to hide latency of load by executing
12248 in parallel with previous instruction in case
12249 previous instruction is not needed to compute the address. */
12250 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12251 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12253 /* Claim moves to take one cycle, as core can issue one load
12254 at time and the next load can start cycle later. */
12255 if (dep_insn_type
== TYPE_IMOV
12256 || dep_insn_type
== TYPE_FMOV
)
12264 memory
= get_attr_memory (insn
);
12266 /* The esp dependency is resolved before the instruction is really
12268 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12269 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12272 /* INT->FP conversion is expensive. */
12273 if (get_attr_fp_int_src (dep_insn
))
12276 /* Show ability of reorder buffer to hide latency of load by executing
12277 in parallel with previous instruction in case
12278 previous instruction is not needed to compute the address. */
12279 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12280 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12282 /* Claim moves to take one cycle, as core can issue one load
12283 at time and the next load can start cycle later. */
12284 if (dep_insn_type
== TYPE_IMOV
12285 || dep_insn_type
== TYPE_FMOV
)
12294 case PROCESSOR_ATHLON
:
12296 memory
= get_attr_memory (insn
);
12298 /* Show ability of reorder buffer to hide latency of load by executing
12299 in parallel with previous instruction in case
12300 previous instruction is not needed to compute the address. */
12301 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12302 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12304 enum attr_unit unit
= get_attr_unit (insn
);
12307 /* Because of the difference between the length of integer and
12308 floating unit pipeline preparation stages, the memory operands
12309 for floating point are cheaper.
12311 ??? For Athlon it the difference is most probably 2. */
12312 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12315 loadcost
= TARGET_ATHLON
? 2 : 0;
12317 if (cost
>= loadcost
)
12330 /* How many alternative schedules to try. This should be as wide as the
12331 scheduling freedom in the DFA, but no wider. Making this value too
12332 large results extra work for the scheduler. */
12335 ia32_multipass_dfa_lookahead (void)
12337 if (ix86_tune
== PROCESSOR_PENTIUM
)
12340 if (ix86_tune
== PROCESSOR_PENTIUMPRO
12341 || ix86_tune
== PROCESSOR_K6
)
12349 /* Compute the alignment given to a constant that is being placed in memory.
12350 EXP is the constant and ALIGN is the alignment that the object would
12352 The value of this function is used instead of that alignment to align
12356 ix86_constant_alignment (tree exp
, int align
)
12358 if (TREE_CODE (exp
) == REAL_CST
)
12360 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12362 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12365 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12366 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12367 return BITS_PER_WORD
;
12372 /* Compute the alignment for a static variable.
12373 TYPE is the data type, and ALIGN is the alignment that
12374 the object would ordinarily have. The value of this function is used
12375 instead of that alignment to align the object. */
12378 ix86_data_alignment (tree type
, int align
)
12380 if (AGGREGATE_TYPE_P (type
)
12381 && TYPE_SIZE (type
)
12382 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12383 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12384 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12387 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12388 to 16byte boundary. */
12391 if (AGGREGATE_TYPE_P (type
)
12392 && TYPE_SIZE (type
)
12393 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12394 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12395 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12399 if (TREE_CODE (type
) == ARRAY_TYPE
)
12401 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12403 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12406 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12409 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12411 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12414 else if ((TREE_CODE (type
) == RECORD_TYPE
12415 || TREE_CODE (type
) == UNION_TYPE
12416 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12417 && TYPE_FIELDS (type
))
12419 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12421 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12424 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12425 || TREE_CODE (type
) == INTEGER_TYPE
)
12427 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12429 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12436 /* Compute the alignment for a local variable.
12437 TYPE is the data type, and ALIGN is the alignment that
12438 the object would ordinarily have. The value of this macro is used
12439 instead of that alignment to align the object. */
12442 ix86_local_alignment (tree type
, int align
)
12444 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12445 to 16byte boundary. */
12448 if (AGGREGATE_TYPE_P (type
)
12449 && TYPE_SIZE (type
)
12450 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12451 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12452 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12455 if (TREE_CODE (type
) == ARRAY_TYPE
)
12457 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12459 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12462 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12464 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12466 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12469 else if ((TREE_CODE (type
) == RECORD_TYPE
12470 || TREE_CODE (type
) == UNION_TYPE
12471 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12472 && TYPE_FIELDS (type
))
12474 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12476 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12479 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12480 || TREE_CODE (type
) == INTEGER_TYPE
)
12483 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12485 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12491 /* Emit RTL insns to initialize the variable parts of a trampoline.
12492 FNADDR is an RTX for the address of the function's pure code.
12493 CXT is an RTX for the static chain value for the function. */
12495 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12499 /* Compute offset from the end of the jmp to the target function. */
12500 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12501 plus_constant (tramp
, 10),
12502 NULL_RTX
, 1, OPTAB_DIRECT
);
12503 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12504 gen_int_mode (0xb9, QImode
));
12505 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12506 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12507 gen_int_mode (0xe9, QImode
));
12508 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12513 /* Try to load address using shorter movl instead of movabs.
12514 We may want to support movq for kernel mode, but kernel does not use
12515 trampolines at the moment. */
12516 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
12518 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12519 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12520 gen_int_mode (0xbb41, HImode
));
12521 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12522 gen_lowpart (SImode
, fnaddr
));
12527 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12528 gen_int_mode (0xbb49, HImode
));
12529 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12533 /* Load static chain using movabs to r10. */
12534 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12535 gen_int_mode (0xba49, HImode
));
12536 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12539 /* Jump to the r11 */
12540 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12541 gen_int_mode (0xff49, HImode
));
12542 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12543 gen_int_mode (0xe3, QImode
));
12545 if (offset
> TRAMPOLINE_SIZE
)
12549 #ifdef ENABLE_EXECUTE_STACK
12550 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12551 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12555 /* Codes for all the SSE/MMX builtins. */
12558 IX86_BUILTIN_ADDPS
,
12559 IX86_BUILTIN_ADDSS
,
12560 IX86_BUILTIN_DIVPS
,
12561 IX86_BUILTIN_DIVSS
,
12562 IX86_BUILTIN_MULPS
,
12563 IX86_BUILTIN_MULSS
,
12564 IX86_BUILTIN_SUBPS
,
12565 IX86_BUILTIN_SUBSS
,
12567 IX86_BUILTIN_CMPEQPS
,
12568 IX86_BUILTIN_CMPLTPS
,
12569 IX86_BUILTIN_CMPLEPS
,
12570 IX86_BUILTIN_CMPGTPS
,
12571 IX86_BUILTIN_CMPGEPS
,
12572 IX86_BUILTIN_CMPNEQPS
,
12573 IX86_BUILTIN_CMPNLTPS
,
12574 IX86_BUILTIN_CMPNLEPS
,
12575 IX86_BUILTIN_CMPNGTPS
,
12576 IX86_BUILTIN_CMPNGEPS
,
12577 IX86_BUILTIN_CMPORDPS
,
12578 IX86_BUILTIN_CMPUNORDPS
,
12579 IX86_BUILTIN_CMPNEPS
,
12580 IX86_BUILTIN_CMPEQSS
,
12581 IX86_BUILTIN_CMPLTSS
,
12582 IX86_BUILTIN_CMPLESS
,
12583 IX86_BUILTIN_CMPNEQSS
,
12584 IX86_BUILTIN_CMPNLTSS
,
12585 IX86_BUILTIN_CMPNLESS
,
12586 IX86_BUILTIN_CMPNGTSS
,
12587 IX86_BUILTIN_CMPNGESS
,
12588 IX86_BUILTIN_CMPORDSS
,
12589 IX86_BUILTIN_CMPUNORDSS
,
12590 IX86_BUILTIN_CMPNESS
,
12592 IX86_BUILTIN_COMIEQSS
,
12593 IX86_BUILTIN_COMILTSS
,
12594 IX86_BUILTIN_COMILESS
,
12595 IX86_BUILTIN_COMIGTSS
,
12596 IX86_BUILTIN_COMIGESS
,
12597 IX86_BUILTIN_COMINEQSS
,
12598 IX86_BUILTIN_UCOMIEQSS
,
12599 IX86_BUILTIN_UCOMILTSS
,
12600 IX86_BUILTIN_UCOMILESS
,
12601 IX86_BUILTIN_UCOMIGTSS
,
12602 IX86_BUILTIN_UCOMIGESS
,
12603 IX86_BUILTIN_UCOMINEQSS
,
12605 IX86_BUILTIN_CVTPI2PS
,
12606 IX86_BUILTIN_CVTPS2PI
,
12607 IX86_BUILTIN_CVTSI2SS
,
12608 IX86_BUILTIN_CVTSI642SS
,
12609 IX86_BUILTIN_CVTSS2SI
,
12610 IX86_BUILTIN_CVTSS2SI64
,
12611 IX86_BUILTIN_CVTTPS2PI
,
12612 IX86_BUILTIN_CVTTSS2SI
,
12613 IX86_BUILTIN_CVTTSS2SI64
,
12615 IX86_BUILTIN_MAXPS
,
12616 IX86_BUILTIN_MAXSS
,
12617 IX86_BUILTIN_MINPS
,
12618 IX86_BUILTIN_MINSS
,
12620 IX86_BUILTIN_LOADUPS
,
12621 IX86_BUILTIN_STOREUPS
,
12622 IX86_BUILTIN_MOVSS
,
12624 IX86_BUILTIN_MOVHLPS
,
12625 IX86_BUILTIN_MOVLHPS
,
12626 IX86_BUILTIN_LOADHPS
,
12627 IX86_BUILTIN_LOADLPS
,
12628 IX86_BUILTIN_STOREHPS
,
12629 IX86_BUILTIN_STORELPS
,
12631 IX86_BUILTIN_MASKMOVQ
,
12632 IX86_BUILTIN_MOVMSKPS
,
12633 IX86_BUILTIN_PMOVMSKB
,
12635 IX86_BUILTIN_MOVNTPS
,
12636 IX86_BUILTIN_MOVNTQ
,
12638 IX86_BUILTIN_LOADDQU
,
12639 IX86_BUILTIN_STOREDQU
,
12641 IX86_BUILTIN_PACKSSWB
,
12642 IX86_BUILTIN_PACKSSDW
,
12643 IX86_BUILTIN_PACKUSWB
,
12645 IX86_BUILTIN_PADDB
,
12646 IX86_BUILTIN_PADDW
,
12647 IX86_BUILTIN_PADDD
,
12648 IX86_BUILTIN_PADDQ
,
12649 IX86_BUILTIN_PADDSB
,
12650 IX86_BUILTIN_PADDSW
,
12651 IX86_BUILTIN_PADDUSB
,
12652 IX86_BUILTIN_PADDUSW
,
12653 IX86_BUILTIN_PSUBB
,
12654 IX86_BUILTIN_PSUBW
,
12655 IX86_BUILTIN_PSUBD
,
12656 IX86_BUILTIN_PSUBQ
,
12657 IX86_BUILTIN_PSUBSB
,
12658 IX86_BUILTIN_PSUBSW
,
12659 IX86_BUILTIN_PSUBUSB
,
12660 IX86_BUILTIN_PSUBUSW
,
12663 IX86_BUILTIN_PANDN
,
12667 IX86_BUILTIN_PAVGB
,
12668 IX86_BUILTIN_PAVGW
,
12670 IX86_BUILTIN_PCMPEQB
,
12671 IX86_BUILTIN_PCMPEQW
,
12672 IX86_BUILTIN_PCMPEQD
,
12673 IX86_BUILTIN_PCMPGTB
,
12674 IX86_BUILTIN_PCMPGTW
,
12675 IX86_BUILTIN_PCMPGTD
,
12677 IX86_BUILTIN_PMADDWD
,
12679 IX86_BUILTIN_PMAXSW
,
12680 IX86_BUILTIN_PMAXUB
,
12681 IX86_BUILTIN_PMINSW
,
12682 IX86_BUILTIN_PMINUB
,
12684 IX86_BUILTIN_PMULHUW
,
12685 IX86_BUILTIN_PMULHW
,
12686 IX86_BUILTIN_PMULLW
,
12688 IX86_BUILTIN_PSADBW
,
12689 IX86_BUILTIN_PSHUFW
,
12691 IX86_BUILTIN_PSLLW
,
12692 IX86_BUILTIN_PSLLD
,
12693 IX86_BUILTIN_PSLLQ
,
12694 IX86_BUILTIN_PSRAW
,
12695 IX86_BUILTIN_PSRAD
,
12696 IX86_BUILTIN_PSRLW
,
12697 IX86_BUILTIN_PSRLD
,
12698 IX86_BUILTIN_PSRLQ
,
12699 IX86_BUILTIN_PSLLWI
,
12700 IX86_BUILTIN_PSLLDI
,
12701 IX86_BUILTIN_PSLLQI
,
12702 IX86_BUILTIN_PSRAWI
,
12703 IX86_BUILTIN_PSRADI
,
12704 IX86_BUILTIN_PSRLWI
,
12705 IX86_BUILTIN_PSRLDI
,
12706 IX86_BUILTIN_PSRLQI
,
12708 IX86_BUILTIN_PUNPCKHBW
,
12709 IX86_BUILTIN_PUNPCKHWD
,
12710 IX86_BUILTIN_PUNPCKHDQ
,
12711 IX86_BUILTIN_PUNPCKLBW
,
12712 IX86_BUILTIN_PUNPCKLWD
,
12713 IX86_BUILTIN_PUNPCKLDQ
,
12715 IX86_BUILTIN_SHUFPS
,
12717 IX86_BUILTIN_RCPPS
,
12718 IX86_BUILTIN_RCPSS
,
12719 IX86_BUILTIN_RSQRTPS
,
12720 IX86_BUILTIN_RSQRTSS
,
12721 IX86_BUILTIN_SQRTPS
,
12722 IX86_BUILTIN_SQRTSS
,
12724 IX86_BUILTIN_UNPCKHPS
,
12725 IX86_BUILTIN_UNPCKLPS
,
12727 IX86_BUILTIN_ANDPS
,
12728 IX86_BUILTIN_ANDNPS
,
12730 IX86_BUILTIN_XORPS
,
12733 IX86_BUILTIN_LDMXCSR
,
12734 IX86_BUILTIN_STMXCSR
,
12735 IX86_BUILTIN_SFENCE
,
12737 /* 3DNow! Original */
12738 IX86_BUILTIN_FEMMS
,
12739 IX86_BUILTIN_PAVGUSB
,
12740 IX86_BUILTIN_PF2ID
,
12741 IX86_BUILTIN_PFACC
,
12742 IX86_BUILTIN_PFADD
,
12743 IX86_BUILTIN_PFCMPEQ
,
12744 IX86_BUILTIN_PFCMPGE
,
12745 IX86_BUILTIN_PFCMPGT
,
12746 IX86_BUILTIN_PFMAX
,
12747 IX86_BUILTIN_PFMIN
,
12748 IX86_BUILTIN_PFMUL
,
12749 IX86_BUILTIN_PFRCP
,
12750 IX86_BUILTIN_PFRCPIT1
,
12751 IX86_BUILTIN_PFRCPIT2
,
12752 IX86_BUILTIN_PFRSQIT1
,
12753 IX86_BUILTIN_PFRSQRT
,
12754 IX86_BUILTIN_PFSUB
,
12755 IX86_BUILTIN_PFSUBR
,
12756 IX86_BUILTIN_PI2FD
,
12757 IX86_BUILTIN_PMULHRW
,
12759 /* 3DNow! Athlon Extensions */
12760 IX86_BUILTIN_PF2IW
,
12761 IX86_BUILTIN_PFNACC
,
12762 IX86_BUILTIN_PFPNACC
,
12763 IX86_BUILTIN_PI2FW
,
12764 IX86_BUILTIN_PSWAPDSI
,
12765 IX86_BUILTIN_PSWAPDSF
,
12768 IX86_BUILTIN_ADDPD
,
12769 IX86_BUILTIN_ADDSD
,
12770 IX86_BUILTIN_DIVPD
,
12771 IX86_BUILTIN_DIVSD
,
12772 IX86_BUILTIN_MULPD
,
12773 IX86_BUILTIN_MULSD
,
12774 IX86_BUILTIN_SUBPD
,
12775 IX86_BUILTIN_SUBSD
,
12777 IX86_BUILTIN_CMPEQPD
,
12778 IX86_BUILTIN_CMPLTPD
,
12779 IX86_BUILTIN_CMPLEPD
,
12780 IX86_BUILTIN_CMPGTPD
,
12781 IX86_BUILTIN_CMPGEPD
,
12782 IX86_BUILTIN_CMPNEQPD
,
12783 IX86_BUILTIN_CMPNLTPD
,
12784 IX86_BUILTIN_CMPNLEPD
,
12785 IX86_BUILTIN_CMPNGTPD
,
12786 IX86_BUILTIN_CMPNGEPD
,
12787 IX86_BUILTIN_CMPORDPD
,
12788 IX86_BUILTIN_CMPUNORDPD
,
12789 IX86_BUILTIN_CMPNEPD
,
12790 IX86_BUILTIN_CMPEQSD
,
12791 IX86_BUILTIN_CMPLTSD
,
12792 IX86_BUILTIN_CMPLESD
,
12793 IX86_BUILTIN_CMPNEQSD
,
12794 IX86_BUILTIN_CMPNLTSD
,
12795 IX86_BUILTIN_CMPNLESD
,
12796 IX86_BUILTIN_CMPORDSD
,
12797 IX86_BUILTIN_CMPUNORDSD
,
12798 IX86_BUILTIN_CMPNESD
,
12800 IX86_BUILTIN_COMIEQSD
,
12801 IX86_BUILTIN_COMILTSD
,
12802 IX86_BUILTIN_COMILESD
,
12803 IX86_BUILTIN_COMIGTSD
,
12804 IX86_BUILTIN_COMIGESD
,
12805 IX86_BUILTIN_COMINEQSD
,
12806 IX86_BUILTIN_UCOMIEQSD
,
12807 IX86_BUILTIN_UCOMILTSD
,
12808 IX86_BUILTIN_UCOMILESD
,
12809 IX86_BUILTIN_UCOMIGTSD
,
12810 IX86_BUILTIN_UCOMIGESD
,
12811 IX86_BUILTIN_UCOMINEQSD
,
12813 IX86_BUILTIN_MAXPD
,
12814 IX86_BUILTIN_MAXSD
,
12815 IX86_BUILTIN_MINPD
,
12816 IX86_BUILTIN_MINSD
,
12818 IX86_BUILTIN_ANDPD
,
12819 IX86_BUILTIN_ANDNPD
,
12821 IX86_BUILTIN_XORPD
,
12823 IX86_BUILTIN_SQRTPD
,
12824 IX86_BUILTIN_SQRTSD
,
12826 IX86_BUILTIN_UNPCKHPD
,
12827 IX86_BUILTIN_UNPCKLPD
,
12829 IX86_BUILTIN_SHUFPD
,
12831 IX86_BUILTIN_LOADUPD
,
12832 IX86_BUILTIN_STOREUPD
,
12833 IX86_BUILTIN_MOVSD
,
12835 IX86_BUILTIN_LOADHPD
,
12836 IX86_BUILTIN_LOADLPD
,
12838 IX86_BUILTIN_CVTDQ2PD
,
12839 IX86_BUILTIN_CVTDQ2PS
,
12841 IX86_BUILTIN_CVTPD2DQ
,
12842 IX86_BUILTIN_CVTPD2PI
,
12843 IX86_BUILTIN_CVTPD2PS
,
12844 IX86_BUILTIN_CVTTPD2DQ
,
12845 IX86_BUILTIN_CVTTPD2PI
,
12847 IX86_BUILTIN_CVTPI2PD
,
12848 IX86_BUILTIN_CVTSI2SD
,
12849 IX86_BUILTIN_CVTSI642SD
,
12851 IX86_BUILTIN_CVTSD2SI
,
12852 IX86_BUILTIN_CVTSD2SI64
,
12853 IX86_BUILTIN_CVTSD2SS
,
12854 IX86_BUILTIN_CVTSS2SD
,
12855 IX86_BUILTIN_CVTTSD2SI
,
12856 IX86_BUILTIN_CVTTSD2SI64
,
12858 IX86_BUILTIN_CVTPS2DQ
,
12859 IX86_BUILTIN_CVTPS2PD
,
12860 IX86_BUILTIN_CVTTPS2DQ
,
12862 IX86_BUILTIN_MOVNTI
,
12863 IX86_BUILTIN_MOVNTPD
,
12864 IX86_BUILTIN_MOVNTDQ
,
12867 IX86_BUILTIN_MASKMOVDQU
,
12868 IX86_BUILTIN_MOVMSKPD
,
12869 IX86_BUILTIN_PMOVMSKB128
,
12871 IX86_BUILTIN_PACKSSWB128
,
12872 IX86_BUILTIN_PACKSSDW128
,
12873 IX86_BUILTIN_PACKUSWB128
,
12875 IX86_BUILTIN_PADDB128
,
12876 IX86_BUILTIN_PADDW128
,
12877 IX86_BUILTIN_PADDD128
,
12878 IX86_BUILTIN_PADDQ128
,
12879 IX86_BUILTIN_PADDSB128
,
12880 IX86_BUILTIN_PADDSW128
,
12881 IX86_BUILTIN_PADDUSB128
,
12882 IX86_BUILTIN_PADDUSW128
,
12883 IX86_BUILTIN_PSUBB128
,
12884 IX86_BUILTIN_PSUBW128
,
12885 IX86_BUILTIN_PSUBD128
,
12886 IX86_BUILTIN_PSUBQ128
,
12887 IX86_BUILTIN_PSUBSB128
,
12888 IX86_BUILTIN_PSUBSW128
,
12889 IX86_BUILTIN_PSUBUSB128
,
12890 IX86_BUILTIN_PSUBUSW128
,
12892 IX86_BUILTIN_PAND128
,
12893 IX86_BUILTIN_PANDN128
,
12894 IX86_BUILTIN_POR128
,
12895 IX86_BUILTIN_PXOR128
,
12897 IX86_BUILTIN_PAVGB128
,
12898 IX86_BUILTIN_PAVGW128
,
12900 IX86_BUILTIN_PCMPEQB128
,
12901 IX86_BUILTIN_PCMPEQW128
,
12902 IX86_BUILTIN_PCMPEQD128
,
12903 IX86_BUILTIN_PCMPGTB128
,
12904 IX86_BUILTIN_PCMPGTW128
,
12905 IX86_BUILTIN_PCMPGTD128
,
12907 IX86_BUILTIN_PMADDWD128
,
12909 IX86_BUILTIN_PMAXSW128
,
12910 IX86_BUILTIN_PMAXUB128
,
12911 IX86_BUILTIN_PMINSW128
,
12912 IX86_BUILTIN_PMINUB128
,
12914 IX86_BUILTIN_PMULUDQ
,
12915 IX86_BUILTIN_PMULUDQ128
,
12916 IX86_BUILTIN_PMULHUW128
,
12917 IX86_BUILTIN_PMULHW128
,
12918 IX86_BUILTIN_PMULLW128
,
12920 IX86_BUILTIN_PSADBW128
,
12921 IX86_BUILTIN_PSHUFHW
,
12922 IX86_BUILTIN_PSHUFLW
,
12923 IX86_BUILTIN_PSHUFD
,
12925 IX86_BUILTIN_PSLLW128
,
12926 IX86_BUILTIN_PSLLD128
,
12927 IX86_BUILTIN_PSLLQ128
,
12928 IX86_BUILTIN_PSRAW128
,
12929 IX86_BUILTIN_PSRAD128
,
12930 IX86_BUILTIN_PSRLW128
,
12931 IX86_BUILTIN_PSRLD128
,
12932 IX86_BUILTIN_PSRLQ128
,
12933 IX86_BUILTIN_PSLLDQI128
,
12934 IX86_BUILTIN_PSLLWI128
,
12935 IX86_BUILTIN_PSLLDI128
,
12936 IX86_BUILTIN_PSLLQI128
,
12937 IX86_BUILTIN_PSRAWI128
,
12938 IX86_BUILTIN_PSRADI128
,
12939 IX86_BUILTIN_PSRLDQI128
,
12940 IX86_BUILTIN_PSRLWI128
,
12941 IX86_BUILTIN_PSRLDI128
,
12942 IX86_BUILTIN_PSRLQI128
,
12944 IX86_BUILTIN_PUNPCKHBW128
,
12945 IX86_BUILTIN_PUNPCKHWD128
,
12946 IX86_BUILTIN_PUNPCKHDQ128
,
12947 IX86_BUILTIN_PUNPCKHQDQ128
,
12948 IX86_BUILTIN_PUNPCKLBW128
,
12949 IX86_BUILTIN_PUNPCKLWD128
,
12950 IX86_BUILTIN_PUNPCKLDQ128
,
12951 IX86_BUILTIN_PUNPCKLQDQ128
,
12953 IX86_BUILTIN_CLFLUSH
,
12954 IX86_BUILTIN_MFENCE
,
12955 IX86_BUILTIN_LFENCE
,
12957 /* Prescott New Instructions. */
12958 IX86_BUILTIN_ADDSUBPS
,
12959 IX86_BUILTIN_HADDPS
,
12960 IX86_BUILTIN_HSUBPS
,
12961 IX86_BUILTIN_MOVSHDUP
,
12962 IX86_BUILTIN_MOVSLDUP
,
12963 IX86_BUILTIN_ADDSUBPD
,
12964 IX86_BUILTIN_HADDPD
,
12965 IX86_BUILTIN_HSUBPD
,
12966 IX86_BUILTIN_LDDQU
,
12968 IX86_BUILTIN_MONITOR
,
12969 IX86_BUILTIN_MWAIT
,
12971 IX86_BUILTIN_VEC_INIT_V2SI
,
12972 IX86_BUILTIN_VEC_INIT_V4HI
,
12973 IX86_BUILTIN_VEC_INIT_V8QI
,
12974 IX86_BUILTIN_VEC_EXT_V2DF
,
12975 IX86_BUILTIN_VEC_EXT_V2DI
,
12976 IX86_BUILTIN_VEC_EXT_V4SF
,
12977 IX86_BUILTIN_VEC_EXT_V4SI
,
12978 IX86_BUILTIN_VEC_EXT_V8HI
,
12979 IX86_BUILTIN_VEC_EXT_V2SI
,
12980 IX86_BUILTIN_VEC_EXT_V4HI
,
12981 IX86_BUILTIN_VEC_SET_V8HI
,
12982 IX86_BUILTIN_VEC_SET_V4HI
,
12987 #define def_builtin(MASK, NAME, TYPE, CODE) \
12989 if ((MASK) & target_flags \
12990 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12991 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12992 NULL, NULL_TREE); \
12995 /* Bits for builtin_description.flag. */
12997 /* Set when we don't support the comparison natively, and should
12998 swap_comparison in order to support it. */
12999 #define BUILTIN_DESC_SWAP_OPERANDS 1
13001 struct builtin_description
13003 const unsigned int mask
;
13004 const enum insn_code icode
;
13005 const char *const name
;
13006 const enum ix86_builtins code
;
13007 const enum rtx_code comparison
;
13008 const unsigned int flag
;
13011 static const struct builtin_description bdesc_comi
[] =
13013 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
13014 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
13015 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
13016 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
13017 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
13018 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
13019 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
13020 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
13021 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
13022 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
13023 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
13024 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
13025 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
13026 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
13027 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
13028 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
13029 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
13030 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
13031 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
13032 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
13033 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
13034 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
13035 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
13036 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
13039 static const struct builtin_description bdesc_2arg
[] =
13042 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
13043 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
13044 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
13045 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
13046 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
13047 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
13048 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
13049 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
13051 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
13052 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
13053 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
13054 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
13055 BUILTIN_DESC_SWAP_OPERANDS
},
13056 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
13057 BUILTIN_DESC_SWAP_OPERANDS
},
13058 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
13059 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
13060 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
13061 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
13062 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
13063 BUILTIN_DESC_SWAP_OPERANDS
},
13064 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
13065 BUILTIN_DESC_SWAP_OPERANDS
},
13066 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
13067 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
13068 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
13069 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
13070 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
13071 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
13072 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
13073 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
13074 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
13075 BUILTIN_DESC_SWAP_OPERANDS
},
13076 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
13077 BUILTIN_DESC_SWAP_OPERANDS
},
13078 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
13080 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
13081 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
13082 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
13083 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
13085 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
13086 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
13087 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
13088 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
13090 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
13091 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
13092 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
13093 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
13094 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
13097 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
13098 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
13099 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
13100 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
13101 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
13102 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
13103 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
13104 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
13106 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
13107 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
13108 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
13109 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
13110 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
13111 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
13112 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
13113 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
13115 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
13116 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
13117 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
13119 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
13120 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
13121 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
13122 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
13124 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
13125 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
13127 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
13128 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
13129 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
13130 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
13131 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
13132 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
13134 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
13135 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
13136 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
13137 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
13139 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
13140 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
13141 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
13142 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
13143 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
13144 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
13147 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
13148 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
13149 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
13151 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
13152 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
13153 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
13155 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
13156 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
13157 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
13158 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
13159 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
13160 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
13162 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
13163 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
13164 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
13165 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
13166 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
13167 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
13169 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
13170 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
13171 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
13172 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
13174 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
13175 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
13178 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
13179 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
13180 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
13181 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
13182 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
13183 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
13184 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
13185 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
13187 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
13188 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
13189 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
13190 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
13191 BUILTIN_DESC_SWAP_OPERANDS
},
13192 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
13193 BUILTIN_DESC_SWAP_OPERANDS
},
13194 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
13195 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
13196 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
13197 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
13198 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
13199 BUILTIN_DESC_SWAP_OPERANDS
},
13200 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
13201 BUILTIN_DESC_SWAP_OPERANDS
},
13202 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
13203 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
13204 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
13205 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
13206 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
13207 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
13208 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
13209 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
13210 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
13212 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
13213 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
13214 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
13215 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
13217 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
13218 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
13219 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
13220 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
13222 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
13223 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
13224 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
13227 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
13228 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
13229 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
13230 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
13231 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
13232 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
13233 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
13234 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
13236 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
13237 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
13238 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
13239 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
13240 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
13241 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
13242 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
13243 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
13245 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
13246 { MASK_SSE2
, CODE_FOR_sse2_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
13248 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
13249 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
13250 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
13251 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
13253 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
13254 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
13256 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
13257 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
13258 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
13259 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
13260 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
13261 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
13263 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
13264 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
13265 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
13266 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
13268 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
13269 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
13270 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
13271 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
13272 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
13273 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
13274 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
13275 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
13277 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
13278 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
13279 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
13281 { MASK_SSE2
, CODE_FOR_sse2_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
13282 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
13284 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
13285 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
13287 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
13288 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
13289 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
13291 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
13292 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
13293 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
13295 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
13296 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
13298 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
13300 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
13301 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
13302 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
13303 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
13306 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
13307 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
13308 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
13309 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
13310 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
13311 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
13314 static const struct builtin_description bdesc_1arg
[] =
13316 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
13317 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
13319 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
13320 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
13321 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
13323 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
13324 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
13325 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
13326 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
13327 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
13328 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
13330 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
13331 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
13333 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
13335 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
13336 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
13338 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
13339 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
13340 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
13341 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
13342 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
13344 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
13346 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
13347 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
13348 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
13349 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
13351 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
13352 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
13353 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
13356 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
13357 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
13361 ix86_init_builtins (void)
13364 ix86_init_mmx_sse_builtins ();
13367 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13368 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13371 ix86_init_mmx_sse_builtins (void)
13373 const struct builtin_description
* d
;
13376 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
13377 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
13378 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
13379 tree V2DI_type_node
13380 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
13381 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
13382 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
13383 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
13384 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
13385 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
13386 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
13388 tree pchar_type_node
= build_pointer_type (char_type_node
);
13389 tree pcchar_type_node
= build_pointer_type (
13390 build_type_variant (char_type_node
, 1, 0));
13391 tree pfloat_type_node
= build_pointer_type (float_type_node
);
13392 tree pcfloat_type_node
= build_pointer_type (
13393 build_type_variant (float_type_node
, 1, 0));
13394 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
13395 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
13396 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
13399 tree int_ftype_v4sf_v4sf
13400 = build_function_type_list (integer_type_node
,
13401 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13402 tree v4si_ftype_v4sf_v4sf
13403 = build_function_type_list (V4SI_type_node
,
13404 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13405 /* MMX/SSE/integer conversions. */
13406 tree int_ftype_v4sf
13407 = build_function_type_list (integer_type_node
,
13408 V4SF_type_node
, NULL_TREE
);
13409 tree int64_ftype_v4sf
13410 = build_function_type_list (long_long_integer_type_node
,
13411 V4SF_type_node
, NULL_TREE
);
13412 tree int_ftype_v8qi
13413 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
13414 tree v4sf_ftype_v4sf_int
13415 = build_function_type_list (V4SF_type_node
,
13416 V4SF_type_node
, integer_type_node
, NULL_TREE
);
13417 tree v4sf_ftype_v4sf_int64
13418 = build_function_type_list (V4SF_type_node
,
13419 V4SF_type_node
, long_long_integer_type_node
,
13421 tree v4sf_ftype_v4sf_v2si
13422 = build_function_type_list (V4SF_type_node
,
13423 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
13425 /* Miscellaneous. */
13426 tree v8qi_ftype_v4hi_v4hi
13427 = build_function_type_list (V8QI_type_node
,
13428 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13429 tree v4hi_ftype_v2si_v2si
13430 = build_function_type_list (V4HI_type_node
,
13431 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13432 tree v4sf_ftype_v4sf_v4sf_int
13433 = build_function_type_list (V4SF_type_node
,
13434 V4SF_type_node
, V4SF_type_node
,
13435 integer_type_node
, NULL_TREE
);
13436 tree v2si_ftype_v4hi_v4hi
13437 = build_function_type_list (V2SI_type_node
,
13438 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13439 tree v4hi_ftype_v4hi_int
13440 = build_function_type_list (V4HI_type_node
,
13441 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13442 tree v4hi_ftype_v4hi_di
13443 = build_function_type_list (V4HI_type_node
,
13444 V4HI_type_node
, long_long_unsigned_type_node
,
13446 tree v2si_ftype_v2si_di
13447 = build_function_type_list (V2SI_type_node
,
13448 V2SI_type_node
, long_long_unsigned_type_node
,
13450 tree void_ftype_void
13451 = build_function_type (void_type_node
, void_list_node
);
13452 tree void_ftype_unsigned
13453 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13454 tree void_ftype_unsigned_unsigned
13455 = build_function_type_list (void_type_node
, unsigned_type_node
,
13456 unsigned_type_node
, NULL_TREE
);
13457 tree void_ftype_pcvoid_unsigned_unsigned
13458 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13459 unsigned_type_node
, unsigned_type_node
,
13461 tree unsigned_ftype_void
13462 = build_function_type (unsigned_type_node
, void_list_node
);
13463 tree v2si_ftype_v4sf
13464 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13465 /* Loads/stores. */
13466 tree void_ftype_v8qi_v8qi_pchar
13467 = build_function_type_list (void_type_node
,
13468 V8QI_type_node
, V8QI_type_node
,
13469 pchar_type_node
, NULL_TREE
);
13470 tree v4sf_ftype_pcfloat
13471 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13472 /* @@@ the type is bogus */
13473 tree v4sf_ftype_v4sf_pv2si
13474 = build_function_type_list (V4SF_type_node
,
13475 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13476 tree void_ftype_pv2si_v4sf
13477 = build_function_type_list (void_type_node
,
13478 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13479 tree void_ftype_pfloat_v4sf
13480 = build_function_type_list (void_type_node
,
13481 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13482 tree void_ftype_pdi_di
13483 = build_function_type_list (void_type_node
,
13484 pdi_type_node
, long_long_unsigned_type_node
,
13486 tree void_ftype_pv2di_v2di
13487 = build_function_type_list (void_type_node
,
13488 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13489 /* Normal vector unops. */
13490 tree v4sf_ftype_v4sf
13491 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13493 /* Normal vector binops. */
13494 tree v4sf_ftype_v4sf_v4sf
13495 = build_function_type_list (V4SF_type_node
,
13496 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13497 tree v8qi_ftype_v8qi_v8qi
13498 = build_function_type_list (V8QI_type_node
,
13499 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13500 tree v4hi_ftype_v4hi_v4hi
13501 = build_function_type_list (V4HI_type_node
,
13502 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13503 tree v2si_ftype_v2si_v2si
13504 = build_function_type_list (V2SI_type_node
,
13505 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13506 tree di_ftype_di_di
13507 = build_function_type_list (long_long_unsigned_type_node
,
13508 long_long_unsigned_type_node
,
13509 long_long_unsigned_type_node
, NULL_TREE
);
13511 tree v2si_ftype_v2sf
13512 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13513 tree v2sf_ftype_v2si
13514 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13515 tree v2si_ftype_v2si
13516 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13517 tree v2sf_ftype_v2sf
13518 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13519 tree v2sf_ftype_v2sf_v2sf
13520 = build_function_type_list (V2SF_type_node
,
13521 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13522 tree v2si_ftype_v2sf_v2sf
13523 = build_function_type_list (V2SI_type_node
,
13524 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13525 tree pint_type_node
= build_pointer_type (integer_type_node
);
13526 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13527 tree pcdouble_type_node
= build_pointer_type (
13528 build_type_variant (double_type_node
, 1, 0));
13529 tree int_ftype_v2df_v2df
13530 = build_function_type_list (integer_type_node
,
13531 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13533 tree ti_ftype_ti_ti
13534 = build_function_type_list (intTI_type_node
,
13535 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13536 tree void_ftype_pcvoid
13537 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13538 tree v4sf_ftype_v4si
13539 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13540 tree v4si_ftype_v4sf
13541 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13542 tree v2df_ftype_v4si
13543 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13544 tree v4si_ftype_v2df
13545 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13546 tree v2si_ftype_v2df
13547 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13548 tree v4sf_ftype_v2df
13549 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13550 tree v2df_ftype_v2si
13551 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13552 tree v2df_ftype_v4sf
13553 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13554 tree int_ftype_v2df
13555 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13556 tree int64_ftype_v2df
13557 = build_function_type_list (long_long_integer_type_node
,
13558 V2DF_type_node
, NULL_TREE
);
13559 tree v2df_ftype_v2df_int
13560 = build_function_type_list (V2DF_type_node
,
13561 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13562 tree v2df_ftype_v2df_int64
13563 = build_function_type_list (V2DF_type_node
,
13564 V2DF_type_node
, long_long_integer_type_node
,
13566 tree v4sf_ftype_v4sf_v2df
13567 = build_function_type_list (V4SF_type_node
,
13568 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13569 tree v2df_ftype_v2df_v4sf
13570 = build_function_type_list (V2DF_type_node
,
13571 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13572 tree v2df_ftype_v2df_v2df_int
13573 = build_function_type_list (V2DF_type_node
,
13574 V2DF_type_node
, V2DF_type_node
,
13577 tree v2df_ftype_v2df_pcdouble
13578 = build_function_type_list (V2DF_type_node
,
13579 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13580 tree void_ftype_pdouble_v2df
13581 = build_function_type_list (void_type_node
,
13582 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13583 tree void_ftype_pint_int
13584 = build_function_type_list (void_type_node
,
13585 pint_type_node
, integer_type_node
, NULL_TREE
);
13586 tree void_ftype_v16qi_v16qi_pchar
13587 = build_function_type_list (void_type_node
,
13588 V16QI_type_node
, V16QI_type_node
,
13589 pchar_type_node
, NULL_TREE
);
13590 tree v2df_ftype_pcdouble
13591 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13592 tree v2df_ftype_v2df_v2df
13593 = build_function_type_list (V2DF_type_node
,
13594 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13595 tree v16qi_ftype_v16qi_v16qi
13596 = build_function_type_list (V16QI_type_node
,
13597 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13598 tree v8hi_ftype_v8hi_v8hi
13599 = build_function_type_list (V8HI_type_node
,
13600 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13601 tree v4si_ftype_v4si_v4si
13602 = build_function_type_list (V4SI_type_node
,
13603 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13604 tree v2di_ftype_v2di_v2di
13605 = build_function_type_list (V2DI_type_node
,
13606 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13607 tree v2di_ftype_v2df_v2df
13608 = build_function_type_list (V2DI_type_node
,
13609 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13610 tree v2df_ftype_v2df
13611 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13612 tree v2di_ftype_v2di_int
13613 = build_function_type_list (V2DI_type_node
,
13614 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13615 tree v4si_ftype_v4si_int
13616 = build_function_type_list (V4SI_type_node
,
13617 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13618 tree v8hi_ftype_v8hi_int
13619 = build_function_type_list (V8HI_type_node
,
13620 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13621 tree v8hi_ftype_v8hi_v2di
13622 = build_function_type_list (V8HI_type_node
,
13623 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13624 tree v4si_ftype_v4si_v2di
13625 = build_function_type_list (V4SI_type_node
,
13626 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13627 tree v4si_ftype_v8hi_v8hi
13628 = build_function_type_list (V4SI_type_node
,
13629 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13630 tree di_ftype_v8qi_v8qi
13631 = build_function_type_list (long_long_unsigned_type_node
,
13632 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13633 tree di_ftype_v2si_v2si
13634 = build_function_type_list (long_long_unsigned_type_node
,
13635 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13636 tree v2di_ftype_v16qi_v16qi
13637 = build_function_type_list (V2DI_type_node
,
13638 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13639 tree v2di_ftype_v4si_v4si
13640 = build_function_type_list (V2DI_type_node
,
13641 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13642 tree int_ftype_v16qi
13643 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13644 tree v16qi_ftype_pcchar
13645 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13646 tree void_ftype_pchar_v16qi
13647 = build_function_type_list (void_type_node
,
13648 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13651 tree float128_type
;
13654 /* The __float80 type. */
13655 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13656 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13660 /* The __float80 type. */
13661 float80_type
= make_node (REAL_TYPE
);
13662 TYPE_PRECISION (float80_type
) = 80;
13663 layout_type (float80_type
);
13664 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13667 float128_type
= make_node (REAL_TYPE
);
13668 TYPE_PRECISION (float128_type
) = 128;
13669 layout_type (float128_type
);
13670 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13672 /* Add all builtins that are more or less simple operations on two
13674 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13676 /* Use one of the operands; the target can have a different mode for
13677 mask-generating compares. */
13678 enum machine_mode mode
;
13683 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13688 type
= v16qi_ftype_v16qi_v16qi
;
13691 type
= v8hi_ftype_v8hi_v8hi
;
13694 type
= v4si_ftype_v4si_v4si
;
13697 type
= v2di_ftype_v2di_v2di
;
13700 type
= v2df_ftype_v2df_v2df
;
13703 type
= ti_ftype_ti_ti
;
13706 type
= v4sf_ftype_v4sf_v4sf
;
13709 type
= v8qi_ftype_v8qi_v8qi
;
13712 type
= v4hi_ftype_v4hi_v4hi
;
13715 type
= v2si_ftype_v2si_v2si
;
13718 type
= di_ftype_di_di
;
13725 /* Override for comparisons. */
13726 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
13727 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
13728 type
= v4si_ftype_v4sf_v4sf
;
13730 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
13731 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
13732 type
= v2di_ftype_v2df_v2df
;
13734 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13737 /* Add the remaining MMX insns with somewhat more complicated types. */
13738 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13739 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13740 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13741 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13743 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13744 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13745 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13747 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13748 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13750 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13751 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13753 /* comi/ucomi insns. */
13754 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13755 if (d
->mask
== MASK_SSE2
)
13756 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13758 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13760 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13761 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13762 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13764 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13765 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13766 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13767 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13768 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13769 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
13770 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13771 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
13772 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13773 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13774 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
13776 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13778 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13779 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13781 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13782 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13783 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13784 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13786 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13787 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13788 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13789 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13791 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13793 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13795 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13796 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13797 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13798 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13799 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13800 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13802 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13804 /* Original 3DNow! */
13805 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13806 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13807 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13808 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13809 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13810 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13811 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13812 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13813 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13814 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13815 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13816 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13817 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13818 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13819 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13820 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13821 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13822 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13823 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13824 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13826 /* 3DNow! extension as used in the Athlon CPU. */
13827 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13828 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13829 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13830 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13831 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13832 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13835 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13837 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13838 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13840 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
13841 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
13843 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13844 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13845 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13846 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13847 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13849 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13850 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13851 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13852 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13854 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13855 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13857 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13859 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13860 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13862 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13863 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13864 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13865 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13866 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13868 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13870 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13871 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13872 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13873 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13875 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13876 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13877 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13879 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13880 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13881 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13882 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13884 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13885 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13886 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13888 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13889 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13891 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
13892 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
13894 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13895 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13896 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13898 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13899 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13900 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13902 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13903 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13905 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13906 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13907 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13908 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13910 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13911 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13912 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13913 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13915 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13916 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13918 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13920 /* Prescott New Instructions. */
13921 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13922 void_ftype_pcvoid_unsigned_unsigned
,
13923 IX86_BUILTIN_MONITOR
);
13924 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13925 void_ftype_unsigned_unsigned
,
13926 IX86_BUILTIN_MWAIT
);
13927 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13929 IX86_BUILTIN_MOVSHDUP
);
13930 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13932 IX86_BUILTIN_MOVSLDUP
);
13933 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13934 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13936 /* Access to the vec_init patterns. */
13937 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
13938 integer_type_node
, NULL_TREE
);
13939 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
13940 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
13942 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
13943 short_integer_type_node
,
13944 short_integer_type_node
,
13945 short_integer_type_node
, NULL_TREE
);
13946 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
13947 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
13949 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
13950 char_type_node
, char_type_node
,
13951 char_type_node
, char_type_node
,
13952 char_type_node
, char_type_node
,
13953 char_type_node
, NULL_TREE
);
13954 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
13955 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
13957 /* Access to the vec_extract patterns. */
13958 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
13959 integer_type_node
, NULL_TREE
);
13960 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
13961 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
13963 ftype
= build_function_type_list (long_long_integer_type_node
,
13964 V2DI_type_node
, integer_type_node
,
13966 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
13967 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
13969 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
13970 integer_type_node
, NULL_TREE
);
13971 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
13972 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
13974 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
13975 integer_type_node
, NULL_TREE
);
13976 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
13977 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
13979 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
13980 integer_type_node
, NULL_TREE
);
13981 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
13982 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
13984 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
13985 integer_type_node
, NULL_TREE
);
13986 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
13987 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
13989 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
13990 integer_type_node
, NULL_TREE
);
13991 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
13992 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
13994 /* Access to the vec_set patterns. */
13995 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
13997 integer_type_node
, NULL_TREE
);
13998 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
13999 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
14001 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
14003 integer_type_node
, NULL_TREE
);
14004 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
14005 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
14008 /* Errors in the source file can cause expand_expr to return const0_rtx
14009 where we expect a vector. To avoid crashing, use one of the vector
14010 clear instructions. */
14012 safe_vector_operand (rtx x
, enum machine_mode mode
)
14014 if (x
== const0_rtx
)
14015 x
= CONST0_RTX (mode
);
14019 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14022 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
14025 tree arg0
= TREE_VALUE (arglist
);
14026 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14027 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14028 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14029 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14030 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14031 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14033 if (VECTOR_MODE_P (mode0
))
14034 op0
= safe_vector_operand (op0
, mode0
);
14035 if (VECTOR_MODE_P (mode1
))
14036 op1
= safe_vector_operand (op1
, mode1
);
14038 if (optimize
|| !target
14039 || GET_MODE (target
) != tmode
14040 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14041 target
= gen_reg_rtx (tmode
);
14043 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
14045 rtx x
= gen_reg_rtx (V4SImode
);
14046 emit_insn (gen_sse2_loadd (x
, op1
));
14047 op1
= gen_lowpart (TImode
, x
);
14050 /* In case the insn wants input operands in modes different from
14051 the result, abort. */
14052 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
14053 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
14056 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14057 op0
= copy_to_mode_reg (mode0
, op0
);
14058 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14059 op1
= copy_to_mode_reg (mode1
, op1
);
14061 /* ??? Using ix86_fixup_binary_operands is problematic when
14062 we've got mismatched modes. Fake it. */
14068 if (tmode
== mode0
&& tmode
== mode1
)
14070 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
14074 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
14076 op0
= force_reg (mode0
, op0
);
14077 op1
= force_reg (mode1
, op1
);
14078 target
= gen_reg_rtx (tmode
);
14081 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14088 /* Subroutine of ix86_expand_builtin to take care of stores. */
14091 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
14094 tree arg0
= TREE_VALUE (arglist
);
14095 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14096 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14097 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14098 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14099 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14101 if (VECTOR_MODE_P (mode1
))
14102 op1
= safe_vector_operand (op1
, mode1
);
14104 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14105 op1
= copy_to_mode_reg (mode1
, op1
);
14107 pat
= GEN_FCN (icode
) (op0
, op1
);
14113 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14116 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
14117 rtx target
, int do_load
)
14120 tree arg0
= TREE_VALUE (arglist
);
14121 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14122 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14123 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14125 if (optimize
|| !target
14126 || GET_MODE (target
) != tmode
14127 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14128 target
= gen_reg_rtx (tmode
);
14130 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14133 if (VECTOR_MODE_P (mode0
))
14134 op0
= safe_vector_operand (op0
, mode0
);
14136 if ((optimize
&& !register_operand (op0
, mode0
))
14137 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14138 op0
= copy_to_mode_reg (mode0
, op0
);
14141 pat
= GEN_FCN (icode
) (target
, op0
);
14148 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14149 sqrtss, rsqrtss, rcpss. */
14152 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
14155 tree arg0
= TREE_VALUE (arglist
);
14156 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14157 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14158 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14160 if (optimize
|| !target
14161 || GET_MODE (target
) != tmode
14162 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14163 target
= gen_reg_rtx (tmode
);
14165 if (VECTOR_MODE_P (mode0
))
14166 op0
= safe_vector_operand (op0
, mode0
);
14168 if ((optimize
&& !register_operand (op0
, mode0
))
14169 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14170 op0
= copy_to_mode_reg (mode0
, op0
);
14173 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
14174 op1
= copy_to_mode_reg (mode0
, op1
);
14176 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14183 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14186 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
14190 tree arg0
= TREE_VALUE (arglist
);
14191 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14192 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14193 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14195 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
14196 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
14197 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
14198 enum rtx_code comparison
= d
->comparison
;
14200 if (VECTOR_MODE_P (mode0
))
14201 op0
= safe_vector_operand (op0
, mode0
);
14202 if (VECTOR_MODE_P (mode1
))
14203 op1
= safe_vector_operand (op1
, mode1
);
14205 /* Swap operands if we have a comparison that isn't available in
14207 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
14209 rtx tmp
= gen_reg_rtx (mode1
);
14210 emit_move_insn (tmp
, op1
);
14215 if (optimize
|| !target
14216 || GET_MODE (target
) != tmode
14217 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
14218 target
= gen_reg_rtx (tmode
);
14220 if ((optimize
&& !register_operand (op0
, mode0
))
14221 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
14222 op0
= copy_to_mode_reg (mode0
, op0
);
14223 if ((optimize
&& !register_operand (op1
, mode1
))
14224 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
14225 op1
= copy_to_mode_reg (mode1
, op1
);
14227 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
14228 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
14235 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14238 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
14242 tree arg0
= TREE_VALUE (arglist
);
14243 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14244 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14245 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14247 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
14248 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
14249 enum rtx_code comparison
= d
->comparison
;
14251 if (VECTOR_MODE_P (mode0
))
14252 op0
= safe_vector_operand (op0
, mode0
);
14253 if (VECTOR_MODE_P (mode1
))
14254 op1
= safe_vector_operand (op1
, mode1
);
14256 /* Swap operands if we have a comparison that isn't available in
14258 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
14265 target
= gen_reg_rtx (SImode
);
14266 emit_move_insn (target
, const0_rtx
);
14267 target
= gen_rtx_SUBREG (QImode
, target
, 0);
14269 if ((optimize
&& !register_operand (op0
, mode0
))
14270 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
14271 op0
= copy_to_mode_reg (mode0
, op0
);
14272 if ((optimize
&& !register_operand (op1
, mode1
))
14273 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
14274 op1
= copy_to_mode_reg (mode1
, op1
);
14276 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
14277 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
14281 emit_insn (gen_rtx_SET (VOIDmode
,
14282 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
14283 gen_rtx_fmt_ee (comparison
, QImode
,
14287 return SUBREG_REG (target
);
14290 /* Return the integer constant in ARG. Constrain it to be in the range
14291 of the subparts of VEC_TYPE; issue an error if not. */
14294 get_element_number (tree vec_type
, tree arg
)
14296 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
14298 if (!host_integerp (arg
, 1)
14299 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
14301 error ("selector must be an integer constant in the range 0..%i", max
);
14308 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14309 ix86_expand_vector_init. We DO have language-level syntax for this, in
14310 the form of (type){ init-list }. Except that since we can't place emms
14311 instructions from inside the compiler, we can't allow the use of MMX
14312 registers unless the user explicitly asks for it. So we do *not* define
14313 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14314 we have builtins invoked by mmintrin.h that gives us license to emit
14315 these sorts of instructions. */
14318 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
14320 enum machine_mode tmode
= TYPE_MODE (type
);
14321 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
14322 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
14323 rtvec v
= rtvec_alloc (n_elt
);
14325 gcc_assert (VECTOR_MODE_P (tmode
));
14327 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
14329 rtx x
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14330 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
14333 gcc_assert (arglist
== NULL
);
14335 if (!target
|| !register_operand (target
, tmode
))
14336 target
= gen_reg_rtx (tmode
);
14338 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
14342 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14343 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14344 had a language-level syntax for referencing vector elements. */
14347 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
14349 enum machine_mode tmode
, mode0
;
14354 arg0
= TREE_VALUE (arglist
);
14355 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14357 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14358 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
14360 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
14361 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
14362 gcc_assert (VECTOR_MODE_P (mode0
));
14364 op0
= force_reg (mode0
, op0
);
14366 if (optimize
|| !target
|| !register_operand (target
, tmode
))
14367 target
= gen_reg_rtx (tmode
);
14369 ix86_expand_vector_extract (true, target
, op0
, elt
);
14374 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14375 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14376 a language-level syntax for referencing vector elements. */
14379 ix86_expand_vec_set_builtin (tree arglist
)
14381 enum machine_mode tmode
, mode1
;
14382 tree arg0
, arg1
, arg2
;
14386 arg0
= TREE_VALUE (arglist
);
14387 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14388 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14390 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
14391 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
14392 gcc_assert (VECTOR_MODE_P (tmode
));
14394 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
14395 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
14396 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
14398 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
14399 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
14401 op0
= force_reg (tmode
, op0
);
14402 op1
= force_reg (mode1
, op1
);
14404 ix86_expand_vector_set (true, op0
, op1
, elt
);
14409 /* Expand an expression EXP that calls a built-in function,
14410 with result going to TARGET if that's convenient
14411 (and in mode MODE if that's convenient).
14412 SUBTARGET may be used as the target for computing one of EXP's operands.
14413 IGNORE is nonzero if the value is to be ignored. */
14416 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
14417 enum machine_mode mode ATTRIBUTE_UNUSED
,
14418 int ignore ATTRIBUTE_UNUSED
)
14420 const struct builtin_description
*d
;
14422 enum insn_code icode
;
14423 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
14424 tree arglist
= TREE_OPERAND (exp
, 1);
14425 tree arg0
, arg1
, arg2
;
14426 rtx op0
, op1
, op2
, pat
;
14427 enum machine_mode tmode
, mode0
, mode1
, mode2
;
14428 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
14432 case IX86_BUILTIN_EMMS
:
14433 emit_insn (gen_mmx_emms ());
14436 case IX86_BUILTIN_SFENCE
:
14437 emit_insn (gen_sse_sfence ());
14440 case IX86_BUILTIN_MASKMOVQ
:
14441 case IX86_BUILTIN_MASKMOVDQU
:
14442 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
14443 ? CODE_FOR_mmx_maskmovq
14444 : CODE_FOR_sse2_maskmovdqu
);
14445 /* Note the arg order is different from the operand order. */
14446 arg1
= TREE_VALUE (arglist
);
14447 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
14448 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14449 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14450 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14451 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14452 mode0
= insn_data
[icode
].operand
[0].mode
;
14453 mode1
= insn_data
[icode
].operand
[1].mode
;
14454 mode2
= insn_data
[icode
].operand
[2].mode
;
14456 op0
= force_reg (Pmode
, op0
);
14457 op0
= gen_rtx_MEM (mode1
, op0
);
14459 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14460 op0
= copy_to_mode_reg (mode0
, op0
);
14461 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14462 op1
= copy_to_mode_reg (mode1
, op1
);
14463 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
14464 op2
= copy_to_mode_reg (mode2
, op2
);
14465 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14471 case IX86_BUILTIN_SQRTSS
:
14472 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
14473 case IX86_BUILTIN_RSQRTSS
:
14474 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
14475 case IX86_BUILTIN_RCPSS
:
14476 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
14478 case IX86_BUILTIN_LOADUPS
:
14479 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
14481 case IX86_BUILTIN_STOREUPS
:
14482 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
14484 case IX86_BUILTIN_LOADHPS
:
14485 case IX86_BUILTIN_LOADLPS
:
14486 case IX86_BUILTIN_LOADHPD
:
14487 case IX86_BUILTIN_LOADLPD
:
14488 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
14489 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
14490 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
14491 : CODE_FOR_sse2_loadlpd
);
14492 arg0
= TREE_VALUE (arglist
);
14493 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14494 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14495 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14496 tmode
= insn_data
[icode
].operand
[0].mode
;
14497 mode0
= insn_data
[icode
].operand
[1].mode
;
14498 mode1
= insn_data
[icode
].operand
[2].mode
;
14500 op0
= force_reg (mode0
, op0
);
14501 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14502 if (optimize
|| target
== 0
14503 || GET_MODE (target
) != tmode
14504 || !register_operand (target
, tmode
))
14505 target
= gen_reg_rtx (tmode
);
14506 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14512 case IX86_BUILTIN_STOREHPS
:
14513 case IX86_BUILTIN_STORELPS
:
14514 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
14515 : CODE_FOR_sse_storelps
);
14516 arg0
= TREE_VALUE (arglist
);
14517 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14518 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14519 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14520 mode0
= insn_data
[icode
].operand
[0].mode
;
14521 mode1
= insn_data
[icode
].operand
[1].mode
;
14523 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14524 op1
= force_reg (mode1
, op1
);
14526 pat
= GEN_FCN (icode
) (op0
, op1
);
14532 case IX86_BUILTIN_MOVNTPS
:
14533 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14534 case IX86_BUILTIN_MOVNTQ
:
14535 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14537 case IX86_BUILTIN_LDMXCSR
:
14538 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14539 target
= assign_386_stack_local (SImode
, 0);
14540 emit_move_insn (target
, op0
);
14541 emit_insn (gen_sse_ldmxcsr (target
));
14544 case IX86_BUILTIN_STMXCSR
:
14545 target
= assign_386_stack_local (SImode
, 0);
14546 emit_insn (gen_sse_stmxcsr (target
));
14547 return copy_to_mode_reg (SImode
, target
);
14549 case IX86_BUILTIN_SHUFPS
:
14550 case IX86_BUILTIN_SHUFPD
:
14551 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14552 ? CODE_FOR_sse_shufps
14553 : CODE_FOR_sse2_shufpd
);
14554 arg0
= TREE_VALUE (arglist
);
14555 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14556 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14557 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14558 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14559 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14560 tmode
= insn_data
[icode
].operand
[0].mode
;
14561 mode0
= insn_data
[icode
].operand
[1].mode
;
14562 mode1
= insn_data
[icode
].operand
[2].mode
;
14563 mode2
= insn_data
[icode
].operand
[3].mode
;
14565 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14566 op0
= copy_to_mode_reg (mode0
, op0
);
14567 if ((optimize
&& !register_operand (op1
, mode1
))
14568 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14569 op1
= copy_to_mode_reg (mode1
, op1
);
14570 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14572 /* @@@ better error message */
14573 error ("mask must be an immediate");
14574 return gen_reg_rtx (tmode
);
14576 if (optimize
|| target
== 0
14577 || GET_MODE (target
) != tmode
14578 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14579 target
= gen_reg_rtx (tmode
);
14580 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14586 case IX86_BUILTIN_PSHUFW
:
14587 case IX86_BUILTIN_PSHUFD
:
14588 case IX86_BUILTIN_PSHUFHW
:
14589 case IX86_BUILTIN_PSHUFLW
:
14590 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14591 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14592 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14593 : CODE_FOR_mmx_pshufw
);
14594 arg0
= TREE_VALUE (arglist
);
14595 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14596 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14597 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14598 tmode
= insn_data
[icode
].operand
[0].mode
;
14599 mode1
= insn_data
[icode
].operand
[1].mode
;
14600 mode2
= insn_data
[icode
].operand
[2].mode
;
14602 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14603 op0
= copy_to_mode_reg (mode1
, op0
);
14604 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14606 /* @@@ better error message */
14607 error ("mask must be an immediate");
14611 || GET_MODE (target
) != tmode
14612 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14613 target
= gen_reg_rtx (tmode
);
14614 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14620 case IX86_BUILTIN_PSLLDQI128
:
14621 case IX86_BUILTIN_PSRLDQI128
:
14622 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14623 : CODE_FOR_sse2_lshrti3
);
14624 arg0
= TREE_VALUE (arglist
);
14625 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14626 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14627 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14628 tmode
= insn_data
[icode
].operand
[0].mode
;
14629 mode1
= insn_data
[icode
].operand
[1].mode
;
14630 mode2
= insn_data
[icode
].operand
[2].mode
;
14632 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14634 op0
= copy_to_reg (op0
);
14635 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14637 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14639 error ("shift must be an immediate");
14642 target
= gen_reg_rtx (V2DImode
);
14643 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14649 case IX86_BUILTIN_FEMMS
:
14650 emit_insn (gen_mmx_femms ());
14653 case IX86_BUILTIN_PAVGUSB
:
14654 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
14656 case IX86_BUILTIN_PF2ID
:
14657 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
14659 case IX86_BUILTIN_PFACC
:
14660 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
14662 case IX86_BUILTIN_PFADD
:
14663 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
14665 case IX86_BUILTIN_PFCMPEQ
:
14666 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
14668 case IX86_BUILTIN_PFCMPGE
:
14669 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
14671 case IX86_BUILTIN_PFCMPGT
:
14672 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
14674 case IX86_BUILTIN_PFMAX
:
14675 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
14677 case IX86_BUILTIN_PFMIN
:
14678 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
14680 case IX86_BUILTIN_PFMUL
:
14681 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
14683 case IX86_BUILTIN_PFRCP
:
14684 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
14686 case IX86_BUILTIN_PFRCPIT1
:
14687 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
14689 case IX86_BUILTIN_PFRCPIT2
:
14690 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
14692 case IX86_BUILTIN_PFRSQIT1
:
14693 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
14695 case IX86_BUILTIN_PFRSQRT
:
14696 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
14698 case IX86_BUILTIN_PFSUB
:
14699 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
14701 case IX86_BUILTIN_PFSUBR
:
14702 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
14704 case IX86_BUILTIN_PI2FD
:
14705 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
14707 case IX86_BUILTIN_PMULHRW
:
14708 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
14710 case IX86_BUILTIN_PF2IW
:
14711 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
14713 case IX86_BUILTIN_PFNACC
:
14714 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
14716 case IX86_BUILTIN_PFPNACC
:
14717 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
14719 case IX86_BUILTIN_PI2FW
:
14720 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
14722 case IX86_BUILTIN_PSWAPDSI
:
14723 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
14725 case IX86_BUILTIN_PSWAPDSF
:
14726 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
14728 case IX86_BUILTIN_SQRTSD
:
14729 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
14730 case IX86_BUILTIN_LOADUPD
:
14731 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14732 case IX86_BUILTIN_STOREUPD
:
14733 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14735 case IX86_BUILTIN_MFENCE
:
14736 emit_insn (gen_sse2_mfence ());
14738 case IX86_BUILTIN_LFENCE
:
14739 emit_insn (gen_sse2_lfence ());
14742 case IX86_BUILTIN_CLFLUSH
:
14743 arg0
= TREE_VALUE (arglist
);
14744 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14745 icode
= CODE_FOR_sse2_clflush
;
14746 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14747 op0
= copy_to_mode_reg (Pmode
, op0
);
14749 emit_insn (gen_sse2_clflush (op0
));
14752 case IX86_BUILTIN_MOVNTPD
:
14753 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14754 case IX86_BUILTIN_MOVNTDQ
:
14755 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14756 case IX86_BUILTIN_MOVNTI
:
14757 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14759 case IX86_BUILTIN_LOADDQU
:
14760 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14761 case IX86_BUILTIN_STOREDQU
:
14762 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14764 case IX86_BUILTIN_MONITOR
:
14765 arg0
= TREE_VALUE (arglist
);
14766 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14767 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14768 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14769 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14770 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14772 op0
= copy_to_mode_reg (SImode
, op0
);
14774 op1
= copy_to_mode_reg (SImode
, op1
);
14776 op2
= copy_to_mode_reg (SImode
, op2
);
14777 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
14780 case IX86_BUILTIN_MWAIT
:
14781 arg0
= TREE_VALUE (arglist
);
14782 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14783 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14784 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14786 op0
= copy_to_mode_reg (SImode
, op0
);
14788 op1
= copy_to_mode_reg (SImode
, op1
);
14789 emit_insn (gen_sse3_mwait (op0
, op1
));
14792 case IX86_BUILTIN_LDDQU
:
14793 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
14796 case IX86_BUILTIN_VEC_INIT_V2SI
:
14797 case IX86_BUILTIN_VEC_INIT_V4HI
:
14798 case IX86_BUILTIN_VEC_INIT_V8QI
:
14799 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
14801 case IX86_BUILTIN_VEC_EXT_V2DF
:
14802 case IX86_BUILTIN_VEC_EXT_V2DI
:
14803 case IX86_BUILTIN_VEC_EXT_V4SF
:
14804 case IX86_BUILTIN_VEC_EXT_V4SI
:
14805 case IX86_BUILTIN_VEC_EXT_V8HI
:
14806 case IX86_BUILTIN_VEC_EXT_V2SI
:
14807 case IX86_BUILTIN_VEC_EXT_V4HI
:
14808 return ix86_expand_vec_ext_builtin (arglist
, target
);
14810 case IX86_BUILTIN_VEC_SET_V8HI
:
14811 case IX86_BUILTIN_VEC_SET_V4HI
:
14812 return ix86_expand_vec_set_builtin (arglist
);
14818 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14819 if (d
->code
== fcode
)
14821 /* Compares are treated specially. */
14822 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
14823 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
14824 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
14825 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
14826 return ix86_expand_sse_compare (d
, arglist
, target
);
14828 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14831 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14832 if (d
->code
== fcode
)
14833 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14835 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14836 if (d
->code
== fcode
)
14837 return ix86_expand_sse_comi (d
, arglist
, target
);
14839 gcc_unreachable ();
14842 /* Store OPERAND to the memory after reload is completed. This means
14843 that we can't easily use assign_stack_local. */
14845 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14848 if (!reload_completed
)
14850 if (TARGET_RED_ZONE
)
14852 result
= gen_rtx_MEM (mode
,
14853 gen_rtx_PLUS (Pmode
,
14855 GEN_INT (-RED_ZONE_SIZE
)));
14856 emit_move_insn (result
, operand
);
14858 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14864 operand
= gen_lowpart (DImode
, operand
);
14868 gen_rtx_SET (VOIDmode
,
14869 gen_rtx_MEM (DImode
,
14870 gen_rtx_PRE_DEC (DImode
,
14871 stack_pointer_rtx
)),
14877 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14886 split_di (&operand
, 1, operands
, operands
+ 1);
14888 gen_rtx_SET (VOIDmode
,
14889 gen_rtx_MEM (SImode
,
14890 gen_rtx_PRE_DEC (Pmode
,
14891 stack_pointer_rtx
)),
14894 gen_rtx_SET (VOIDmode
,
14895 gen_rtx_MEM (SImode
,
14896 gen_rtx_PRE_DEC (Pmode
,
14897 stack_pointer_rtx
)),
14902 /* It is better to store HImodes as SImodes. */
14903 if (!TARGET_PARTIAL_REG_STALL
)
14904 operand
= gen_lowpart (SImode
, operand
);
14908 gen_rtx_SET (VOIDmode
,
14909 gen_rtx_MEM (GET_MODE (operand
),
14910 gen_rtx_PRE_DEC (SImode
,
14911 stack_pointer_rtx
)),
14917 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14922 /* Free operand from the memory. */
14924 ix86_free_from_memory (enum machine_mode mode
)
14926 if (!TARGET_RED_ZONE
)
14930 if (mode
== DImode
|| TARGET_64BIT
)
14932 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14936 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14937 to pop or add instruction if registers are available. */
14938 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14939 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14944 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14945 QImode must go into class Q_REGS.
14946 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14947 movdf to do mem-to-mem moves through integer regs. */
14949 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14951 /* We're only allowed to return a subclass of CLASS. Many of the
14952 following checks fail for NO_REGS, so eliminate that early. */
14953 if (class == NO_REGS
)
14956 /* All classes can load zeros. */
14957 if (x
== CONST0_RTX (GET_MODE (x
)))
14960 /* Floating-point constants need more complex checks. */
14961 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14963 /* General regs can load everything. */
14964 if (reg_class_subset_p (class, GENERAL_REGS
))
14967 /* Floats can load 0 and 1 plus some others. Note that we eliminated
14968 zero above. We only want to wind up preferring 80387 registers if
14969 we plan on doing computation with them. */
14971 && (TARGET_MIX_SSE_I387
14972 || !(TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (GET_MODE (x
))))
14973 && standard_80387_constant_p (x
))
14975 /* Limit class to non-sse. */
14976 if (class == FLOAT_SSE_REGS
)
14978 if (class == FP_TOP_SSE_REGS
)
14980 if (class == FP_SECOND_SSE_REGS
)
14981 return FP_SECOND_REG
;
14982 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
14988 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14990 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x
))
14993 /* Generally when we see PLUS here, it's the function invariant
14994 (plus soft-fp const_int). Which can only be computed into general
14996 if (GET_CODE (x
) == PLUS
)
14997 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
14999 /* QImode constants are easy to load, but non-constant QImode data
15000 must go into Q_REGS. */
15001 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
15003 if (reg_class_subset_p (class, Q_REGS
))
15005 if (reg_class_subset_p (Q_REGS
, class))
15013 /* If we are copying between general and FP registers, we need a memory
15014 location. The same is true for SSE and MMX registers.
15016 The macro can't work reliably when one of the CLASSES is class containing
15017 registers from multiple units (SSE, MMX, integer). We avoid this by never
15018 combining those units in single alternative in the machine description.
15019 Ensure that this constraint holds to avoid unexpected surprises.
15021 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15022 enforce these sanity checks. */
15025 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
15026 enum machine_mode mode
, int strict
)
15028 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
15029 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
15030 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
15031 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
15032 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
15033 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
15040 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
15043 /* ??? This is a lie. We do have moves between mmx/general, and for
15044 mmx/sse2. But by saying we need secondary memory we discourage the
15045 register allocator from using the mmx registers unless needed. */
15046 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
15049 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
15051 /* SSE1 doesn't have any direct moves from other classes. */
15055 /* If the target says that inter-unit moves are more expensive
15056 than moving through memory, then don't generate them. */
15057 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
15060 /* Between SSE and general, we have moves no larger than word size. */
15061 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
15064 /* ??? For the cost of one register reformat penalty, we could use
15065 the same instructions to move SFmode and DFmode data, but the
15066 relevant move patterns don't support those alternatives. */
15067 if (mode
== SFmode
|| mode
== DFmode
)
15074 /* Return the cost of moving data from a register in class CLASS1 to
15075 one in class CLASS2.
15077 It is not required that the cost always equal 2 when FROM is the same as TO;
15078 on some machines it is expensive to move between registers if they are not
15079 general registers. */
15082 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
15083 enum reg_class class2
)
15085 /* In case we require secondary memory, compute cost of the store followed
15086 by load. In order to avoid bad register allocation choices, we need
15087 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15089 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
15093 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
15094 MEMORY_MOVE_COST (mode
, class1
, 1));
15095 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
15096 MEMORY_MOVE_COST (mode
, class2
, 1));
15098 /* In case of copying from general_purpose_register we may emit multiple
15099 stores followed by single load causing memory size mismatch stall.
15100 Count this as arbitrarily high cost of 20. */
15101 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
15104 /* In the case of FP/MMX moves, the registers actually overlap, and we
15105 have to switch modes in order to treat them differently. */
15106 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
15107 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
15113 /* Moves between SSE/MMX and integer unit are expensive. */
15114 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
15115 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
15116 return ix86_cost
->mmxsse_to_integer
;
15117 if (MAYBE_FLOAT_CLASS_P (class1
))
15118 return ix86_cost
->fp_move
;
15119 if (MAYBE_SSE_CLASS_P (class1
))
15120 return ix86_cost
->sse_move
;
15121 if (MAYBE_MMX_CLASS_P (class1
))
15122 return ix86_cost
->mmx_move
;
15126 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15129 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
15131 /* Flags and only flags can only hold CCmode values. */
15132 if (CC_REGNO_P (regno
))
15133 return GET_MODE_CLASS (mode
) == MODE_CC
;
15134 if (GET_MODE_CLASS (mode
) == MODE_CC
15135 || GET_MODE_CLASS (mode
) == MODE_RANDOM
15136 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
15138 if (FP_REGNO_P (regno
))
15139 return VALID_FP_MODE_P (mode
);
15140 if (SSE_REGNO_P (regno
))
15142 /* We implement the move patterns for all vector modes into and
15143 out of SSE registers, even when no operation instructions
15145 return (VALID_SSE_REG_MODE (mode
)
15146 || VALID_SSE2_REG_MODE (mode
)
15147 || VALID_MMX_REG_MODE (mode
)
15148 || VALID_MMX_REG_MODE_3DNOW (mode
));
15150 if (MMX_REGNO_P (regno
))
15152 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15153 so if the register is available at all, then we can move data of
15154 the given mode into or out of it. */
15155 return (VALID_MMX_REG_MODE (mode
)
15156 || VALID_MMX_REG_MODE_3DNOW (mode
));
15159 if (mode
== QImode
)
15161 /* Take care for QImode values - they can be in non-QI regs,
15162 but then they do cause partial register stalls. */
15163 if (regno
< 4 || TARGET_64BIT
)
15165 if (!TARGET_PARTIAL_REG_STALL
)
15167 return reload_in_progress
|| reload_completed
;
15169 /* We handle both integer and floats in the general purpose registers. */
15170 else if (VALID_INT_MODE_P (mode
))
15172 else if (VALID_FP_MODE_P (mode
))
15174 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15175 on to use that value in smaller contexts, this can easily force a
15176 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15177 supporting DImode, allow it. */
15178 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
15184 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15185 tieable integer mode. */
15188 ix86_tieable_integer_mode_p (enum machine_mode mode
)
15197 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
15200 return TARGET_64BIT
;
15207 /* Return true if MODE1 is accessible in a register that can hold MODE2
15208 without copying. That is, all register classes that can hold MODE2
15209 can also hold MODE1. */
15212 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
15214 if (mode1
== mode2
)
15217 if (ix86_tieable_integer_mode_p (mode1
)
15218 && ix86_tieable_integer_mode_p (mode2
))
15221 /* MODE2 being XFmode implies fp stack or general regs, which means we
15222 can tie any smaller floating point modes to it. Note that we do not
15223 tie this with TFmode. */
15224 if (mode2
== XFmode
)
15225 return mode1
== SFmode
|| mode1
== DFmode
;
15227 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15228 that we can tie it with SFmode. */
15229 if (mode2
== DFmode
)
15230 return mode1
== SFmode
;
15232 /* If MODE2 is only appropriate for an SSE register, then tie with
15233 any other mode acceptable to SSE registers. */
15234 if (GET_MODE_SIZE (mode2
) >= 8
15235 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
15236 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
15238 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15239 with any other mode acceptable to MMX registers. */
15240 if (GET_MODE_SIZE (mode2
) == 8
15241 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
15242 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
15247 /* Return the cost of moving data of mode M between a
15248 register and memory. A value of 2 is the default; this cost is
15249 relative to those in `REGISTER_MOVE_COST'.
15251 If moving between registers and memory is more expensive than
15252 between two registers, you should define this macro to express the
15255 Model also increased moving costs of QImode registers in non
15259 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
15261 if (FLOAT_CLASS_P (class))
15278 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
15280 if (SSE_CLASS_P (class))
15283 switch (GET_MODE_SIZE (mode
))
15297 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
15299 if (MMX_CLASS_P (class))
15302 switch (GET_MODE_SIZE (mode
))
15313 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
15315 switch (GET_MODE_SIZE (mode
))
15319 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
15320 : ix86_cost
->movzbl_load
);
15322 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
15323 : ix86_cost
->int_store
[0] + 4);
15326 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
15328 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15329 if (mode
== TFmode
)
15331 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
15332 * (((int) GET_MODE_SIZE (mode
)
15333 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
15337 /* Compute a (partial) cost for rtx X. Return true if the complete
15338 cost has been computed, and false if subexpressions should be
15339 scanned. In either case, *TOTAL contains the cost result. */
15342 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
15344 enum machine_mode mode
= GET_MODE (x
);
15352 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
15354 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
15356 else if (flag_pic
&& SYMBOLIC_CONST (x
)
15358 || (!GET_CODE (x
) != LABEL_REF
15359 && (GET_CODE (x
) != SYMBOL_REF
15360 || !SYMBOL_REF_LOCAL_P (x
)))))
15367 if (mode
== VOIDmode
)
15370 switch (standard_80387_constant_p (x
))
15375 default: /* Other constants */
15380 /* Start with (MEM (SYMBOL_REF)), since that's where
15381 it'll probably end up. Add a penalty for size. */
15382 *total
= (COSTS_N_INSNS (1)
15383 + (flag_pic
!= 0 && !TARGET_64BIT
)
15384 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
15390 /* The zero extensions is often completely free on x86_64, so make
15391 it as cheap as possible. */
15392 if (TARGET_64BIT
&& mode
== DImode
15393 && GET_MODE (XEXP (x
, 0)) == SImode
)
15395 else if (TARGET_ZERO_EXTEND_WITH_AND
)
15396 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15398 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
15402 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
15406 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
15407 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
15409 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15412 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15415 if ((value
== 2 || value
== 3)
15416 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
15418 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15428 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
15430 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15432 if (INTVAL (XEXP (x
, 1)) > 32)
15433 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
15435 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
15439 if (GET_CODE (XEXP (x
, 1)) == AND
)
15440 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
15442 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
15447 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15448 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
15450 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
15455 if (FLOAT_MODE_P (mode
))
15457 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
15462 rtx op0
= XEXP (x
, 0);
15463 rtx op1
= XEXP (x
, 1);
15465 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15467 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15468 for (nbits
= 0; value
!= 0; value
&= value
- 1)
15472 /* This is arbitrary. */
15475 /* Compute costs correctly for widening multiplication. */
15476 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
15477 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
15478 == GET_MODE_SIZE (mode
))
15480 int is_mulwiden
= 0;
15481 enum machine_mode inner_mode
= GET_MODE (op0
);
15483 if (GET_CODE (op0
) == GET_CODE (op1
))
15484 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
15485 else if (GET_CODE (op1
) == CONST_INT
)
15487 if (GET_CODE (op0
) == SIGN_EXTEND
)
15488 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
15491 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
15495 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
15498 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
15499 + nbits
* ix86_cost
->mult_bit
)
15500 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
15509 if (FLOAT_MODE_P (mode
))
15510 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
15512 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
15516 if (FLOAT_MODE_P (mode
))
15517 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15518 else if (GET_MODE_CLASS (mode
) == MODE_INT
15519 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
15521 if (GET_CODE (XEXP (x
, 0)) == PLUS
15522 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
15523 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
15524 && CONSTANT_P (XEXP (x
, 1)))
15526 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
15527 if (val
== 2 || val
== 4 || val
== 8)
15529 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15530 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15531 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
15533 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15537 else if (GET_CODE (XEXP (x
, 0)) == MULT
15538 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
15540 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
15541 if (val
== 2 || val
== 4 || val
== 8)
15543 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15544 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15545 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15549 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
15551 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15552 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15553 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15554 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15561 if (FLOAT_MODE_P (mode
))
15563 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15571 if (!TARGET_64BIT
&& mode
== DImode
)
15573 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15574 + (rtx_cost (XEXP (x
, 0), outer_code
)
15575 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15576 + (rtx_cost (XEXP (x
, 1), outer_code
)
15577 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15583 if (FLOAT_MODE_P (mode
))
15585 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15591 if (!TARGET_64BIT
&& mode
== DImode
)
15592 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15594 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15598 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
15599 && XEXP (XEXP (x
, 0), 1) == const1_rtx
15600 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
15601 && XEXP (x
, 1) == const0_rtx
)
15603 /* This kind of construct is implemented using test[bwl].
15604 Treat it as if we had an AND. */
15605 *total
= (COSTS_N_INSNS (ix86_cost
->add
)
15606 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
15607 + rtx_cost (const1_rtx
, outer_code
));
15613 if (!TARGET_SSE_MATH
15615 || (mode
== DFmode
&& !TARGET_SSE2
))
15620 if (FLOAT_MODE_P (mode
))
15621 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15625 if (FLOAT_MODE_P (mode
))
15626 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15630 if (XINT (x
, 1) == UNSPEC_TP
)
15641 static int current_machopic_label_num
;
15643 /* Given a symbol name and its associated stub, write out the
15644 definition of the stub. */
15647 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15649 unsigned int length
;
15650 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15651 int label
= ++current_machopic_label_num
;
15653 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15654 symb
= (*targetm
.strip_name_encoding
) (symb
);
15656 length
= strlen (stub
);
15657 binder_name
= alloca (length
+ 32);
15658 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15660 length
= strlen (symb
);
15661 symbol_name
= alloca (length
+ 32);
15662 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15664 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15667 machopic_picsymbol_stub_section ();
15669 machopic_symbol_stub_section ();
15671 fprintf (file
, "%s:\n", stub
);
15672 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15676 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15677 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15678 fprintf (file
, "\tjmp %%edx\n");
15681 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15683 fprintf (file
, "%s:\n", binder_name
);
15687 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15688 fprintf (file
, "\tpushl %%eax\n");
15691 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15693 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15695 machopic_lazy_symbol_ptr_section ();
15696 fprintf (file
, "%s:\n", lazy_ptr_name
);
15697 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15698 fprintf (file
, "\t.long %s\n", binder_name
);
15700 #endif /* TARGET_MACHO */
15702 /* Order the registers for register allocator. */
15705 x86_order_regs_for_local_alloc (void)
15710 /* First allocate the local general purpose registers. */
15711 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15712 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
15713 reg_alloc_order
[pos
++] = i
;
15715 /* Global general purpose registers. */
15716 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15717 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
15718 reg_alloc_order
[pos
++] = i
;
15720 /* x87 registers come first in case we are doing FP math
15722 if (!TARGET_SSE_MATH
)
15723 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15724 reg_alloc_order
[pos
++] = i
;
15726 /* SSE registers. */
15727 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15728 reg_alloc_order
[pos
++] = i
;
15729 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15730 reg_alloc_order
[pos
++] = i
;
15732 /* x87 registers. */
15733 if (TARGET_SSE_MATH
)
15734 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15735 reg_alloc_order
[pos
++] = i
;
15737 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
15738 reg_alloc_order
[pos
++] = i
;
15740 /* Initialize the rest of array as we do not allocate some registers
15742 while (pos
< FIRST_PSEUDO_REGISTER
)
15743 reg_alloc_order
[pos
++] = 0;
15746 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15747 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15750 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15751 struct attribute_spec.handler. */
15753 ix86_handle_struct_attribute (tree
*node
, tree name
,
15754 tree args ATTRIBUTE_UNUSED
,
15755 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
15758 if (DECL_P (*node
))
15760 if (TREE_CODE (*node
) == TYPE_DECL
)
15761 type
= &TREE_TYPE (*node
);
15766 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
15767 || TREE_CODE (*type
) == UNION_TYPE
)))
15769 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name
));
15770 *no_add_attrs
= true;
15773 else if ((is_attribute_p ("ms_struct", name
)
15774 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15775 || ((is_attribute_p ("gcc_struct", name
)
15776 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15778 warning ("%qs incompatible attribute ignored",
15779 IDENTIFIER_POINTER (name
));
15780 *no_add_attrs
= true;
15787 ix86_ms_bitfield_layout_p (tree record_type
)
15789 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15790 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15791 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15794 /* Returns an expression indicating where the this parameter is
15795 located on entry to the FUNCTION. */
15798 x86_this_parameter (tree function
)
15800 tree type
= TREE_TYPE (function
);
15804 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
15805 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15808 if (ix86_function_regparm (type
, function
) > 0)
15812 parm
= TYPE_ARG_TYPES (type
);
15813 /* Figure out whether or not the function has a variable number of
15815 for (; parm
; parm
= TREE_CHAIN (parm
))
15816 if (TREE_VALUE (parm
) == void_type_node
)
15818 /* If not, the this parameter is in the first argument. */
15822 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
15824 return gen_rtx_REG (SImode
, regno
);
15828 if (aggregate_value_p (TREE_TYPE (type
), type
))
15829 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15831 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15834 /* Determine whether x86_output_mi_thunk can succeed. */
15837 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
15838 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
15839 HOST_WIDE_INT vcall_offset
, tree function
)
15841 /* 64-bit can handle anything. */
15845 /* For 32-bit, everything's fine if we have one free register. */
15846 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
15849 /* Need a free register for vcall_offset. */
15853 /* Need a free register for GOT references. */
15854 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15857 /* Otherwise ok. */
15861 /* Output the assembler code for a thunk function. THUNK_DECL is the
15862 declaration for the thunk function itself, FUNCTION is the decl for
15863 the target function. DELTA is an immediate constant offset to be
15864 added to THIS. If VCALL_OFFSET is nonzero, the word at
15865 *(*this + vcall_offset) should be added to THIS. */
15868 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
15869 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
15870 HOST_WIDE_INT vcall_offset
, tree function
)
15873 rtx
this = x86_this_parameter (function
);
15876 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15877 pull it in now and let DELTA benefit. */
15880 else if (vcall_offset
)
15882 /* Put the this parameter into %eax. */
15884 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15885 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15888 this_reg
= NULL_RTX
;
15890 /* Adjust the this parameter by a fixed constant. */
15893 xops
[0] = GEN_INT (delta
);
15894 xops
[1] = this_reg
? this_reg
: this;
15897 if (!x86_64_general_operand (xops
[0], DImode
))
15899 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15901 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15905 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15908 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15911 /* Adjust the this parameter by a value stored in the vtable. */
15915 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15918 int tmp_regno
= 2 /* ECX */;
15919 if (lookup_attribute ("fastcall",
15920 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
15921 tmp_regno
= 0 /* EAX */;
15922 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
15925 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15928 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15930 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15932 /* Adjust the this parameter. */
15933 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15934 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15936 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15937 xops
[0] = GEN_INT (vcall_offset
);
15939 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15940 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15942 xops
[1] = this_reg
;
15944 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15946 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15949 /* If necessary, drop THIS back to its stack slot. */
15950 if (this_reg
&& this_reg
!= this)
15952 xops
[0] = this_reg
;
15954 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15957 xops
[0] = XEXP (DECL_RTL (function
), 0);
15960 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15961 output_asm_insn ("jmp\t%P0", xops
);
15964 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15965 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15966 tmp
= gen_rtx_MEM (QImode
, tmp
);
15968 output_asm_insn ("jmp\t%A0", xops
);
15973 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15974 output_asm_insn ("jmp\t%P0", xops
);
15979 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
15980 tmp
= (gen_rtx_SYMBOL_REF
15982 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
15983 tmp
= gen_rtx_MEM (QImode
, tmp
);
15985 output_asm_insn ("jmp\t%0", xops
);
15988 #endif /* TARGET_MACHO */
15990 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15991 output_set_got (tmp
);
15994 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15995 output_asm_insn ("jmp\t{*}%1", xops
);
16001 x86_file_start (void)
16003 default_file_start ();
16004 if (X86_FILE_START_VERSION_DIRECTIVE
)
16005 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
16006 if (X86_FILE_START_FLTUSED
)
16007 fputs ("\t.global\t__fltused\n", asm_out_file
);
16008 if (ix86_asm_dialect
== ASM_INTEL
)
16009 fputs ("\t.intel_syntax\n", asm_out_file
);
16013 x86_field_alignment (tree field
, int computed
)
16015 enum machine_mode mode
;
16016 tree type
= TREE_TYPE (field
);
16018 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
16020 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
16021 ? get_inner_array_type (type
) : type
);
16022 if (mode
== DFmode
|| mode
== DCmode
16023 || GET_MODE_CLASS (mode
) == MODE_INT
16024 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
16025 return MIN (32, computed
);
16029 /* Output assembler code to FILE to increment profiler label # LABELNO
16030 for profiling a function entry. */
16032 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
16037 #ifndef NO_PROFILE_COUNTERS
16038 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
16040 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
16044 #ifndef NO_PROFILE_COUNTERS
16045 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
16047 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
16051 #ifndef NO_PROFILE_COUNTERS
16052 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16053 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
16055 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
16059 #ifndef NO_PROFILE_COUNTERS
16060 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
16061 PROFILE_COUNT_REGISTER
);
16063 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
16067 /* We don't have exact information about the insn sizes, but we may assume
16068 quite safely that we are informed about all 1 byte insns and memory
16069 address sizes. This is enough to eliminate unnecessary padding in
16073 min_insn_size (rtx insn
)
16077 if (!INSN_P (insn
) || !active_insn_p (insn
))
16080 /* Discard alignments we've emit and jump instructions. */
16081 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
16082 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
16084 if (GET_CODE (insn
) == JUMP_INSN
16085 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
16086 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
16089 /* Important case - calls are always 5 bytes.
16090 It is common to have many calls in the row. */
16091 if (GET_CODE (insn
) == CALL_INSN
16092 && symbolic_reference_mentioned_p (PATTERN (insn
))
16093 && !SIBLING_CALL_P (insn
))
16095 if (get_attr_length (insn
) <= 1)
16098 /* For normal instructions we may rely on the sizes of addresses
16099 and the presence of symbol to require 4 bytes of encoding.
16100 This is not the case for jumps where references are PC relative. */
16101 if (GET_CODE (insn
) != JUMP_INSN
)
16103 l
= get_attr_length_address (insn
);
16104 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
16113 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16117 ix86_avoid_jump_misspredicts (void)
16119 rtx insn
, start
= get_insns ();
16120 int nbytes
= 0, njumps
= 0;
16123 /* Look for all minimal intervals of instructions containing 4 jumps.
16124 The intervals are bounded by START and INSN. NBYTES is the total
16125 size of instructions in the interval including INSN and not including
16126 START. When the NBYTES is smaller than 16 bytes, it is possible
16127 that the end of START and INSN ends up in the same 16byte page.
16129 The smallest offset in the page INSN can start is the case where START
16130 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16131 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16133 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
16136 nbytes
+= min_insn_size (insn
);
16138 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
16139 INSN_UID (insn
), min_insn_size (insn
));
16140 if ((GET_CODE (insn
) == JUMP_INSN
16141 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
16142 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
16143 || GET_CODE (insn
) == CALL_INSN
)
16150 start
= NEXT_INSN (start
);
16151 if ((GET_CODE (start
) == JUMP_INSN
16152 && GET_CODE (PATTERN (start
)) != ADDR_VEC
16153 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
16154 || GET_CODE (start
) == CALL_INSN
)
16155 njumps
--, isjump
= 1;
16158 nbytes
-= min_insn_size (start
);
16163 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
16164 INSN_UID (start
), INSN_UID (insn
), nbytes
);
16166 if (njumps
== 3 && isjump
&& nbytes
< 16)
16168 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
16171 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
16172 INSN_UID (insn
), padsize
);
16173 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
16178 /* AMD Athlon works faster
16179 when RET is not destination of conditional jump or directly preceded
16180 by other jump instruction. We avoid the penalty by inserting NOP just
16181 before the RET instructions in such cases. */
16183 ix86_pad_returns (void)
16188 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
16190 basic_block bb
= e
->src
;
16191 rtx ret
= BB_END (bb
);
16193 bool replace
= false;
16195 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
16196 || !maybe_hot_bb_p (bb
))
16198 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
16199 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
16201 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
16206 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16207 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
16208 && !(e
->flags
& EDGE_FALLTHRU
))
16213 prev
= prev_active_insn (ret
);
16215 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
16216 || GET_CODE (prev
) == CALL_INSN
))
16218 /* Empty functions get branch mispredict even when the jump destination
16219 is not visible to us. */
16220 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
16225 emit_insn_before (gen_return_internal_long (), ret
);
16231 /* Implement machine specific optimizations. We implement padding of returns
16232 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16236 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
16237 ix86_pad_returns ();
16238 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
16239 ix86_avoid_jump_misspredicts ();
16242 /* Return nonzero when QImode register that must be represented via REX prefix
16245 x86_extended_QIreg_mentioned_p (rtx insn
)
16248 extract_insn_cached (insn
);
16249 for (i
= 0; i
< recog_data
.n_operands
; i
++)
16250 if (REG_P (recog_data
.operand
[i
])
16251 && REGNO (recog_data
.operand
[i
]) >= 4)
16256 /* Return nonzero when P points to register encoded via REX prefix.
16257 Called via for_each_rtx. */
16259 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
16261 unsigned int regno
;
16264 regno
= REGNO (*p
);
16265 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
16268 /* Return true when INSN mentions register that must be encoded using REX
16271 x86_extended_reg_mentioned_p (rtx insn
)
16273 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
16276 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16277 optabs would emit if we didn't have TFmode patterns. */
16280 x86_emit_floatuns (rtx operands
[2])
16282 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
16283 enum machine_mode mode
, inmode
;
16285 inmode
= GET_MODE (operands
[1]);
16286 if (inmode
!= SImode
16287 && inmode
!= DImode
)
16291 in
= force_reg (inmode
, operands
[1]);
16292 mode
= GET_MODE (out
);
16293 neglab
= gen_label_rtx ();
16294 donelab
= gen_label_rtx ();
16295 i1
= gen_reg_rtx (Pmode
);
16296 f0
= gen_reg_rtx (mode
);
16298 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
16300 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
16301 emit_jump_insn (gen_jump (donelab
));
16304 emit_label (neglab
);
16306 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
16307 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
16308 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
16309 expand_float (f0
, i0
, 0);
16310 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
16312 emit_label (donelab
);
16315 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16316 with all elements equal to VAR. Return true if successful. */
16319 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
16320 rtx target
, rtx val
)
16322 enum machine_mode smode
, wsmode
, wvmode
;
16329 if (!mmx_ok
&& !TARGET_SSE
)
16337 val
= force_reg (GET_MODE_INNER (mode
), val
);
16338 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
16339 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16345 if (TARGET_SSE
|| TARGET_3DNOW_A
)
16347 val
= gen_lowpart (SImode
, val
);
16348 x
= gen_rtx_TRUNCATE (HImode
, val
);
16349 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
16350 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16379 /* Replicate the value once into the next wider mode and recurse. */
16380 val
= convert_modes (wsmode
, smode
, val
, true);
16381 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
16382 GEN_INT (GET_MODE_BITSIZE (smode
)),
16383 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16384 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16386 x
= gen_reg_rtx (wvmode
);
16387 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
16388 gcc_unreachable ();
16389 emit_move_insn (target
, gen_lowpart (mode
, x
));
16397 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16398 whose low element is VAR, and other elements are zero. Return true
16402 ix86_expand_vector_init_low_nonzero (bool mmx_ok
, enum machine_mode mode
,
16403 rtx target
, rtx var
)
16405 enum machine_mode vsimode
;
16412 if (!mmx_ok
&& !TARGET_SSE
)
16418 var
= force_reg (GET_MODE_INNER (mode
), var
);
16419 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
16420 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16425 var
= force_reg (GET_MODE_INNER (mode
), var
);
16426 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
16427 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
16428 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16433 vsimode
= V4SImode
;
16439 vsimode
= V2SImode
;
16442 /* Zero extend the variable element to SImode and recurse. */
16443 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
16445 x
= gen_reg_rtx (vsimode
);
16446 if (!ix86_expand_vector_init_low_nonzero (mmx_ok
, vsimode
, x
, var
))
16447 gcc_unreachable ();
16449 emit_move_insn (target
, gen_lowpart (mode
, x
));
16457 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16458 consisting of the values in VALS. It is known that all elements
16459 except ONE_VAR are constants. Return true if successful. */
16462 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
16463 rtx target
, rtx vals
, int one_var
)
16465 rtx var
= XVECEXP (vals
, 0, one_var
);
16466 enum machine_mode wmode
;
16469 XVECEXP (vals
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
16470 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
16478 /* For the two element vectors, it's just as easy to use
16479 the general case. */
16495 /* There's no way to set one QImode entry easily. Combine
16496 the variable value with its adjacent constant value, and
16497 promote to an HImode set. */
16498 x
= XVECEXP (vals
, 0, one_var
^ 1);
16501 var
= convert_modes (HImode
, QImode
, var
, true);
16502 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
16503 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16504 x
= GEN_INT (INTVAL (x
) & 0xff);
16508 var
= convert_modes (HImode
, QImode
, var
, true);
16509 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
16511 if (x
!= const0_rtx
)
16512 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
16513 1, OPTAB_LIB_WIDEN
);
16515 x
= gen_reg_rtx (wmode
);
16516 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
16517 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
16519 emit_move_insn (target
, gen_lowpart (mode
, x
));
16526 emit_move_insn (target
, const_vec
);
16527 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
16531 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16532 all values variable, and none identical. */
16535 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
16536 rtx target
, rtx vals
)
16538 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
16539 rtx op0
= NULL
, op1
= NULL
;
16540 bool use_vec_concat
= false;
16546 if (!mmx_ok
&& !TARGET_SSE
)
16552 /* For the two element vectors, we always implement VEC_CONCAT. */
16553 op0
= XVECEXP (vals
, 0, 0);
16554 op1
= XVECEXP (vals
, 0, 1);
16555 use_vec_concat
= true;
16559 half_mode
= V2SFmode
;
16562 half_mode
= V2SImode
;
16568 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16569 Recurse to load the two halves. */
16571 op0
= gen_reg_rtx (half_mode
);
16572 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
16573 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
16575 op1
= gen_reg_rtx (half_mode
);
16576 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
16577 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
16579 use_vec_concat
= true;
16590 gcc_unreachable ();
16593 if (use_vec_concat
)
16595 if (!register_operand (op0
, half_mode
))
16596 op0
= force_reg (half_mode
, op0
);
16597 if (!register_operand (op1
, half_mode
))
16598 op1
= force_reg (half_mode
, op1
);
16600 emit_insn (gen_rtx_SET (VOIDmode
, target
,
16601 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
16605 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
16606 enum machine_mode inner_mode
;
16607 rtx words
[4], shift
;
16609 inner_mode
= GET_MODE_INNER (mode
);
16610 n_elts
= GET_MODE_NUNITS (mode
);
16611 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
16612 n_elt_per_word
= n_elts
/ n_words
;
16613 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
16615 for (i
= 0; i
< n_words
; ++i
)
16617 rtx word
= NULL_RTX
;
16619 for (j
= 0; j
< n_elt_per_word
; ++j
)
16621 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
16622 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
16628 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
16629 word
, 1, OPTAB_LIB_WIDEN
);
16630 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
16631 word
, 1, OPTAB_LIB_WIDEN
);
16639 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
16640 else if (n_words
== 2)
16642 rtx tmp
= gen_reg_rtx (mode
);
16643 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
16644 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
16645 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
16646 emit_move_insn (target
, tmp
);
16648 else if (n_words
== 4)
16650 rtx tmp
= gen_reg_rtx (V4SImode
);
16651 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
16652 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
16653 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
16656 gcc_unreachable ();
16660 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16661 instructions unless MMX_OK is true. */
16664 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
16666 enum machine_mode mode
= GET_MODE (target
);
16667 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
16668 int n_elts
= GET_MODE_NUNITS (mode
);
16669 int n_var
= 0, one_var
= -1;
16670 bool all_same
= true, all_const_zero
= true;
16674 for (i
= 0; i
< n_elts
; ++i
)
16676 x
= XVECEXP (vals
, 0, i
);
16677 if (!CONSTANT_P (x
))
16678 n_var
++, one_var
= i
;
16679 else if (x
!= CONST0_RTX (inner_mode
))
16680 all_const_zero
= false;
16681 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
16685 /* Constants are best loaded from the constant pool. */
16688 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
16692 /* If all values are identical, broadcast the value. */
16694 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
16695 XVECEXP (vals
, 0, 0)))
16698 /* Values where only one field is non-constant are best loaded from
16699 the pool and overwritten via move later. */
16702 if (all_const_zero
&& one_var
== 0
16703 && ix86_expand_vector_init_low_nonzero (mmx_ok
, mode
, target
,
16704 XVECEXP (vals
, 0, 0)))
16707 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
16711 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
16715 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
16717 enum machine_mode mode
= GET_MODE (target
);
16718 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
16719 bool use_vec_merge
= false;
16728 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
16729 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
16731 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
16733 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
16734 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
16744 /* For the two element vectors, we implement a VEC_CONCAT with
16745 the extraction of the other element. */
16747 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
16748 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
16751 op0
= val
, op1
= tmp
;
16753 op0
= tmp
, op1
= val
;
16755 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
16756 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
16764 use_vec_merge
= true;
16768 /* tmp = op0 = A B C D */
16769 tmp
= copy_to_reg (target
);
16771 /* op0 = C C D D */
16772 emit_insn (gen_sse_unpcklps (target
, target
, target
));
16774 /* op0 = C C D X */
16775 ix86_expand_vector_set (false, target
, val
, 0);
16777 /* op0 = A B X D */
16778 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
16779 GEN_INT (1), GEN_INT (0),
16780 GEN_INT (2+4), GEN_INT (3+4)));
16784 tmp
= copy_to_reg (target
);
16785 ix86_expand_vector_set (false, target
, val
, 0);
16786 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
16787 GEN_INT (0), GEN_INT (1),
16788 GEN_INT (0+4), GEN_INT (3+4)));
16792 tmp
= copy_to_reg (target
);
16793 ix86_expand_vector_set (false, target
, val
, 0);
16794 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
16795 GEN_INT (0), GEN_INT (1),
16796 GEN_INT (2+4), GEN_INT (0+4)));
16800 gcc_unreachable ();
16805 /* Element 0 handled by vec_merge below. */
16808 use_vec_merge
= true;
16814 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16815 store into element 0, then shuffle them back. */
16819 order
[0] = GEN_INT (elt
);
16820 order
[1] = const1_rtx
;
16821 order
[2] = const2_rtx
;
16822 order
[3] = GEN_INT (3);
16823 order
[elt
] = const0_rtx
;
16825 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
16826 order
[1], order
[2], order
[3]));
16828 ix86_expand_vector_set (false, target
, val
, 0);
16830 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
16831 order
[1], order
[2], order
[3]));
16835 /* For SSE1, we have to reuse the V4SF code. */
16836 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
16837 gen_lowpart (SFmode
, val
), elt
);
16842 use_vec_merge
= TARGET_SSE2
;
16845 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
16856 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
16857 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
16858 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
16862 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
16864 emit_move_insn (mem
, target
);
16866 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
16867 emit_move_insn (tmp
, val
);
16869 emit_move_insn (target
, mem
);
16874 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
16876 enum machine_mode mode
= GET_MODE (vec
);
16877 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
16878 bool use_vec_extr
= false;
16891 use_vec_extr
= true;
16903 tmp
= gen_reg_rtx (mode
);
16904 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
16905 GEN_INT (elt
), GEN_INT (elt
),
16906 GEN_INT (elt
+4), GEN_INT (elt
+4)));
16910 tmp
= gen_reg_rtx (mode
);
16911 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
16915 gcc_unreachable ();
16918 use_vec_extr
= true;
16933 tmp
= gen_reg_rtx (mode
);
16934 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
16935 GEN_INT (elt
), GEN_INT (elt
),
16936 GEN_INT (elt
), GEN_INT (elt
)));
16940 tmp
= gen_reg_rtx (mode
);
16941 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
16945 gcc_unreachable ();
16948 use_vec_extr
= true;
16953 /* For SSE1, we have to reuse the V4SF code. */
16954 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
16955 gen_lowpart (V4SFmode
, vec
), elt
);
16961 use_vec_extr
= TARGET_SSE2
;
16964 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
16969 /* ??? Could extract the appropriate HImode element and shift. */
16976 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
16977 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
16979 /* Let the rtl optimizers know about the zero extension performed. */
16980 if (inner_mode
== HImode
)
16982 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
16983 target
= gen_lowpart (SImode
, target
);
16986 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
16990 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
16992 emit_move_insn (mem
, vec
);
16994 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
16995 emit_move_insn (target
, tmp
);
16999 /* Implements target hook vector_mode_supported_p. */
17001 ix86_vector_mode_supported_p (enum machine_mode mode
)
17003 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
17005 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
17007 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
17009 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
17014 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17016 We do this in the new i386 backend to maintain source compatibility
17017 with the old cc0-based compiler. */
17020 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
17021 tree inputs ATTRIBUTE_UNUSED
,
17024 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
17026 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
17028 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
17033 /* Worker function for REVERSE_CONDITION. */
17036 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
17038 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
17039 ? reverse_condition (code
)
17040 : reverse_condition_maybe_unordered (code
));
17043 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17047 output_387_reg_move (rtx insn
, rtx
*operands
)
17049 if (REG_P (operands
[1])
17050 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
17052 if (REGNO (operands
[0]) == FIRST_STACK_REG
17053 && TARGET_USE_FFREEP
)
17054 return "ffreep\t%y0";
17055 return "fstp\t%y0";
17057 if (STACK_TOP_P (operands
[0]))
17058 return "fld%z1\t%y1";
17062 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17063 FP status register is set. */
17066 ix86_emit_fp_unordered_jump (rtx label
)
17068 rtx reg
= gen_reg_rtx (HImode
);
17071 emit_insn (gen_x86_fnstsw_1 (reg
));
17073 if (TARGET_USE_SAHF
)
17075 emit_insn (gen_x86_sahf_1 (reg
));
17077 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
17078 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
17082 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
17084 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17085 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
17088 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
17089 gen_rtx_LABEL_REF (VOIDmode
, label
),
17091 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
17092 emit_jump_insn (temp
);
17095 /* Output code to perform a log1p XFmode calculation. */
17097 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
17099 rtx label1
= gen_label_rtx ();
17100 rtx label2
= gen_label_rtx ();
17102 rtx tmp
= gen_reg_rtx (XFmode
);
17103 rtx tmp2
= gen_reg_rtx (XFmode
);
17105 emit_insn (gen_absxf2 (tmp
, op1
));
17106 emit_insn (gen_cmpxf (tmp
,
17107 CONST_DOUBLE_FROM_REAL_VALUE (
17108 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
17110 emit_jump_insn (gen_bge (label1
));
17112 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
17113 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
17114 emit_jump (label2
);
17116 emit_label (label1
);
17117 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
17118 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
17119 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
17120 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
17122 emit_label (label2
);
17125 /* Solaris named-section hook. Parameters are as for
17126 named_section_real. */
17129 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
17132 /* With Binutils 2.15, the "@unwind" marker must be specified on
17133 every occurrence of the ".eh_frame" section, not just the first
17136 && strcmp (name
, ".eh_frame") == 0)
17138 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
17139 flags
& SECTION_WRITE
? "aw" : "a");
17142 default_elf_asm_named_section (name
, flags
, decl
);
17145 #include "gt-i386.h"